diff options
Diffstat (limited to 'mm/memcontrol.c')
| -rw-r--r-- | mm/memcontrol.c | 390 | 
1 files changed, 229 insertions, 161 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f72b5e52451..795e525afab 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -61,12 +61,12 @@ struct cgroup_subsys mem_cgroup_subsys __read_mostly;  #define MEM_CGROUP_RECLAIM_RETRIES	5  static struct mem_cgroup *root_mem_cgroup __read_mostly; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP  /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */  int do_swap_account __read_mostly;  /* for remember boot option*/ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED +#ifdef CONFIG_MEMCG_SWAP_ENABLED  static int really_do_swap_account __initdata = 1;  #else  static int really_do_swap_account __initdata = 0; @@ -87,7 +87,7 @@ enum mem_cgroup_stat_index {  	MEM_CGROUP_STAT_CACHE, 	   /* # of pages charged as cache */  	MEM_CGROUP_STAT_RSS,	   /* # of pages charged as anon rss */  	MEM_CGROUP_STAT_FILE_MAPPED,  /* # of pages charged as file rss */ -	MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ +	MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */  	MEM_CGROUP_STAT_NSTATS,  }; @@ -378,9 +378,7 @@ static bool move_file(void)  enum charge_type {  	MEM_CGROUP_CHARGE_TYPE_CACHE = 0, -	MEM_CGROUP_CHARGE_TYPE_MAPPED, -	MEM_CGROUP_CHARGE_TYPE_SHMEM,	/* used by page migration of shmem */ -	MEM_CGROUP_CHARGE_TYPE_FORCE,	/* used by force_empty */ +	MEM_CGROUP_CHARGE_TYPE_ANON,  	MEM_CGROUP_CHARGE_TYPE_SWAPOUT,	/* for accounting swapcache */  	MEM_CGROUP_CHARGE_TYPE_DROP,	/* a page was unused swap cache */  	NR_CHARGE_TYPE, @@ -407,8 +405,14 @@ enum charge_type {  static void mem_cgroup_get(struct mem_cgroup *memcg);  static void mem_cgroup_put(struct mem_cgroup *memcg); +static inline +struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s) +{ +	return container_of(s, struct mem_cgroup, css); +} +  /* Writing them here to avoid exposing memcg's inner layout */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM  #include <net/sock.h>  #include <net/ip.h> @@ -467,9 +471,9 @@ struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)  }  EXPORT_SYMBOL(tcp_proto_cgroup);  #endif /* CONFIG_INET */ -#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ +#endif /* CONFIG_MEMCG_KMEM */ -#if defined(CONFIG_INET) && defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) +#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)  static void disarm_sock_keys(struct mem_cgroup *memcg)  {  	if (!memcg_proto_activated(&memcg->tcp_mem.cg_proto)) @@ -703,7 +707,7 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,  					 bool charge)  {  	int val = (charge) ? 1 : -1; -	this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAPOUT], val); +	this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);  }  static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, @@ -864,9 +868,8 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)  struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)  { -	return container_of(cgroup_subsys_state(cont, -				mem_cgroup_subsys_id), struct mem_cgroup, -				css); +	return mem_cgroup_from_css( +		cgroup_subsys_state(cont, mem_cgroup_subsys_id));  }  struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) @@ -879,8 +882,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)  	if (unlikely(!p))  		return NULL; -	return container_of(task_subsys_state(p, mem_cgroup_subsys_id), -				struct mem_cgroup, css); +	return mem_cgroup_from_css(task_subsys_state(p, mem_cgroup_subsys_id));  }  struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) @@ -966,8 +968,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,  		css = css_get_next(&mem_cgroup_subsys, id + 1, &root->css, &id);  		if (css) {  			if (css == &root->css || css_tryget(css)) -				memcg = container_of(css, -						     struct mem_cgroup, css); +				memcg = mem_cgroup_from_css(css);  		} else  			id = 0;  		rcu_read_unlock(); @@ -1454,7 +1455,7 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)  /*   * Return the memory (and swap, if configured) limit for a memcg.   */ -u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) +static u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)  {  	u64 limit;  	u64 memsw; @@ -1470,6 +1471,73 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)  	return min(limit, memsw);  } +void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, +			      int order) +{ +	struct mem_cgroup *iter; +	unsigned long chosen_points = 0; +	unsigned long totalpages; +	unsigned int points = 0; +	struct task_struct *chosen = NULL; + +	/* +	 * If current has a pending SIGKILL, then automatically select it.  The +	 * goal is to allow it to allocate so that it may quickly exit and free +	 * its memory. +	 */ +	if (fatal_signal_pending(current)) { +		set_thread_flag(TIF_MEMDIE); +		return; +	} + +	check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL); +	totalpages = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1; +	for_each_mem_cgroup_tree(iter, memcg) { +		struct cgroup *cgroup = iter->css.cgroup; +		struct cgroup_iter it; +		struct task_struct *task; + +		cgroup_iter_start(cgroup, &it); +		while ((task = cgroup_iter_next(cgroup, &it))) { +			switch (oom_scan_process_thread(task, totalpages, NULL, +							false)) { +			case OOM_SCAN_SELECT: +				if (chosen) +					put_task_struct(chosen); +				chosen = task; +				chosen_points = ULONG_MAX; +				get_task_struct(chosen); +				/* fall through */ +			case OOM_SCAN_CONTINUE: +				continue; +			case OOM_SCAN_ABORT: +				cgroup_iter_end(cgroup, &it); +				mem_cgroup_iter_break(memcg, iter); +				if (chosen) +					put_task_struct(chosen); +				return; +			case OOM_SCAN_OK: +				break; +			}; +			points = oom_badness(task, memcg, NULL, totalpages); +			if (points > chosen_points) { +				if (chosen) +					put_task_struct(chosen); +				chosen = task; +				chosen_points = points; +				get_task_struct(chosen); +			} +		} +		cgroup_iter_end(cgroup, &it); +	} + +	if (!chosen) +		return; +	points = chosen_points * 1000 / totalpages; +	oom_kill_process(chosen, gfp_mask, order, points, totalpages, memcg, +			 NULL, "Memory cgroup out of memory"); +} +  static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,  					gfp_t gfp_mask,  					unsigned long flags) @@ -1899,7 +1967,7 @@ again:  		return;  	/*  	 * If this memory cgroup is not under account moving, we don't -	 * need to take move_lock_page_cgroup(). Because we already hold +	 * need to take move_lock_mem_cgroup(). Because we already hold  	 * rcu_read_lock(), any calls to move_account will be delayed until  	 * rcu_read_unlock() if mem_cgroup_stolen() == true.  	 */ @@ -1921,7 +1989,7 @@ void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)  	/*  	 * It's guaranteed that pc->mem_cgroup never changes while  	 * lock is held because a routine modifies pc->mem_cgroup -	 * should take move_lock_page_cgroup(). +	 * should take move_lock_mem_cgroup().  	 */  	move_unlock_mem_cgroup(pc->mem_cgroup, flags);  } @@ -2268,7 +2336,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,  	 * We always charge the cgroup the mm_struct belongs to.  	 * The mm_struct's mem_cgroup changes on task migration if the  	 * thread group leader migrates. It's possible that mm is not -	 * set, if so charge the init_mm (happens for pagecache usage). +	 * set, if so charge the root memcg (happens for pagecache usage).  	 */  	if (!*ptr && !mm)  		*ptr = root_mem_cgroup; @@ -2429,7 +2497,7 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)  	css = css_lookup(&mem_cgroup_subsys, id);  	if (!css)  		return NULL; -	return container_of(css, struct mem_cgroup, css); +	return mem_cgroup_from_css(css);  }  struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) @@ -2473,11 +2541,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,  	bool anon;  	lock_page_cgroup(pc); -	if (unlikely(PageCgroupUsed(pc))) { -		unlock_page_cgroup(pc); -		__mem_cgroup_cancel_charge(memcg, nr_pages); -		return; -	} +	VM_BUG_ON(PageCgroupUsed(pc));  	/*  	 * we don't need page_cgroup_lock about tail pages, becase they are not  	 * accessed by any other context at this point. @@ -2519,7 +2583,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,  		spin_unlock_irq(&zone->lru_lock);  	} -	if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) +	if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON)  		anon = true;  	else  		anon = false; @@ -2644,8 +2708,7 @@ out:  static int mem_cgroup_move_parent(struct page *page,  				  struct page_cgroup *pc, -				  struct mem_cgroup *child, -				  gfp_t gfp_mask) +				  struct mem_cgroup *child)  {  	struct mem_cgroup *parent;  	unsigned int nr_pages; @@ -2728,38 +2791,7 @@ int mem_cgroup_newpage_charge(struct page *page,  	VM_BUG_ON(page->mapping && !PageAnon(page));  	VM_BUG_ON(!mm);  	return mem_cgroup_charge_common(page, mm, gfp_mask, -					MEM_CGROUP_CHARGE_TYPE_MAPPED); -} - -static void -__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, -					enum charge_type ctype); - -int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, -				gfp_t gfp_mask) -{ -	struct mem_cgroup *memcg = NULL; -	enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; -	int ret; - -	if (mem_cgroup_disabled()) -		return 0; -	if (PageCompound(page)) -		return 0; - -	if (unlikely(!mm)) -		mm = &init_mm; -	if (!page_is_file_cache(page)) -		type = MEM_CGROUP_CHARGE_TYPE_SHMEM; - -	if (!PageSwapCache(page)) -		ret = mem_cgroup_charge_common(page, mm, gfp_mask, type); -	else { /* page is swapcache/shmem */ -		ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg); -		if (!ret) -			__mem_cgroup_commit_charge_swapin(page, memcg, type); -	} -	return ret; +					MEM_CGROUP_CHARGE_TYPE_ANON);  }  /* @@ -2768,27 +2800,26 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,   * struct page_cgroup is acquired. This refcnt will be consumed by   * "commit()" or removed by "cancel()"   */ -int mem_cgroup_try_charge_swapin(struct mm_struct *mm, -				 struct page *page, -				 gfp_t mask, struct mem_cgroup **memcgp) +static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm, +					  struct page *page, +					  gfp_t mask, +					  struct mem_cgroup **memcgp)  {  	struct mem_cgroup *memcg; +	struct page_cgroup *pc;  	int ret; -	*memcgp = NULL; - -	if (mem_cgroup_disabled()) -		return 0; - -	if (!do_swap_account) -		goto charge_cur_mm; +	pc = lookup_page_cgroup(page);  	/* -	 * A racing thread's fault, or swapoff, may have already updated -	 * the pte, and even removed page from swap cache: in those cases -	 * do_swap_page()'s pte_same() test will fail; but there's also a -	 * KSM case which does need to charge the page. +	 * Every swap fault against a single page tries to charge the +	 * page, bail as early as possible.  shmem_unuse() encounters +	 * already charged pages, too.  The USED bit is protected by +	 * the page lock, which serializes swap cache removal, which +	 * in turn serializes uncharging.  	 */ -	if (!PageSwapCache(page)) +	if (PageCgroupUsed(pc)) +		return 0; +	if (!do_swap_account)  		goto charge_cur_mm;  	memcg = try_get_mem_cgroup_from_page(page);  	if (!memcg) @@ -2800,14 +2831,44 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,  		ret = 0;  	return ret;  charge_cur_mm: -	if (unlikely(!mm)) -		mm = &init_mm;  	ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);  	if (ret == -EINTR)  		ret = 0;  	return ret;  } +int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, +				 gfp_t gfp_mask, struct mem_cgroup **memcgp) +{ +	*memcgp = NULL; +	if (mem_cgroup_disabled()) +		return 0; +	/* +	 * A racing thread's fault, or swapoff, may have already +	 * updated the pte, and even removed page from swap cache: in +	 * those cases unuse_pte()'s pte_same() test will fail; but +	 * there's also a KSM case which does need to charge the page. +	 */ +	if (!PageSwapCache(page)) { +		int ret; + +		ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, memcgp, true); +		if (ret == -EINTR) +			ret = 0; +		return ret; +	} +	return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp); +} + +void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg) +{ +	if (mem_cgroup_disabled()) +		return; +	if (!memcg) +		return; +	__mem_cgroup_cancel_charge(memcg, 1); +} +  static void  __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,  					enum charge_type ctype) @@ -2842,16 +2903,30 @@ void mem_cgroup_commit_charge_swapin(struct page *page,  				     struct mem_cgroup *memcg)  {  	__mem_cgroup_commit_charge_swapin(page, memcg, -					  MEM_CGROUP_CHARGE_TYPE_MAPPED); +					  MEM_CGROUP_CHARGE_TYPE_ANON);  } -void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg) +int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, +				gfp_t gfp_mask)  { +	struct mem_cgroup *memcg = NULL; +	enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; +	int ret; +  	if (mem_cgroup_disabled()) -		return; -	if (!memcg) -		return; -	__mem_cgroup_cancel_charge(memcg, 1); +		return 0; +	if (PageCompound(page)) +		return 0; + +	if (!PageSwapCache(page)) +		ret = mem_cgroup_charge_common(page, mm, gfp_mask, type); +	else { /* page is swapcache/shmem */ +		ret = __mem_cgroup_try_charge_swapin(mm, page, +						     gfp_mask, &memcg); +		if (!ret) +			__mem_cgroup_commit_charge_swapin(page, memcg, type); +	} +	return ret;  }  static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg, @@ -2911,7 +2986,8 @@ direct_uncharge:   * uncharge if !page_mapped(page)   */  static struct mem_cgroup * -__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) +__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype, +			     bool end_migration)  {  	struct mem_cgroup *memcg = NULL;  	unsigned int nr_pages = 1; @@ -2921,8 +2997,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)  	if (mem_cgroup_disabled())  		return NULL; -	if (PageSwapCache(page)) -		return NULL; +	VM_BUG_ON(PageSwapCache(page));  	if (PageTransHuge(page)) {  		nr_pages <<= compound_order(page); @@ -2945,7 +3020,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)  	anon = PageAnon(page);  	switch (ctype) { -	case MEM_CGROUP_CHARGE_TYPE_MAPPED: +	case MEM_CGROUP_CHARGE_TYPE_ANON:  		/*  		 * Generally PageAnon tells if it's the anon statistics to be  		 * updated; but sometimes e.g. mem_cgroup_uncharge_page() is @@ -2955,7 +3030,16 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)  		/* fallthrough */  	case MEM_CGROUP_CHARGE_TYPE_DROP:  		/* See mem_cgroup_prepare_migration() */ -		if (page_mapped(page) || PageCgroupMigration(pc)) +		if (page_mapped(page)) +			goto unlock_out; +		/* +		 * Pages under migration may not be uncharged.  But +		 * end_migration() /must/ be the one uncharging the +		 * unused post-migration page and so it has to call +		 * here with the migration bit still set.  See the +		 * res_counter handling below. +		 */ +		if (!end_migration && PageCgroupMigration(pc))  			goto unlock_out;  		break;  	case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: @@ -2989,7 +3073,12 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)  		mem_cgroup_swap_statistics(memcg, true);  		mem_cgroup_get(memcg);  	} -	if (!mem_cgroup_is_root(memcg)) +	/* +	 * Migration does not charge the res_counter for the +	 * replacement page, so leave it alone when phasing out the +	 * page that is unused after the migration. +	 */ +	if (!end_migration && !mem_cgroup_is_root(memcg))  		mem_cgroup_do_uncharge(memcg, nr_pages, ctype);  	return memcg; @@ -3005,14 +3094,16 @@ void mem_cgroup_uncharge_page(struct page *page)  	if (page_mapped(page))  		return;  	VM_BUG_ON(page->mapping && !PageAnon(page)); -	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); +	if (PageSwapCache(page)) +		return; +	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);  }  void mem_cgroup_uncharge_cache_page(struct page *page)  {  	VM_BUG_ON(page_mapped(page));  	VM_BUG_ON(page->mapping); -	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); +	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);  }  /* @@ -3076,7 +3167,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)  	if (!swapout) /* this was a swap cache but the swap is unused ! */  		ctype = MEM_CGROUP_CHARGE_TYPE_DROP; -	memcg = __mem_cgroup_uncharge_common(page, ctype); +	memcg = __mem_cgroup_uncharge_common(page, ctype, false);  	/*  	 * record memcg information,  if swapout && memcg != NULL, @@ -3087,7 +3178,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)  }  #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP  /*   * called from swap_entry_free(). remove record in swap_cgroup and   * uncharge "memsw" account. @@ -3166,19 +3257,18 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,   * Before starting migration, account PAGE_SIZE to mem_cgroup that the old   * page belongs to.   */ -int mem_cgroup_prepare_migration(struct page *page, -	struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask) +void mem_cgroup_prepare_migration(struct page *page, struct page *newpage, +				  struct mem_cgroup **memcgp)  {  	struct mem_cgroup *memcg = NULL;  	struct page_cgroup *pc;  	enum charge_type ctype; -	int ret = 0;  	*memcgp = NULL;  	VM_BUG_ON(PageTransHuge(page));  	if (mem_cgroup_disabled()) -		return 0; +		return;  	pc = lookup_page_cgroup(page);  	lock_page_cgroup(pc); @@ -3223,24 +3313,9 @@ int mem_cgroup_prepare_migration(struct page *page,  	 * we return here.  	 */  	if (!memcg) -		return 0; +		return;  	*memcgp = memcg; -	ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false); -	css_put(&memcg->css);/* drop extra refcnt */ -	if (ret) { -		if (PageAnon(page)) { -			lock_page_cgroup(pc); -			ClearPageCgroupMigration(pc); -			unlock_page_cgroup(pc); -			/* -			 * The old page may be fully unmapped while we kept it. -			 */ -			mem_cgroup_uncharge_page(page); -		} -		/* we'll need to revisit this error code (we have -EINTR) */ -		return -ENOMEM; -	}  	/*  	 * We charge new page before it's used/mapped. So, even if unlock_page()  	 * is called before end_migration, we can catch all events on this new @@ -3248,13 +3323,15 @@ int mem_cgroup_prepare_migration(struct page *page,  	 * mapcount will be finally 0 and we call uncharge in end_migration().  	 */  	if (PageAnon(page)) -		ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; -	else if (page_is_file_cache(page)) -		ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; +		ctype = MEM_CGROUP_CHARGE_TYPE_ANON;  	else -		ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; +		ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; +	/* +	 * The page is committed to the memcg, but it's not actually +	 * charged to the res_counter since we plan on replacing the +	 * old one and only one page is going to be left afterwards. +	 */  	__mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false); -	return ret;  }  /* remove redundant charge if migration failed*/ @@ -3276,6 +3353,12 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,  		used = newpage;  		unused = oldpage;  	} +	anon = PageAnon(used); +	__mem_cgroup_uncharge_common(unused, +				     anon ? MEM_CGROUP_CHARGE_TYPE_ANON +				     : MEM_CGROUP_CHARGE_TYPE_CACHE, +				     true); +	css_put(&memcg->css);  	/*  	 * We disallowed uncharge of pages under migration because mapcount  	 * of the page goes down to zero, temporarly. @@ -3285,10 +3368,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,  	lock_page_cgroup(pc);  	ClearPageCgroupMigration(pc);  	unlock_page_cgroup(pc); -	anon = PageAnon(used); -	__mem_cgroup_uncharge_common(unused, -		anon ? MEM_CGROUP_CHARGE_TYPE_MAPPED -		     : MEM_CGROUP_CHARGE_TYPE_CACHE);  	/*  	 * If a page is a file cache, radix-tree replacement is very atomic @@ -3340,10 +3419,6 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,  	 */  	if (!memcg)  		return; - -	if (PageSwapBacked(oldpage)) -		type = MEM_CGROUP_CHARGE_TYPE_SHMEM; -  	/*  	 * Even if newpage->mapping was NULL before starting replacement,  	 * the newpage may be on LRU(or pagevec for LRU) already. We lock @@ -3418,7 +3493,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,  		/*  		 * Rather than hide all in some function, I do this in  		 * open coded manner. You see what this really does. -		 * We have to guarantee memcg->res.limit < memcg->memsw.limit. +		 * We have to guarantee memcg->res.limit <= memcg->memsw.limit.  		 */  		mutex_lock(&set_limit_mutex);  		memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); @@ -3479,7 +3554,7 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,  		/*  		 * Rather than hide all in some function, I do this in  		 * open coded manner. You see what this really does. -		 * We have to guarantee memcg->res.limit < memcg->memsw.limit. +		 * We have to guarantee memcg->res.limit <= memcg->memsw.limit.  		 */  		mutex_lock(&set_limit_mutex);  		memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT); @@ -3611,10 +3686,12 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,  }  /* - * This routine traverse page_cgroup in given list and drop them all. - * *And* this routine doesn't reclaim page itself, just removes page_cgroup. + * Traverse a specified page_cgroup list and try to drop them all.  This doesn't + * reclaim the pages page themselves - it just removes the page_cgroups. + * Returns true if some page_cgroups were not freed, indicating that the caller + * must retry this operation.   */ -static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg, +static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg,  				int node, int zid, enum lru_list lru)  {  	struct mem_cgroup_per_zone *mz; @@ -3622,7 +3699,6 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,  	struct list_head *list;  	struct page *busy;  	struct zone *zone; -	int ret = 0;  	zone = &NODE_DATA(node)->node_zones[zid];  	mz = mem_cgroup_zoneinfo(memcg, node, zid); @@ -3636,7 +3712,6 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,  		struct page_cgroup *pc;  		struct page *page; -		ret = 0;  		spin_lock_irqsave(&zone->lru_lock, flags);  		if (list_empty(list)) {  			spin_unlock_irqrestore(&zone->lru_lock, flags); @@ -3653,21 +3728,14 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,  		pc = lookup_page_cgroup(page); -		ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL); -		if (ret == -ENOMEM || ret == -EINTR) -			break; - -		if (ret == -EBUSY || ret == -EINVAL) { +		if (mem_cgroup_move_parent(page, pc, memcg)) {  			/* found lock contention or "pc" is obsolete. */  			busy = page;  			cond_resched();  		} else  			busy = NULL;  	} - -	if (!ret && !list_empty(list)) -		return -EBUSY; -	return ret; +	return !list_empty(list);  }  /* @@ -3692,9 +3760,6 @@ move_account:  		ret = -EBUSY;  		if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))  			goto out; -		ret = -EINTR; -		if (signal_pending(current)) -			goto out;  		/* This is for making all *used* pages to be on LRU. */  		lru_add_drain_all();  		drain_all_stock_sync(memcg); @@ -3715,9 +3780,6 @@ move_account:  		}  		mem_cgroup_end_move(memcg);  		memcg_oom_recover(memcg); -		/* it seems parent cgroup doesn't have enough mem */ -		if (ret == -ENOMEM) -			goto try_to_free;  		cond_resched();  	/* "ret" should also be checked to ensure all lists are empty. */  	} while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0 || ret); @@ -3779,6 +3841,10 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,  		parent_memcg = mem_cgroup_from_cont(parent);  	cgroup_lock(); + +	if (memcg->use_hierarchy == val) +		goto out; +  	/*  	 * If parent's use_hierarchy is set, we can't make any modifications  	 * in the child subtrees. If it is unset, then the change can @@ -3795,6 +3861,8 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,  			retval = -EBUSY;  	} else  		retval = -EINVAL; + +out:  	cgroup_unlock();  	return retval; @@ -3831,7 +3899,7 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)  	val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);  	if (swap) -		val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAPOUT); +		val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);  	return val << PAGE_SHIFT;  } @@ -4015,7 +4083,7 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,  #endif  #ifdef CONFIG_NUMA -static int mem_control_numa_stat_show(struct cgroup *cont, struct cftype *cft, +static int memcg_numa_stat_show(struct cgroup *cont, struct cftype *cft,  				      struct seq_file *m)  {  	int nid; @@ -4074,7 +4142,7 @@ static inline void mem_cgroup_lru_names_not_uptodate(void)  	BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);  } -static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, +static int memcg_stat_show(struct cgroup *cont, struct cftype *cft,  				 struct seq_file *m)  {  	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); @@ -4082,7 +4150,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,  	unsigned int i;  	for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { -		if (i == MEM_CGROUP_STAT_SWAPOUT && !do_swap_account) +		if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)  			continue;  		seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i],  			   mem_cgroup_read_stat(memcg, i) * PAGE_SIZE); @@ -4109,7 +4177,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,  	for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {  		long long val = 0; -		if (i == MEM_CGROUP_STAT_SWAPOUT && !do_swap_account) +		if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)  			continue;  		for_each_mem_cgroup_tree(mi, memcg)  			val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE; @@ -4533,7 +4601,7 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,  	return 0;  } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM  static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)  {  	return mem_cgroup_sockets_init(memcg, ss); @@ -4588,7 +4656,7 @@ static struct cftype mem_cgroup_files[] = {  	},  	{  		.name = "stat", -		.read_seq_string = mem_control_stat_show, +		.read_seq_string = memcg_stat_show,  	},  	{  		.name = "force_empty", @@ -4620,10 +4688,10 @@ static struct cftype mem_cgroup_files[] = {  #ifdef CONFIG_NUMA  	{  		.name = "numa_stat", -		.read_seq_string = mem_control_numa_stat_show, +		.read_seq_string = memcg_numa_stat_show,  	},  #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP  	{  		.name = "memsw.usage_in_bytes",  		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), @@ -4810,7 +4878,7 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)  }  EXPORT_SYMBOL(parent_mem_cgroup); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP  static void __init enable_swap_cgroup(void)  {  	if (!mem_cgroup_disabled() && really_do_swap_account) @@ -5541,7 +5609,7 @@ struct cgroup_subsys mem_cgroup_subsys = {  	.__DEPRECATED_clear_css_refs = true,  }; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP  static int __init enable_swap_account(char *s)  {  	/* consider enabled if no parameter or 1 is given */  |