diff options
| author | Johannes Weiner <hannes@cmpxchg.org> | 2011-03-23 16:42:30 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-23 19:46:28 -0700 | 
| commit | 6b3ae58efca06623c197fd6d91ded4aa3a8fe039 (patch) | |
| tree | 6460e4e1ce206d391b862a3d398a9e22e33ecb3c | |
| parent | 5564e88ba6fd2f6dcd83a592771810cd84b5ae80 (diff) | |
| download | olio-linux-3.10-6b3ae58efca06623c197fd6d91ded4aa3a8fe039.tar.xz olio-linux-3.10-6b3ae58efca06623c197fd6d91ded4aa3a8fe039.zip  | |
memcg: remove direct page_cgroup-to-page pointer
In struct page_cgroup, we have a full word for flags but only a few are
reserved.  Use the remaining upper bits to encode, depending on
configuration, the node or the section, to enable page_cgroup-to-page
lookups without a direct pointer.
This saves a full word for every page in a system with memory cgroups
enabled.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | include/linux/page_cgroup.h | 75 | ||||
| -rw-r--r-- | kernel/bounds.c | 2 | ||||
| -rw-r--r-- | mm/memcontrol.c | 4 | ||||
| -rw-r--r-- | mm/page_cgroup.c | 91 | 
4 files changed, 117 insertions, 55 deletions
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 6b63679ce8a..f5de21de31d 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -1,8 +1,26 @@  #ifndef __LINUX_PAGE_CGROUP_H  #define __LINUX_PAGE_CGROUP_H +enum { +	/* flags for mem_cgroup */ +	PCG_LOCK,  /* Lock for pc->mem_cgroup and following bits. */ +	PCG_CACHE, /* charged as cache */ +	PCG_USED, /* this object is in use. */ +	PCG_MIGRATION, /* under page migration */ +	/* flags for mem_cgroup and file and I/O status */ +	PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */ +	PCG_FILE_MAPPED, /* page is accounted as "mapped" */ +	/* No lock in page_cgroup */ +	PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */ +	__NR_PCG_FLAGS, +}; + +#ifndef __GENERATING_BOUNDS_H +#include <generated/bounds.h> +  #ifdef CONFIG_CGROUP_MEM_RES_CTLR  #include <linux/bit_spinlock.h> +  /*   * Page Cgroup can be considered as an extended mem_map.   * A page_cgroup page is associated with every page descriptor. The @@ -13,7 +31,6 @@  struct page_cgroup {  	unsigned long flags;  	struct mem_cgroup *mem_cgroup; -	struct page *page;  	struct list_head lru;		/* per cgroup LRU list */  }; @@ -32,19 +49,7 @@ static inline void __init page_cgroup_init(void)  #endif  struct page_cgroup *lookup_page_cgroup(struct page *page); - -enum { -	/* flags for mem_cgroup */ -	PCG_LOCK,  /* Lock for pc->mem_cgroup and following bits. */ -	PCG_CACHE, /* charged as cache */ -	PCG_USED, /* this object is in use. */ -	PCG_MIGRATION, /* under page migration */ -	/* flags for mem_cgroup and file and I/O status */ -	PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */ -	PCG_FILE_MAPPED, /* page is accounted as "mapped" */ -	/* No lock in page_cgroup */ -	PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */ -}; +struct page *lookup_cgroup_page(struct page_cgroup *pc);  #define TESTPCGFLAG(uname, lname)			\  static inline int PageCgroup##uname(struct page_cgroup *pc)	\ @@ -117,6 +122,39 @@ static inline void move_unlock_page_cgroup(struct page_cgroup *pc,  	local_irq_restore(*flags);  } +#ifdef CONFIG_SPARSEMEM +#define PCG_ARRAYID_WIDTH	SECTIONS_SHIFT +#else +#define PCG_ARRAYID_WIDTH	NODES_SHIFT +#endif + +#if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS) +#error Not enough space left in pc->flags to store page_cgroup array IDs +#endif + +/* pc->flags: ARRAY-ID | FLAGS */ + +#define PCG_ARRAYID_MASK	((1UL << PCG_ARRAYID_WIDTH) - 1) + +#define PCG_ARRAYID_OFFSET	(BITS_PER_LONG - PCG_ARRAYID_WIDTH) +/* + * Zero the shift count for non-existant fields, to prevent compiler + * warnings and ensure references are optimized away. + */ +#define PCG_ARRAYID_SHIFT	(PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0)) + +static inline void set_page_cgroup_array_id(struct page_cgroup *pc, +					    unsigned long id) +{ +	pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT); +	pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT; +} + +static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc) +{ +	return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK; +} +  #else /* CONFIG_CGROUP_MEM_RES_CTLR */  struct page_cgroup; @@ -137,7 +175,7 @@ static inline void __init page_cgroup_init_flatmem(void)  {  } -#endif +#endif /* CONFIG_CGROUP_MEM_RES_CTLR */  #include <linux/swap.h> @@ -173,5 +211,8 @@ static inline void swap_cgroup_swapoff(int type)  	return;  } -#endif -#endif +#endif /* CONFIG_CGROUP_MEM_RES_CTLR_SWAP */ + +#endif /* !__GENERATING_BOUNDS_H */ + +#endif /* __LINUX_PAGE_CGROUP_H */ diff --git a/kernel/bounds.c b/kernel/bounds.c index 98a51f26c13..0c9b862292b 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -9,11 +9,13 @@  #include <linux/page-flags.h>  #include <linux/mmzone.h>  #include <linux/kbuild.h> +#include <linux/page_cgroup.h>  void foo(void)  {  	/* The enum constants to put into include/generated/bounds.h */  	DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);  	DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); +	DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);  	/* End of constants */  } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e286e1603e4..660dfc27d97 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1080,7 +1080,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,  		if (unlikely(!PageCgroupUsed(pc)))  			continue; -		page = pc->page; +		page = lookup_cgroup_page(pc);  		if (unlikely(!PageLRU(page)))  			continue; @@ -3344,7 +3344,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,  		}  		spin_unlock_irqrestore(&zone->lru_lock, flags); -		page = pc->page; +		page = lookup_cgroup_page(pc);  		ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL);  		if (ret == -ENOMEM) diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 59a3cd4c799..6c3f7a6a481 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -11,12 +11,11 @@  #include <linux/swapops.h>  #include <linux/kmemleak.h> -static void __meminit -__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) +static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)  {  	pc->flags = 0; +	set_page_cgroup_array_id(pc, id);  	pc->mem_cgroup = NULL; -	pc->page = pfn_to_page(pfn);  	INIT_LIST_HEAD(&pc->lru);  }  static unsigned long total_usage; @@ -43,6 +42,19 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)  	return base + offset;  } +struct page *lookup_cgroup_page(struct page_cgroup *pc) +{ +	unsigned long pfn; +	struct page *page; +	pg_data_t *pgdat; + +	pgdat = NODE_DATA(page_cgroup_array_id(pc)); +	pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn; +	page = pfn_to_page(pfn); +	VM_BUG_ON(pc != lookup_page_cgroup(page)); +	return page; +} +  static int __init alloc_node_page_cgroup(int nid)  {  	struct page_cgroup *base, *pc; @@ -63,7 +75,7 @@ static int __init alloc_node_page_cgroup(int nid)  		return -ENOMEM;  	for (index = 0; index < nr_pages; index++) {  		pc = base + index; -		__init_page_cgroup(pc, start_pfn + index); +		init_page_cgroup(pc, nid);  	}  	NODE_DATA(nid)->node_page_cgroup = base;  	total_usage += table_size; @@ -105,46 +117,53 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)  	return section->page_cgroup + pfn;  } +struct page *lookup_cgroup_page(struct page_cgroup *pc) +{ +	struct mem_section *section; +	struct page *page; +	unsigned long nr; + +	nr = page_cgroup_array_id(pc); +	section = __nr_to_section(nr); +	page = pfn_to_page(pc - section->page_cgroup); +	VM_BUG_ON(pc != lookup_page_cgroup(page)); +	return page; +} +  /* __alloc_bootmem...() is protected by !slab_available() */  static int __init_refok init_section_page_cgroup(unsigned long pfn)  { -	struct mem_section *section = __pfn_to_section(pfn);  	struct page_cgroup *base, *pc; +	struct mem_section *section;  	unsigned long table_size; +	unsigned long nr;  	int nid, index; -	if (!section->page_cgroup) { -		nid = page_to_nid(pfn_to_page(pfn)); -		table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; -		VM_BUG_ON(!slab_is_available()); -		if (node_state(nid, N_HIGH_MEMORY)) { -			base = kmalloc_node(table_size, -				GFP_KERNEL | __GFP_NOWARN, nid); -			if (!base) -				base = vmalloc_node(table_size, nid); -		} else { -			base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); -			if (!base) -				base = vmalloc(table_size); -		} -		/* -		 * The value stored in section->page_cgroup is (base - pfn) -		 * and it does not point to the memory block allocated above, -		 * causing kmemleak false positives. -		 */ -		kmemleak_not_leak(base); +	nr = pfn_to_section_nr(pfn); +	section = __nr_to_section(nr); + +	if (section->page_cgroup) +		return 0; + +	nid = page_to_nid(pfn_to_page(pfn)); +	table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; +	VM_BUG_ON(!slab_is_available()); +	if (node_state(nid, N_HIGH_MEMORY)) { +		base = kmalloc_node(table_size, +				    GFP_KERNEL | __GFP_NOWARN, nid); +		if (!base) +			base = vmalloc_node(table_size, nid);  	} else { -		/* - 		 * We don't have to allocate page_cgroup again, but -		 * address of memmap may be changed. So, we have to initialize -		 * again. -		 */ -		base = section->page_cgroup + pfn; -		table_size = 0; -		/* check address of memmap is changed or not. */ -		if (base->page == pfn_to_page(pfn)) -			return 0; +		base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); +		if (!base) +			base = vmalloc(table_size);  	} +	/* +	 * The value stored in section->page_cgroup is (base - pfn) +	 * and it does not point to the memory block allocated above, +	 * causing kmemleak false positives. +	 */ +	kmemleak_not_leak(base);  	if (!base) {  		printk(KERN_ERR "page cgroup allocation failure\n"); @@ -153,7 +172,7 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)  	for (index = 0; index < PAGES_PER_SECTION; index++) {  		pc = base + index; -		__init_page_cgroup(pc, pfn + index); +		init_page_cgroup(pc, nr);  	}  	section->page_cgroup = base - pfn;  |