diff options
| author | Christoph Lameter <christoph@graphe.net> | 2005-06-21 17:15:00 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-21 18:46:18 -0700 | 
| commit | 2caaad41e4aa8f5dd999695b4ddeaa0e7f3912a4 (patch) | |
| tree | 4ce8426bf3a85d92efc5a0f6c981f54963d472e8 | |
| parent | 4ae7c03943fca73f23bc0cdb938070f41b98101f (diff) | |
| download | olio-linux-3.10-2caaad41e4aa8f5dd999695b4ddeaa0e7f3912a4.tar.xz olio-linux-3.10-2caaad41e4aa8f5dd999695b4ddeaa0e7f3912a4.zip  | |
[PATCH] Reduce size of huge boot per_cpu_pageset
Reduce size of the huge per_cpu_pageset structure in __initdata introduced
into mm1 with the pageset localization patchset.  Use one specially
configured pageset per cpu for all zones and nodes during bootup.
- Avoid duplication of pageset initialization code.
- do the adding to the pageset list before potential free_pages_bulk
  in free_hot_cold_page (otherwise we would have to hold a page
  in a pageset during the period that the boot pagesets are in use).
- remove mistaken __cpuinitdata attribute and revert back to __initdata
  for the boot pageset. A boot pageset is not necessary for cpu hotplug.
Tested for UP SMP NUMA on x86_64 (2.6.12-rc6-mm1): UP SMP NUMA Tested on
IA64 (2.6.12-rc5-mm2): NUMA (2.6.12-rc6-mm1 broken for IA64 because of
sparsemem patches)
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
| -rw-r--r-- | mm/page_alloc.c | 108 | 
1 files changed, 42 insertions, 66 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a95e72d7f94..418102a0292 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -71,11 +71,6 @@ EXPORT_SYMBOL(nr_swap_pages);  struct zone *zone_table[1 << (ZONES_SHIFT + NODES_SHIFT)];  EXPORT_SYMBOL(zone_table); -#ifdef CONFIG_NUMA -static struct per_cpu_pageset -	pageset_table[MAX_NR_ZONES*MAX_NUMNODES*NR_CPUS] __initdata; -#endif -  static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };  int min_free_kbytes = 1024; @@ -652,10 +647,10 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)  	free_pages_check(__FUNCTION__, page);  	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];  	local_irq_save(flags); -	if (pcp->count >= pcp->high) -		pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);  	list_add(&page->lru, &pcp->list);  	pcp->count++; +	if (pcp->count >= pcp->high) +		pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);  	local_irq_restore(flags);  	put_cpu();  } @@ -1714,57 +1709,55 @@ static int __devinit zone_batchsize(struct zone *zone)  	return batch;  } +inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) +{ +	struct per_cpu_pages *pcp; + +	pcp = &p->pcp[0];		/* hot */ +	pcp->count = 0; +	pcp->low = 2 * batch; +	pcp->high = 6 * batch; +	pcp->batch = max(1UL, 1 * batch); +	INIT_LIST_HEAD(&pcp->list); + +	pcp = &p->pcp[1];		/* cold*/ +	pcp->count = 0; +	pcp->low = 0; +	pcp->high = 2 * batch; +	pcp->batch = max(1UL, 1 * batch); +	INIT_LIST_HEAD(&pcp->list); +} +  #ifdef CONFIG_NUMA  /* - * Dynamicaly allocate memory for the + * Boot pageset table. One per cpu which is going to be used for all + * zones and all nodes. The parameters will be set in such a way + * that an item put on a list will immediately be handed over to + * the buddy list. This is safe since pageset manipulation is done + * with interrupts disabled. + * + * Some NUMA counter updates may also be caught by the boot pagesets. + * These will be discarded when bootup is complete. + */ +static struct per_cpu_pageset +	boot_pageset[NR_CPUS] __initdata; + +/* + * Dynamically allocate memory for the   * per cpu pageset array in struct zone.   */  static int __devinit process_zones(int cpu)  {  	struct zone *zone, *dzone; -	int i;  	for_each_zone(zone) { -		struct per_cpu_pageset *npageset = NULL; -		npageset = kmalloc_node(sizeof(struct per_cpu_pageset), +		zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset),  					 GFP_KERNEL, cpu_to_node(cpu)); -		if (!npageset) { -			zone->pageset[cpu] = NULL; +		if (!zone->pageset[cpu])  			goto bad; -		} - -		if (zone->pageset[cpu]) { -			memcpy(npageset, zone->pageset[cpu], -					sizeof(struct per_cpu_pageset)); - -			/* Relocate lists */ -			for (i = 0; i < 2; i++) { -				INIT_LIST_HEAD(&npageset->pcp[i].list); -				list_splice(&zone->pageset[cpu]->pcp[i].list, -					&npageset->pcp[i].list); -			} - 		} else { -			struct per_cpu_pages *pcp; -			unsigned long batch; - -			batch = zone_batchsize(zone); -			pcp = &npageset->pcp[0];		/* hot */ -			pcp->count = 0; -			pcp->low = 2 * batch; -			pcp->high = 6 * batch; -			pcp->batch = 1 * batch; -			INIT_LIST_HEAD(&pcp->list); - -			pcp = &npageset->pcp[1];		/* cold*/ -			pcp->count = 0; -			pcp->low = 0; -			pcp->high = 2 * batch; -			pcp->batch = 1 * batch; -			INIT_LIST_HEAD(&pcp->list); -		} -		zone->pageset[cpu] = npageset; +		setup_pageset(zone->pageset[cpu], zone_batchsize(zone));  	}  	return 0; @@ -1878,30 +1871,13 @@ static void __init free_area_init_core(struct pglist_data *pgdat,  		batch = zone_batchsize(zone);  		for (cpu = 0; cpu < NR_CPUS; cpu++) { -			struct per_cpu_pages *pcp;  #ifdef CONFIG_NUMA -			struct per_cpu_pageset *pgset; -			pgset = &pageset_table[nid*MAX_NR_ZONES*NR_CPUS + -					(j * NR_CPUS) + cpu]; - -			zone->pageset[cpu] = pgset; +			/* Early boot. Slab allocator not functional yet */ +			zone->pageset[cpu] = &boot_pageset[cpu]; +			setup_pageset(&boot_pageset[cpu],0);  #else -			struct per_cpu_pageset *pgset = zone_pcp(zone, cpu); +			setup_pageset(zone_pcp(zone,cpu), batch);  #endif - -			pcp = &pgset->pcp[0];			/* hot */ -			pcp->count = 0; -			pcp->low = 2 * batch; -			pcp->high = 6 * batch; -			pcp->batch = 1 * batch; -			INIT_LIST_HEAD(&pcp->list); - -			pcp = &pgset->pcp[1];			/* cold */ -			pcp->count = 0; -			pcp->low = 0; -			pcp->high = 2 * batch; -			pcp->batch = 1 * batch; -			INIT_LIST_HEAD(&pcp->list);  		}  		printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",  				zone_names[j], realsize, batch);  |