diff options
Diffstat (limited to 'mm/page_alloc.c')
| -rw-r--r-- | mm/page_alloc.c | 115 | 
1 files changed, 80 insertions, 35 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eee3efa58c9..83637dfba11 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -90,6 +90,9 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {  #ifdef CONFIG_HIGHMEM  	[N_HIGH_MEMORY] = { { [0] = 1UL } },  #endif +#ifdef CONFIG_MOVABLE_NODE +	[N_MEMORY] = { { [0] = 1UL } }, +#endif  	[N_CPU] = { { [0] = 1UL } },  #endif	/* NUMA */  }; @@ -732,6 +735,13 @@ static void __free_pages_ok(struct page *page, unsigned int order)  	local_irq_restore(flags);  } +/* + * Read access to zone->managed_pages is safe because it's unsigned long, + * but we still need to serialize writers. Currently all callers of + * __free_pages_bootmem() except put_page_bootmem() should only be used + * at boot time. So for shorter boot time, we shift the burden to + * put_page_bootmem() to serialize writers. + */  void __meminit __free_pages_bootmem(struct page *page, unsigned int order)  {  	unsigned int nr_pages = 1 << order; @@ -747,6 +757,7 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)  		set_page_count(p, 0);  	} +	page_zone(page)->managed_pages += 1 << order;  	set_page_refcounted(page);  	__free_pages(page, order);  } @@ -1695,7 +1706,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,   *   * If the zonelist cache is present in the passed in zonelist, then   * returns a pointer to the allowed node mask (either the current - * tasks mems_allowed, or node_states[N_HIGH_MEMORY].) + * tasks mems_allowed, or node_states[N_MEMORY].)   *   * If the zonelist cache is not available for this zonelist, does   * nothing and returns NULL. @@ -1724,7 +1735,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)  	allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ?  					&cpuset_current_mems_allowed : -					&node_states[N_HIGH_MEMORY]; +					&node_states[N_MEMORY];  	return allowednodes;  } @@ -2981,6 +2992,7 @@ void show_free_areas(unsigned int filter)  			" isolated(anon):%lukB"  			" isolated(file):%lukB"  			" present:%lukB" +			" managed:%lukB"  			" mlocked:%lukB"  			" dirty:%lukB"  			" writeback:%lukB" @@ -3010,6 +3022,7 @@ void show_free_areas(unsigned int filter)  			K(zone_page_state(zone, NR_ISOLATED_ANON)),  			K(zone_page_state(zone, NR_ISOLATED_FILE)),  			K(zone->present_pages), +			K(zone->managed_pages),  			K(zone_page_state(zone, NR_MLOCK)),  			K(zone_page_state(zone, NR_FILE_DIRTY)),  			K(zone_page_state(zone, NR_WRITEBACK)), @@ -3238,7 +3251,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)  		return node;  	} -	for_each_node_state(n, N_HIGH_MEMORY) { +	for_each_node_state(n, N_MEMORY) {  		/* Don't want a node to appear more than once */  		if (node_isset(n, *used_node_mask)) @@ -3380,7 +3393,7 @@ static int default_zonelist_order(void)   	 * local memory, NODE_ORDER may be suitable.           */  	average_size = total_size / -				(nodes_weight(node_states[N_HIGH_MEMORY]) + 1); +				(nodes_weight(node_states[N_MEMORY]) + 1);  	for_each_online_node(nid) {  		low_kmem_size = 0;  		total_size = 0; @@ -4476,6 +4489,26 @@ void __init set_pageblock_order(void)  #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ +static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages, +						   unsigned long present_pages) +{ +	unsigned long pages = spanned_pages; + +	/* +	 * Provide a more accurate estimation if there are holes within +	 * the zone and SPARSEMEM is in use. If there are holes within the +	 * zone, each populated memory region may cost us one or two extra +	 * memmap pages due to alignment because memmap pages for each +	 * populated regions may not naturally algined on page boundary. +	 * So the (present_pages >> 4) heuristic is a tradeoff for that. +	 */ +	if (spanned_pages > present_pages + (present_pages >> 4) && +	    IS_ENABLED(CONFIG_SPARSEMEM)) +		pages = present_pages; + +	return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT; +} +  /*   * Set up the zone data structures:   *   - mark all pages reserved @@ -4499,48 +4532,56 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,  	for (j = 0; j < MAX_NR_ZONES; j++) {  		struct zone *zone = pgdat->node_zones + j; -		unsigned long size, realsize, memmap_pages; +		unsigned long size, realsize, freesize, memmap_pages;  		size = zone_spanned_pages_in_node(nid, j, zones_size); -		realsize = size - zone_absent_pages_in_node(nid, j, +		realsize = freesize = size - zone_absent_pages_in_node(nid, j,  								zholes_size);  		/* -		 * Adjust realsize so that it accounts for how much memory +		 * Adjust freesize so that it accounts for how much memory  		 * is used by this zone for memmap. This affects the watermark  		 * and per-cpu initialisations  		 */ -		memmap_pages = -			PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; -		if (realsize >= memmap_pages) { -			realsize -= memmap_pages; +		memmap_pages = calc_memmap_size(size, realsize); +		if (freesize >= memmap_pages) { +			freesize -= memmap_pages;  			if (memmap_pages)  				printk(KERN_DEBUG  				       "  %s zone: %lu pages used for memmap\n",  				       zone_names[j], memmap_pages);  		} else  			printk(KERN_WARNING -				"  %s zone: %lu pages exceeds realsize %lu\n", -				zone_names[j], memmap_pages, realsize); +				"  %s zone: %lu pages exceeds freesize %lu\n", +				zone_names[j], memmap_pages, freesize);  		/* Account for reserved pages */ -		if (j == 0 && realsize > dma_reserve) { -			realsize -= dma_reserve; +		if (j == 0 && freesize > dma_reserve) { +			freesize -= dma_reserve;  			printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",  					zone_names[0], dma_reserve);  		}  		if (!is_highmem_idx(j)) -			nr_kernel_pages += realsize; -		nr_all_pages += realsize; +			nr_kernel_pages += freesize; +		/* Charge for highmem memmap if there are enough kernel pages */ +		else if (nr_kernel_pages > memmap_pages * 2) +			nr_kernel_pages -= memmap_pages; +		nr_all_pages += freesize;  		zone->spanned_pages = size; -		zone->present_pages = realsize; +		zone->present_pages = freesize; +		/* +		 * Set an approximate value for lowmem here, it will be adjusted +		 * when the bootmem allocator frees pages into the buddy system. +		 * And all highmem pages will be managed by the buddy system. +		 */ +		zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;  #ifdef CONFIG_NUMA  		zone->node = nid; -		zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) +		zone->min_unmapped_pages = (freesize*sysctl_min_unmapped_ratio)  						/ 100; -		zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; +		zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100;  #endif  		zone->name = zone_names[j];  		spin_lock_init(&zone->lock); @@ -4731,7 +4772,7 @@ unsigned long __init find_min_pfn_with_active_regions(void)  /*   * early_calculate_totalpages()   * Sum pages in active regions for movable zone. - * Populate N_HIGH_MEMORY for calculating usable_nodes. + * Populate N_MEMORY for calculating usable_nodes.   */  static unsigned long __init early_calculate_totalpages(void)  { @@ -4744,7 +4785,7 @@ static unsigned long __init early_calculate_totalpages(void)  		totalpages += pages;  		if (pages) -			node_set_state(nid, N_HIGH_MEMORY); +			node_set_state(nid, N_MEMORY);  	}    	return totalpages;  } @@ -4761,9 +4802,9 @@ static void __init find_zone_movable_pfns_for_nodes(void)  	unsigned long usable_startpfn;  	unsigned long kernelcore_node, kernelcore_remaining;  	/* save the state before borrow the nodemask */ -	nodemask_t saved_node_state = node_states[N_HIGH_MEMORY]; +	nodemask_t saved_node_state = node_states[N_MEMORY];  	unsigned long totalpages = early_calculate_totalpages(); -	int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); +	int usable_nodes = nodes_weight(node_states[N_MEMORY]);  	/*  	 * If movablecore was specified, calculate what size of @@ -4798,7 +4839,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)  restart:  	/* Spread kernelcore memory as evenly as possible throughout nodes */  	kernelcore_node = required_kernelcore / usable_nodes; -	for_each_node_state(nid, N_HIGH_MEMORY) { +	for_each_node_state(nid, N_MEMORY) {  		unsigned long start_pfn, end_pfn;  		/* @@ -4890,23 +4931,27 @@ restart:  out:  	/* restore the node_state */ -	node_states[N_HIGH_MEMORY] = saved_node_state; +	node_states[N_MEMORY] = saved_node_state;  } -/* Any regular memory on that node ? */ -static void __init check_for_regular_memory(pg_data_t *pgdat) +/* Any regular or high memory on that node ? */ +static void check_for_memory(pg_data_t *pgdat, int nid)  { -#ifdef CONFIG_HIGHMEM  	enum zone_type zone_type; -	for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) { +	if (N_MEMORY == N_NORMAL_MEMORY) +		return; + +	for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) {  		struct zone *zone = &pgdat->node_zones[zone_type];  		if (zone->present_pages) { -			node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY); +			node_set_state(nid, N_HIGH_MEMORY); +			if (N_NORMAL_MEMORY != N_HIGH_MEMORY && +			    zone_type <= ZONE_NORMAL) +				node_set_state(nid, N_NORMAL_MEMORY);  			break;  		}  	} -#endif  }  /** @@ -4989,8 +5034,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)  		/* Any memory on that node */  		if (pgdat->node_present_pages) -			node_set_state(nid, N_HIGH_MEMORY); -		check_for_regular_memory(pgdat); +			node_set_state(nid, N_MEMORY); +		check_for_memory(pgdat, nid);  	}  } @@ -5727,7 +5772,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,  	unsigned int tries = 0;  	int ret = 0; -	migrate_prep_local(); +	migrate_prep();  	while (pfn < end || !list_empty(&cc->migratepages)) {  		if (fatal_signal_pending(current)) {  |