diff options
| author | Lai Jiangshan <laijs@cn.fujitsu.com> | 2012-12-11 16:03:16 -0800 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-11 17:22:28 -0800 | 
| commit | 511c2aba8f07fc45bdcba548cb63f7b8a450c6dc (patch) | |
| tree | 6a1a853e60e0004f5895d78231ed1bea33fecaac | |
| parent | fcf07d22f089856631b52a75c35ba3c33b70a1b4 (diff) | |
| download | olio-linux-3.10-511c2aba8f07fc45bdcba548cb63f7b8a450c6dc.tar.xz olio-linux-3.10-511c2aba8f07fc45bdcba548cb63f7b8a450c6dc.zip  | |
mm, memory-hotplug: dynamic configure movable memory and portion memory
Add online_movable and online_kernel for logic memory hotplug.  This is
the dynamic version of "movablecore" & "kernelcore".
We have the same reason to introduce it as to introduce "movablecore" &
"kernelcore".  It has the same motive as "movablecore" & "kernelcore", but
it is dynamic/running-time:
o We can configure memory as kernelcore or movablecore after boot.
  Userspace workload is increased, we need more hugepage, we can't use
  "online_movable" to add memory and allow the system use more
  THP(transparent-huge-page), vice-verse when kernel workload is increase.
  Also help for virtualization to dynamic configure host/guest's memory,
  to save/(reduce waste) memory.
  Memory capacity on Demand
o When a new node is physically online after boot, we need to use
  "online_movable" or "online_kernel" to configure/portion it as we
  expected when we logic-online it.
  This configuration also helps for physically-memory-migrate.
o all benefit as the same as existed "movablecore" & "kernelcore".
o Preparing for movable-node, which is very important for power-saving,
  hardware partitioning and high-available-system(hardware fault
  management).
(Note, we don't introduce movable-node here.)
Action behavior:
When a memoryblock/memorysection is onlined by "online_movable", the kernel
will not have directly reference to the page of the memoryblock,
thus we can remove that memory any time when needed.
When it is online by "online_kernel", the kernel can use it.
When it is online by "online", the zone type doesn't changed.
Current constraints:
Only the memoryblock which is adjacent to the ZONE_MOVABLE
can be online from ZONE_NORMAL to ZONE_MOVABLE.
[akpm@linux-foundation.org: use min_t, cleanups]
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | Documentation/memory-hotplug.txt | 14 | ||||
| -rw-r--r-- | drivers/base/memory.c | 33 | ||||
| -rw-r--r-- | include/linux/memory_hotplug.h | 13 | ||||
| -rw-r--r-- | mm/memory_hotplug.c | 100 | 
4 files changed, 146 insertions, 14 deletions
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index 6e6cbc78f32..c6f993d491b 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt @@ -161,7 +161,8 @@ a recent addition and not present on older kernels.  		    in the memory block.  'state'           : read-write                      at read:  contains online/offline state of memory. -                    at write: user can specify "online", "offline" command +                    at write: user can specify "online_kernel", +                    "online_movable", "online", "offline" command                      which will be performed on al sections in the block.  'phys_device'     : read-only: designed to show the name of physical memory                      device.  This is not well implemented now. @@ -255,6 +256,17 @@ For onlining, you have to write "online" to the section's state file as:  % echo online > /sys/devices/system/memory/memoryXXX/state +This onlining will not change the ZONE type of the target memory section, +If the memory section is in ZONE_NORMAL, you can change it to ZONE_MOVABLE: + +% echo online_movable > /sys/devices/system/memory/memoryXXX/state +(NOTE: current limit: this memory section must be adjacent to ZONE_MOVABLE) + +And if the memory section is in ZONE_MOVABLE, you can change it to ZONE_NORMAL: + +% echo online_kernel > /sys/devices/system/memory/memoryXXX/state +(NOTE: current limit: this memory section must be adjacent to ZONE_NORMAL) +  After this, section memoryXXX's state will be 'online' and the amount of  available memory will be increased. diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 7eb1211ab68..987604d56c8 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -254,7 +254,7 @@ static bool pages_correctly_reserved(unsigned long start_pfn,   * OK to have direct references to sparsemem variables in here.   */  static int -memory_block_action(unsigned long phys_index, unsigned long action) +memory_block_action(unsigned long phys_index, unsigned long action, int online_type)  {  	unsigned long start_pfn;  	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; @@ -269,7 +269,7 @@ memory_block_action(unsigned long phys_index, unsigned long action)  			if (!pages_correctly_reserved(start_pfn, nr_pages))  				return -EBUSY; -			ret = online_pages(start_pfn, nr_pages); +			ret = online_pages(start_pfn, nr_pages, online_type);  			break;  		case MEM_OFFLINE:  			ret = offline_pages(start_pfn, nr_pages); @@ -284,7 +284,8 @@ memory_block_action(unsigned long phys_index, unsigned long action)  }  static int __memory_block_change_state(struct memory_block *mem, -		unsigned long to_state, unsigned long from_state_req) +		unsigned long to_state, unsigned long from_state_req, +		int online_type)  {  	int ret = 0; @@ -296,7 +297,7 @@ static int __memory_block_change_state(struct memory_block *mem,  	if (to_state == MEM_OFFLINE)  		mem->state = MEM_GOING_OFFLINE; -	ret = memory_block_action(mem->start_section_nr, to_state); +	ret = memory_block_action(mem->start_section_nr, to_state, online_type);  	if (ret) {  		mem->state = from_state_req; @@ -319,12 +320,14 @@ out:  }  static int memory_block_change_state(struct memory_block *mem, -		unsigned long to_state, unsigned long from_state_req) +		unsigned long to_state, unsigned long from_state_req, +		int online_type)  {  	int ret;  	mutex_lock(&mem->state_mutex); -	ret = __memory_block_change_state(mem, to_state, from_state_req); +	ret = __memory_block_change_state(mem, to_state, from_state_req, +					  online_type);  	mutex_unlock(&mem->state_mutex);  	return ret; @@ -338,10 +341,18 @@ store_mem_state(struct device *dev,  	mem = container_of(dev, struct memory_block, dev); -	if (!strncmp(buf, "online", min((int)count, 6))) -		ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); -	else if(!strncmp(buf, "offline", min((int)count, 7))) -		ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); +	if (!strncmp(buf, "online_kernel", min_t(int, count, 13))) +		ret = memory_block_change_state(mem, MEM_ONLINE, +						MEM_OFFLINE, ONLINE_KERNEL); +	else if (!strncmp(buf, "online_movable", min_t(int, count, 14))) +		ret = memory_block_change_state(mem, MEM_ONLINE, +						MEM_OFFLINE, ONLINE_MOVABLE); +	else if (!strncmp(buf, "online", min_t(int, count, 6))) +		ret = memory_block_change_state(mem, MEM_ONLINE, +						MEM_OFFLINE, ONLINE_KEEP); +	else if(!strncmp(buf, "offline", min_t(int, count, 7))) +		ret = memory_block_change_state(mem, MEM_OFFLINE, +						MEM_ONLINE, -1);  	if (ret)  		return ret; @@ -676,7 +687,7 @@ int offline_memory_block(struct memory_block *mem)  	mutex_lock(&mem->state_mutex);  	if (mem->state != MEM_OFFLINE) -		ret = __memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); +		ret = __memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE, -1);  	mutex_unlock(&mem->state_mutex);  	return ret; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 95573ec4ee6..4a45c4e5002 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -26,6 +26,13 @@ enum {  	MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,  }; +/* Types for control the zone type of onlined memory */ +enum { +	ONLINE_KEEP, +	ONLINE_KERNEL, +	ONLINE_MOVABLE, +}; +  /*   * pgdat resizing functions   */ @@ -46,6 +53,10 @@ void pgdat_resize_init(struct pglist_data *pgdat)  }  /*   * Zone resizing functions + * + * Note: any attempt to resize a zone should has pgdat_resize_lock() + * zone_span_writelock() both held. This ensure the size of a zone + * can't be changed while pgdat_resize_lock() held.   */  static inline unsigned zone_span_seqbegin(struct zone *zone)  { @@ -71,7 +82,7 @@ extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);  extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);  extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);  /* VM interface that may be used by firmware interface */ -extern int online_pages(unsigned long, unsigned long); +extern int online_pages(unsigned long, unsigned long, int);  extern void __offline_isolated_pages(unsigned long, unsigned long);  typedef void (*online_page_callback_t)(struct page *page); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 571130ee66d..5c1f4959e6b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -214,6 +214,88 @@ static void grow_zone_span(struct zone *zone, unsigned long start_pfn,  	zone_span_writeunlock(zone);  } +static void resize_zone(struct zone *zone, unsigned long start_pfn, +		unsigned long end_pfn) +{ +	zone_span_writelock(zone); + +	zone->zone_start_pfn = start_pfn; +	zone->spanned_pages = end_pfn - start_pfn; + +	zone_span_writeunlock(zone); +} + +static void fix_zone_id(struct zone *zone, unsigned long start_pfn, +		unsigned long end_pfn) +{ +	enum zone_type zid = zone_idx(zone); +	int nid = zone->zone_pgdat->node_id; +	unsigned long pfn; + +	for (pfn = start_pfn; pfn < end_pfn; pfn++) +		set_page_links(pfn_to_page(pfn), zid, nid, pfn); +} + +static int move_pfn_range_left(struct zone *z1, struct zone *z2, +		unsigned long start_pfn, unsigned long end_pfn) +{ +	unsigned long flags; + +	pgdat_resize_lock(z1->zone_pgdat, &flags); + +	/* can't move pfns which are higher than @z2 */ +	if (end_pfn > z2->zone_start_pfn + z2->spanned_pages) +		goto out_fail; +	/* the move out part mast at the left most of @z2 */ +	if (start_pfn > z2->zone_start_pfn) +		goto out_fail; +	/* must included/overlap */ +	if (end_pfn <= z2->zone_start_pfn) +		goto out_fail; + +	resize_zone(z1, z1->zone_start_pfn, end_pfn); +	resize_zone(z2, end_pfn, z2->zone_start_pfn + z2->spanned_pages); + +	pgdat_resize_unlock(z1->zone_pgdat, &flags); + +	fix_zone_id(z1, start_pfn, end_pfn); + +	return 0; +out_fail: +	pgdat_resize_unlock(z1->zone_pgdat, &flags); +	return -1; +} + +static int move_pfn_range_right(struct zone *z1, struct zone *z2, +		unsigned long start_pfn, unsigned long end_pfn) +{ +	unsigned long flags; + +	pgdat_resize_lock(z1->zone_pgdat, &flags); + +	/* can't move pfns which are lower than @z1 */ +	if (z1->zone_start_pfn > start_pfn) +		goto out_fail; +	/* the move out part mast at the right most of @z1 */ +	if (z1->zone_start_pfn + z1->spanned_pages >  end_pfn) +		goto out_fail; +	/* must included/overlap */ +	if (start_pfn >= z1->zone_start_pfn + z1->spanned_pages) +		goto out_fail; + +	resize_zone(z1, z1->zone_start_pfn, start_pfn); +	resize_zone(z2, start_pfn, z2->zone_start_pfn + z2->spanned_pages); + +	pgdat_resize_unlock(z1->zone_pgdat, &flags); + +	fix_zone_id(z2, start_pfn, end_pfn); + +	return 0; +out_fail: +	pgdat_resize_unlock(z1->zone_pgdat, &flags); +	return -1; +} +  static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,  			    unsigned long end_pfn)  { @@ -508,7 +590,7 @@ static void node_states_set_node(int node, struct memory_notify *arg)  } -int __ref online_pages(unsigned long pfn, unsigned long nr_pages) +int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)  {  	unsigned long onlined_pages = 0;  	struct zone *zone; @@ -525,6 +607,22 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages)  	 */  	zone = page_zone(pfn_to_page(pfn)); +	if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) { +		if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) { +			unlock_memory_hotplug(); +			return -1; +		} +	} +	if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) { +		if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) { +			unlock_memory_hotplug(); +			return -1; +		} +	} + +	/* Previous code may changed the zone of the pfn range */ +	zone = page_zone(pfn_to_page(pfn)); +  	arg.start_pfn = pfn;  	arg.nr_pages = nr_pages;  	node_states_check_changes_online(nr_pages, zone, &arg);  |