diff options
Diffstat (limited to 'kernel/cpuset.c')
| -rw-r--r-- | kernel/cpuset.c | 130 | 
1 files changed, 92 insertions, 38 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8c8bd652dd1..f33c7153b6d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -147,6 +147,12 @@ typedef enum {  	CS_SPREAD_SLAB,  } cpuset_flagbits_t; +/* the type of hotplug event */ +enum hotplug_event { +	CPUSET_CPU_OFFLINE, +	CPUSET_MEM_OFFLINE, +}; +  /* convenient tests for these bits */  static inline int is_cpu_exclusive(const struct cpuset *cs)  { @@ -1990,8 +1996,36 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)  }  /* - * Walk the specified cpuset subtree and look for empty cpusets. - * The tasks of such cpuset must be moved to a parent cpuset. + * Helper function to traverse cpusets. + * It can be used to walk the cpuset tree from top to bottom, completing + * one layer before dropping down to the next (thus always processing a + * node before any of its children). + */ +static struct cpuset *cpuset_next(struct list_head *queue) +{ +	struct cpuset *cp; +	struct cpuset *child;	/* scans child cpusets of cp */ +	struct cgroup *cont; + +	if (list_empty(queue)) +		return NULL; + +	cp = list_first_entry(queue, struct cpuset, stack_list); +	list_del(queue->next); +	list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { +		child = cgroup_cs(cont); +		list_add_tail(&child->stack_list, queue); +	} + +	return cp; +} + + +/* + * Walk the specified cpuset subtree upon a hotplug operation (CPU/Memory + * online/offline) and update the cpusets accordingly. + * For regular CPU/Mem hotplug, look for empty cpusets; the tasks of such + * cpuset must be moved to a parent cpuset.   *   * Called with cgroup_mutex held.  We take callback_mutex to modify   * cpus_allowed and mems_allowed. @@ -2000,50 +2034,61 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)   * before dropping down to the next.  It always processes a node before   * any of its children.   * - * For now, since we lack memory hot unplug, we'll never see a cpuset - * that has tasks along with an empty 'mems'.  But if we did see such - * a cpuset, we'd handle it just like we do if its 'cpus' was empty. + * In the case of memory hot-unplug, it will remove nodes from N_HIGH_MEMORY + * if all present pages from a node are offlined.   */ -static void scan_for_empty_cpusets(struct cpuset *root) +static void +scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event)  {  	LIST_HEAD(queue); -	struct cpuset *cp;	/* scans cpusets being updated */ -	struct cpuset *child;	/* scans child cpusets of cp */ -	struct cgroup *cont; +	struct cpuset *cp;		/* scans cpusets being updated */  	static nodemask_t oldmems;	/* protected by cgroup_mutex */  	list_add_tail((struct list_head *)&root->stack_list, &queue); -	while (!list_empty(&queue)) { -		cp = list_first_entry(&queue, struct cpuset, stack_list); -		list_del(queue.next); -		list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { -			child = cgroup_cs(cont); -			list_add_tail(&child->stack_list, &queue); +	switch (event) { +	case CPUSET_CPU_OFFLINE: +		while ((cp = cpuset_next(&queue)) != NULL) { + +			/* Continue past cpusets with all cpus online */ +			if (cpumask_subset(cp->cpus_allowed, cpu_active_mask)) +				continue; + +			/* Remove offline cpus from this cpuset. */ +			mutex_lock(&callback_mutex); +			cpumask_and(cp->cpus_allowed, cp->cpus_allowed, +							cpu_active_mask); +			mutex_unlock(&callback_mutex); + +			/* Move tasks from the empty cpuset to a parent */ +			if (cpumask_empty(cp->cpus_allowed)) +				remove_tasks_in_empty_cpuset(cp); +			else +				update_tasks_cpumask(cp, NULL);  		} +		break; -		/* Continue past cpusets with all cpus, mems online */ -		if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) && -		    nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) -			continue; +	case CPUSET_MEM_OFFLINE: +		while ((cp = cpuset_next(&queue)) != NULL) { -		oldmems = cp->mems_allowed; +			/* Continue past cpusets with all mems online */ +			if (nodes_subset(cp->mems_allowed, +					node_states[N_HIGH_MEMORY])) +				continue; -		/* Remove offline cpus and mems from this cpuset. */ -		mutex_lock(&callback_mutex); -		cpumask_and(cp->cpus_allowed, cp->cpus_allowed, -			    cpu_active_mask); -		nodes_and(cp->mems_allowed, cp->mems_allowed, +			oldmems = cp->mems_allowed; + +			/* Remove offline mems from this cpuset. */ +			mutex_lock(&callback_mutex); +			nodes_and(cp->mems_allowed, cp->mems_allowed,  						node_states[N_HIGH_MEMORY]); -		mutex_unlock(&callback_mutex); +			mutex_unlock(&callback_mutex); -		/* Move tasks from the empty cpuset to a parent */ -		if (cpumask_empty(cp->cpus_allowed) || -		     nodes_empty(cp->mems_allowed)) -			remove_tasks_in_empty_cpuset(cp); -		else { -			update_tasks_cpumask(cp, NULL); -			update_tasks_nodemask(cp, &oldmems, NULL); +			/* Move tasks from the empty cpuset to a parent */ +			if (nodes_empty(cp->mems_allowed)) +				remove_tasks_in_empty_cpuset(cp); +			else +				update_tasks_nodemask(cp, &oldmems, NULL);  		}  	}  } @@ -2054,13 +2099,19 @@ static void scan_for_empty_cpusets(struct cpuset *root)   * (of no affect) on systems that are actively using CPU hotplug   * but making no active use of cpusets.   * + * The only exception to this is suspend/resume, where we don't + * modify cpusets at all. + *   * This routine ensures that top_cpuset.cpus_allowed tracks   * cpu_active_mask on each CPU hotplug (cpuhp) event.   *   * Called within get_online_cpus().  Needs to call cgroup_lock()   * before calling generate_sched_domains(). + * + * @cpu_online: Indicates whether this is a CPU online event (true) or + * a CPU offline event (false).   */ -void cpuset_update_active_cpus(void) +void cpuset_update_active_cpus(bool cpu_online)  {  	struct sched_domain_attr *attr;  	cpumask_var_t *doms; @@ -2070,7 +2121,10 @@ void cpuset_update_active_cpus(void)  	mutex_lock(&callback_mutex);  	cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);  	mutex_unlock(&callback_mutex); -	scan_for_empty_cpusets(&top_cpuset); + +	if (!cpu_online) +		scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_CPU_OFFLINE); +  	ndoms = generate_sched_domains(&doms, &attr);  	cgroup_unlock(); @@ -2082,7 +2136,7 @@ void cpuset_update_active_cpus(void)  /*   * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].   * Call this routine anytime after node_states[N_HIGH_MEMORY] changes. - * See also the previous routine cpuset_track_online_cpus(). + * See cpuset_update_active_cpus() for CPU hotplug handling.   */  static int cpuset_track_online_nodes(struct notifier_block *self,  				unsigned long action, void *arg) @@ -2101,9 +2155,9 @@ static int cpuset_track_online_nodes(struct notifier_block *self,  	case MEM_OFFLINE:  		/*  		 * needn't update top_cpuset.mems_allowed explicitly because -		 * scan_for_empty_cpusets() will update it. +		 * scan_cpusets_upon_hotplug() will update it.  		 */ -		scan_for_empty_cpusets(&top_cpuset); +		scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_MEM_OFFLINE);  		break;  	default:  		break;  |