diff options
Diffstat (limited to 'kernel/sched_fair.c')
| -rw-r--r-- | kernel/sched_fair.c | 159 | 
1 files changed, 126 insertions, 33 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5c9e67923b7..8a39fa3e3c6 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -772,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)  		list_del_leaf_cfs_rq(cfs_rq);  } +static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq) +{ +	long tg_weight; + +	/* +	 * Use this CPU's actual weight instead of the last load_contribution +	 * to gain a more accurate current total weight. See +	 * update_cfs_rq_load_contribution(). +	 */ +	tg_weight = atomic_read(&tg->load_weight); +	tg_weight -= cfs_rq->load_contribution; +	tg_weight += cfs_rq->load.weight; + +	return tg_weight; +} +  static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)  { -	long load_weight, load, shares; +	long tg_weight, load, shares; +	tg_weight = calc_tg_weight(tg, cfs_rq);  	load = cfs_rq->load.weight; -	load_weight = atomic_read(&tg->load_weight); -	load_weight += load; -	load_weight -= cfs_rq->load_contribution; -  	shares = (tg->shares * load); -	if (load_weight) -		shares /= load_weight; +	if (tg_weight) +		shares /= tg_weight;  	if (shares < MIN_SHARES)  		shares = MIN_SHARES; @@ -1743,7 +1756,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)  static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)  { -	if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running) +	if (!cfs_rq->runtime_enabled || cfs_rq->nr_running)  		return;  	__return_cfs_rq_runtime(cfs_rq); @@ -2036,36 +2049,100 @@ static void task_waking_fair(struct task_struct *p)   * Adding load to a group doesn't make a group heavier, but can cause movement   * of group shares between cpus. Assuming the shares were perfectly aligned one   * can calculate the shift in shares. + * + * Calculate the effective load difference if @wl is added (subtracted) to @tg + * on this @cpu and results in a total addition (subtraction) of @wg to the + * total group weight. + * + * Given a runqueue weight distribution (rw_i) we can compute a shares + * distribution (s_i) using: + * + *   s_i = rw_i / \Sum rw_j						(1) + * + * Suppose we have 4 CPUs and our @tg is a direct child of the root group and + * has 7 equal weight tasks, distributed as below (rw_i), with the resulting + * shares distribution (s_i): + * + *   rw_i = {   2,   4,   1,   0 } + *   s_i  = { 2/7, 4/7, 1/7,   0 } + * + * As per wake_affine() we're interested in the load of two CPUs (the CPU the + * task used to run on and the CPU the waker is running on), we need to + * compute the effect of waking a task on either CPU and, in case of a sync + * wakeup, compute the effect of the current task going to sleep. + * + * So for a change of @wl to the local @cpu with an overall group weight change + * of @wl we can compute the new shares distribution (s'_i) using: + * + *   s'_i = (rw_i + @wl) / (@wg + \Sum rw_j)				(2) + * + * Suppose we're interested in CPUs 0 and 1, and want to compute the load + * differences in waking a task to CPU 0. The additional task changes the + * weight and shares distributions like: + * + *   rw'_i = {   3,   4,   1,   0 } + *   s'_i  = { 3/8, 4/8, 1/8,   0 } + * + * We can then compute the difference in effective weight by using: + * + *   dw_i = S * (s'_i - s_i)						(3) + * + * Where 'S' is the group weight as seen by its parent. + * + * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7) + * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 - + * 4/7) times the weight of the group.   */  static long effective_load(struct task_group *tg, int cpu, long wl, long wg)  {  	struct sched_entity *se = tg->se[cpu]; -	if (!tg->parent) +	if (!tg->parent)	/* the trivial, non-cgroup case */  		return wl;  	for_each_sched_entity(se) { -		long lw, w; +		long w, W;  		tg = se->my_q->tg; -		w = se->my_q->load.weight; -		/* use this cpu's instantaneous contribution */ -		lw = atomic_read(&tg->load_weight); -		lw -= se->my_q->load_contribution; -		lw += w + wg; +		/* +		 * W = @wg + \Sum rw_j +		 */ +		W = wg + calc_tg_weight(tg, se->my_q); -		wl += w; +		/* +		 * w = rw_i + @wl +		 */ +		w = se->my_q->load.weight + wl; -		if (lw > 0 && wl < lw) -			wl = (wl * tg->shares) / lw; +		/* +		 * wl = S * s'_i; see (2) +		 */ +		if (W > 0 && w < W) +			wl = (w * tg->shares) / W;  		else  			wl = tg->shares; -		/* zero point is MIN_SHARES */ +		/* +		 * Per the above, wl is the new se->load.weight value; since +		 * those are clipped to [MIN_SHARES, ...) do so now. See +		 * calc_cfs_shares(). +		 */  		if (wl < MIN_SHARES)  			wl = MIN_SHARES; + +		/* +		 * wl = dw_i = S * (s'_i - s_i); see (3) +		 */  		wl -= se->load.weight; + +		/* +		 * Recursively apply this logic to all parent groups to compute +		 * the final effective load change on the root group. Since +		 * only the @tg group gets extra weight, all parent groups can +		 * only redistribute existing shares. @wl is the shift in shares +		 * resulting from this level per the above. +		 */  		wg = 0;  	} @@ -2249,7 +2326,8 @@ static int select_idle_sibling(struct task_struct *p, int target)  	int cpu = smp_processor_id();  	int prev_cpu = task_cpu(p);  	struct sched_domain *sd; -	int i; +	struct sched_group *sg; +	int i, smt = 0;  	/*  	 * If the task is going to be woken-up on this cpu and if it is @@ -2269,25 +2347,40 @@ static int select_idle_sibling(struct task_struct *p, int target)  	 * Otherwise, iterate the domains and find an elegible idle cpu.  	 */  	rcu_read_lock(); +again:  	for_each_domain(target, sd) { +		if (!smt && (sd->flags & SD_SHARE_CPUPOWER)) +			continue; + +		if (smt && !(sd->flags & SD_SHARE_CPUPOWER)) +			break; +  		if (!(sd->flags & SD_SHARE_PKG_RESOURCES))  			break; -		for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) { -			if (idle_cpu(i)) { -				target = i; -				break; +		sg = sd->groups; +		do { +			if (!cpumask_intersects(sched_group_cpus(sg), +						tsk_cpus_allowed(p))) +				goto next; + +			for_each_cpu(i, sched_group_cpus(sg)) { +				if (!idle_cpu(i)) +					goto next;  			} -		} -		/* -		 * Lets stop looking for an idle sibling when we reached -		 * the domain that spans the current cpu and prev_cpu. -		 */ -		if (cpumask_test_cpu(cpu, sched_domain_span(sd)) && -		    cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) -			break; +			target = cpumask_first_and(sched_group_cpus(sg), +					tsk_cpus_allowed(p)); +			goto done; +next: +			sg = sg->next; +		} while (sg != sd->groups); +	} +	if (!smt) { +		smt = 1; +		goto again;  	} +done:  	rcu_read_unlock();  	return target; @@ -3511,7 +3604,7 @@ static bool update_sd_pick_busiest(struct sched_domain *sd,  }  /** - * update_sd_lb_stats - Update sched_group's statistics for load balancing. + * update_sd_lb_stats - Update sched_domain's statistics for load balancing.   * @sd: sched_domain whose statistics are to be updated.   * @this_cpu: Cpu for which load balance is currently performed.   * @idle: Idle status of this_cpu  |