diff options
Diffstat (limited to 'kernel/sched_fair.c')
| -rw-r--r-- | kernel/sched_fair.c | 288 | 
1 files changed, 98 insertions, 190 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 89fa32b4edf..08ae848b71d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -334,34 +334,6 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,  #endif  /* - * delta *= w / rw - */ -static inline unsigned long -calc_delta_weight(unsigned long delta, struct sched_entity *se) -{ -	for_each_sched_entity(se) { -		delta = calc_delta_mine(delta, -				se->load.weight, &cfs_rq_of(se)->load); -	} - -	return delta; -} - -/* - * delta *= rw / w - */ -static inline unsigned long -calc_delta_fair(unsigned long delta, struct sched_entity *se) -{ -	for_each_sched_entity(se) { -		delta = calc_delta_mine(delta, -				cfs_rq_of(se)->load.weight, &se->load); -	} - -	return delta; -} - -/*   * The idea is to set a period in which each task runs once.   *   * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch @@ -390,54 +362,47 @@ static u64 __sched_period(unsigned long nr_running)   */  static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)  { -	return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); +	u64 slice = __sched_period(cfs_rq->nr_running); + +	for_each_sched_entity(se) { +		cfs_rq = cfs_rq_of(se); + +		slice *= se->load.weight; +		do_div(slice, cfs_rq->load.weight); +	} + + +	return slice;  }  /*   * We calculate the vruntime slice of a to be inserted task   * - * vs = s*rw/w = p + * vs = s/w = p/rw   */  static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)  {  	unsigned long nr_running = cfs_rq->nr_running; +	unsigned long weight; +	u64 vslice;  	if (!se->on_rq)  		nr_running++; -	return __sched_period(nr_running); -} - -/* - * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in - * that it favours >=0 over <0. - * - *   -20         | - *               | - *     0 --------+------- - *             .' - *    19     .' - * - */ -static unsigned long -calc_delta_asym(unsigned long delta, struct sched_entity *se) -{ -	struct load_weight lw = { -		.weight = NICE_0_LOAD, -		.inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT) -	}; +	vslice = __sched_period(nr_running);  	for_each_sched_entity(se) { -		struct load_weight *se_lw = &se->load; +		cfs_rq = cfs_rq_of(se); -		if (se->load.weight < NICE_0_LOAD) -			se_lw = &lw; +		weight = cfs_rq->load.weight; +		if (!se->on_rq) +			weight += se->load.weight; -		delta = calc_delta_mine(delta, -				cfs_rq_of(se)->load.weight, se_lw); +		vslice *= NICE_0_LOAD; +		do_div(vslice, weight);  	} -	return delta; +	return vslice;  }  /* @@ -454,7 +419,11 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,  	curr->sum_exec_runtime += delta_exec;  	schedstat_add(cfs_rq, exec_clock, delta_exec); -	delta_exec_weighted = calc_delta_fair(delta_exec, curr); +	delta_exec_weighted = delta_exec; +	if (unlikely(curr->load.weight != NICE_0_LOAD)) { +		delta_exec_weighted = calc_delta_fair(delta_exec_weighted, +							&curr->load); +	}  	curr->vruntime += delta_exec_weighted;  } @@ -541,27 +510,10 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)   * Scheduling class queueing methods:   */ -#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED -static void -add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) -{ -	cfs_rq->task_weight += weight; -} -#else -static inline void -add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) -{ -} -#endif -  static void  account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)  {  	update_load_add(&cfs_rq->load, se->load.weight); -	if (!parent_entity(se)) -		inc_cpu_load(rq_of(cfs_rq), se->load.weight); -	if (entity_is_task(se)) -		add_cfs_task_weight(cfs_rq, se->load.weight);  	cfs_rq->nr_running++;  	se->on_rq = 1;  	list_add(&se->group_node, &cfs_rq->tasks); @@ -571,10 +523,6 @@ static void  account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)  {  	update_load_sub(&cfs_rq->load, se->load.weight); -	if (!parent_entity(se)) -		dec_cpu_load(rq_of(cfs_rq), se->load.weight); -	if (entity_is_task(se)) -		add_cfs_task_weight(cfs_rq, -se->load.weight);  	cfs_rq->nr_running--;  	se->on_rq = 0;  	list_del_init(&se->group_node); @@ -661,12 +609,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)  	if (!initial) {  		/* sleeps upto a single latency don't count. */ -		if (sched_feat(NEW_FAIR_SLEEPERS)) { -			if (sched_feat(NORMALIZED_SLEEPER)) -				vruntime -= calc_delta_weight(sysctl_sched_latency, se); -			else -				vruntime -= sysctl_sched_latency; -		} +		if (sched_feat(NEW_FAIR_SLEEPERS)) +			vruntime -= sysctl_sched_latency;  		/* ensure we never gain time by being placed backwards. */  		vruntime = max_vruntime(se->vruntime, vruntime); @@ -682,6 +626,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)  	 * Update run-time statistics of the 'current'.  	 */  	update_curr(cfs_rq); +	account_entity_enqueue(cfs_rq, se);  	if (wakeup) {  		place_entity(cfs_rq, se, 0); @@ -692,7 +637,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)  	check_spread(cfs_rq, se);  	if (se != cfs_rq->curr)  		__enqueue_entity(cfs_rq, se); -	account_entity_enqueue(cfs_rq, se);  }  static void update_avg(u64 *avg, u64 sample) @@ -841,8 +785,10 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)  	 * queued ticks are scheduled to match the slice, so don't bother  	 * validating it and just reschedule.  	 */ -	if (queued) -		return resched_task(rq_of(cfs_rq)->curr); +	if (queued) { +		resched_task(rq_of(cfs_rq)->curr); +		return; +	}  	/*  	 * don't let the period tick interfere with the hrtick preemption  	 */ @@ -957,7 +903,7 @@ static void yield_task_fair(struct rq *rq)  		return;  	if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { -		__update_rq_clock(rq); +		update_rq_clock(rq);  		/*  		 * Update run-time statistics of the 'current'.  		 */ @@ -1007,7 +953,7 @@ static int wake_idle(int cpu, struct task_struct *p)  	 * sibling runqueue info. This will avoid the checks and cache miss  	 * penalities associated with that.  	 */ -	if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1) +	if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1)  		return cpu;  	for_each_domain(cpu, sd) { @@ -1050,16 +996,27 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,  	struct task_struct *curr = this_rq->curr;  	unsigned long tl = this_load;  	unsigned long tl_per_task; +	int balanced; -	if (!(this_sd->flags & SD_WAKE_AFFINE)) +	if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))  		return 0;  	/* +	 * If sync wakeup then subtract the (maximum possible) +	 * effect of the currently running task from the load +	 * of the current CPU: +	 */ +	if (sync) +		tl -= current->se.load.weight; + +	balanced = 100*(tl + p->se.load.weight) <= imbalance*load; + +	/*  	 * If the currently running task will sleep within  	 * a reasonable amount of time then attract this newly  	 * woken task:  	 */ -	if (sync && curr->sched_class == &fair_sched_class) { +	if (sync && balanced && curr->sched_class == &fair_sched_class) {  		if (curr->se.avg_overlap < sysctl_sched_migration_cost &&  				p->se.avg_overlap < sysctl_sched_migration_cost)  			return 1; @@ -1068,16 +1025,8 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,  	schedstat_inc(p, se.nr_wakeups_affine_attempts);  	tl_per_task = cpu_avg_load_per_task(this_cpu); -	/* -	 * If sync wakeup then subtract the (maximum possible) -	 * effect of the currently running task from the load -	 * of the current CPU: -	 */ -	if (sync) -		tl -= current->se.load.weight; -  	if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) || -			100*(tl + p->se.load.weight) <= imbalance*load) { +			balanced) {  		/*  		 * This domain has SD_WAKE_AFFINE and  		 * p is cache cold in this domain, and @@ -1162,10 +1111,11 @@ static unsigned long wakeup_gran(struct sched_entity *se)  	unsigned long gran = sysctl_sched_wakeup_granularity;  	/* -	 * More easily preempt - nice tasks, while not making it harder for -	 * + nice tasks. +	 * More easily preempt - nice tasks, while not making +	 * it harder for + nice tasks.  	 */ -	gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se); +	if (unlikely(se->load.weight > NICE_0_LOAD)) +		gran = calc_delta_fair(gran, &se->load);  	return gran;  } @@ -1359,90 +1309,75 @@ static struct task_struct *load_balance_next_fair(void *arg)  	return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator);  } -static unsigned long -__load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, -		unsigned long max_load_move, struct sched_domain *sd, -		enum cpu_idle_type idle, int *all_pinned, int *this_best_prio, -		struct cfs_rq *cfs_rq) +#ifdef CONFIG_FAIR_GROUP_SCHED +static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)  { -	struct rq_iterator cfs_rq_iterator; +	struct sched_entity *curr; +	struct task_struct *p; -	cfs_rq_iterator.start = load_balance_start_fair; -	cfs_rq_iterator.next = load_balance_next_fair; -	cfs_rq_iterator.arg = cfs_rq; +	if (!cfs_rq->nr_running || !first_fair(cfs_rq)) +		return MAX_PRIO; -	return balance_tasks(this_rq, this_cpu, busiest, -			max_load_move, sd, idle, all_pinned, -			this_best_prio, &cfs_rq_iterator); +	curr = cfs_rq->curr; +	if (!curr) +		curr = __pick_next_entity(cfs_rq); + +	p = task_of(curr); + +	return p->prio;  } +#endif -#ifdef CONFIG_FAIR_GROUP_SCHED  static unsigned long  load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,  		  unsigned long max_load_move,  		  struct sched_domain *sd, enum cpu_idle_type idle,  		  int *all_pinned, int *this_best_prio)  { +	struct cfs_rq *busy_cfs_rq;  	long rem_load_move = max_load_move; -	int busiest_cpu = cpu_of(busiest); -	struct task_group *tg; - -	rcu_read_lock(); -	list_for_each_entry(tg, &task_groups, list) { -		long imbalance; -		unsigned long this_weight, busiest_weight; -		long rem_load, max_load, moved_load; - -		/* -		 * empty group -		 */ -		if (!aggregate(tg, sd)->task_weight) -			continue; - -		rem_load = rem_load_move * aggregate(tg, sd)->rq_weight; -		rem_load /= aggregate(tg, sd)->load + 1; - -		this_weight = tg->cfs_rq[this_cpu]->task_weight; -		busiest_weight = tg->cfs_rq[busiest_cpu]->task_weight; +	struct rq_iterator cfs_rq_iterator; -		imbalance = (busiest_weight - this_weight) / 2; +	cfs_rq_iterator.start = load_balance_start_fair; +	cfs_rq_iterator.next = load_balance_next_fair; -		if (imbalance < 0) -			imbalance = busiest_weight; +	for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { +#ifdef CONFIG_FAIR_GROUP_SCHED +		struct cfs_rq *this_cfs_rq; +		long imbalance; +		unsigned long maxload; -		max_load = max(rem_load, imbalance); -		moved_load = __load_balance_fair(this_rq, this_cpu, busiest, -				max_load, sd, idle, all_pinned, this_best_prio, -				tg->cfs_rq[busiest_cpu]); +		this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); -		if (!moved_load) +		imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight; +		/* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ +		if (imbalance <= 0)  			continue; -		move_group_shares(tg, sd, busiest_cpu, this_cpu); +		/* Don't pull more than imbalance/2 */ +		imbalance /= 2; +		maxload = min(rem_load_move, imbalance); -		moved_load *= aggregate(tg, sd)->load; -		moved_load /= aggregate(tg, sd)->rq_weight + 1; +		*this_best_prio = cfs_rq_best_prio(this_cfs_rq); +#else +# define maxload rem_load_move +#endif +		/* +		 * pass busy_cfs_rq argument into +		 * load_balance_[start|next]_fair iterators +		 */ +		cfs_rq_iterator.arg = busy_cfs_rq; +		rem_load_move -= balance_tasks(this_rq, this_cpu, busiest, +					       maxload, sd, idle, all_pinned, +					       this_best_prio, +					       &cfs_rq_iterator); -		rem_load_move -= moved_load; -		if (rem_load_move < 0) +		if (rem_load_move <= 0)  			break;  	} -	rcu_read_unlock();  	return max_load_move - rem_load_move;  } -#else -static unsigned long -load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, -		  unsigned long max_load_move, -		  struct sched_domain *sd, enum cpu_idle_type idle, -		  int *all_pinned, int *this_best_prio) -{ -	return __load_balance_fair(this_rq, this_cpu, busiest, -			max_load_move, sd, idle, all_pinned, -			this_best_prio, &busiest->cfs); -} -#endif  static int  move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, @@ -1611,30 +1546,6 @@ static const struct sched_class fair_sched_class = {  };  #ifdef CONFIG_SCHED_DEBUG -static void -print_cfs_rq_tasks(struct seq_file *m, struct cfs_rq *cfs_rq, int depth) -{ -	struct sched_entity *se; - -	if (!cfs_rq) -		return; - -	list_for_each_entry_rcu(se, &cfs_rq->tasks, group_node) { -		int i; - -		for (i = depth; i; i--) -			seq_puts(m, "  "); - -		seq_printf(m, "%lu %s %lu\n", -				se->load.weight, -				entity_is_task(se) ? "T" : "G", -				calc_delta_weight(SCHED_LOAD_SCALE, se) -				); -		if (!entity_is_task(se)) -			print_cfs_rq_tasks(m, group_cfs_rq(se), depth + 1); -	} -} -  static void print_cfs_stats(struct seq_file *m, int cpu)  {  	struct cfs_rq *cfs_rq; @@ -1642,9 +1553,6 @@ static void print_cfs_stats(struct seq_file *m, int cpu)  	rcu_read_lock();  	for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)  		print_cfs_rq(m, cpu, cfs_rq); - -	seq_printf(m, "\nWeight tree:\n"); -	print_cfs_rq_tasks(m, &cpu_rq(cpu)->cfs, 1);  	rcu_read_unlock();  }  #endif  |