diff options
Diffstat (limited to 'kernel/sched_fair.c')
| -rw-r--r-- | kernel/sched_fair.c | 248 | 
1 files changed, 180 insertions, 68 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 9573c33688b..98345e45b05 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)  	return se->parent;  } +/* return depth at which a sched entity is present in the hierarchy */ +static inline int depth_se(struct sched_entity *se) +{ +	int depth = 0; + +	for_each_sched_entity(se) +		depth++; + +	return depth; +} + +static void +find_matching_se(struct sched_entity **se, struct sched_entity **pse) +{ +	int se_depth, pse_depth; + +	/* +	 * preemption test can be made between sibling entities who are in the +	 * same cfs_rq i.e who have a common parent. Walk up the hierarchy of +	 * both tasks until we find their ancestors who are siblings of common +	 * parent. +	 */ + +	/* First walk up until both entities are at same depth */ +	se_depth = depth_se(*se); +	pse_depth = depth_se(*pse); + +	while (se_depth > pse_depth) { +		se_depth--; +		*se = parent_entity(*se); +	} + +	while (pse_depth > se_depth) { +		pse_depth--; +		*pse = parent_entity(*pse); +	} + +	while (!is_same_group(*se, *pse)) { +		*se = parent_entity(*se); +		*pse = parent_entity(*pse); +	} +} +  #else	/* CONFIG_FAIR_GROUP_SCHED */  static inline struct rq *rq_of(struct cfs_rq *cfs_rq) @@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)  	return NULL;  } +static inline void +find_matching_se(struct sched_entity **se, struct sched_entity **pse) +{ +} +  #endif	/* CONFIG_FAIR_GROUP_SCHED */ @@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)  	return se->vruntime - cfs_rq->min_vruntime;  } +static void update_min_vruntime(struct cfs_rq *cfs_rq) +{ +	u64 vruntime = cfs_rq->min_vruntime; + +	if (cfs_rq->curr) +		vruntime = cfs_rq->curr->vruntime; + +	if (cfs_rq->rb_leftmost) { +		struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost, +						   struct sched_entity, +						   run_node); + +		if (vruntime == cfs_rq->min_vruntime) +			vruntime = se->vruntime; +		else +			vruntime = min_vruntime(vruntime, se->vruntime); +	} + +	cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); +} +  /*   * Enqueue an entity into the rb-tree:   */ @@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)  	 * Maintain a cache of leftmost tree entries (it is frequently  	 * used):  	 */ -	if (leftmost) { +	if (leftmost)  		cfs_rq->rb_leftmost = &se->run_node; -		/* -		 * maintain cfs_rq->min_vruntime to be a monotonic increasing -		 * value tracking the leftmost vruntime in the tree. -		 */ -		cfs_rq->min_vruntime = -			max_vruntime(cfs_rq->min_vruntime, se->vruntime); -	}  	rb_link_node(&se->run_node, parent, link);  	rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); @@ -274,37 +336,25 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)  {  	if (cfs_rq->rb_leftmost == &se->run_node) {  		struct rb_node *next_node; -		struct sched_entity *next;  		next_node = rb_next(&se->run_node);  		cfs_rq->rb_leftmost = next_node; - -		if (next_node) { -			next = rb_entry(next_node, -					struct sched_entity, run_node); -			cfs_rq->min_vruntime = -				max_vruntime(cfs_rq->min_vruntime, -					     next->vruntime); -		}  	} -	if (cfs_rq->next == se) -		cfs_rq->next = NULL; -  	rb_erase(&se->run_node, &cfs_rq->tasks_timeline);  } -static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) -{ -	return cfs_rq->rb_leftmost; -} -  static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)  { -	return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node); +	struct rb_node *left = cfs_rq->rb_leftmost; + +	if (!left) +		return NULL; + +	return rb_entry(left, struct sched_entity, run_node);  } -static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) +static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)  {  	struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); @@ -424,6 +474,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,  	schedstat_add(cfs_rq, exec_clock, delta_exec);  	delta_exec_weighted = calc_delta_fair(delta_exec, curr);  	curr->vruntime += delta_exec_weighted; +	update_min_vruntime(cfs_rq);  }  static void update_curr(struct cfs_rq *cfs_rq) @@ -613,13 +664,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)  static void  place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)  { -	u64 vruntime; - -	if (first_fair(cfs_rq)) { -		vruntime = min_vruntime(cfs_rq->min_vruntime, -				__pick_next_entity(cfs_rq)->vruntime); -	} else -		vruntime = cfs_rq->min_vruntime; +	u64 vruntime = cfs_rq->min_vruntime;  	/*  	 * The 'current' period is already promised to the current tasks, @@ -671,6 +716,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)  		__enqueue_entity(cfs_rq, se);  } +static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) +{ +	if (cfs_rq->last == se) +		cfs_rq->last = NULL; + +	if (cfs_rq->next == se) +		cfs_rq->next = NULL; +} +  static void  dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)  { @@ -693,9 +747,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)  #endif  	} +	clear_buddies(cfs_rq, se); +  	if (se != cfs_rq->curr)  		__dequeue_entity(cfs_rq, se);  	account_entity_dequeue(cfs_rq, se); +	update_min_vruntime(cfs_rq);  }  /* @@ -742,29 +799,18 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)  	se->prev_sum_exec_runtime = se->sum_exec_runtime;  } -static struct sched_entity * -pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) -{ -	struct rq *rq = rq_of(cfs_rq); -	u64 pair_slice = rq->clock - cfs_rq->pair_start; - -	if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) { -		cfs_rq->pair_start = rq->clock; -		return se; -	} - -	return cfs_rq->next; -} +static int +wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);  static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)  { -	struct sched_entity *se = NULL; +	struct sched_entity *se = __pick_next_entity(cfs_rq); -	if (first_fair(cfs_rq)) { -		se = __pick_next_entity(cfs_rq); -		se = pick_next(cfs_rq, se); -		set_next_entity(cfs_rq, se); -	} +	if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1) +		return cfs_rq->next; + +	if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1) +		return cfs_rq->last;  	return se;  } @@ -936,6 +982,8 @@ static void yield_task_fair(struct rq *rq)  	if (unlikely(cfs_rq->nr_running == 1))  		return; +	clear_buddies(cfs_rq, se); +  	if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {  		update_rq_clock(rq);  		/* @@ -1122,10 +1170,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,  	if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))  		return 0; -	if (!sync && sched_feat(SYNC_WAKEUPS) && -	    curr->se.avg_overlap < sysctl_sched_migration_cost && -	    p->se.avg_overlap < sysctl_sched_migration_cost) -		sync = 1; +	if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || +			p->se.avg_overlap > sysctl_sched_migration_cost)) +		sync = 0;  	/*  	 * If sync wakeup then subtract the (maximum possible) @@ -1244,33 +1291,88 @@ static unsigned long wakeup_gran(struct sched_entity *se)  	 * More easily preempt - nice tasks, while not making it harder for  	 * + nice tasks.  	 */ -	if (sched_feat(ASYM_GRAN)) -		gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load); +	if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD) +		gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);  	return gran;  }  /* + * Should 'se' preempt 'curr'. + * + *             |s1 + *        |s2 + *   |s3 + *         g + *      |<--->|c + * + *  w(c, s1) = -1 + *  w(c, s2) =  0 + *  w(c, s3) =  1 + * + */ +static int +wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) +{ +	s64 gran, vdiff = curr->vruntime - se->vruntime; + +	if (vdiff <= 0) +		return -1; + +	gran = wakeup_gran(curr); +	if (vdiff > gran) +		return 1; + +	return 0; +} + +static void set_last_buddy(struct sched_entity *se) +{ +	for_each_sched_entity(se) +		cfs_rq_of(se)->last = se; +} + +static void set_next_buddy(struct sched_entity *se) +{ +	for_each_sched_entity(se) +		cfs_rq_of(se)->next = se; +} + +/*   * Preempt the current task with a newly woken task if needed:   */  static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)  {  	struct task_struct *curr = rq->curr; -	struct cfs_rq *cfs_rq = task_cfs_rq(curr);  	struct sched_entity *se = &curr->se, *pse = &p->se; -	s64 delta_exec;  	if (unlikely(rt_prio(p->prio))) { +		struct cfs_rq *cfs_rq = task_cfs_rq(curr); +  		update_rq_clock(rq);  		update_curr(cfs_rq);  		resched_task(curr);  		return;  	} +	if (unlikely(p->sched_class != &fair_sched_class)) +		return; +  	if (unlikely(se == pse))  		return; -	cfs_rq_of(pse)->next = pse; +	/* +	 * Only set the backward buddy when the current task is still on the +	 * rq. This can happen when a wakeup gets interleaved with schedule on +	 * the ->pre_schedule() or idle_balance() point, either of which can +	 * drop the rq lock. +	 * +	 * Also, during early boot the idle thread is in the fair class, for +	 * obvious reasons its a bad idea to schedule back to the idle thread. +	 */ +	if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) +		set_last_buddy(se); +	set_next_buddy(pse);  	/*  	 * We can come here with TIF_NEED_RESCHED already set from new task @@ -1296,9 +1398,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)  		return;  	} -	delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime; -	if (delta_exec > wakeup_gran(pse)) -		resched_task(curr); +	find_matching_se(&se, &pse); + +	while (se) { +		BUG_ON(!pse); + +		if (wakeup_preempt_entity(se, pse) == 1) { +			resched_task(curr); +			break; +		} + +		se = parent_entity(se); +		pse = parent_entity(pse); +	}  }  static struct task_struct *pick_next_task_fair(struct rq *rq) @@ -1312,6 +1424,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)  	do {  		se = pick_next_entity(cfs_rq); +		set_next_entity(cfs_rq, se);  		cfs_rq = group_cfs_rq(se);  	} while (cfs_rq); @@ -1594,9 +1707,6 @@ static const struct sched_class fair_sched_class = {  	.enqueue_task		= enqueue_task_fair,  	.dequeue_task		= dequeue_task_fair,  	.yield_task		= yield_task_fair, -#ifdef CONFIG_SMP -	.select_task_rq		= select_task_rq_fair, -#endif /* CONFIG_SMP */  	.check_preempt_curr	= check_preempt_wakeup, @@ -1604,6 +1714,8 @@ static const struct sched_class fair_sched_class = {  	.put_prev_task		= put_prev_task_fair,  #ifdef CONFIG_SMP +	.select_task_rq		= select_task_rq_fair, +  	.load_balance		= load_balance_fair,  	.move_one_task		= move_one_task_fair,  #endif  |