diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched/core.c | 1 | ||||
| -rw-r--r-- | kernel/sched/debug.c | 3 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 128 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 4 | 
4 files changed, 121 insertions, 15 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fd9d0859350..00898f1fb69 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1528,7 +1528,6 @@ static void __sched_fork(struct task_struct *p)  	p->se.avg.runnable_avg_period = 0;  	p->se.avg.runnable_avg_sum = 0;  #endif -  #ifdef CONFIG_SCHEDSTATS  	memset(&p->se.statistics, 0, sizeof(p->se.statistics));  #endif diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index c953a89f94a..2d2e2b3c1be 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -95,6 +95,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group  	P(se->avg.runnable_avg_sum);  	P(se->avg.runnable_avg_period);  	P(se->avg.load_avg_contrib); +	P(se->avg.decay_count);  #endif  #undef PN  #undef P @@ -227,6 +228,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)  			atomic_read(&cfs_rq->tg->load_weight));  	SEQ_printf(m, "  .%-30s: %lld\n", "runnable_load_avg",  			cfs_rq->runnable_load_avg); +	SEQ_printf(m, "  .%-30s: %lld\n", "blocked_load_avg", +			cfs_rq->blocked_load_avg);  #endif  	print_cfs_group_stats(m, cpu, cfs_rq->tg); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 77af759e567..83194175e84 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -259,6 +259,8 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)  	return grp->my_q;  } +static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq); +  static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)  {  	if (!cfs_rq->on_list) { @@ -278,6 +280,8 @@ static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)  		}  		cfs_rq->on_list = 1; +		/* We should have no load, but we need to update last_decay. */ +		update_cfs_rq_blocked_load(cfs_rq);  	}  } @@ -1081,6 +1085,20 @@ static __always_inline int __update_entity_runnable_avg(u64 now,  	return decayed;  } +/* Synchronize an entity's decay with its parenting cfs_rq.*/ +static inline void __synchronize_entity_decay(struct sched_entity *se) +{ +	struct cfs_rq *cfs_rq = cfs_rq_of(se); +	u64 decays = atomic64_read(&cfs_rq->decay_counter); + +	decays -= se->avg.decay_count; +	if (!decays) +		return; + +	se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays); +	se->avg.decay_count = 0; +} +  /* Compute the current contribution to load_avg by se, return any delta */  static long __update_entity_load_avg_contrib(struct sched_entity *se)  { @@ -1096,8 +1114,18 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se)  	return se->avg.load_avg_contrib - old_contrib;  } +static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq, +						 long load_contrib) +{ +	if (likely(load_contrib < cfs_rq->blocked_load_avg)) +		cfs_rq->blocked_load_avg -= load_contrib; +	else +		cfs_rq->blocked_load_avg = 0; +} +  /* Update a sched_entity's runnable average */ -static inline void update_entity_load_avg(struct sched_entity *se) +static inline void update_entity_load_avg(struct sched_entity *se, +					  int update_cfs_rq)  {  	struct cfs_rq *cfs_rq = cfs_rq_of(se);  	long contrib_delta; @@ -1107,8 +1135,34 @@ static inline void update_entity_load_avg(struct sched_entity *se)  		return;  	contrib_delta = __update_entity_load_avg_contrib(se); + +	if (!update_cfs_rq) +		return; +  	if (se->on_rq)  		cfs_rq->runnable_load_avg += contrib_delta; +	else +		subtract_blocked_load_contrib(cfs_rq, -contrib_delta); +} + +/* + * Decay the load contributed by all blocked children and account this so that + * their contribution may appropriately discounted when they wake up. + */ +static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq) +{ +	u64 now = rq_of(cfs_rq)->clock_task >> 20; +	u64 decays; + +	decays = now - cfs_rq->last_decay; +	if (!decays) +		return; + +	cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg, +					      decays); +	atomic64_add(decays, &cfs_rq->decay_counter); + +	cfs_rq->last_decay = now;  }  static inline void update_rq_runnable_avg(struct rq *rq, int runnable) @@ -1118,26 +1172,53 @@ static inline void update_rq_runnable_avg(struct rq *rq, int runnable)  /* Add the load generated by se into cfs_rq's child load-average */  static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, -						  struct sched_entity *se) +						  struct sched_entity *se, +						  int wakeup)  { -	update_entity_load_avg(se); +	/* we track migrations using entity decay_count == 0 */ +	if (unlikely(!se->avg.decay_count)) { +		se->avg.last_runnable_update = rq_of(cfs_rq)->clock_task; +		wakeup = 0; +	} else { +		__synchronize_entity_decay(se); +	} + +	if (wakeup) +		subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib); + +	update_entity_load_avg(se, 0);  	cfs_rq->runnable_load_avg += se->avg.load_avg_contrib; +	update_cfs_rq_blocked_load(cfs_rq);  } -/* Remove se's load from this cfs_rq child load-average */ +/* + * Remove se's load from this cfs_rq child load-average, if the entity is + * transitioning to a blocked state we track its projected decay using + * blocked_load_avg. + */  static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, -						  struct sched_entity *se) +						  struct sched_entity *se, +						  int sleep)  { -	update_entity_load_avg(se); +	update_entity_load_avg(se, 1); +  	cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib; +	if (sleep) { +		cfs_rq->blocked_load_avg += se->avg.load_avg_contrib; +		se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter); +	} /* migrations, e.g. sleep=0 leave decay_count == 0 */  }  #else -static inline void update_entity_load_avg(struct sched_entity *se) {} +static inline void update_entity_load_avg(struct sched_entity *se, +					  int update_cfs_rq) {}  static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {}  static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, -						  struct sched_entity *se) {} +					   struct sched_entity *se, +					   int wakeup) {}  static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, -						  struct sched_entity *se) {} +					   struct sched_entity *se, +					   int sleep) {} +static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq) {}  #endif  static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -1266,7 +1347,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)  	 */  	update_curr(cfs_rq);  	update_cfs_load(cfs_rq, 0); -	enqueue_entity_load_avg(cfs_rq, se); +	enqueue_entity_load_avg(cfs_rq, se, flags & ENQUEUE_WAKEUP);  	account_entity_enqueue(cfs_rq, se);  	update_cfs_shares(cfs_rq); @@ -1341,7 +1422,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)  	 * Update run-time statistics of the 'current'.  	 */  	update_curr(cfs_rq); -	dequeue_entity_load_avg(cfs_rq, se); +	dequeue_entity_load_avg(cfs_rq, se, flags & DEQUEUE_SLEEP);  	update_stats_dequeue(cfs_rq, se);  	if (flags & DEQUEUE_SLEEP) { @@ -1512,7 +1593,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)  		/* Put 'current' back into the tree. */  		__enqueue_entity(cfs_rq, prev);  		/* in !on_rq case, update occurred at dequeue */ -		update_entity_load_avg(prev); +		update_entity_load_avg(prev, 1);  	}  	cfs_rq->curr = NULL;  } @@ -1528,7 +1609,8 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)  	/*  	 * Ensure that runnable average is periodically updated.  	 */ -	update_entity_load_avg(curr); +	update_entity_load_avg(curr, 1); +	update_cfs_rq_blocked_load(cfs_rq);  	/*  	 * Update share accounting for long-running entities. @@ -2387,6 +2469,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)  		update_cfs_load(cfs_rq, 0);  		update_cfs_shares(cfs_rq); +		update_entity_load_avg(se, 1);  	}  	if (!se) { @@ -2448,6 +2531,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)  		update_cfs_load(cfs_rq, 0);  		update_cfs_shares(cfs_rq); +		update_entity_load_avg(se, 1);  	}  	if (!se) { @@ -3498,6 +3582,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu)  	update_rq_clock(rq);  	update_cfs_load(cfs_rq, 1); +	update_cfs_rq_blocked_load(cfs_rq);  	/*  	 * We need to update shares after updating tg->load_weight in @@ -5232,6 +5317,20 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)  		place_entity(cfs_rq, se, 0);  		se->vruntime -= cfs_rq->min_vruntime;  	} + +#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) +	/* +	* Remove our load from contribution when we leave sched_fair +	* and ensure we don't carry in an old decay_count if we +	* switch back. +	*/ +	if (p->se.avg.decay_count) { +		struct cfs_rq *cfs_rq = cfs_rq_of(&p->se); +		__synchronize_entity_decay(&p->se); +		subtract_blocked_load_contrib(cfs_rq, +				p->se.avg.load_avg_contrib); +	} +#endif  }  /* @@ -5278,6 +5377,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)  #ifndef CONFIG_64BIT  	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;  #endif +#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) +	atomic64_set(&cfs_rq->decay_counter, 1); +#endif  }  #ifdef CONFIG_FAIR_GROUP_SCHED diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e6539736af5..664ff39195f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -229,7 +229,9 @@ struct cfs_rq {  	 * This allows for the description of both thread and group usage (in  	 * the FAIR_GROUP_SCHED case).  	 */ -	u64 runnable_load_avg; +	u64 runnable_load_avg, blocked_load_avg; +	atomic64_t decay_counter; +	u64 last_decay;  #endif  #ifdef CONFIG_FAIR_GROUP_SCHED  	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */  |