diff options
| -rw-r--r-- | kernel/sched/debug.c | 4 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 56 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 2 | 
3 files changed, 62 insertions, 0 deletions
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 290892361a0..71b0ea325e9 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -234,6 +234,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)  			atomic64_read(&cfs_rq->tg->load_avg));  	SEQ_printf(m, "  .%-30s: %lld\n", "tg_load_contrib",  			cfs_rq->tg_load_contrib); +	SEQ_printf(m, "  .%-30s: %d\n", "tg_runnable_contrib", +			cfs_rq->tg_runnable_contrib); +	SEQ_printf(m, "  .%-30s: %d\n", "tg->runnable_avg", +			atomic_read(&cfs_rq->tg->runnable_avg));  #endif  	print_cfs_group_stats(m, cpu, cfs_rq->tg); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e20cb2693ef..9e49722da03 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1118,19 +1118,73 @@ static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq,  	}  } +/* + * Aggregate cfs_rq runnable averages into an equivalent task_group + * representation for computing load contributions. + */ +static inline void __update_tg_runnable_avg(struct sched_avg *sa, +						  struct cfs_rq *cfs_rq) +{ +	struct task_group *tg = cfs_rq->tg; +	long contrib; + +	/* The fraction of a cpu used by this cfs_rq */ +	contrib = div_u64(sa->runnable_avg_sum << NICE_0_SHIFT, +			  sa->runnable_avg_period + 1); +	contrib -= cfs_rq->tg_runnable_contrib; + +	if (abs(contrib) > cfs_rq->tg_runnable_contrib / 64) { +		atomic_add(contrib, &tg->runnable_avg); +		cfs_rq->tg_runnable_contrib += contrib; +	} +} +  static inline void __update_group_entity_contrib(struct sched_entity *se)  {  	struct cfs_rq *cfs_rq = group_cfs_rq(se);  	struct task_group *tg = cfs_rq->tg; +	int runnable_avg; +  	u64 contrib;  	contrib = cfs_rq->tg_load_contrib * tg->shares;  	se->avg.load_avg_contrib = div64_u64(contrib,  					     atomic64_read(&tg->load_avg) + 1); + +	/* +	 * For group entities we need to compute a correction term in the case +	 * that they are consuming <1 cpu so that we would contribute the same +	 * load as a task of equal weight. +	 * +	 * Explicitly co-ordinating this measurement would be expensive, but +	 * fortunately the sum of each cpus contribution forms a usable +	 * lower-bound on the true value. +	 * +	 * Consider the aggregate of 2 contributions.  Either they are disjoint +	 * (and the sum represents true value) or they are disjoint and we are +	 * understating by the aggregate of their overlap. +	 * +	 * Extending this to N cpus, for a given overlap, the maximum amount we +	 * understand is then n_i(n_i+1)/2 * w_i where n_i is the number of +	 * cpus that overlap for this interval and w_i is the interval width. +	 * +	 * On a small machine; the first term is well-bounded which bounds the +	 * total error since w_i is a subset of the period.  Whereas on a +	 * larger machine, while this first term can be larger, if w_i is the +	 * of consequential size guaranteed to see n_i*w_i quickly converge to +	 * our upper bound of 1-cpu. +	 */ +	runnable_avg = atomic_read(&tg->runnable_avg); +	if (runnable_avg < NICE_0_LOAD) { +		se->avg.load_avg_contrib *= runnable_avg; +		se->avg.load_avg_contrib >>= NICE_0_SHIFT; +	}  }  #else  static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq,  						 int force_update) {} +static inline void __update_tg_runnable_avg(struct sched_avg *sa, +						  struct cfs_rq *cfs_rq) {}  static inline void __update_group_entity_contrib(struct sched_entity *se) {}  #endif @@ -1152,6 +1206,7 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se)  	if (entity_is_task(se)) {  		__update_task_entity_contrib(se);  	} else { +		__update_tg_runnable_avg(&se->avg, group_cfs_rq(se));  		__update_group_entity_contrib(se);  	} @@ -1220,6 +1275,7 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update)  static inline void update_rq_runnable_avg(struct rq *rq, int runnable)  {  	__update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable); +	__update_tg_runnable_avg(&rq->avg, &rq->cfs);  }  /* Add the load generated by se into cfs_rq's child load-average */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 924a9909488..134928dc6f0 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -113,6 +113,7 @@ struct task_group {  	atomic_t load_weight;  	atomic64_t load_avg; +	atomic_t runnable_avg;  #endif  #ifdef CONFIG_RT_GROUP_SCHED @@ -234,6 +235,7 @@ struct cfs_rq {  	atomic64_t decay_counter, removed_load;  	u64 last_decay;  #ifdef CONFIG_FAIR_GROUP_SCHED +	u32 tg_runnable_contrib;  	u64 tg_load_contrib;  #endif  #endif  |