diff options
Diffstat (limited to 'kernel/sched_fair.c')
| -rw-r--r-- | kernel/sched_fair.c | 126 | 
1 files changed, 87 insertions, 39 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6fa833ab2cb..37f22626225 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -358,6 +358,10 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)  	}  	cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); +#ifndef CONFIG_64BIT +	smp_wmb(); +	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; +#endif  }  /* @@ -1340,6 +1344,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)  	hrtick_update(rq);  } +static void set_next_buddy(struct sched_entity *se); +  /*   * The dequeue_task method is called before nr_running is   * decreased. We remove the task from the rbtree and @@ -1349,14 +1355,22 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)  {  	struct cfs_rq *cfs_rq;  	struct sched_entity *se = &p->se; +	int task_sleep = flags & DEQUEUE_SLEEP;  	for_each_sched_entity(se) {  		cfs_rq = cfs_rq_of(se);  		dequeue_entity(cfs_rq, se, flags);  		/* Don't dequeue parent if it has other entities besides us */ -		if (cfs_rq->load.weight) +		if (cfs_rq->load.weight) { +			/* +			 * Bias pick_next to pick a task from this cfs_rq, as +			 * p is sleeping when it is within its sched_slice. +			 */ +			if (task_sleep && parent_entity(se)) +				set_next_buddy(parent_entity(se));  			break; +		}  		flags |= DEQUEUE_SLEEP;  	} @@ -1372,12 +1386,25 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)  #ifdef CONFIG_SMP -static void task_waking_fair(struct rq *rq, struct task_struct *p) +static void task_waking_fair(struct task_struct *p)  {  	struct sched_entity *se = &p->se;  	struct cfs_rq *cfs_rq = cfs_rq_of(se); +	u64 min_vruntime; -	se->vruntime -= cfs_rq->min_vruntime; +#ifndef CONFIG_64BIT +	u64 min_vruntime_copy; + +	do { +		min_vruntime_copy = cfs_rq->min_vruntime_copy; +		smp_rmb(); +		min_vruntime = cfs_rq->min_vruntime; +	} while (min_vruntime != min_vruntime_copy); +#else +	min_vruntime = cfs_rq->min_vruntime; +#endif + +	se->vruntime -= min_vruntime;  }  #ifdef CONFIG_FAIR_GROUP_SCHED @@ -1622,6 +1649,7 @@ static int select_idle_sibling(struct task_struct *p, int target)  	/*  	 * Otherwise, iterate the domains and find an elegible idle cpu.  	 */ +	rcu_read_lock();  	for_each_domain(target, sd) {  		if (!(sd->flags & SD_SHARE_PKG_RESOURCES))  			break; @@ -1641,6 +1669,7 @@ static int select_idle_sibling(struct task_struct *p, int target)  		    cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))  			break;  	} +	rcu_read_unlock();  	return target;  } @@ -1657,7 +1686,7 @@ static int select_idle_sibling(struct task_struct *p, int target)   * preempt must be disabled.   */  static int -select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags) +select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)  {  	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;  	int cpu = smp_processor_id(); @@ -1673,6 +1702,7 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_  		new_cpu = prev_cpu;  	} +	rcu_read_lock();  	for_each_domain(cpu, tmp) {  		if (!(tmp->flags & SD_LOAD_BALANCE))  			continue; @@ -1723,9 +1753,10 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_  	if (affine_sd) {  		if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) -			return select_idle_sibling(p, cpu); -		else -			return select_idle_sibling(p, prev_cpu); +			prev_cpu = cpu; + +		new_cpu = select_idle_sibling(p, prev_cpu); +		goto unlock;  	}  	while (sd) { @@ -1766,6 +1797,8 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_  		}  		/* while loop will break here if sd == NULL */  	} +unlock: +	rcu_read_unlock();  	return new_cpu;  } @@ -1789,10 +1822,7 @@ wakeup_gran(struct sched_entity *curr, struct sched_entity *se)  	 * This is especially important for buddies when the leftmost  	 * task is higher priority than the buddy.  	 */ -	if (unlikely(se->load.weight != NICE_0_LOAD)) -		gran = calc_delta_fair(gran, se); - -	return gran; +	return calc_delta_fair(gran, se);  }  /* @@ -1826,26 +1856,26 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)  static void set_last_buddy(struct sched_entity *se)  { -	if (likely(task_of(se)->policy != SCHED_IDLE)) { -		for_each_sched_entity(se) -			cfs_rq_of(se)->last = se; -	} +	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE)) +		return; + +	for_each_sched_entity(se) +		cfs_rq_of(se)->last = se;  }  static void set_next_buddy(struct sched_entity *se)  { -	if (likely(task_of(se)->policy != SCHED_IDLE)) { -		for_each_sched_entity(se) -			cfs_rq_of(se)->next = se; -	} +	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE)) +		return; + +	for_each_sched_entity(se) +		cfs_rq_of(se)->next = se;  }  static void set_skip_buddy(struct sched_entity *se)  { -	if (likely(task_of(se)->policy != SCHED_IDLE)) { -		for_each_sched_entity(se) -			cfs_rq_of(se)->skip = se; -	} +	for_each_sched_entity(se) +		cfs_rq_of(se)->skip = se;  }  /* @@ -1857,12 +1887,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_  	struct sched_entity *se = &curr->se, *pse = &p->se;  	struct cfs_rq *cfs_rq = task_cfs_rq(curr);  	int scale = cfs_rq->nr_running >= sched_nr_latency; +	int next_buddy_marked = 0;  	if (unlikely(se == pse))  		return; -	if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) +	if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) {  		set_next_buddy(pse); +		next_buddy_marked = 1; +	}  	/*  	 * We can come here with TIF_NEED_RESCHED already set from new task @@ -1890,8 +1923,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_  	update_curr(cfs_rq);  	find_matching_se(&se, &pse);  	BUG_ON(!pse); -	if (wakeup_preempt_entity(se, pse) == 1) +	if (wakeup_preempt_entity(se, pse) == 1) { +		/* +		 * Bias pick_next to pick the sched entity that is +		 * triggering this preemption. +		 */ +		if (!next_buddy_marked) +			set_next_buddy(pse);  		goto preempt; +	}  	return; @@ -2102,7 +2142,7 @@ static unsigned long  balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,  	      unsigned long max_load_move, struct sched_domain *sd,  	      enum cpu_idle_type idle, int *all_pinned, -	      int *this_best_prio, struct cfs_rq *busiest_cfs_rq) +	      struct cfs_rq *busiest_cfs_rq)  {  	int loops = 0, pulled = 0;  	long rem_load_move = max_load_move; @@ -2140,9 +2180,6 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,  		 */  		if (rem_load_move <= 0)  			break; - -		if (p->prio < *this_best_prio) -			*this_best_prio = p->prio;  	}  out:  	/* @@ -2202,7 +2239,7 @@ static unsigned long  load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,  		  unsigned long max_load_move,  		  struct sched_domain *sd, enum cpu_idle_type idle, -		  int *all_pinned, int *this_best_prio) +		  int *all_pinned)  {  	long rem_load_move = max_load_move;  	int busiest_cpu = cpu_of(busiest); @@ -2227,7 +2264,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,  		rem_load = div_u64(rem_load, busiest_h_load + 1);  		moved_load = balance_tasks(this_rq, this_cpu, busiest, -				rem_load, sd, idle, all_pinned, this_best_prio, +				rem_load, sd, idle, all_pinned,  				busiest_cfs_rq);  		if (!moved_load) @@ -2253,11 +2290,11 @@ static unsigned long  load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,  		  unsigned long max_load_move,  		  struct sched_domain *sd, enum cpu_idle_type idle, -		  int *all_pinned, int *this_best_prio) +		  int *all_pinned)  {  	return balance_tasks(this_rq, this_cpu, busiest,  			max_load_move, sd, idle, all_pinned, -			this_best_prio, &busiest->cfs); +			&busiest->cfs);  }  #endif @@ -2274,12 +2311,11 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,  		      int *all_pinned)  {  	unsigned long total_load_moved = 0, load_moved; -	int this_best_prio = this_rq->curr->prio;  	do {  		load_moved = load_balance_fair(this_rq, this_cpu, busiest,  				max_load_move - total_load_moved, -				sd, idle, all_pinned, &this_best_prio); +				sd, idle, all_pinned);  		total_load_moved += load_moved; @@ -2648,7 +2684,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)  	/*  	 * Only siblings can have significantly less than SCHED_LOAD_SCALE  	 */ -	if (sd->level != SD_LV_SIBLING) +	if (!(sd->flags & SD_SHARE_CPUPOWER))  		return 0;  	/* @@ -3465,6 +3501,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)  	raw_spin_unlock(&this_rq->lock);  	update_shares(this_cpu); +	rcu_read_lock();  	for_each_domain(this_cpu, sd) {  		unsigned long interval;  		int balance = 1; @@ -3486,6 +3523,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)  			break;  		}  	} +	rcu_read_unlock();  	raw_spin_lock(&this_rq->lock); @@ -3534,6 +3572,7 @@ static int active_load_balance_cpu_stop(void *data)  	double_lock_balance(busiest_rq, target_rq);  	/* Search for an sd spanning us and the target CPU. */ +	rcu_read_lock();  	for_each_domain(target_cpu, sd) {  		if ((sd->flags & SD_LOAD_BALANCE) &&  		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) @@ -3549,6 +3588,7 @@ static int active_load_balance_cpu_stop(void *data)  		else  			schedstat_inc(sd, alb_failed);  	} +	rcu_read_unlock();  	double_unlock_balance(busiest_rq, target_rq);  out_unlock:  	busiest_rq->active_balance = 0; @@ -3675,6 +3715,7 @@ static int find_new_ilb(int cpu)  {  	struct sched_domain *sd;  	struct sched_group *ilb_group; +	int ilb = nr_cpu_ids;  	/*  	 * Have idle load balancer selection from semi-idle packages only @@ -3690,20 +3731,25 @@ static int find_new_ilb(int cpu)  	if (cpumask_weight(nohz.idle_cpus_mask) < 2)  		goto out_done; +	rcu_read_lock();  	for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {  		ilb_group = sd->groups;  		do { -			if (is_semi_idle_group(ilb_group)) -				return cpumask_first(nohz.grp_idle_mask); +			if (is_semi_idle_group(ilb_group)) { +				ilb = cpumask_first(nohz.grp_idle_mask); +				goto unlock; +			}  			ilb_group = ilb_group->next;  		} while (ilb_group != sd->groups);  	} +unlock: +	rcu_read_unlock();  out_done: -	return nr_cpu_ids; +	return ilb;  }  #else /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */  static inline int find_new_ilb(int call_cpu) @@ -3848,6 +3894,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)  	update_shares(cpu); +	rcu_read_lock();  	for_each_domain(cpu, sd) {  		if (!(sd->flags & SD_LOAD_BALANCE))  			continue; @@ -3893,6 +3940,7 @@ out:  		if (!balance)  			break;  	} +	rcu_read_unlock();  	/*  	 * next_balance will be updated only when there is a need.  |