diff options
Diffstat (limited to 'kernel/sched/core.c')
| -rw-r--r-- | kernel/sched/core.c | 316 | 
1 files changed, 27 insertions, 289 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 243a20c5cf9..c70a8814a76 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1283,8 +1283,8 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)  static void  ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)  { -	trace_sched_wakeup(p, true);  	check_preempt_curr(rq, p, wake_flags); +	trace_sched_wakeup(p, true);  	p->state = TASK_RUNNING;  #ifdef CONFIG_SMP @@ -1493,8 +1493,10 @@ static void try_to_wake_up_local(struct task_struct *p)  {  	struct rq *rq = task_rq(p); -	BUG_ON(rq != this_rq()); -	BUG_ON(p == current); +	if (WARN_ON_ONCE(rq != this_rq()) || +	    WARN_ON_ONCE(p == current)) +		return; +  	lockdep_assert_held(&rq->lock);  	if (!raw_spin_trylock(&p->pi_lock)) { @@ -2992,51 +2994,6 @@ void __sched schedule_preempt_disabled(void)  	preempt_disable();  } -#ifdef CONFIG_MUTEX_SPIN_ON_OWNER - -static inline bool owner_running(struct mutex *lock, struct task_struct *owner) -{ -	if (lock->owner != owner) -		return false; - -	/* -	 * Ensure we emit the owner->on_cpu, dereference _after_ checking -	 * lock->owner still matches owner, if that fails, owner might -	 * point to free()d memory, if it still matches, the rcu_read_lock() -	 * ensures the memory stays valid. -	 */ -	barrier(); - -	return owner->on_cpu; -} - -/* - * Look out! "owner" is an entirely speculative pointer - * access and not reliable. - */ -int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) -{ -	if (!sched_feat(OWNER_SPIN)) -		return 0; - -	rcu_read_lock(); -	while (owner_running(lock, owner)) { -		if (need_resched()) -			break; - -		arch_mutex_cpu_relax(); -	} -	rcu_read_unlock(); - -	/* -	 * We break out the loop above on need_resched() and when the -	 * owner changed, which is a sign for heavy contention. Return -	 * success only when lock->owner is NULL. -	 */ -	return lock->owner == NULL; -} -#endif -  #ifdef CONFIG_PREEMPT  /*   * this is the entry point to schedule() from in-kernel preemption @@ -3077,11 +3034,13 @@ EXPORT_SYMBOL(preempt_schedule);  asmlinkage void __sched preempt_schedule_irq(void)  {  	struct thread_info *ti = current_thread_info(); +	enum ctx_state prev_state;  	/* Catch callers which need to be fixed */  	BUG_ON(ti->preempt_count || !irqs_disabled()); -	user_exit(); +	prev_state = exception_enter(); +  	do {  		add_preempt_count(PREEMPT_ACTIVE);  		local_irq_enable(); @@ -3095,6 +3054,8 @@ asmlinkage void __sched preempt_schedule_irq(void)  		 */  		barrier();  	} while (need_resched()); + +	exception_exit(prev_state);  }  #endif /* CONFIG_PREEMPT */ @@ -4121,6 +4082,10 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)  	get_task_struct(p);  	rcu_read_unlock(); +	if (p->flags & PF_NO_SETAFFINITY) { +		retval = -EINVAL; +		goto out_put_task; +	}  	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {  		retval = -ENOMEM;  		goto out_put_task; @@ -4768,11 +4733,6 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)  		goto out;  	} -	if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) { -		ret = -EINVAL; -		goto out; -	} -  	do_set_cpus_allowed(p, new_mask);  	/* Can the task run on the task's current CPU? If so, we're done */ @@ -4994,7 +4954,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep)  }  static int min_load_idx = 0; -static int max_load_idx = CPU_LOAD_IDX_MAX; +static int max_load_idx = CPU_LOAD_IDX_MAX-1;  static void  set_table_entry(struct ctl_table *entry, @@ -6243,7 +6203,7 @@ static void sched_init_numa(void)  	 * 'level' contains the number of unique distances, excluding the  	 * identity distance node_distance(i,i).  	 * -	 * The sched_domains_nume_distance[] array includes the actual distance +	 * The sched_domains_numa_distance[] array includes the actual distance  	 * numbers.  	 */ @@ -6856,11 +6816,15 @@ int in_sched_functions(unsigned long addr)  }  #ifdef CONFIG_CGROUP_SCHED +/* + * Default task group. + * Every task in system belongs to this group at bootup. + */  struct task_group root_task_group;  LIST_HEAD(task_groups);  #endif -DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask); +DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);  void __init sched_init(void)  { @@ -6897,7 +6861,7 @@ void __init sched_init(void)  #endif /* CONFIG_RT_GROUP_SCHED */  #ifdef CONFIG_CPUMASK_OFFSTACK  		for_each_possible_cpu(i) { -			per_cpu(load_balance_tmpmask, i) = (void *)ptr; +			per_cpu(load_balance_mask, i) = (void *)ptr;  			ptr += cpumask_size();  		}  #endif /* CONFIG_CPUMASK_OFFSTACK */ @@ -6923,12 +6887,6 @@ void __init sched_init(void)  #endif /* CONFIG_CGROUP_SCHED */ -#ifdef CONFIG_CGROUP_CPUACCT -	root_cpuacct.cpustat = &kernel_cpustat; -	root_cpuacct.cpuusage = alloc_percpu(u64); -	/* Too early, not expected to fail */ -	BUG_ON(!root_cpuacct.cpuusage); -#endif  	for_each_possible_cpu(i) {  		struct rq *rq; @@ -7450,7 +7408,7 @@ unlock:  	return err;  } -int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) +static int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)  {  	u64 rt_runtime, rt_period; @@ -7462,7 +7420,7 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)  	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);  } -long sched_group_rt_runtime(struct task_group *tg) +static long sched_group_rt_runtime(struct task_group *tg)  {  	u64 rt_runtime_us; @@ -7474,7 +7432,7 @@ long sched_group_rt_runtime(struct task_group *tg)  	return rt_runtime_us;  } -int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) +static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)  {  	u64 rt_runtime, rt_period; @@ -7487,7 +7445,7 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)  	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);  } -long sched_group_rt_period(struct task_group *tg) +static long sched_group_rt_period(struct task_group *tg)  {  	u64 rt_period_us; @@ -7522,7 +7480,7 @@ static int sched_rt_global_constraints(void)  	return ret;  } -int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) +static int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)  {  	/* Don't accept realtime tasks when there is no way for them to run */  	if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) @@ -8030,226 +7988,6 @@ struct cgroup_subsys cpu_cgroup_subsys = {  #endif	/* CONFIG_CGROUP_SCHED */ -#ifdef CONFIG_CGROUP_CPUACCT - -/* - * CPU accounting code for task groups. - * - * Based on the work by Paul Menage (menage@google.com) and Balbir Singh - * (balbir@in.ibm.com). - */ - -struct cpuacct root_cpuacct; - -/* create a new cpu accounting group */ -static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp) -{ -	struct cpuacct *ca; - -	if (!cgrp->parent) -		return &root_cpuacct.css; - -	ca = kzalloc(sizeof(*ca), GFP_KERNEL); -	if (!ca) -		goto out; - -	ca->cpuusage = alloc_percpu(u64); -	if (!ca->cpuusage) -		goto out_free_ca; - -	ca->cpustat = alloc_percpu(struct kernel_cpustat); -	if (!ca->cpustat) -		goto out_free_cpuusage; - -	return &ca->css; - -out_free_cpuusage: -	free_percpu(ca->cpuusage); -out_free_ca: -	kfree(ca); -out: -	return ERR_PTR(-ENOMEM); -} - -/* destroy an existing cpu accounting group */ -static void cpuacct_css_free(struct cgroup *cgrp) -{ -	struct cpuacct *ca = cgroup_ca(cgrp); - -	free_percpu(ca->cpustat); -	free_percpu(ca->cpuusage); -	kfree(ca); -} - -static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) -{ -	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); -	u64 data; - -#ifndef CONFIG_64BIT -	/* -	 * Take rq->lock to make 64-bit read safe on 32-bit platforms. -	 */ -	raw_spin_lock_irq(&cpu_rq(cpu)->lock); -	data = *cpuusage; -	raw_spin_unlock_irq(&cpu_rq(cpu)->lock); -#else -	data = *cpuusage; -#endif - -	return data; -} - -static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) -{ -	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); - -#ifndef CONFIG_64BIT -	/* -	 * Take rq->lock to make 64-bit write safe on 32-bit platforms. -	 */ -	raw_spin_lock_irq(&cpu_rq(cpu)->lock); -	*cpuusage = val; -	raw_spin_unlock_irq(&cpu_rq(cpu)->lock); -#else -	*cpuusage = val; -#endif -} - -/* return total cpu usage (in nanoseconds) of a group */ -static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) -{ -	struct cpuacct *ca = cgroup_ca(cgrp); -	u64 totalcpuusage = 0; -	int i; - -	for_each_present_cpu(i) -		totalcpuusage += cpuacct_cpuusage_read(ca, i); - -	return totalcpuusage; -} - -static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype, -								u64 reset) -{ -	struct cpuacct *ca = cgroup_ca(cgrp); -	int err = 0; -	int i; - -	if (reset) { -		err = -EINVAL; -		goto out; -	} - -	for_each_present_cpu(i) -		cpuacct_cpuusage_write(ca, i, 0); - -out: -	return err; -} - -static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, -				   struct seq_file *m) -{ -	struct cpuacct *ca = cgroup_ca(cgroup); -	u64 percpu; -	int i; - -	for_each_present_cpu(i) { -		percpu = cpuacct_cpuusage_read(ca, i); -		seq_printf(m, "%llu ", (unsigned long long) percpu); -	} -	seq_printf(m, "\n"); -	return 0; -} - -static const char *cpuacct_stat_desc[] = { -	[CPUACCT_STAT_USER] = "user", -	[CPUACCT_STAT_SYSTEM] = "system", -}; - -static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, -			      struct cgroup_map_cb *cb) -{ -	struct cpuacct *ca = cgroup_ca(cgrp); -	int cpu; -	s64 val = 0; - -	for_each_online_cpu(cpu) { -		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); -		val += kcpustat->cpustat[CPUTIME_USER]; -		val += kcpustat->cpustat[CPUTIME_NICE]; -	} -	val = cputime64_to_clock_t(val); -	cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val); - -	val = 0; -	for_each_online_cpu(cpu) { -		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); -		val += kcpustat->cpustat[CPUTIME_SYSTEM]; -		val += kcpustat->cpustat[CPUTIME_IRQ]; -		val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; -	} - -	val = cputime64_to_clock_t(val); -	cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); - -	return 0; -} - -static struct cftype files[] = { -	{ -		.name = "usage", -		.read_u64 = cpuusage_read, -		.write_u64 = cpuusage_write, -	}, -	{ -		.name = "usage_percpu", -		.read_seq_string = cpuacct_percpu_seq_read, -	}, -	{ -		.name = "stat", -		.read_map = cpuacct_stats_show, -	}, -	{ }	/* terminate */ -}; - -/* - * charge this task's execution time to its accounting group. - * - * called with rq->lock held. - */ -void cpuacct_charge(struct task_struct *tsk, u64 cputime) -{ -	struct cpuacct *ca; -	int cpu; - -	if (unlikely(!cpuacct_subsys.active)) -		return; - -	cpu = task_cpu(tsk); - -	rcu_read_lock(); - -	ca = task_ca(tsk); - -	for (; ca; ca = parent_ca(ca)) { -		u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); -		*cpuusage += cputime; -	} - -	rcu_read_unlock(); -} - -struct cgroup_subsys cpuacct_subsys = { -	.name = "cpuacct", -	.css_alloc = cpuacct_css_alloc, -	.css_free = cpuacct_css_free, -	.subsys_id = cpuacct_subsys_id, -	.base_cftypes = files, -}; -#endif	/* CONFIG_CGROUP_CPUACCT */ -  void dump_cpu_task(int cpu)  {  	pr_info("Task dump for CPU %d:\n", cpu);  |