From 19a37d1cd5465c10d669a296a2ea24b4c985363b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:05:28 +0800 Subject: sched: Remove some dummy functions No one will call those functions if CONFIG_SCHED_DEBUG=n. Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A748.3050206@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index d35d2b6ddbf..2715fbb9ea8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -127,18 +127,6 @@ extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); -#else -static inline void -proc_sched_show_task(struct task_struct *p, struct seq_file *m) -{ -} -static inline void proc_sched_set_task(struct task_struct *p) -{ -} -static inline void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) -{ -} #endif /* -- cgit v1.2.3-70-g09d2 From 090b582f27ac7b6714661020033160130e5297bd Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:05:51 +0800 Subject: sched: Remove test_sd_parent() It's unused. Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A75F.4070202@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2715fbb9ea8..e880d7d115e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -959,15 +959,6 @@ extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], cpumask_var_t *alloc_sched_domains(unsigned int ndoms); void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); -/* Test a flag in parent sched domain */ -static inline int test_sd_parent(struct sched_domain *sd, int flag) -{ - if (sd->parent && (sd->parent->flags & flag)) - return 1; - - return 0; -} - unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); -- cgit v1.2.3-70-g09d2 From cc1f4b1f3faed9f2040eff2a75f510b424b3cf18 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:06:09 +0800 Subject: sched: Move SCHED_LOAD_SHIFT macros to kernel/sched/sched.h They are used internally only. Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A771.4070104@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 25 ------------------------- kernel/sched/sched.h | 26 +++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 26 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index e880d7d115e..f8826d04fb1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -755,31 +755,6 @@ enum cpu_idle_type { CPU_MAX_IDLE_TYPES }; -/* - * Increase resolution of nice-level calculations for 64-bit architectures. - * The extra resolution improves shares distribution and load balancing of - * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup - * hierarchies, especially on larger systems. This is not a user-visible change - * and does not change the user-interface for setting shares/weights. - * - * We increase resolution only if we have enough bits to allow this increased - * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution - * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the - * increased costs. - */ -#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */ -# define SCHED_LOAD_RESOLUTION 10 -# define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION) -# define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION) -#else -# define SCHED_LOAD_RESOLUTION 0 -# define scale_load(w) (w) -# define scale_load_down(w) (w) -#endif - -#define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION) -#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) - /* * Increase resolution of cpu_power calculations */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index cc03cfdf469..709a30cdfd8 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -33,6 +33,31 @@ extern __read_mostly int scheduler_running; */ #define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ)) +/* + * Increase resolution of nice-level calculations for 64-bit architectures. + * The extra resolution improves shares distribution and load balancing of + * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup + * hierarchies, especially on larger systems. This is not a user-visible change + * and does not change the user-interface for setting shares/weights. + * + * We increase resolution only if we have enough bits to allow this increased + * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution + * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the + * increased costs. + */ +#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */ +# define SCHED_LOAD_RESOLUTION 10 +# define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION) +# define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION) +#else +# define SCHED_LOAD_RESOLUTION 0 +# define scale_load(w) (w) +# define scale_load_down(w) (w) +#endif + +#define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION) +#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) + #define NICE_0_LOAD SCHED_LOAD_SCALE #define NICE_0_SHIFT SCHED_LOAD_SHIFT @@ -784,7 +809,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) } #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ - static inline void update_load_add(struct load_weight *lw, unsigned long inc) { lw->weight += inc; -- cgit v1.2.3-70-g09d2 From 5e6521eaa1ee581a13b904f35b80c5efeb2baccb Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:06:23 +0800 Subject: sched: Move struct sched_group to kernel/sched/sched.h Move struct sched_group_power and sched_group and related inline functions to kernel/sched/sched.h, as they are used internally only. Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A77F.2010705@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 58 ++------------------------------------------------- kernel/sched/sched.h | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 56 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index f8826d04fb1..0d641304c0f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -780,62 +780,6 @@ enum cpu_idle_type { extern int __weak arch_sd_sibiling_asym_packing(void); -struct sched_group_power { - atomic_t ref; - /* - * CPU power of this group, SCHED_LOAD_SCALE being max power for a - * single CPU. - */ - unsigned int power, power_orig; - unsigned long next_update; - /* - * Number of busy cpus in this group. - */ - atomic_t nr_busy_cpus; - - unsigned long cpumask[0]; /* iteration mask */ -}; - -struct sched_group { - struct sched_group *next; /* Must be a circular list */ - atomic_t ref; - - unsigned int group_weight; - struct sched_group_power *sgp; - - /* - * The CPUs this group covers. - * - * NOTE: this field is variable length. (Allocated dynamically - * by attaching extra space to the end of the structure, - * depending on how many CPUs the kernel has booted up with) - */ - unsigned long cpumask[0]; -}; - -static inline struct cpumask *sched_group_cpus(struct sched_group *sg) -{ - return to_cpumask(sg->cpumask); -} - -/* - * cpumask masking which cpus in the group are allowed to iterate up the domain - * tree. - */ -static inline struct cpumask *sched_group_mask(struct sched_group *sg) -{ - return to_cpumask(sg->sgp->cpumask); -} - -/** - * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. - * @group: The group whose first cpu is to be returned. - */ -static inline unsigned int group_first_cpu(struct sched_group *group) -{ - return cpumask_first(sched_group_cpus(group)); -} - struct sched_domain_attr { int relax_domain_level; }; @@ -846,6 +790,8 @@ struct sched_domain_attr { extern int sched_domain_level_max; +struct sched_group; + struct sched_domain { /* These fields must be setup */ struct sched_domain *parent; /* top domain must be null terminated */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 709a30cdfd8..1a4a2b19c2f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -572,6 +572,62 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag) DECLARE_PER_CPU(struct sched_domain *, sd_llc); DECLARE_PER_CPU(int, sd_llc_id); +struct sched_group_power { + atomic_t ref; + /* + * CPU power of this group, SCHED_LOAD_SCALE being max power for a + * single CPU. + */ + unsigned int power, power_orig; + unsigned long next_update; + /* + * Number of busy cpus in this group. + */ + atomic_t nr_busy_cpus; + + unsigned long cpumask[0]; /* iteration mask */ +}; + +struct sched_group { + struct sched_group *next; /* Must be a circular list */ + atomic_t ref; + + unsigned int group_weight; + struct sched_group_power *sgp; + + /* + * The CPUs this group covers. + * + * NOTE: this field is variable length. (Allocated dynamically + * by attaching extra space to the end of the structure, + * depending on how many CPUs the kernel has booted up with) + */ + unsigned long cpumask[0]; +}; + +static inline struct cpumask *sched_group_cpus(struct sched_group *sg) +{ + return to_cpumask(sg->cpumask); +} + +/* + * cpumask masking which cpus in the group are allowed to iterate up the domain + * tree. + */ +static inline struct cpumask *sched_group_mask(struct sched_group *sg) +{ + return to_cpumask(sg->sgp->cpumask); +} + +/** + * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. + * @group: The group whose first cpu is to be returned. + */ +static inline unsigned int group_first_cpu(struct sched_group *group) +{ + return cpumask_first(sched_group_cpus(group)); +} + extern int group_balance_cpu(struct sched_group *sg); #endif /* CONFIG_SMP */ -- cgit v1.2.3-70-g09d2 From b13095f07f25464de65f5ce5ea94e16813d67488 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:06:38 +0800 Subject: sched: Move wake flags to kernel/sched/sched.h They are used internally only. Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A78E.7040609@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 7 ------- kernel/sched/sched.h | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0d641304c0f..863b505ac48 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -920,13 +920,6 @@ struct uts_namespace; struct rq; struct sched_domain; -/* - * wake flags - */ -#define WF_SYNC 0x01 /* waker goes to sleep after wakup */ -#define WF_FORK 0x02 /* child wakeup after fork */ -#define WF_MIGRATED 0x04 /* internal use, task got migrated */ - #define ENQUEUE_WAKEUP 1 #define ENQUEUE_HEAD 2 #ifdef CONFIG_SMP diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1a4a2b19c2f..4e5c2afdac9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -865,6 +865,13 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) } #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ +/* + * wake flags + */ +#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ +#define WF_FORK 0x02 /* child wakeup after fork */ +#define WF_MIGRATED 0x4 /* internal use, task got migrated */ + static inline void update_load_add(struct load_weight *lw, unsigned long inc) { lw->weight += inc; -- cgit v1.2.3-70-g09d2 From c82ba9fa7588dfd02d4dc99ad1af486304bc424c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:06:55 +0800 Subject: sched: Move struct sched_class to kernel/sched/sched.h It's used internally only. Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A79F.8090502@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 59 --------------------------------------------------- kernel/sched/sched.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 59 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 863b505ac48..04b834fa14b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -917,65 +917,6 @@ struct mempolicy; struct pipe_inode_info; struct uts_namespace; -struct rq; -struct sched_domain; - -#define ENQUEUE_WAKEUP 1 -#define ENQUEUE_HEAD 2 -#ifdef CONFIG_SMP -#define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */ -#else -#define ENQUEUE_WAKING 0 -#endif - -#define DEQUEUE_SLEEP 1 - -struct sched_class { - const struct sched_class *next; - - void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); - void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); - void (*yield_task) (struct rq *rq); - bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt); - - void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); - - struct task_struct * (*pick_next_task) (struct rq *rq); - void (*put_prev_task) (struct rq *rq, struct task_struct *p); - -#ifdef CONFIG_SMP - int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); - void (*migrate_task_rq)(struct task_struct *p, int next_cpu); - - void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); - void (*post_schedule) (struct rq *this_rq); - void (*task_waking) (struct task_struct *task); - void (*task_woken) (struct rq *this_rq, struct task_struct *task); - - void (*set_cpus_allowed)(struct task_struct *p, - const struct cpumask *newmask); - - void (*rq_online)(struct rq *rq); - void (*rq_offline)(struct rq *rq); -#endif - - void (*set_curr_task) (struct rq *rq); - void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); - void (*task_fork) (struct task_struct *p); - - void (*switched_from) (struct rq *this_rq, struct task_struct *task); - void (*switched_to) (struct rq *this_rq, struct task_struct *task); - void (*prio_changed) (struct rq *this_rq, struct task_struct *task, - int oldprio); - - unsigned int (*get_rr_interval) (struct rq *rq, - struct task_struct *task); - -#ifdef CONFIG_FAIR_GROUP_SCHED - void (*task_move_group) (struct task_struct *p, int on_rq); -#endif -}; - struct load_weight { unsigned long weight, inv_weight; }; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 4e5c2afdac9..eca526d7afb 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -951,6 +951,61 @@ enum cpuacct_stat_index { CPUACCT_STAT_NSTATS, }; +#define ENQUEUE_WAKEUP 1 +#define ENQUEUE_HEAD 2 +#ifdef CONFIG_SMP +#define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */ +#else +#define ENQUEUE_WAKING 0 +#endif + +#define DEQUEUE_SLEEP 1 + +struct sched_class { + const struct sched_class *next; + + void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); + void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); + void (*yield_task) (struct rq *rq); + bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt); + + void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); + + struct task_struct * (*pick_next_task) (struct rq *rq); + void (*put_prev_task) (struct rq *rq, struct task_struct *p); + +#ifdef CONFIG_SMP + int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); + void (*migrate_task_rq)(struct task_struct *p, int next_cpu); + + void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); + void (*post_schedule) (struct rq *this_rq); + void (*task_waking) (struct task_struct *task); + void (*task_woken) (struct rq *this_rq, struct task_struct *task); + + void (*set_cpus_allowed)(struct task_struct *p, + const struct cpumask *newmask); + + void (*rq_online)(struct rq *rq); + void (*rq_offline)(struct rq *rq); +#endif + + void (*set_curr_task) (struct rq *rq); + void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); + void (*task_fork) (struct task_struct *p); + + void (*switched_from) (struct rq *this_rq, struct task_struct *task); + void (*switched_to) (struct rq *this_rq, struct task_struct *task); + void (*prio_changed) (struct rq *this_rq, struct task_struct *task, + int oldprio); + + unsigned int (*get_rr_interval) (struct rq *rq, + struct task_struct *task); + +#ifdef CONFIG_FAIR_GROUP_SCHED + void (*task_move_group) (struct task_struct *p, int on_rq); +#endif +}; #define sched_class_highest (&stop_sched_class) #define for_each_class(class) \ -- cgit v1.2.3-70-g09d2 From 15f803c94bd92b17708aad9e74226fd0b2c9130c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:07:11 +0800 Subject: sched: Make default_scale_freq_power() static As default_scale_{freq,smt}_power() and update_rt_power() are used in kernel/sched/fair.c only, annotate them as static functions. Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A7AF.8010900@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 --- kernel/sched/fair.c | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 04b834fa14b..eadd113e1eb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -880,9 +880,6 @@ extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], cpumask_var_t *alloc_sched_domains(unsigned int ndoms); void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); -unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); -unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); - bool cpus_share_cache(int this_cpu, int that_cpu); #else /* CONFIG_SMP */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7a33e5986fc..9f2311256ae 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4245,7 +4245,7 @@ static inline int get_sd_load_idx(struct sched_domain *sd, return load_idx; } -unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) +static unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) { return SCHED_POWER_SCALE; } @@ -4255,7 +4255,7 @@ unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) return default_scale_freq_power(sd, cpu); } -unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu) +static unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu) { unsigned long weight = sd->span_weight; unsigned long smt_gain = sd->smt_gain; @@ -4270,7 +4270,7 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) return default_scale_smt_power(sd, cpu); } -unsigned long scale_rt_power(int cpu) +static unsigned long scale_rt_power(int cpu) { struct rq *rq = cpu_rq(cpu); u64 total, available, age_stamp, avg; -- cgit v1.2.3-70-g09d2 From 25cc7da7e6336d3bb6a5bad3d3fa96fce9a81d5b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 5 Mar 2013 16:07:33 +0800 Subject: sched: Move group scheduling functions out of include/linux/sched.h - Make sched_group_{set_,}runtime(), sched_group_{set_,}period() and sched_rt_can_attach() static. - Move sched_{create,destroy,online,offline}_group() to kernel/sched/sched.h. - Remove declaration of sched_group_shares(). Signed-off-by: Li Zefan Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5135A7C5.3000708@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 21 --------------------- kernel/sched/core.c | 10 +++++----- kernel/sched/sched.h | 12 ++++++++++++ 3 files changed, 17 insertions(+), 26 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index eadd113e1eb..fc039ceccbe 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2512,28 +2512,7 @@ extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); #ifdef CONFIG_CGROUP_SCHED - extern struct task_group root_task_group; - -extern struct task_group *sched_create_group(struct task_group *parent); -extern void sched_online_group(struct task_group *tg, - struct task_group *parent); -extern void sched_destroy_group(struct task_group *tg); -extern void sched_offline_group(struct task_group *tg); -extern void sched_move_task(struct task_struct *tsk); -#ifdef CONFIG_FAIR_GROUP_SCHED -extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); -extern unsigned long sched_group_shares(struct task_group *tg); -#endif -#ifdef CONFIG_RT_GROUP_SCHED -extern int sched_group_set_rt_runtime(struct task_group *tg, - long rt_runtime_us); -extern long sched_group_rt_runtime(struct task_group *tg); -extern int sched_group_set_rt_period(struct task_group *tg, - long rt_period_us); -extern long sched_group_rt_period(struct task_group *tg); -extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); -#endif #endif /* CONFIG_CGROUP_SCHED */ extern int task_can_switch_user(struct user_struct *up, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f12624a393..9ad26c98644 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7455,7 +7455,7 @@ unlock: return err; } -int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) +static int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) { u64 rt_runtime, rt_period; @@ -7467,7 +7467,7 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); } -long sched_group_rt_runtime(struct task_group *tg) +static long sched_group_rt_runtime(struct task_group *tg) { u64 rt_runtime_us; @@ -7479,7 +7479,7 @@ long sched_group_rt_runtime(struct task_group *tg) return rt_runtime_us; } -int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) +static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) { u64 rt_runtime, rt_period; @@ -7492,7 +7492,7 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); } -long sched_group_rt_period(struct task_group *tg) +static long sched_group_rt_period(struct task_group *tg) { u64 rt_period_us; @@ -7527,7 +7527,7 @@ static int sched_rt_global_constraints(void) return ret; } -int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) +static int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) { /* Don't accept realtime tasks when there is no way for them to run */ if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index eca526d7afb..304fc1c7714 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -221,6 +221,18 @@ extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int cpu, struct sched_rt_entity *parent); +extern struct task_group *sched_create_group(struct task_group *parent); +extern void sched_online_group(struct task_group *tg, + struct task_group *parent); +extern void sched_destroy_group(struct task_group *tg); +extern void sched_offline_group(struct task_group *tg); + +extern void sched_move_task(struct task_struct *tsk); + +#ifdef CONFIG_FAIR_GROUP_SCHED +extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); +#endif + #else /* CONFIG_CGROUP_SCHED */ struct cfs_bandwidth { }; -- cgit v1.2.3-70-g09d2 From 9fbc42eac1f6917081dc3b39922b2f1c57fdff28 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 25 Feb 2013 17:25:39 +0100 Subject: cputime: Dynamically scale cputime for full dynticks accounting The full dynticks cputime accounting is able to account either using the tick or the context tracking subsystem. This way the housekeeping CPU can keep the low overhead tick based solution. This latter mode has a low jiffies resolution granularity and need to be scaled against CFS precise runtime accounting to improve its result. We are doing this for CONFIG_TICK_CPU_ACCOUNTING, now we also need to expand it to full dynticks accounting dynamic off-case as well. Signed-off-by: Frederic Weisbecker Cc: Li Zhong Cc: Kevin Hilman Cc: Mats Liljegren Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Steven Rostedt Cc: Namhyung Kim Cc: Andrew Morton Cc: Thomas Gleixner Cc: Paul E. McKenney --- include/linux/sched.h | 4 +- kernel/fork.c | 2 +- kernel/sched/cputime.c | 154 +++++++++++++++++++++++++------------------------ 3 files changed, 83 insertions(+), 77 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index d35d2b6ddbf..8d1b6034d80 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -570,7 +570,7 @@ struct signal_struct { cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE struct cputime prev_cputime; #endif unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; @@ -1327,7 +1327,7 @@ struct task_struct { cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE struct cputime prev_cputime; #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN diff --git a/kernel/fork.c b/kernel/fork.c index 8d932b1c905..f3146ed4907 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1230,7 +1230,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE p->prev_cputime.utime = p->prev_cputime.stime = 0; #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index ed12cbb135f..024fe1998ad 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -388,82 +388,10 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_ struct rq *rq) {} #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -/* - * Account a single tick of cpu time. - * @p: the process that the cpu time gets accounted to - * @user_tick: indicates if the tick is a user or a system tick - */ -void account_process_tick(struct task_struct *p, int user_tick) -{ - cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); - struct rq *rq = this_rq(); - - if (vtime_accounting_enabled()) - return; - - if (sched_clock_irqtime) { - irqtime_account_process_tick(p, user_tick, rq); - return; - } - - if (steal_account_process_tick()) - return; - - if (user_tick) - account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); - else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) - account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, - one_jiffy_scaled); - else - account_idle_time(cputime_one_jiffy); -} - -/* - * Account multiple ticks of steal time. - * @p: the process from which the cpu time has been stolen - * @ticks: number of stolen ticks - */ -void account_steal_ticks(unsigned long ticks) -{ - account_steal_time(jiffies_to_cputime(ticks)); -} - -/* - * Account multiple ticks of idle time. - * @ticks: number of stolen ticks - */ -void account_idle_ticks(unsigned long ticks) -{ - - if (sched_clock_irqtime) { - irqtime_account_idle_ticks(ticks); - return; - } - - account_idle_time(jiffies_to_cputime(ticks)); -} -#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ - /* * Use precise platform statistics if available: */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING -void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) -{ - *ut = p->utime; - *st = p->stime; -} - -void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) -{ - struct task_cputime cputime; - - thread_group_cputime(p, &cputime); - - *ut = cputime.utime; - *st = cputime.stime; -} #ifndef __ARCH_HAS_VTIME_TASK_SWITCH void vtime_task_switch(struct task_struct *prev) @@ -518,8 +446,80 @@ void vtime_account_irq_enter(struct task_struct *tsk) } EXPORT_SYMBOL_GPL(vtime_account_irq_enter); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ + + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + *ut = p->utime; + *st = p->stime; +} -#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ +void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + struct task_cputime cputime; + + thread_group_cputime(p, &cputime); + + *ut = cputime.utime; + *st = cputime.stime; +} +#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ +/* + * Account a single tick of cpu time. + * @p: the process that the cpu time gets accounted to + * @user_tick: indicates if the tick is a user or a system tick + */ +void account_process_tick(struct task_struct *p, int user_tick) +{ + cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); + struct rq *rq = this_rq(); + + if (vtime_accounting_enabled()) + return; + + if (sched_clock_irqtime) { + irqtime_account_process_tick(p, user_tick, rq); + return; + } + + if (steal_account_process_tick()) + return; + + if (user_tick) + account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); + else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) + account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, + one_jiffy_scaled); + else + account_idle_time(cputime_one_jiffy); +} + +/* + * Account multiple ticks of steal time. + * @p: the process from which the cpu time has been stolen + * @ticks: number of stolen ticks + */ +void account_steal_ticks(unsigned long ticks) +{ + account_steal_time(jiffies_to_cputime(ticks)); +} + +/* + * Account multiple ticks of idle time. + * @ticks: number of stolen ticks + */ +void account_idle_ticks(unsigned long ticks) +{ + + if (sched_clock_irqtime) { + irqtime_account_idle_ticks(ticks); + return; + } + + account_idle_time(jiffies_to_cputime(ticks)); +} static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total) { @@ -545,6 +545,12 @@ static void cputime_adjust(struct task_cputime *curr, { cputime_t rtime, stime, total; + if (vtime_accounting_enabled()) { + *ut = curr->utime; + *st = curr->stime; + return; + } + stime = curr->stime; total = stime + curr->utime; @@ -597,7 +603,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime thread_group_cputime(p, &cputime); cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); } -#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN static unsigned long long vtime_delta(struct task_struct *tsk) -- cgit v1.2.3-70-g09d2 From 9b89f6ba2ab56e4d9c00e7e591d6bc333137895e Mon Sep 17 00:00:00 2001 From: Andrei Epure Date: Thu, 11 Apr 2013 20:30:29 +0300 Subject: sched: Document task_struct::personality field Signed-off-by: Andrei Epure Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1365701429-4721-1-git-send-email-epure.andrei@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9004f6e19ea..6bdaa73ede1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1105,8 +1105,10 @@ struct task_struct { int exit_code, exit_signal; int pdeath_signal; /* The signal sent when the parent dies */ unsigned int jobctl; /* JOBCTL_*, siglock protected */ - /* ??? */ + + /* Used for emulating ABI behavior of previous Linux versions */ unsigned int personality; + unsigned did_exec:1; unsigned in_execve:1; /* Tell the LSMs that the process is doing an * execve */ -- cgit v1.2.3-70-g09d2 From 25f55d9d01ad7a7ad248fd5af1d22675ffd202c5 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Tue, 23 Apr 2013 16:59:02 +0200 Subject: sched: Fix init NOHZ_IDLE flag On my SMP platform which is made of 5 cores in 2 clusters, I have the nr_busy_cpu field of sched_group_power struct that is not null when the platform is fully idle - which makes the scheduler unhappy. The root cause is: During the boot sequence, some CPUs reach the idle loop and set their NOHZ_IDLE flag while waiting for others CPUs to boot. But the nr_busy_cpus field is initialized later with the assumption that all CPUs are in the busy state whereas some CPUs have already set their NOHZ_IDLE flag. More generally, the NOHZ_IDLE flag must be initialized when new sched_domains are created in order to ensure that NOHZ_IDLE and nr_busy_cpus are aligned. This condition can be ensured by adding a synchronize_rcu() between the destruction of old sched_domains and the creation of new ones so the NOHZ_IDLE flag will not be updated with old sched_domain once it has been initialized. But this solution introduces a additionnal latency in the rebuild sequence that is called during cpu hotplug. As suggested by Frederic Weisbecker, another solution is to have the same rcu lifecycle for both NOHZ_IDLE and sched_domain struct. A new nohz_idle field is added to sched_domain so both status and sched_domain will share the same RCU lifecycle and will be always synchronized. In addition, there is no more need to protect nohz_idle against concurrent access as it is only modified by 2 exclusive functions called by local cpu. This solution has been prefered to the creation of a new struct with an extra pointer indirection for sched_domain. The synchronization is done at the cost of : - An additional indirection and a rcu_dereference for accessing nohz_idle. - We use only the nohz_idle field of the top sched_domain. Signed-off-by: Vincent Guittot Acked-by: Peter Zijlstra Cc: linaro-kernel@lists.linaro.org Cc: peterz@infradead.org Cc: fweisbec@gmail.com Cc: pjt@google.com Cc: rostedt@goodmis.org Cc: efault@gmx.de Link: http://lkml.kernel.org/r/1366729142-14662-1-git-send-email-vincent.guittot@linaro.org [ Fixed !NO_HZ build bug. ] Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ kernel/sched/fair.c | 26 ++++++++++++++++---------- kernel/sched/sched.h | 1 - 3 files changed, 18 insertions(+), 11 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6bdaa73ede1..a25168f4ab8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -808,6 +808,8 @@ struct sched_domain { unsigned int wake_idx; unsigned int forkexec_idx; unsigned int smt_gain; + + int nohz_idle; /* NOHZ IDLE status */ int flags; /* See SD_* */ int level; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index acaf567a03d..8bf7081b1ec 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5420,13 +5420,16 @@ static inline void set_cpu_sd_state_busy(void) struct sched_domain *sd; int cpu = smp_processor_id(); - if (!test_bit(NOHZ_IDLE, nohz_flags(cpu))) - return; - clear_bit(NOHZ_IDLE, nohz_flags(cpu)); - rcu_read_lock(); - for_each_domain(cpu, sd) + sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); + + if (!sd || !sd->nohz_idle) + goto unlock; + sd->nohz_idle = 0; + + for (; sd; sd = sd->parent) atomic_inc(&sd->groups->sgp->nr_busy_cpus); +unlock: rcu_read_unlock(); } @@ -5435,13 +5438,16 @@ void set_cpu_sd_state_idle(void) struct sched_domain *sd; int cpu = smp_processor_id(); - if (test_bit(NOHZ_IDLE, nohz_flags(cpu))) - return; - set_bit(NOHZ_IDLE, nohz_flags(cpu)); - rcu_read_lock(); - for_each_domain(cpu, sd) + sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); + + if (!sd || sd->nohz_idle) + goto unlock; + sd->nohz_idle = 1; + + for (; sd; sd = sd->parent) atomic_dec(&sd->groups->sgp->nr_busy_cpus); +unlock: rcu_read_unlock(); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 605426a6358..4c225c4c711 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1303,7 +1303,6 @@ extern void account_cfs_bandwidth_used(int enabled, int was_enabled); enum rq_nohz_flag_bits { NOHZ_TICK_STOPPED, NOHZ_BALANCE_KICK, - NOHZ_IDLE, }; #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) -- cgit v1.2.3-70-g09d2