diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/irq/manage.c | 3 | ||||
| -rw-r--r-- | kernel/power/Kconfig | 9 | ||||
| -rw-r--r-- | kernel/power/Makefile | 2 | ||||
| -rw-r--r-- | kernel/power/nvs.c (renamed from kernel/power/hibernate_nvs.c) | 24 | ||||
| -rw-r--r-- | kernel/power/suspend.c | 6 | ||||
| -rw-r--r-- | kernel/sched.c | 133 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 2 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 5 | ||||
| -rw-r--r-- | kernel/trace/trace_event_perf.c | 4 | 
9 files changed, 107 insertions, 81 deletions
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 3164ba7ce15..e1497481fe8 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -456,6 +456,9 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,  		/* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */  		desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK);  		desc->status |= flags; + +		if (chip != desc->chip) +			irq_chip_set_defaults(desc->chip);  	}  	return ret; diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 5c36ea9d55d..ca6066a6952 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -99,9 +99,13 @@ config PM_SLEEP_ADVANCED_DEBUG  	depends on PM_ADVANCED_DEBUG  	default n +config SUSPEND_NVS +       bool +  config SUSPEND  	bool "Suspend to RAM and standby"  	depends on PM && ARCH_SUSPEND_POSSIBLE +	select SUSPEND_NVS if HAS_IOMEM  	default y  	---help---  	  Allow the system to enter sleep states in which main memory is @@ -130,13 +134,10 @@ config SUSPEND_FREEZER  	  Turning OFF this setting is NOT recommended! If in doubt, say Y. -config HIBERNATION_NVS -	bool -  config HIBERNATION  	bool "Hibernation (aka 'suspend to disk')"  	depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE -	select HIBERNATION_NVS if HAS_IOMEM +	select SUSPEND_NVS if HAS_IOMEM  	---help---  	  Enable the suspend to disk (STD) functionality, which is usually  	  called "hibernation" in user interfaces.  STD checkpoints the diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 524e058dcf0..f9063c6b185 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -10,6 +10,6 @@ obj-$(CONFIG_SUSPEND)		+= suspend.o  obj-$(CONFIG_PM_TEST_SUSPEND)	+= suspend_test.o  obj-$(CONFIG_HIBERNATION)	+= hibernate.o snapshot.o swap.o user.o \  				   block_io.o -obj-$(CONFIG_HIBERNATION_NVS)	+= hibernate_nvs.o +obj-$(CONFIG_SUSPEND_NVS)	+= nvs.o  obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/nvs.c index fdcad9ed5a7..1836db60bbb 100644 --- a/kernel/power/hibernate_nvs.c +++ b/kernel/power/nvs.c @@ -15,7 +15,7 @@  /*   * Platforms, like ACPI, may want us to save some memory used by them during - * hibernation and to restore the contents of this memory during the subsequent + * suspend and to restore the contents of this memory during the subsequent   * resume.  The code below implements a mechanism allowing us to do that.   */ @@ -30,7 +30,7 @@ struct nvs_page {  static LIST_HEAD(nvs_list);  /** - *	hibernate_nvs_register - register platform NVS memory region to save + *	suspend_nvs_register - register platform NVS memory region to save   *	@start - physical address of the region   *	@size - size of the region   * @@ -38,7 +38,7 @@ static LIST_HEAD(nvs_list);   *	things so that the data from page-aligned addresses in this region will   *	be copied into separate RAM pages.   */ -int hibernate_nvs_register(unsigned long start, unsigned long size) +int suspend_nvs_register(unsigned long start, unsigned long size)  {  	struct nvs_page *entry, *next; @@ -68,9 +68,9 @@ int hibernate_nvs_register(unsigned long start, unsigned long size)  }  /** - *	hibernate_nvs_free - free data pages allocated for saving NVS regions + *	suspend_nvs_free - free data pages allocated for saving NVS regions   */ -void hibernate_nvs_free(void) +void suspend_nvs_free(void)  {  	struct nvs_page *entry; @@ -86,16 +86,16 @@ void hibernate_nvs_free(void)  }  /** - *	hibernate_nvs_alloc - allocate memory necessary for saving NVS regions + *	suspend_nvs_alloc - allocate memory necessary for saving NVS regions   */ -int hibernate_nvs_alloc(void) +int suspend_nvs_alloc(void)  {  	struct nvs_page *entry;  	list_for_each_entry(entry, &nvs_list, node) {  		entry->data = (void *)__get_free_page(GFP_KERNEL);  		if (!entry->data) { -			hibernate_nvs_free(); +			suspend_nvs_free();  			return -ENOMEM;  		}  	} @@ -103,9 +103,9 @@ int hibernate_nvs_alloc(void)  }  /** - *	hibernate_nvs_save - save NVS memory regions + *	suspend_nvs_save - save NVS memory regions   */ -void hibernate_nvs_save(void) +void suspend_nvs_save(void)  {  	struct nvs_page *entry; @@ -119,12 +119,12 @@ void hibernate_nvs_save(void)  }  /** - *	hibernate_nvs_restore - restore NVS memory regions + *	suspend_nvs_restore - restore NVS memory regions   *   *	This function is going to be called with interrupts disabled, so it   *	cannot iounmap the virtual addresses used to access the NVS region.   */ -void hibernate_nvs_restore(void) +void suspend_nvs_restore(void)  {  	struct nvs_page *entry; diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 56e7dbb8b99..f37cb7dd440 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -16,6 +16,12 @@  #include <linux/cpu.h>  #include <linux/syscalls.h>  #include <linux/gfp.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/suspend.h>  #include "power.h" diff --git a/kernel/sched.c b/kernel/sched.c index 3c5d34a4e93..8c473adbf22 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -306,52 +306,6 @@ static int init_task_group_load = INIT_TASK_GROUP_LOAD;   */  struct task_group init_task_group; -/* return group to which a task belongs */ -static inline struct task_group *task_group(struct task_struct *p) -{ -	struct task_group *tg; - -#ifdef CONFIG_CGROUP_SCHED -	tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), -				struct task_group, css); -#else -	tg = &init_task_group; -#endif -	return tg; -} - -/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ -static inline void set_task_rq(struct task_struct *p, unsigned int cpu) -{ -	/* -	 * Strictly speaking this rcu_read_lock() is not needed since the -	 * task_group is tied to the cgroup, which in turn can never go away -	 * as long as there are tasks attached to it. -	 * -	 * However since task_group() uses task_subsys_state() which is an -	 * rcu_dereference() user, this quiets CONFIG_PROVE_RCU. -	 */ -	rcu_read_lock(); -#ifdef CONFIG_FAIR_GROUP_SCHED -	p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; -	p->se.parent = task_group(p)->se[cpu]; -#endif - -#ifdef CONFIG_RT_GROUP_SCHED -	p->rt.rt_rq  = task_group(p)->rt_rq[cpu]; -	p->rt.parent = task_group(p)->rt_se[cpu]; -#endif -	rcu_read_unlock(); -} - -#else - -static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } -static inline struct task_group *task_group(struct task_struct *p) -{ -	return NULL; -} -  #endif	/* CONFIG_CGROUP_SCHED */  /* CFS-related fields in a runqueue */ @@ -644,6 +598,49 @@ static inline int cpu_of(struct rq *rq)  #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)  #define raw_rq()		(&__raw_get_cpu_var(runqueues)) +#ifdef CONFIG_CGROUP_SCHED + +/* + * Return the group to which this tasks belongs. + * + * We use task_subsys_state_check() and extend the RCU verification + * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() + * holds that lock for each task it moves into the cgroup. Therefore + * by holding that lock, we pin the task to the current cgroup. + */ +static inline struct task_group *task_group(struct task_struct *p) +{ +	struct cgroup_subsys_state *css; + +	css = task_subsys_state_check(p, cpu_cgroup_subsys_id, +			lockdep_is_held(&task_rq(p)->lock)); +	return container_of(css, struct task_group, css); +} + +/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ +static inline void set_task_rq(struct task_struct *p, unsigned int cpu) +{ +#ifdef CONFIG_FAIR_GROUP_SCHED +	p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; +	p->se.parent = task_group(p)->se[cpu]; +#endif + +#ifdef CONFIG_RT_GROUP_SCHED +	p->rt.rt_rq  = task_group(p)->rt_rq[cpu]; +	p->rt.parent = task_group(p)->rt_se[cpu]; +#endif +} + +#else /* CONFIG_CGROUP_SCHED */ + +static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } +static inline struct task_group *task_group(struct task_struct *p) +{ +	return NULL; +} + +#endif /* CONFIG_CGROUP_SCHED */ +  inline void update_rq_clock(struct rq *rq)  {  	if (!rq->skip_clock_update) @@ -1257,6 +1254,12 @@ static void sched_avg_update(struct rq *rq)  	s64 period = sched_avg_period();  	while ((s64)(rq->clock - rq->age_stamp) > period) { +		/* +		 * Inline assembly required to prevent the compiler +		 * optimising this loop into a divmod call. +		 * See __iter_div_u64_rem() for another example of this. +		 */ +		asm("" : "+rm" (rq->age_stamp));  		rq->age_stamp += period;  		rq->rt_avg /= 2;  	} @@ -1660,9 +1663,6 @@ static void update_shares(struct sched_domain *sd)  static void update_h_load(long cpu)  { -	if (root_task_group_empty()) -		return; -  	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);  } @@ -2494,7 +2494,16 @@ void sched_fork(struct task_struct *p, int clone_flags)  	if (p->sched_class->task_fork)  		p->sched_class->task_fork(p); +	/* +	 * The child is not yet in the pid-hash so no cgroup attach races, +	 * and the cgroup is pinned to this child due to cgroup_fork() +	 * is ran before sched_fork(). +	 * +	 * Silence PROVE_RCU. +	 */ +	rcu_read_lock();  	set_task_cpu(p, cpu); +	rcu_read_unlock();  #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)  	if (likely(sched_info_on())) @@ -4465,16 +4474,6 @@ recheck:  	}  	if (user) { -#ifdef CONFIG_RT_GROUP_SCHED -		/* -		 * Do not allow realtime tasks into groups that have no runtime -		 * assigned. -		 */ -		if (rt_bandwidth_enabled() && rt_policy(policy) && -				task_group(p)->rt_bandwidth.rt_runtime == 0) -			return -EPERM; -#endif -  		retval = security_task_setscheduler(p, policy, param);  		if (retval)  			return retval; @@ -4490,6 +4489,22 @@ recheck:  	 * runqueue lock must be held.  	 */  	rq = __task_rq_lock(p); + +#ifdef CONFIG_RT_GROUP_SCHED +	if (user) { +		/* +		 * Do not allow realtime tasks into groups that have no runtime +		 * assigned. +		 */ +		if (rt_bandwidth_enabled() && rt_policy(policy) && +				task_group(p)->rt_bandwidth.rt_runtime == 0) { +			__task_rq_unlock(rq); +			raw_spin_unlock_irqrestore(&p->pi_lock, flags); +			return -EPERM; +		} +	} +#endif +  	/* recheck policy now with rq lock held */  	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {  		policy = oldpolicy = -1; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index eed35eded60..a878b5332da 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1240,6 +1240,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)  	 * effect of the currently running task from the load  	 * of the current CPU:  	 */ +	rcu_read_lock();  	if (sync) {  		tg = task_group(current);  		weight = current->se.load.weight; @@ -1275,6 +1276,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)  		balanced = this_eff_load <= prev_eff_load;  	} else  		balanced = true; +	rcu_read_unlock();  	/*  	 * If the currently running task will sleep within diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1d7b9bc1c03..783fbadf220 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -315,9 +315,6 @@ void tick_nohz_stop_sched_tick(int inidle)  		goto end;  	} -	if (nohz_ratelimit(cpu)) -		goto end; -  	ts->idle_calls++;  	/* Read jiffies and the time when jiffies were updated last */  	do { @@ -328,7 +325,7 @@ void tick_nohz_stop_sched_tick(int inidle)  	} while (read_seqretry(&xtime_lock, seq));  	if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || -	    arch_needs_cpu(cpu)) { +	    arch_needs_cpu(cpu) || nohz_ratelimit(cpu)) {  		next_jiffies = last_jiffies + 1;  		delta_jiffies = 1;  	} else { diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 4799d7047eb..6053982dc30 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -94,7 +94,9 @@ int perf_trace_init(struct perf_event *p_event)  	mutex_lock(&event_mutex);  	list_for_each_entry(tp_event, &ftrace_events, list) {  		if (tp_event->event.type == event_id && -		    tp_event->class && tp_event->class->perf_probe && +		    tp_event->class && +		    (tp_event->class->perf_probe || +		     tp_event->class->reg) &&  		    try_module_get(tp_event->mod)) {  			ret = perf_trace_event_init(tp_event, p_event);  			break;  |