diff options
Diffstat (limited to 'kernel')
36 files changed, 585 insertions, 436 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d9d5648f3cd..a184470cf9b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2098,11 +2098,6 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)  			continue;  		/* get old css_set pointer */  		task_lock(tsk); -		if (tsk->flags & PF_EXITING) { -			/* ignore this task if it's going away */ -			task_unlock(tsk); -			continue; -		}  		oldcg = tsk->cgroups;  		get_css_set(oldcg);  		task_unlock(tsk); diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 213c0351dad..fcb93fca782 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -48,19 +48,17 @@ static inline struct freezer *task_freezer(struct task_struct *task)  			    struct freezer, css);  } -static inline int __cgroup_freezing_or_frozen(struct task_struct *task) +bool cgroup_freezing(struct task_struct *task)  { -	enum freezer_state state = task_freezer(task)->state; -	return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); -} +	enum freezer_state state; +	bool ret; -int cgroup_freezing_or_frozen(struct task_struct *task) -{ -	int result; -	task_lock(task); -	result = __cgroup_freezing_or_frozen(task); -	task_unlock(task); -	return result; +	rcu_read_lock(); +	state = task_freezer(task)->state; +	ret = state == CGROUP_FREEZING || state == CGROUP_FROZEN; +	rcu_read_unlock(); + +	return ret;  }  /* @@ -102,9 +100,6 @@ struct cgroup_subsys freezer_subsys;   * freezer_can_attach():   * cgroup_mutex (held by caller of can_attach)   * - * cgroup_freezing_or_frozen(): - * task->alloc_lock (to get task's cgroup) - *   * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):   * freezer->lock   *  sighand->siglock (if the cgroup is freezing) @@ -130,7 +125,7 @@ struct cgroup_subsys freezer_subsys;   *   write_lock css_set_lock (cgroup iterator start)   *    task->alloc_lock   *   read_lock css_set_lock (cgroup iterator start) - *    task->alloc_lock (inside thaw_process(), prevents race with refrigerator()) + *    task->alloc_lock (inside __thaw_task(), prevents race with refrigerator())   *     sighand->siglock   */  static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, @@ -150,7 +145,11 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,  static void freezer_destroy(struct cgroup_subsys *ss,  			    struct cgroup *cgroup)  { -	kfree(cgroup_freezer(cgroup)); +	struct freezer *freezer = cgroup_freezer(cgroup); + +	if (freezer->state != CGROUP_THAWED) +		atomic_dec(&system_freezing_cnt); +	kfree(freezer);  }  /* task is frozen or will freeze immediately when next it gets woken */ @@ -184,13 +183,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss,  static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)  { -	rcu_read_lock(); -	if (__cgroup_freezing_or_frozen(tsk)) { -		rcu_read_unlock(); -		return -EBUSY; -	} -	rcu_read_unlock(); -	return 0; +	return cgroup_freezing(tsk) ? -EBUSY : 0;  }  static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) @@ -220,7 +213,7 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)  	/* Locking avoids race with FREEZING -> THAWED transitions. */  	if (freezer->state == CGROUP_FREEZING) -		freeze_task(task, true); +		freeze_task(task);  	spin_unlock_irq(&freezer->lock);  } @@ -238,7 +231,7 @@ static void update_if_frozen(struct cgroup *cgroup,  	cgroup_iter_start(cgroup, &it);  	while ((task = cgroup_iter_next(cgroup, &it))) {  		ntotal++; -		if (is_task_frozen_enough(task)) +		if (freezing(task) && is_task_frozen_enough(task))  			nfrozen++;  	} @@ -286,10 +279,9 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)  	struct task_struct *task;  	unsigned int num_cant_freeze_now = 0; -	freezer->state = CGROUP_FREEZING;  	cgroup_iter_start(cgroup, &it);  	while ((task = cgroup_iter_next(cgroup, &it))) { -		if (!freeze_task(task, true)) +		if (!freeze_task(task))  			continue;  		if (is_task_frozen_enough(task))  			continue; @@ -307,12 +299,9 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)  	struct task_struct *task;  	cgroup_iter_start(cgroup, &it); -	while ((task = cgroup_iter_next(cgroup, &it))) { -		thaw_process(task); -	} +	while ((task = cgroup_iter_next(cgroup, &it))) +		__thaw_task(task);  	cgroup_iter_end(cgroup, &it); - -	freezer->state = CGROUP_THAWED;  }  static int freezer_change_state(struct cgroup *cgroup, @@ -326,20 +315,24 @@ static int freezer_change_state(struct cgroup *cgroup,  	spin_lock_irq(&freezer->lock);  	update_if_frozen(cgroup, freezer); -	if (goal_state == freezer->state) -		goto out;  	switch (goal_state) {  	case CGROUP_THAWED: +		if (freezer->state != CGROUP_THAWED) +			atomic_dec(&system_freezing_cnt); +		freezer->state = CGROUP_THAWED;  		unfreeze_cgroup(cgroup, freezer);  		break;  	case CGROUP_FROZEN: +		if (freezer->state == CGROUP_THAWED) +			atomic_inc(&system_freezing_cnt); +		freezer->state = CGROUP_FREEZING;  		retval = try_to_freeze_cgroup(cgroup, freezer);  		break;  	default:  		BUG();  	} -out: +  	spin_unlock_irq(&freezer->lock);  	return retval; diff --git a/kernel/cpu.c b/kernel/cpu.c index 563f1360947..cf915b86a5f 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -470,7 +470,7 @@ out:  	cpu_maps_update_done();  } -static int alloc_frozen_cpus(void) +static int __init alloc_frozen_cpus(void)  {  	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))  		return -ENOMEM; @@ -543,7 +543,7 @@ cpu_hotplug_pm_callback(struct notifier_block *nb,  } -int cpu_hotplug_pm_sync_init(void) +static int __init cpu_hotplug_pm_sync_init(void)  {  	pm_notifier(cpu_hotplug_pm_callback, 0);  	return 0; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9fe58c46a42..0b1712dba58 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -123,6 +123,19 @@ static inline struct cpuset *task_cs(struct task_struct *task)  			    struct cpuset, css);  } +#ifdef CONFIG_NUMA +static inline bool task_has_mempolicy(struct task_struct *task) +{ +	return task->mempolicy; +} +#else +static inline bool task_has_mempolicy(struct task_struct *task) +{ +	return false; +} +#endif + +  /* bits in struct cpuset flags field */  typedef enum {  	CS_CPU_EXCLUSIVE, @@ -949,7 +962,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,  static void cpuset_change_task_nodemask(struct task_struct *tsk,  					nodemask_t *newmems)  { -	bool masks_disjoint = !nodes_intersects(*newmems, tsk->mems_allowed); +	bool need_loop;  repeat:  	/* @@ -962,6 +975,14 @@ repeat:  		return;  	task_lock(tsk); +	/* +	 * Determine if a loop is necessary if another thread is doing +	 * get_mems_allowed().  If at least one node remains unchanged and +	 * tsk does not have a mempolicy, then an empty nodemask will not be +	 * possible when mems_allowed is larger than a word. +	 */ +	need_loop = task_has_mempolicy(tsk) || +			!nodes_intersects(*newmems, tsk->mems_allowed);  	nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);  	mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); @@ -981,11 +1002,9 @@ repeat:  	/*  	 * Allocation of memory is very fast, we needn't sleep when waiting -	 * for the read-side.  No wait is necessary, however, if at least one -	 * node remains unchanged. +	 * for the read-side.  	 */ -	while (masks_disjoint && -			ACCESS_ONCE(tsk->mems_allowed_change_disable)) { +	while (need_loop && ACCESS_ONCE(tsk->mems_allowed_change_disable)) {  		task_unlock(tsk);  		if (!task_curr(tsk))  			yield(); diff --git a/kernel/events/core.c b/kernel/events/core.c index 0e8457da6f9..58690af323e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -185,6 +185,9 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,  static void update_context_time(struct perf_event_context *ctx);  static u64 perf_event_time(struct perf_event *event); +static void ring_buffer_attach(struct perf_event *event, +			       struct ring_buffer *rb); +  void __weak perf_event_print_debug(void)	{ }  extern __weak const char *perf_pmu_name(void) @@ -2171,9 +2174,10 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,  	 */  	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); -	perf_event_sched_in(cpuctx, ctx, task); +	if (ctx->nr_events) +		cpuctx->task_ctx = ctx; -	cpuctx->task_ctx = ctx; +	perf_event_sched_in(cpuctx, cpuctx->task_ctx, task);  	perf_pmu_enable(ctx->pmu);  	perf_ctx_unlock(cpuctx, ctx); @@ -3190,12 +3194,33 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)  	struct ring_buffer *rb;  	unsigned int events = POLL_HUP; +	/* +	 * Race between perf_event_set_output() and perf_poll(): perf_poll() +	 * grabs the rb reference but perf_event_set_output() overrides it. +	 * Here is the timeline for two threads T1, T2: +	 * t0: T1, rb = rcu_dereference(event->rb) +	 * t1: T2, old_rb = event->rb +	 * t2: T2, event->rb = new rb +	 * t3: T2, ring_buffer_detach(old_rb) +	 * t4: T1, ring_buffer_attach(rb1) +	 * t5: T1, poll_wait(event->waitq) +	 * +	 * To avoid this problem, we grab mmap_mutex in perf_poll() +	 * thereby ensuring that the assignment of the new ring buffer +	 * and the detachment of the old buffer appear atomic to perf_poll() +	 */ +	mutex_lock(&event->mmap_mutex); +  	rcu_read_lock();  	rb = rcu_dereference(event->rb); -	if (rb) +	if (rb) { +		ring_buffer_attach(event, rb);  		events = atomic_xchg(&rb->poll, 0); +	}  	rcu_read_unlock(); +	mutex_unlock(&event->mmap_mutex); +  	poll_wait(file, &event->waitq, wait);  	return events; @@ -3496,6 +3521,53 @@ unlock:  	return ret;  } +static void ring_buffer_attach(struct perf_event *event, +			       struct ring_buffer *rb) +{ +	unsigned long flags; + +	if (!list_empty(&event->rb_entry)) +		return; + +	spin_lock_irqsave(&rb->event_lock, flags); +	if (!list_empty(&event->rb_entry)) +		goto unlock; + +	list_add(&event->rb_entry, &rb->event_list); +unlock: +	spin_unlock_irqrestore(&rb->event_lock, flags); +} + +static void ring_buffer_detach(struct perf_event *event, +			       struct ring_buffer *rb) +{ +	unsigned long flags; + +	if (list_empty(&event->rb_entry)) +		return; + +	spin_lock_irqsave(&rb->event_lock, flags); +	list_del_init(&event->rb_entry); +	wake_up_all(&event->waitq); +	spin_unlock_irqrestore(&rb->event_lock, flags); +} + +static void ring_buffer_wakeup(struct perf_event *event) +{ +	struct ring_buffer *rb; + +	rcu_read_lock(); +	rb = rcu_dereference(event->rb); +	if (!rb) +		goto unlock; + +	list_for_each_entry_rcu(event, &rb->event_list, rb_entry) +		wake_up_all(&event->waitq); + +unlock: +	rcu_read_unlock(); +} +  static void rb_free_rcu(struct rcu_head *rcu_head)  {  	struct ring_buffer *rb; @@ -3521,9 +3593,19 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event)  static void ring_buffer_put(struct ring_buffer *rb)  { +	struct perf_event *event, *n; +	unsigned long flags; +  	if (!atomic_dec_and_test(&rb->refcount))  		return; +	spin_lock_irqsave(&rb->event_lock, flags); +	list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) { +		list_del_init(&event->rb_entry); +		wake_up_all(&event->waitq); +	} +	spin_unlock_irqrestore(&rb->event_lock, flags); +  	call_rcu(&rb->rcu_head, rb_free_rcu);  } @@ -3546,6 +3628,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)  		atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);  		vma->vm_mm->pinned_vm -= event->mmap_locked;  		rcu_assign_pointer(event->rb, NULL); +		ring_buffer_detach(event, rb);  		mutex_unlock(&event->mmap_mutex);  		ring_buffer_put(rb); @@ -3700,7 +3783,7 @@ static const struct file_operations perf_fops = {  void perf_event_wakeup(struct perf_event *event)  { -	wake_up_all(&event->waitq); +	ring_buffer_wakeup(event);  	if (event->pending_kill) {  		kill_fasync(&event->fasync, SIGIO, event->pending_kill); @@ -5822,6 +5905,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,  	INIT_LIST_HEAD(&event->group_entry);  	INIT_LIST_HEAD(&event->event_entry);  	INIT_LIST_HEAD(&event->sibling_list); +	INIT_LIST_HEAD(&event->rb_entry); +  	init_waitqueue_head(&event->waitq);  	init_irq_work(&event->pending, perf_pending_event); @@ -6028,6 +6113,8 @@ set:  	old_rb = event->rb;  	rcu_assign_pointer(event->rb, rb); +	if (old_rb) +		ring_buffer_detach(event, old_rb);  	ret = 0;  unlock:  	mutex_unlock(&event->mmap_mutex); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 09097dd8116..64568a69937 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -22,6 +22,9 @@ struct ring_buffer {  	local_t				lost;		/* nr records lost   */  	long				watermark;	/* wakeup watermark  */ +	/* poll crap */ +	spinlock_t			event_lock; +	struct list_head		event_list;  	struct perf_event_mmap_page	*user_page;  	void				*data_pages[0]; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index a2a29205cc0..7f3011c6b57 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -209,6 +209,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)  		rb->writable = 1;  	atomic_set(&rb->refcount, 1); + +	INIT_LIST_HEAD(&rb->event_list); +	spin_lock_init(&rb->event_lock);  }  #ifndef CONFIG_PERF_USE_VMALLOC diff --git a/kernel/exit.c b/kernel/exit.c index d0b7d988f87..95a4141d07e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -679,8 +679,6 @@ static void exit_mm(struct task_struct * tsk)  	tsk->mm = NULL;  	up_read(&mm->mmap_sem);  	enter_lazy_tlb(mm, current); -	/* We don't want this task to be frozen prematurely */ -	clear_freeze_flag(tsk);  	task_unlock(tsk);  	mm_update_next_owner(mm);  	mmput(mm); @@ -1040,6 +1038,7 @@ NORET_TYPE void do_exit(long code)  	exit_rcu();  	/* causes final put_task_struct in finish_task_switch(). */  	tsk->state = TASK_DEAD; +	tsk->flags |= PF_NOFREEZE;	/* tell freezer to ignore us */  	schedule();  	BUG();  	/* Avoid "noreturn function does return".  */ diff --git a/kernel/fork.c b/kernel/fork.c index da4a6a10d08..82780861384 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -992,7 +992,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)  	new_flags |= PF_FORKNOEXEC;  	new_flags |= PF_STARTING;  	p->flags = new_flags; -	clear_freeze_flag(p);  }  SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) diff --git a/kernel/freezer.c b/kernel/freezer.c index 7be56c53439..9815b8d1eed 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -9,101 +9,114 @@  #include <linux/export.h>  #include <linux/syscalls.h>  #include <linux/freezer.h> +#include <linux/kthread.h> -/* - * freezing is complete, mark current process as frozen +/* total number of freezing conditions in effect */ +atomic_t system_freezing_cnt = ATOMIC_INIT(0); +EXPORT_SYMBOL(system_freezing_cnt); + +/* indicate whether PM freezing is in effect, protected by pm_mutex */ +bool pm_freezing; +bool pm_nosig_freezing; + +/* protects freezing and frozen transitions */ +static DEFINE_SPINLOCK(freezer_lock); + +/** + * freezing_slow_path - slow path for testing whether a task needs to be frozen + * @p: task to be tested + * + * This function is called by freezing() if system_freezing_cnt isn't zero + * and tests whether @p needs to enter and stay in frozen state.  Can be + * called under any context.  The freezers are responsible for ensuring the + * target tasks see the updated state.   */ -static inline void frozen_process(void) +bool freezing_slow_path(struct task_struct *p)  { -	if (!unlikely(current->flags & PF_NOFREEZE)) { -		current->flags |= PF_FROZEN; -		smp_wmb(); -	} -	clear_freeze_flag(current); +	if (p->flags & PF_NOFREEZE) +		return false; + +	if (pm_nosig_freezing || cgroup_freezing(p)) +		return true; + +	if (pm_freezing && !(p->flags & PF_KTHREAD)) +		return true; + +	return false;  } +EXPORT_SYMBOL(freezing_slow_path);  /* Refrigerator is place where frozen processes are stored :-). */ -void refrigerator(void) +bool __refrigerator(bool check_kthr_stop)  {  	/* Hmm, should we be allowed to suspend when there are realtime  	   processes around? */ -	long save; +	bool was_frozen = false; +	long save = current->state; -	task_lock(current); -	if (freezing(current)) { -		frozen_process(); -		task_unlock(current); -	} else { -		task_unlock(current); -		return; -	} -	save = current->state;  	pr_debug("%s entered refrigerator\n", current->comm); -	spin_lock_irq(¤t->sighand->siglock); -	recalc_sigpending(); /* We sent fake signal, clean it up */ -	spin_unlock_irq(¤t->sighand->siglock); - -	/* prevent accounting of that task to load */ -	current->flags |= PF_FREEZING; -  	for (;;) {  		set_current_state(TASK_UNINTERRUPTIBLE); -		if (!frozen(current)) + +		spin_lock_irq(&freezer_lock); +		current->flags |= PF_FROZEN; +		if (!freezing(current) || +		    (check_kthr_stop && kthread_should_stop())) +			current->flags &= ~PF_FROZEN; +		spin_unlock_irq(&freezer_lock); + +		if (!(current->flags & PF_FROZEN))  			break; +		was_frozen = true;  		schedule();  	} -	/* Remove the accounting blocker */ -	current->flags &= ~PF_FREEZING; -  	pr_debug("%s left refrigerator\n", current->comm); -	__set_current_state(save); + +	/* +	 * Restore saved task state before returning.  The mb'd version +	 * needs to be used; otherwise, it might silently break +	 * synchronization which depends on ordered task state change. +	 */ +	set_current_state(save); + +	return was_frozen;  } -EXPORT_SYMBOL(refrigerator); +EXPORT_SYMBOL(__refrigerator);  static void fake_signal_wake_up(struct task_struct *p)  {  	unsigned long flags; -	spin_lock_irqsave(&p->sighand->siglock, flags); -	signal_wake_up(p, 0); -	spin_unlock_irqrestore(&p->sighand->siglock, flags); +	if (lock_task_sighand(p, &flags)) { +		signal_wake_up(p, 0); +		unlock_task_sighand(p, &flags); +	}  }  /** - *	freeze_task - send a freeze request to given task - *	@p: task to send the request to - *	@sig_only: if set, the request will only be sent if the task has the - *		PF_FREEZER_NOSIG flag unset - *	Return value: 'false', if @sig_only is set and the task has - *		PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise + * freeze_task - send a freeze request to given task + * @p: task to send the request to + * + * If @p is freezing, the freeze request is sent by setting %TIF_FREEZE + * flag and either sending a fake signal to it or waking it up, depending + * on whether it has %PF_FREEZER_NOSIG set.   * - *	The freeze request is sent by setting the tasks's TIF_FREEZE flag and - *	either sending a fake signal to it or waking it up, depending on whether - *	or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task - *	has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its - *	TIF_FREEZE flag will not be set. + * RETURNS: + * %false, if @p is not freezing or already frozen; %true, otherwise   */ -bool freeze_task(struct task_struct *p, bool sig_only) +bool freeze_task(struct task_struct *p)  { -	/* -	 * We first check if the task is freezing and next if it has already -	 * been frozen to avoid the race with frozen_process() which first marks -	 * the task as frozen and next clears its TIF_FREEZE. -	 */ -	if (!freezing(p)) { -		smp_rmb(); -		if (frozen(p)) -			return false; +	unsigned long flags; -		if (!sig_only || should_send_signal(p)) -			set_freeze_flag(p); -		else -			return false; +	spin_lock_irqsave(&freezer_lock, flags); +	if (!freezing(p) || frozen(p)) { +		spin_unlock_irqrestore(&freezer_lock, flags); +		return false;  	} -	if (should_send_signal(p)) { +	if (!(p->flags & PF_KTHREAD)) {  		fake_signal_wake_up(p);  		/*  		 * fake_signal_wake_up() goes through p's scheduler @@ -111,56 +124,48 @@ bool freeze_task(struct task_struct *p, bool sig_only)  		 * TASK_RUNNING transition can't race with task state  		 * testing in try_to_freeze_tasks().  		 */ -	} else if (sig_only) { -		return false;  	} else {  		wake_up_state(p, TASK_INTERRUPTIBLE);  	} +	spin_unlock_irqrestore(&freezer_lock, flags);  	return true;  } -void cancel_freezing(struct task_struct *p) +void __thaw_task(struct task_struct *p)  {  	unsigned long flags; -	if (freezing(p)) { -		pr_debug("  clean up: %s\n", p->comm); -		clear_freeze_flag(p); -		spin_lock_irqsave(&p->sighand->siglock, flags); -		recalc_sigpending_and_wake(p); -		spin_unlock_irqrestore(&p->sighand->siglock, flags); -	} -} - -static int __thaw_process(struct task_struct *p) -{ -	if (frozen(p)) { -		p->flags &= ~PF_FROZEN; -		return 1; -	} -	clear_freeze_flag(p); -	return 0; +	/* +	 * Clear freezing and kick @p if FROZEN.  Clearing is guaranteed to +	 * be visible to @p as waking up implies wmb.  Waking up inside +	 * freezer_lock also prevents wakeups from leaking outside +	 * refrigerator. +	 */ +	spin_lock_irqsave(&freezer_lock, flags); +	if (frozen(p)) +		wake_up_process(p); +	spin_unlock_irqrestore(&freezer_lock, flags);  } -/* - * Wake up a frozen process +/** + * set_freezable - make %current freezable   * - * task_lock() is needed to prevent the race with refrigerator() which may - * occur if the freezing of tasks fails.  Namely, without the lock, if the - * freezing of tasks failed, thaw_tasks() might have run before a task in - * refrigerator() could call frozen_process(), in which case the task would be - * frozen and no one would thaw it. + * Mark %current freezable and enter refrigerator if necessary.   */ -int thaw_process(struct task_struct *p) +bool set_freezable(void)  { -	task_lock(p); -	if (__thaw_process(p) == 1) { -		task_unlock(p); -		wake_up_process(p); -		return 1; -	} -	task_unlock(p); -	return 0; +	might_sleep(); + +	/* +	 * Modify flags while holding freezer_lock.  This ensures the +	 * freezer notices that we aren't frozen yet or the freezing +	 * condition is visible to try_to_freeze() below. +	 */ +	spin_lock_irq(&freezer_lock); +	current->flags &= ~PF_NOFREEZE; +	spin_unlock_irq(&freezer_lock); + +	return try_to_freeze();  } -EXPORT_SYMBOL(thaw_process); +EXPORT_SYMBOL(set_freezable); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0e2b179bc7b..1da999f5e74 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -623,8 +623,9 @@ static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id)  static int irq_wait_for_interrupt(struct irqaction *action)  { +	set_current_state(TASK_INTERRUPTIBLE); +  	while (!kthread_should_stop()) { -		set_current_state(TASK_INTERRUPTIBLE);  		if (test_and_clear_bit(IRQTF_RUNTHREAD,  				       &action->thread_flags)) { @@ -632,7 +633,9 @@ static int irq_wait_for_interrupt(struct irqaction *action)  			return 0;  		}  		schedule(); +		set_current_state(TASK_INTERRUPTIBLE);  	} +	__set_current_state(TASK_RUNNING);  	return -1;  } diff --git a/kernel/jump_label.c b/kernel/jump_label.c index bbdfe2a462a..66ff7109f69 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -66,8 +66,9 @@ void jump_label_inc(struct jump_label_key *key)  		return;  	jump_label_lock(); -	if (atomic_add_return(1, &key->enabled) == 1) +	if (atomic_read(&key->enabled) == 0)  		jump_label_update(key, JUMP_LABEL_ENABLE); +	atomic_inc(&key->enabled);  	jump_label_unlock();  } diff --git a/kernel/kexec.c b/kernel/kexec.c index dc7bc082928..090ee10d960 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1523,7 +1523,7 @@ int kernel_kexec(void)  #ifdef CONFIG_KEXEC_JUMP  	if (kexec_image->preserve_context) { -		mutex_lock(&pm_mutex); +		lock_system_sleep();  		pm_prepare_console();  		error = freeze_processes();  		if (error) { @@ -1576,7 +1576,7 @@ int kernel_kexec(void)  		thaw_processes();   Restore_console:  		pm_restore_console(); -		mutex_unlock(&pm_mutex); +		unlock_system_sleep();  	}  #endif diff --git a/kernel/kmod.c b/kernel/kmod.c index a4bea97c75b..a0a88543934 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -36,6 +36,7 @@  #include <linux/resource.h>  #include <linux/notifier.h>  #include <linux/suspend.h> +#include <linux/rwsem.h>  #include <asm/uaccess.h>  #include <trace/events/module.h> @@ -50,6 +51,7 @@ static struct workqueue_struct *khelper_wq;  static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;  static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;  static DEFINE_SPINLOCK(umh_sysctl_lock); +static DECLARE_RWSEM(umhelper_sem);  #ifdef CONFIG_MODULES @@ -275,6 +277,7 @@ static void __call_usermodehelper(struct work_struct *work)   * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY   * (used for preventing user land processes from being created after the user   * land has been frozen during a system-wide hibernation or suspend operation). + * Should always be manipulated under umhelper_sem acquired for write.   */  static int usermodehelper_disabled = 1; @@ -282,17 +285,29 @@ static int usermodehelper_disabled = 1;  static atomic_t running_helpers = ATOMIC_INIT(0);  /* - * Wait queue head used by usermodehelper_pm_callback() to wait for all running + * Wait queue head used by usermodehelper_disable() to wait for all running   * helpers to finish.   */  static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);  /*   * Time to wait for running_helpers to become zero before the setting of - * usermodehelper_disabled in usermodehelper_pm_callback() fails + * usermodehelper_disabled in usermodehelper_disable() fails   */  #define RUNNING_HELPERS_TIMEOUT	(5 * HZ) +void read_lock_usermodehelper(void) +{ +	down_read(&umhelper_sem); +} +EXPORT_SYMBOL_GPL(read_lock_usermodehelper); + +void read_unlock_usermodehelper(void) +{ +	up_read(&umhelper_sem); +} +EXPORT_SYMBOL_GPL(read_unlock_usermodehelper); +  /**   * usermodehelper_disable - prevent new helpers from being started   */ @@ -300,8 +315,10 @@ int usermodehelper_disable(void)  {  	long retval; +	down_write(&umhelper_sem);  	usermodehelper_disabled = 1; -	smp_mb(); +	up_write(&umhelper_sem); +  	/*  	 * From now on call_usermodehelper_exec() won't start any new  	 * helpers, so it is sufficient if running_helpers turns out to @@ -314,7 +331,9 @@ int usermodehelper_disable(void)  	if (retval)  		return 0; +	down_write(&umhelper_sem);  	usermodehelper_disabled = 0; +	up_write(&umhelper_sem);  	return -EAGAIN;  } @@ -323,7 +342,9 @@ int usermodehelper_disable(void)   */  void usermodehelper_enable(void)  { +	down_write(&umhelper_sem);  	usermodehelper_disabled = 0; +	up_write(&umhelper_sem);  }  /** diff --git a/kernel/kthread.c b/kernel/kthread.c index b6d216a9263..3d3de633702 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -59,6 +59,31 @@ int kthread_should_stop(void)  EXPORT_SYMBOL(kthread_should_stop);  /** + * kthread_freezable_should_stop - should this freezable kthread return now? + * @was_frozen: optional out parameter, indicates whether %current was frozen + * + * kthread_should_stop() for freezable kthreads, which will enter + * refrigerator if necessary.  This function is safe from kthread_stop() / + * freezer deadlock and freezable kthreads should use this function instead + * of calling try_to_freeze() directly. + */ +bool kthread_freezable_should_stop(bool *was_frozen) +{ +	bool frozen = false; + +	might_sleep(); + +	if (unlikely(freezing(current))) +		frozen = __refrigerator(true); + +	if (was_frozen) +		*was_frozen = frozen; + +	return kthread_should_stop(); +} +EXPORT_SYMBOL_GPL(kthread_freezable_should_stop); + +/**   * kthread_data - return data value specified on kthread creation   * @task: kthread task in question   * @@ -257,7 +282,7 @@ int kthreadd(void *unused)  	set_cpus_allowed_ptr(tsk, cpu_all_mask);  	set_mems_allowed(node_states[N_HIGH_MEMORY]); -	current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; +	current->flags |= PF_NOFREEZE;  	for (;;) {  		set_current_state(TASK_INTERRUPTIBLE); diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e69434b070d..b2e08c932d9 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -44,6 +44,7 @@  #include <linux/stringify.h>  #include <linux/bitops.h>  #include <linux/gfp.h> +#include <linux/kmemcheck.h>  #include <asm/sections.h> @@ -2948,7 +2949,12 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,  void lockdep_init_map(struct lockdep_map *lock, const char *name,  		      struct lock_class_key *key, int subclass)  { -	memset(lock, 0, sizeof(*lock)); +	int i; + +	kmemcheck_mark_initialized(lock, sizeof(*lock)); + +	for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) +		lock->class_cache[i] = NULL;  #ifdef CONFIG_LOCK_STAT  	lock->cpu = raw_smp_processor_id(); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index a6b0503574e..6d6d2887033 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -43,8 +43,6 @@ int in_suspend __nosavedata;  enum {  	HIBERNATION_INVALID,  	HIBERNATION_PLATFORM, -	HIBERNATION_TEST, -	HIBERNATION_TESTPROC,  	HIBERNATION_SHUTDOWN,  	HIBERNATION_REBOOT,  	/* keep last */ @@ -55,7 +53,7 @@ enum {  static int hibernation_mode = HIBERNATION_SHUTDOWN; -static bool freezer_test_done; +bool freezer_test_done;  static const struct platform_hibernation_ops *hibernation_ops; @@ -71,14 +69,14 @@ void hibernation_set_ops(const struct platform_hibernation_ops *ops)  		WARN_ON(1);  		return;  	} -	mutex_lock(&pm_mutex); +	lock_system_sleep();  	hibernation_ops = ops;  	if (ops)  		hibernation_mode = HIBERNATION_PLATFORM;  	else if (hibernation_mode == HIBERNATION_PLATFORM)  		hibernation_mode = HIBERNATION_SHUTDOWN; -	mutex_unlock(&pm_mutex); +	unlock_system_sleep();  }  static bool entering_platform_hibernation; @@ -96,15 +94,6 @@ static void hibernation_debug_sleep(void)  	mdelay(5000);  } -static int hibernation_testmode(int mode) -{ -	if (hibernation_mode == mode) { -		hibernation_debug_sleep(); -		return 1; -	} -	return 0; -} -  static int hibernation_test(int level)  {  	if (pm_test_level == level) { @@ -114,7 +103,6 @@ static int hibernation_test(int level)  	return 0;  }  #else /* !CONFIG_PM_DEBUG */ -static int hibernation_testmode(int mode) { return 0; }  static int hibernation_test(int level) { return 0; }  #endif /* !CONFIG_PM_DEBUG */ @@ -278,8 +266,7 @@ static int create_image(int platform_mode)  		goto Platform_finish;  	error = disable_nonboot_cpus(); -	if (error || hibernation_test(TEST_CPUS) -	    || hibernation_testmode(HIBERNATION_TEST)) +	if (error || hibernation_test(TEST_CPUS))  		goto Enable_cpus;  	local_irq_disable(); @@ -333,7 +320,7 @@ static int create_image(int platform_mode)   */  int hibernation_snapshot(int platform_mode)  { -	pm_message_t msg = PMSG_RECOVER; +	pm_message_t msg;  	int error;  	error = platform_begin(platform_mode); @@ -349,8 +336,7 @@ int hibernation_snapshot(int platform_mode)  	if (error)  		goto Cleanup; -	if (hibernation_test(TEST_FREEZER) || -		hibernation_testmode(HIBERNATION_TESTPROC)) { +	if (hibernation_test(TEST_FREEZER)) {  		/*  		 * Indicate to the caller that we are returning due to a @@ -362,26 +348,26 @@ int hibernation_snapshot(int platform_mode)  	error = dpm_prepare(PMSG_FREEZE);  	if (error) { -		dpm_complete(msg); +		dpm_complete(PMSG_RECOVER);  		goto Cleanup;  	}  	suspend_console();  	pm_restrict_gfp_mask(); +  	error = dpm_suspend(PMSG_FREEZE); -	if (error) -		goto Recover_platform; -	if (hibernation_test(TEST_DEVICES)) -		goto Recover_platform; +	if (error || hibernation_test(TEST_DEVICES)) +		platform_recover(platform_mode); +	else +		error = create_image(platform_mode); -	error = create_image(platform_mode);  	/* -	 * Control returns here (1) after the image has been created or the +	 * In the case that we call create_image() above, the control +	 * returns here (1) after the image has been created or the  	 * image creation has failed and (2) after a successful restore.  	 */ - Resume_devices:  	/* We may need to release the preallocated image pages here. */  	if (error || !in_suspend)  		swsusp_free(); @@ -399,10 +385,6 @@ int hibernation_snapshot(int platform_mode)  	platform_end(platform_mode);  	return error; - Recover_platform: -	platform_recover(platform_mode); -	goto Resume_devices; -   Cleanup:  	swsusp_free();  	goto Close; @@ -590,9 +572,6 @@ int hibernation_platform_enter(void)  static void power_down(void)  {  	switch (hibernation_mode) { -	case HIBERNATION_TEST: -	case HIBERNATION_TESTPROC: -		break;  	case HIBERNATION_REBOOT:  		kernel_restart(NULL);  		break; @@ -611,17 +590,6 @@ static void power_down(void)  	while(1);  } -static int prepare_processes(void) -{ -	int error = 0; - -	if (freeze_processes()) { -		error = -EBUSY; -		thaw_processes(); -	} -	return error; -} -  /**   * hibernate - Carry out system hibernation, including saving the image.   */ @@ -629,7 +597,7 @@ int hibernate(void)  {  	int error; -	mutex_lock(&pm_mutex); +	lock_system_sleep();  	/* The snapshot device should not be opened while we're running */  	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {  		error = -EBUSY; @@ -654,7 +622,7 @@ int hibernate(void)  	sys_sync();  	printk("done.\n"); -	error = prepare_processes(); +	error = freeze_processes();  	if (error)  		goto Finish; @@ -697,7 +665,7 @@ int hibernate(void)  	pm_restore_console();  	atomic_inc(&snapshot_device_available);   Unlock: -	mutex_unlock(&pm_mutex); +	unlock_system_sleep();  	return error;  } @@ -811,11 +779,13 @@ static int software_resume(void)  		goto close_finish;  	error = create_basic_memory_bitmaps(); -	if (error) +	if (error) { +		usermodehelper_enable();  		goto close_finish; +	}  	pr_debug("PM: Preparing processes for restore.\n"); -	error = prepare_processes(); +	error = freeze_processes();  	if (error) {  		swsusp_close(FMODE_READ);  		goto Done; @@ -855,8 +825,6 @@ static const char * const hibernation_modes[] = {  	[HIBERNATION_PLATFORM]	= "platform",  	[HIBERNATION_SHUTDOWN]	= "shutdown",  	[HIBERNATION_REBOOT]	= "reboot", -	[HIBERNATION_TEST]	= "test", -	[HIBERNATION_TESTPROC]	= "testproc",  };  /* @@ -865,17 +833,15 @@ static const char * const hibernation_modes[] = {   * Hibernation can be handled in several ways.  There are a few different ways   * to put the system into the sleep state: using the platform driver (e.g. ACPI   * or other hibernation_ops), powering it off or rebooting it (for testing - * mostly), or using one of the two available test modes. + * mostly).   *   * The sysfs file /sys/power/disk provides an interface for selecting the   * hibernation mode to use.  Reading from this file causes the available modes - * to be printed.  There are 5 modes that can be supported: + * to be printed.  There are 3 modes that can be supported:   *   *	'platform'   *	'shutdown'   *	'reboot' - *	'test' - *	'testproc'   *   * If a platform hibernation driver is in use, 'platform' will be supported   * and will be used by default.  Otherwise, 'shutdown' will be used by default. @@ -899,8 +865,6 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,  		switch (i) {  		case HIBERNATION_SHUTDOWN:  		case HIBERNATION_REBOOT: -		case HIBERNATION_TEST: -		case HIBERNATION_TESTPROC:  			break;  		case HIBERNATION_PLATFORM:  			if (hibernation_ops) @@ -929,7 +893,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,  	p = memchr(buf, '\n', n);  	len = p ? p - buf : n; -	mutex_lock(&pm_mutex); +	lock_system_sleep();  	for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {  		if (len == strlen(hibernation_modes[i])  		    && !strncmp(buf, hibernation_modes[i], len)) { @@ -941,8 +905,6 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,  		switch (mode) {  		case HIBERNATION_SHUTDOWN:  		case HIBERNATION_REBOOT: -		case HIBERNATION_TEST: -		case HIBERNATION_TESTPROC:  			hibernation_mode = mode;  			break;  		case HIBERNATION_PLATFORM: @@ -957,7 +919,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,  	if (!error)  		pr_debug("PM: Hibernation mode set to '%s'\n",  			 hibernation_modes[mode]); -	mutex_unlock(&pm_mutex); +	unlock_system_sleep();  	return error ? error : n;  } @@ -984,9 +946,9 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,  	if (maj != MAJOR(res) || min != MINOR(res))  		goto out; -	mutex_lock(&pm_mutex); +	lock_system_sleep();  	swsusp_resume_device = res; -	mutex_unlock(&pm_mutex); +	unlock_system_sleep();  	printk(KERN_INFO "PM: Starting manual resume from disk\n");  	noresume = 0;  	software_resume(); diff --git a/kernel/power/main.c b/kernel/power/main.c index 36e0f0903c3..9824b41e5a1 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -3,7 +3,7 @@   *   * Copyright (c) 2003 Patrick Mochel   * Copyright (c) 2003 Open Source Development Lab - *  + *   * This file is released under the GPLv2   *   */ @@ -116,7 +116,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,  	p = memchr(buf, '\n', n);  	len = p ? p - buf : n; -	mutex_lock(&pm_mutex); +	lock_system_sleep();  	level = TEST_FIRST;  	for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++) @@ -126,7 +126,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,  			break;  		} -	mutex_unlock(&pm_mutex); +	unlock_system_sleep();  	return error ? error : n;  } @@ -240,7 +240,7 @@ struct kobject *power_kobj;   *	'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and   *	'disk' (Suspend-to-Disk).   * - *	store() accepts one of those strings, translates it into the  + *	store() accepts one of those strings, translates it into the   *	proper enumerated value, and initiates a suspend transition.   */  static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, @@ -282,7 +282,7 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,  	/* First, check if we are requested to hibernate */  	if (len == 4 && !strncmp(buf, "disk", len)) {  		error = hibernate(); -  goto Exit; +		goto Exit;  	}  #ifdef CONFIG_SUSPEND diff --git a/kernel/power/power.h b/kernel/power/power.h index 23a2db1ec44..0c4defe6d3b 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -50,6 +50,8 @@ static inline char *check_image_kernel(struct swsusp_info *info)  #define SPARE_PAGES	((1024 * 1024) >> PAGE_SHIFT)  /* kernel/power/hibernate.c */ +extern bool freezer_test_done; +  extern int hibernation_snapshot(int platform_mode);  extern int hibernation_restore(int platform_mode);  extern int hibernation_platform_enter(void); diff --git a/kernel/power/process.c b/kernel/power/process.c index addbbe5531b..77274c9ba2f 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -22,16 +22,7 @@   */  #define TIMEOUT	(20 * HZ) -static inline int freezable(struct task_struct * p) -{ -	if ((p == current) || -	    (p->flags & PF_NOFREEZE) || -	    (p->exit_state != 0)) -		return 0; -	return 1; -} - -static int try_to_freeze_tasks(bool sig_only) +static int try_to_freeze_tasks(bool user_only)  {  	struct task_struct *g, *p;  	unsigned long end_time; @@ -46,17 +37,14 @@ static int try_to_freeze_tasks(bool sig_only)  	end_time = jiffies + TIMEOUT; -	if (!sig_only) +	if (!user_only)  		freeze_workqueues_begin();  	while (true) {  		todo = 0;  		read_lock(&tasklist_lock);  		do_each_thread(g, p) { -			if (frozen(p) || !freezable(p)) -				continue; - -			if (!freeze_task(p, sig_only)) +			if (p == current || !freeze_task(p))  				continue;  			/* @@ -77,7 +65,7 @@ static int try_to_freeze_tasks(bool sig_only)  		} while_each_thread(g, p);  		read_unlock(&tasklist_lock); -		if (!sig_only) { +		if (!user_only) {  			wq_busy = freeze_workqueues_busy();  			todo += wq_busy;  		} @@ -103,11 +91,6 @@ static int try_to_freeze_tasks(bool sig_only)  	elapsed_csecs = elapsed_csecs64;  	if (todo) { -		/* This does not unfreeze processes that are already frozen -		 * (we have slightly ugly calling convention in that respect, -		 * and caller must call thaw_processes() if something fails), -		 * but it cleans up leftover PF_FREEZE requests. -		 */  		printk("\n");  		printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds "  		       "(%d tasks refusing to freeze, wq_busy=%d):\n", @@ -115,15 +98,11 @@ static int try_to_freeze_tasks(bool sig_only)  		       elapsed_csecs / 100, elapsed_csecs % 100,  		       todo - wq_busy, wq_busy); -		thaw_workqueues(); -  		read_lock(&tasklist_lock);  		do_each_thread(g, p) { -			task_lock(p); -			if (!wakeup && freezing(p) && !freezer_should_skip(p)) +			if (!wakeup && !freezer_should_skip(p) && +			    p != current && freezing(p) && !frozen(p))  				sched_show_task(p); -			cancel_freezing(p); -			task_unlock(p);  		} while_each_thread(g, p);  		read_unlock(&tasklist_lock);  	} else { @@ -136,12 +115,18 @@ static int try_to_freeze_tasks(bool sig_only)  /**   * freeze_processes - Signal user space processes to enter the refrigerator. + * + * On success, returns 0.  On failure, -errno and system is fully thawed.   */  int freeze_processes(void)  {  	int error; +	if (!pm_freezing) +		atomic_inc(&system_freezing_cnt); +  	printk("Freezing user space processes ... "); +	pm_freezing = true;  	error = try_to_freeze_tasks(true);  	if (!error) {  		printk("done."); @@ -150,17 +135,22 @@ int freeze_processes(void)  	printk("\n");  	BUG_ON(in_atomic()); +	if (error) +		thaw_processes();  	return error;  }  /**   * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator. + * + * On success, returns 0.  On failure, -errno and system is fully thawed.   */  int freeze_kernel_threads(void)  {  	int error;  	printk("Freezing remaining freezable tasks ... "); +	pm_nosig_freezing = true;  	error = try_to_freeze_tasks(false);  	if (!error)  		printk("done."); @@ -168,37 +158,32 @@ int freeze_kernel_threads(void)  	printk("\n");  	BUG_ON(in_atomic()); +	if (error) +		thaw_processes();  	return error;  } -static void thaw_tasks(bool nosig_only) +void thaw_processes(void)  {  	struct task_struct *g, *p; -	read_lock(&tasklist_lock); -	do_each_thread(g, p) { -		if (!freezable(p)) -			continue; +	if (pm_freezing) +		atomic_dec(&system_freezing_cnt); +	pm_freezing = false; +	pm_nosig_freezing = false; -		if (nosig_only && should_send_signal(p)) -			continue; +	oom_killer_enable(); + +	printk("Restarting tasks ... "); -		if (cgroup_freezing_or_frozen(p)) -			continue; +	thaw_workqueues(); -		thaw_process(p); +	read_lock(&tasklist_lock); +	do_each_thread(g, p) { +		__thaw_task(p);  	} while_each_thread(g, p);  	read_unlock(&tasklist_lock); -} -void thaw_processes(void) -{ -	oom_killer_enable(); - -	printk("Restarting tasks ... "); -	thaw_workqueues(); -	thaw_tasks(true); -	thaw_tasks(false);  	schedule();  	printk("done.\n");  } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4953dc054c5..4fd51beed87 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -42,9 +42,9 @@ static const struct platform_suspend_ops *suspend_ops;   */  void suspend_set_ops(const struct platform_suspend_ops *ops)  { -	mutex_lock(&pm_mutex); +	lock_system_sleep();  	suspend_ops = ops; -	mutex_unlock(&pm_mutex); +	unlock_system_sleep();  }  EXPORT_SYMBOL_GPL(suspend_set_ops); @@ -106,13 +106,11 @@ static int suspend_prepare(void)  		goto Finish;  	error = suspend_freeze_processes(); -	if (error) { -		suspend_stats.failed_freeze++; -		dpm_save_failed_step(SUSPEND_FREEZE); -	} else +	if (!error)  		return 0; -	suspend_thaw_processes(); +	suspend_stats.failed_freeze++; +	dpm_save_failed_step(SUSPEND_FREEZE);  	usermodehelper_enable();   Finish:  	pm_notifier_call_chain(PM_POST_SUSPEND); diff --git a/kernel/power/user.c b/kernel/power/user.c index 6d8f535c2b8..78bdb4404aa 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -30,28 +30,6 @@  #include "power.h" -/* - * NOTE: The SNAPSHOT_SET_SWAP_FILE and SNAPSHOT_PMOPS ioctls are obsolete and - * will be removed in the future.  They are only preserved here for - * compatibility with existing userland utilities. - */ -#define SNAPSHOT_SET_SWAP_FILE	_IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int) -#define SNAPSHOT_PMOPS		_IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int) - -#define PMOPS_PREPARE	1 -#define PMOPS_ENTER	2 -#define PMOPS_FINISH	3 - -/* - * NOTE: The following ioctl definitions are wrong and have been replaced with - * correct ones.  They are only preserved here for compatibility with existing - * userland utilities and will be removed in the future. - */ -#define SNAPSHOT_ATOMIC_SNAPSHOT	_IOW(SNAPSHOT_IOC_MAGIC, 3, void *) -#define SNAPSHOT_SET_IMAGE_SIZE		_IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long) -#define SNAPSHOT_AVAIL_SWAP		_IOR(SNAPSHOT_IOC_MAGIC, 7, void *) -#define SNAPSHOT_GET_SWAP_PAGE		_IOR(SNAPSHOT_IOC_MAGIC, 8, void *) -  #define SNAPSHOT_MINOR	231 @@ -71,7 +49,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)  	struct snapshot_data *data;  	int error; -	mutex_lock(&pm_mutex); +	lock_system_sleep();  	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {  		error = -EBUSY; @@ -123,7 +101,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)  	data->platform_support = 0;   Unlock: -	mutex_unlock(&pm_mutex); +	unlock_system_sleep();  	return error;  } @@ -132,7 +110,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)  {  	struct snapshot_data *data; -	mutex_lock(&pm_mutex); +	lock_system_sleep();  	swsusp_free();  	free_basic_memory_bitmaps(); @@ -146,7 +124,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)  			PM_POST_HIBERNATION : PM_POST_RESTORE);  	atomic_inc(&snapshot_device_available); -	mutex_unlock(&pm_mutex); +	unlock_system_sleep();  	return 0;  } @@ -158,7 +136,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,  	ssize_t res;  	loff_t pg_offp = *offp & ~PAGE_MASK; -	mutex_lock(&pm_mutex); +	lock_system_sleep();  	data = filp->private_data;  	if (!data->ready) { @@ -179,7 +157,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,  		*offp += res;   Unlock: -	mutex_unlock(&pm_mutex); +	unlock_system_sleep();  	return res;  } @@ -191,7 +169,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,  	ssize_t res;  	loff_t pg_offp = *offp & ~PAGE_MASK; -	mutex_lock(&pm_mutex); +	lock_system_sleep();  	data = filp->private_data; @@ -208,20 +186,11 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,  	if (res > 0)  		*offp += res;  unlock: -	mutex_unlock(&pm_mutex); +	unlock_system_sleep();  	return res;  } -static void snapshot_deprecated_ioctl(unsigned int cmd) -{ -	if (printk_ratelimit()) -		printk(KERN_NOTICE "%pf: ioctl '%.8x' is deprecated and will " -				"be removed soon, update your suspend-to-disk " -				"utilities\n", -				__builtin_return_address(0), cmd); -} -  static long snapshot_ioctl(struct file *filp, unsigned int cmd,  							unsigned long arg)  { @@ -257,11 +226,9 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,  			break;  		error = freeze_processes(); -		if (error) { -			thaw_processes(); +		if (error)  			usermodehelper_enable(); -		} -		if (!error) +		else  			data->frozen = 1;  		break; @@ -274,8 +241,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,  		data->frozen = 0;  		break; -	case SNAPSHOT_ATOMIC_SNAPSHOT: -		snapshot_deprecated_ioctl(cmd);  	case SNAPSHOT_CREATE_IMAGE:  		if (data->mode != O_RDONLY || !data->frozen  || data->ready) {  			error = -EPERM; @@ -283,10 +248,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,  		}  		pm_restore_gfp_mask();  		error = hibernation_snapshot(data->platform_support); -		if (!error) +		if (!error) {  			error = put_user(in_suspend, (int __user *)arg); -		if (!error) -			data->ready = 1; +			if (!error && !freezer_test_done) +				data->ready = 1; +			if (freezer_test_done) { +				freezer_test_done = false; +				thaw_processes(); +			} +		}  		break;  	case SNAPSHOT_ATOMIC_RESTORE: @@ -305,8 +275,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,  		data->ready = 0;  		break; -	case SNAPSHOT_SET_IMAGE_SIZE: -		snapshot_deprecated_ioctl(cmd);  	case SNAPSHOT_PREF_IMAGE_SIZE:  		image_size = arg;  		break; @@ -321,16 +289,12 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,  		error = put_user(size, (loff_t __user *)arg);  		break; -	case SNAPSHOT_AVAIL_SWAP: -		snapshot_deprecated_ioctl(cmd);  	case SNAPSHOT_AVAIL_SWAP_SIZE:  		size = count_swap_pages(data->swap, 1);  		size <<= PAGE_SHIFT;  		error = put_user(size, (loff_t __user *)arg);  		break; -	case SNAPSHOT_GET_SWAP_PAGE: -		snapshot_deprecated_ioctl(cmd);  	case SNAPSHOT_ALLOC_SWAP_PAGE:  		if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {  			error = -ENODEV; @@ -353,27 +317,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,  		free_all_swap_pages(data->swap);  		break; -	case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */ -		snapshot_deprecated_ioctl(cmd); -		if (!swsusp_swap_in_use()) { -			/* -			 * User space encodes device types as two-byte values, -			 * so we need to recode them -			 */ -			if (old_decode_dev(arg)) { -				data->swap = swap_type_of(old_decode_dev(arg), -							0, NULL); -				if (data->swap < 0) -					error = -ENODEV; -			} else { -				data->swap = -1; -				error = -EINVAL; -			} -		} else { -			error = -EPERM; -		} -		break; -  	case SNAPSHOT_S2RAM:  		if (!data->frozen) {  			error = -EPERM; @@ -396,33 +339,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,  			error = hibernation_platform_enter();  		break; -	case SNAPSHOT_PMOPS: /* This ioctl is deprecated */ -		snapshot_deprecated_ioctl(cmd); -		error = -EINVAL; - -		switch (arg) { - -		case PMOPS_PREPARE: -			data->platform_support = 1; -			error = 0; -			break; - -		case PMOPS_ENTER: -			if (data->platform_support) -				error = hibernation_platform_enter(); -			break; - -		case PMOPS_FINISH: -			if (data->platform_support) -				error = 0; -			break; - -		default: -			printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg); - -		} -		break; -  	case SNAPSHOT_SET_SWAP_AREA:  		if (swsusp_swap_in_use()) {  			error = -EPERM; diff --git a/kernel/printk.c b/kernel/printk.c index 1455a0d4eed..7982a0a841e 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1293,10 +1293,11 @@ again:  	raw_spin_lock(&logbuf_lock);  	if (con_start != log_end)  		retry = 1; +	raw_spin_unlock_irqrestore(&logbuf_lock, flags); +  	if (retry && console_trylock())  		goto again; -	raw_spin_unlock_irqrestore(&logbuf_lock, flags);  	if (wake_klogd)  		wake_up_klogd();  } diff --git a/kernel/sched.c b/kernel/sched.c index 0e9344a71be..d6b149ccf92 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -71,6 +71,7 @@  #include <linux/ctype.h>  #include <linux/ftrace.h>  #include <linux/slab.h> +#include <linux/init_task.h>  #include <asm/tlb.h>  #include <asm/irq_regs.h> @@ -4810,6 +4811,9 @@ EXPORT_SYMBOL(wait_for_completion);   * This waits for either a completion of a specific task to be signaled or for a   * specified timeout to expire. The timeout is in jiffies. It is not   * interruptible. + * + * The return value is 0 if timed out, and positive (at least 1, or number of + * jiffies left till timeout) if completed.   */  unsigned long __sched  wait_for_completion_timeout(struct completion *x, unsigned long timeout) @@ -4824,6 +4828,8 @@ EXPORT_SYMBOL(wait_for_completion_timeout);   *   * This waits for completion of a specific task to be signaled. It is   * interruptible. + * + * The return value is -ERESTARTSYS if interrupted, 0 if completed.   */  int __sched wait_for_completion_interruptible(struct completion *x)  { @@ -4841,6 +4847,9 @@ EXPORT_SYMBOL(wait_for_completion_interruptible);   *   * This waits for either a completion of a specific task to be signaled or for a   * specified timeout to expire. It is interruptible. The timeout is in jiffies. + * + * The return value is -ERESTARTSYS if interrupted, 0 if timed out, + * positive (at least 1, or number of jiffies left till timeout) if completed.   */  long __sched  wait_for_completion_interruptible_timeout(struct completion *x, @@ -4856,6 +4865,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);   *   * This waits to be signaled for completion of a specific task. It can be   * interrupted by a kill signal. + * + * The return value is -ERESTARTSYS if interrupted, 0 if completed.   */  int __sched wait_for_completion_killable(struct completion *x)  { @@ -4874,6 +4885,9 @@ EXPORT_SYMBOL(wait_for_completion_killable);   * This waits for either a completion of a specific task to be   * signaled or for a specified timeout to expire. It can be   * interrupted by a kill signal. The timeout is in jiffies. + * + * The return value is -ERESTARTSYS if interrupted, 0 if timed out, + * positive (at least 1, or number of jiffies left till timeout) if completed.   */  long __sched  wait_for_completion_killable_timeout(struct completion *x, @@ -6099,6 +6113,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)  	 */  	idle->sched_class = &idle_sched_class;  	ftrace_graph_init_idle_task(idle, cpu); +#if defined(CONFIG_SMP) +	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); +#endif  }  /* diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5c9e67923b7..8a39fa3e3c6 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -772,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)  		list_del_leaf_cfs_rq(cfs_rq);  } +static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq) +{ +	long tg_weight; + +	/* +	 * Use this CPU's actual weight instead of the last load_contribution +	 * to gain a more accurate current total weight. See +	 * update_cfs_rq_load_contribution(). +	 */ +	tg_weight = atomic_read(&tg->load_weight); +	tg_weight -= cfs_rq->load_contribution; +	tg_weight += cfs_rq->load.weight; + +	return tg_weight; +} +  static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)  { -	long load_weight, load, shares; +	long tg_weight, load, shares; +	tg_weight = calc_tg_weight(tg, cfs_rq);  	load = cfs_rq->load.weight; -	load_weight = atomic_read(&tg->load_weight); -	load_weight += load; -	load_weight -= cfs_rq->load_contribution; -  	shares = (tg->shares * load); -	if (load_weight) -		shares /= load_weight; +	if (tg_weight) +		shares /= tg_weight;  	if (shares < MIN_SHARES)  		shares = MIN_SHARES; @@ -1743,7 +1756,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)  static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)  { -	if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running) +	if (!cfs_rq->runtime_enabled || cfs_rq->nr_running)  		return;  	__return_cfs_rq_runtime(cfs_rq); @@ -2036,36 +2049,100 @@ static void task_waking_fair(struct task_struct *p)   * Adding load to a group doesn't make a group heavier, but can cause movement   * of group shares between cpus. Assuming the shares were perfectly aligned one   * can calculate the shift in shares. + * + * Calculate the effective load difference if @wl is added (subtracted) to @tg + * on this @cpu and results in a total addition (subtraction) of @wg to the + * total group weight. + * + * Given a runqueue weight distribution (rw_i) we can compute a shares + * distribution (s_i) using: + * + *   s_i = rw_i / \Sum rw_j						(1) + * + * Suppose we have 4 CPUs and our @tg is a direct child of the root group and + * has 7 equal weight tasks, distributed as below (rw_i), with the resulting + * shares distribution (s_i): + * + *   rw_i = {   2,   4,   1,   0 } + *   s_i  = { 2/7, 4/7, 1/7,   0 } + * + * As per wake_affine() we're interested in the load of two CPUs (the CPU the + * task used to run on and the CPU the waker is running on), we need to + * compute the effect of waking a task on either CPU and, in case of a sync + * wakeup, compute the effect of the current task going to sleep. + * + * So for a change of @wl to the local @cpu with an overall group weight change + * of @wl we can compute the new shares distribution (s'_i) using: + * + *   s'_i = (rw_i + @wl) / (@wg + \Sum rw_j)				(2) + * + * Suppose we're interested in CPUs 0 and 1, and want to compute the load + * differences in waking a task to CPU 0. The additional task changes the + * weight and shares distributions like: + * + *   rw'_i = {   3,   4,   1,   0 } + *   s'_i  = { 3/8, 4/8, 1/8,   0 } + * + * We can then compute the difference in effective weight by using: + * + *   dw_i = S * (s'_i - s_i)						(3) + * + * Where 'S' is the group weight as seen by its parent. + * + * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7) + * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 - + * 4/7) times the weight of the group.   */  static long effective_load(struct task_group *tg, int cpu, long wl, long wg)  {  	struct sched_entity *se = tg->se[cpu]; -	if (!tg->parent) +	if (!tg->parent)	/* the trivial, non-cgroup case */  		return wl;  	for_each_sched_entity(se) { -		long lw, w; +		long w, W;  		tg = se->my_q->tg; -		w = se->my_q->load.weight; -		/* use this cpu's instantaneous contribution */ -		lw = atomic_read(&tg->load_weight); -		lw -= se->my_q->load_contribution; -		lw += w + wg; +		/* +		 * W = @wg + \Sum rw_j +		 */ +		W = wg + calc_tg_weight(tg, se->my_q); -		wl += w; +		/* +		 * w = rw_i + @wl +		 */ +		w = se->my_q->load.weight + wl; -		if (lw > 0 && wl < lw) -			wl = (wl * tg->shares) / lw; +		/* +		 * wl = S * s'_i; see (2) +		 */ +		if (W > 0 && w < W) +			wl = (w * tg->shares) / W;  		else  			wl = tg->shares; -		/* zero point is MIN_SHARES */ +		/* +		 * Per the above, wl is the new se->load.weight value; since +		 * those are clipped to [MIN_SHARES, ...) do so now. See +		 * calc_cfs_shares(). +		 */  		if (wl < MIN_SHARES)  			wl = MIN_SHARES; + +		/* +		 * wl = dw_i = S * (s'_i - s_i); see (3) +		 */  		wl -= se->load.weight; + +		/* +		 * Recursively apply this logic to all parent groups to compute +		 * the final effective load change on the root group. Since +		 * only the @tg group gets extra weight, all parent groups can +		 * only redistribute existing shares. @wl is the shift in shares +		 * resulting from this level per the above. +		 */  		wg = 0;  	} @@ -2249,7 +2326,8 @@ static int select_idle_sibling(struct task_struct *p, int target)  	int cpu = smp_processor_id();  	int prev_cpu = task_cpu(p);  	struct sched_domain *sd; -	int i; +	struct sched_group *sg; +	int i, smt = 0;  	/*  	 * If the task is going to be woken-up on this cpu and if it is @@ -2269,25 +2347,40 @@ static int select_idle_sibling(struct task_struct *p, int target)  	 * Otherwise, iterate the domains and find an elegible idle cpu.  	 */  	rcu_read_lock(); +again:  	for_each_domain(target, sd) { +		if (!smt && (sd->flags & SD_SHARE_CPUPOWER)) +			continue; + +		if (smt && !(sd->flags & SD_SHARE_CPUPOWER)) +			break; +  		if (!(sd->flags & SD_SHARE_PKG_RESOURCES))  			break; -		for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) { -			if (idle_cpu(i)) { -				target = i; -				break; +		sg = sd->groups; +		do { +			if (!cpumask_intersects(sched_group_cpus(sg), +						tsk_cpus_allowed(p))) +				goto next; + +			for_each_cpu(i, sched_group_cpus(sg)) { +				if (!idle_cpu(i)) +					goto next;  			} -		} -		/* -		 * Lets stop looking for an idle sibling when we reached -		 * the domain that spans the current cpu and prev_cpu. -		 */ -		if (cpumask_test_cpu(cpu, sched_domain_span(sd)) && -		    cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) -			break; +			target = cpumask_first_and(sched_group_cpus(sg), +					tsk_cpus_allowed(p)); +			goto done; +next: +			sg = sg->next; +		} while (sg != sd->groups); +	} +	if (!smt) { +		smt = 1; +		goto again;  	} +done:  	rcu_read_unlock();  	return target; @@ -3511,7 +3604,7 @@ static bool update_sd_pick_busiest(struct sched_domain *sd,  }  /** - * update_sd_lb_stats - Update sched_group's statistics for load balancing. + * update_sd_lb_stats - Update sched_domain's statistics for load balancing.   * @sd: sched_domain whose statistics are to be updated.   * @this_cpu: Cpu for which load balance is currently performed.   * @idle: Idle status of this_cpu diff --git a/kernel/sched_features.h b/kernel/sched_features.h index efa0a7b75dd..84802245abd 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -67,3 +67,4 @@ SCHED_FEAT(NONTASK_POWER, 1)  SCHED_FEAT(TTWU_QUEUE, 1)  SCHED_FEAT(FORCE_SD_OVERLAP, 0) +SCHED_FEAT(RT_RUNTIME_SHARE, 1) diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 056cbd2e2a2..583a1368afe 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -560,6 +560,9 @@ static int balance_runtime(struct rt_rq *rt_rq)  {  	int more = 0; +	if (!sched_feat(RT_RUNTIME_SHARE)) +		return more; +  	if (rt_rq->rt_time > rt_rq->rt_runtime) {  		raw_spin_unlock(&rt_rq->rt_runtime_lock);  		more = do_balance_runtime(rt_rq); diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 6318b511afa..a650694883a 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1354,7 +1354,7 @@ static ssize_t binary_sysctl(const int *name, int nlen,  	fput(file);  out_putname: -	putname(pathname); +	__putname(pathname);  out:  	return result;  } diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index c436e790b21..8a46f5d6450 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -195,7 +195,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)  		struct alarm *alarm;  		ktime_t expired = next->expires; -		if (expired.tv64 >= now.tv64) +		if (expired.tv64 > now.tv64)  			break;  		alarm = container_of(next, struct alarm, node); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 1ecd6ba36d6..c4eb71c8b2e 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -387,6 +387,7 @@ void clockevents_exchange_device(struct clock_event_device *old,  	 * released list and do a notify add later.  	 */  	if (old) { +		old->event_handler = clockevents_handle_noop;  		clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);  		list_del(&old->list);  		list_add(&old->list, &clockevents_released); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index cfc65e1eb9f..d3ad022136e 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -548,7 +548,7 @@ static u64 clocksource_max_deferment(struct clocksource *cs)  	 * note a margin of 12.5% is used because this can be computed with  	 * a shift, versus say 10% which would require division.  	 */ -	return max_nsecs - (max_nsecs >> 5); +	return max_nsecs - (max_nsecs >> 3);  }  #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET @@ -647,7 +647,7 @@ static void clocksource_enqueue(struct clocksource *cs)  /**   * __clocksource_updatefreq_scale - Used update clocksource with new freq - * @t:		clocksource to be registered + * @cs:		clocksource to be registered   * @scale:	Scale factor multiplied against freq to get clocksource hz   * @freq:	clocksource frequency (cycles per second) divided by scale   * @@ -669,7 +669,7 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)  	 * ~ 0.06ppm granularity for NTP. We apply the same 12.5%  	 * margin as we do in clocksource_max_deferment()  	 */ -	sec = (cs->mask - (cs->mask >> 5)); +	sec = (cs->mask - (cs->mask >> 3));  	do_div(sec, freq);  	do_div(sec, scale);  	if (!sec) @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);  /**   * __clocksource_register_scale - Used to install new clocksources - * @t:		clocksource to be registered + * @cs:		clocksource to be registered   * @scale:	Scale factor multiplied against freq to get clocksource hz   * @freq:	clocksource frequency (cycles per second) divided by scale   * @@ -727,7 +727,7 @@ EXPORT_SYMBOL_GPL(__clocksource_register_scale);  /**   * clocksource_register - Used to install new clocksources - * @t:		clocksource to be registered + * @cs:		clocksource to be registered   *   * Returns -EBUSY if registration fails, zero otherwise.   */ @@ -761,6 +761,8 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)  /**   * clocksource_change_rating - Change the rating of a registered clocksource + * @cs:		clocksource to be changed + * @rating:	new rating   */  void clocksource_change_rating(struct clocksource *cs, int rating)  { @@ -772,6 +774,7 @@ EXPORT_SYMBOL(clocksource_change_rating);  /**   * clocksource_unregister - remove a registered clocksource + * @cs:	clocksource to be unregistered   */  void clocksource_unregister(struct clocksource *cs)  { @@ -787,6 +790,7 @@ EXPORT_SYMBOL(clocksource_unregister);  /**   * sysfs_show_current_clocksources - sysfs interface for current clocksource   * @dev:	unused + * @attr:	unused   * @buf:	char buffer to be filled with clocksource list   *   * Provides sysfs interface for listing current clocksource. @@ -807,6 +811,7 @@ sysfs_show_current_clocksources(struct sys_device *dev,  /**   * sysfs_override_clocksource - interface for manually overriding clocksource   * @dev:	unused + * @attr:	unused   * @buf:	name of override clocksource   * @count:	length of buffer   * @@ -842,6 +847,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,  /**   * sysfs_show_available_clocksources - sysfs interface for listing clocksource   * @dev:	unused + * @attr:	unused   * @buf:	char buffer to be filled with clocksource list   *   * Provides sysfs interface for listing registered clocksources diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f954282d9a8..fd4a7b1625a 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -71,7 +71,7 @@ int tick_check_broadcast_device(struct clock_event_device *dev)  	     (dev->features & CLOCK_EVT_FEAT_C3STOP))  		return 0; -	clockevents_exchange_device(NULL, dev); +	clockevents_exchange_device(tick_broadcast_device.evtdev, dev);  	tick_broadcast_device.evtdev = dev;  	if (!cpumask_empty(tick_get_broadcast_mask()))  		tick_broadcast_start_periodic(dev); diff --git a/kernel/timer.c b/kernel/timer.c index dbaa62422b1..9c3c62b0c4b 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1368,7 +1368,7 @@ SYSCALL_DEFINE0(getppid)  	int pid;  	rcu_read_lock(); -	pid = task_tgid_vnr(current->real_parent); +	pid = task_tgid_vnr(rcu_dereference(current->real_parent));  	rcu_read_unlock();  	return pid; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 900b409543d..b1e8943fed1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -152,7 +152,6 @@ void clear_ftrace_function(void)  	ftrace_pid_function = ftrace_stub;  } -#undef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST  #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST  /*   * For those archs that do not test ftrace_trace_stop in their @@ -1212,7 +1211,9 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,  	if (!src->count) {  		free_ftrace_hash_rcu(*dst);  		rcu_assign_pointer(*dst, EMPTY_HASH); -		return 0; +		/* still need to update the function records */ +		ret = 0; +		goto out;  	}  	/* diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 581876f9f38..c212a7f934e 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1078,7 +1078,6 @@ event_subsystem_dir(const char *name, struct dentry *d_events)  	/* First see if we did not already create this dir */  	list_for_each_entry(system, &event_subsystems, list) {  		if (strcmp(system->name, name) == 0) { -			__get_system(system);  			system->nr_events++;  			return system->entry;  		} diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 816d3d07497..95dc31efd6d 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1649,7 +1649,9 @@ static int replace_system_preds(struct event_subsystem *system,  		 */  		err = replace_preds(call, NULL, ps, filter_string, true);  		if (err) -			goto fail; +			call->flags |= TRACE_EVENT_FL_NO_SET_FILTER; +		else +			call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER;  	}  	list_for_each_entry(call, &ftrace_events, list) { @@ -1658,6 +1660,9 @@ static int replace_system_preds(struct event_subsystem *system,  		if (strcmp(call->class->system, system->name) != 0)  			continue; +		if (call->flags & TRACE_EVENT_FL_NO_SET_FILTER) +			continue; +  		filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);  		if (!filter_item)  			goto fail_mem; @@ -1686,7 +1691,7 @@ static int replace_system_preds(struct event_subsystem *system,  		 * replace the filter for the call.  		 */  		filter = call->filter; -		call->filter = filter_item->filter; +		rcu_assign_pointer(call->filter, filter_item->filter);  		filter_item->filter = filter;  		fail = false; @@ -1741,7 +1746,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)  		filter = call->filter;  		if (!filter)  			goto out_unlock; -		call->filter = NULL; +		RCU_INIT_POINTER(call->filter, NULL);  		/* Make sure the filter is not being used */  		synchronize_sched();  		__free_filter(filter); @@ -1782,7 +1787,7 @@ out:  	 * string  	 */  	tmp = call->filter; -	call->filter = filter; +	rcu_assign_pointer(call->filter, filter);  	if (tmp) {  		/* Make sure the call is done with the filter */  		synchronize_sched();  |