summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c5
-rw-r--r--kernel/cgroup_freezer.c63
-rw-r--r--kernel/cpu.c4
-rw-r--r--kernel/cpuset.c29
-rw-r--r--kernel/events/core.c95
-rw-r--r--kernel/events/internal.h3
-rw-r--r--kernel/events/ring_buffer.c3
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/freezer.c201
-rw-r--r--kernel/irq/manage.c5
-rw-r--r--kernel/jump_label.c3
-rw-r--r--kernel/kexec.c4
-rw-r--r--kernel/kmod.c27
-rw-r--r--kernel/kthread.c27
-rw-r--r--kernel/lockdep.c8
-rw-r--r--kernel/power/hibernate.c92
-rw-r--r--kernel/power/main.c10
-rw-r--r--kernel/power/power.h2
-rw-r--r--kernel/power/process.c77
-rw-r--r--kernel/power/suspend.c12
-rw-r--r--kernel/power/user.c120
-rw-r--r--kernel/printk.c3
-rw-r--r--kernel/sched.c17
-rw-r--r--kernel/sched_fair.c159
-rw-r--r--kernel/sched_features.h1
-rw-r--r--kernel/sched_rt.c3
-rw-r--r--kernel/sysctl_binary.c2
-rw-r--r--kernel/time/alarmtimer.c2
-rw-r--r--kernel/time/clockevents.c1
-rw-r--r--kernel/time/clocksource.c16
-rw-r--r--kernel/time/tick-broadcast.c2
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/ftrace.c5
-rw-r--r--kernel/trace/trace_events.c1
-rw-r--r--kernel/trace/trace_events_filter.c13
36 files changed, 585 insertions, 436 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d9d5648f3cd..a184470cf9b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2098,11 +2098,6 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
continue;
/* get old css_set pointer */
task_lock(tsk);
- if (tsk->flags & PF_EXITING) {
- /* ignore this task if it's going away */
- task_unlock(tsk);
- continue;
- }
oldcg = tsk->cgroups;
get_css_set(oldcg);
task_unlock(tsk);
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 213c0351dad..fcb93fca782 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -48,19 +48,17 @@ static inline struct freezer *task_freezer(struct task_struct *task)
struct freezer, css);
}
-static inline int __cgroup_freezing_or_frozen(struct task_struct *task)
+bool cgroup_freezing(struct task_struct *task)
{
- enum freezer_state state = task_freezer(task)->state;
- return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN);
-}
+ enum freezer_state state;
+ bool ret;
-int cgroup_freezing_or_frozen(struct task_struct *task)
-{
- int result;
- task_lock(task);
- result = __cgroup_freezing_or_frozen(task);
- task_unlock(task);
- return result;
+ rcu_read_lock();
+ state = task_freezer(task)->state;
+ ret = state == CGROUP_FREEZING || state == CGROUP_FROZEN;
+ rcu_read_unlock();
+
+ return ret;
}
/*
@@ -102,9 +100,6 @@ struct cgroup_subsys freezer_subsys;
* freezer_can_attach():
* cgroup_mutex (held by caller of can_attach)
*
- * cgroup_freezing_or_frozen():
- * task->alloc_lock (to get task's cgroup)
- *
* freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
* freezer->lock
* sighand->siglock (if the cgroup is freezing)
@@ -130,7 +125,7 @@ struct cgroup_subsys freezer_subsys;
* write_lock css_set_lock (cgroup iterator start)
* task->alloc_lock
* read_lock css_set_lock (cgroup iterator start)
- * task->alloc_lock (inside thaw_process(), prevents race with refrigerator())
+ * task->alloc_lock (inside __thaw_task(), prevents race with refrigerator())
* sighand->siglock
*/
static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
@@ -150,7 +145,11 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
static void freezer_destroy(struct cgroup_subsys *ss,
struct cgroup *cgroup)
{
- kfree(cgroup_freezer(cgroup));
+ struct freezer *freezer = cgroup_freezer(cgroup);
+
+ if (freezer->state != CGROUP_THAWED)
+ atomic_dec(&system_freezing_cnt);
+ kfree(freezer);
}
/* task is frozen or will freeze immediately when next it gets woken */
@@ -184,13 +183,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
{
- rcu_read_lock();
- if (__cgroup_freezing_or_frozen(tsk)) {
- rcu_read_unlock();
- return -EBUSY;
- }
- rcu_read_unlock();
- return 0;
+ return cgroup_freezing(tsk) ? -EBUSY : 0;
}
static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
@@ -220,7 +213,7 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
/* Locking avoids race with FREEZING -> THAWED transitions. */
if (freezer->state == CGROUP_FREEZING)
- freeze_task(task, true);
+ freeze_task(task);
spin_unlock_irq(&freezer->lock);
}
@@ -238,7 +231,7 @@ static void update_if_frozen(struct cgroup *cgroup,
cgroup_iter_start(cgroup, &it);
while ((task = cgroup_iter_next(cgroup, &it))) {
ntotal++;
- if (is_task_frozen_enough(task))
+ if (freezing(task) && is_task_frozen_enough(task))
nfrozen++;
}
@@ -286,10 +279,9 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
struct task_struct *task;
unsigned int num_cant_freeze_now = 0;
- freezer->state = CGROUP_FREEZING;
cgroup_iter_start(cgroup, &it);
while ((task = cgroup_iter_next(cgroup, &it))) {
- if (!freeze_task(task, true))
+ if (!freeze_task(task))
continue;
if (is_task_frozen_enough(task))
continue;
@@ -307,12 +299,9 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
struct task_struct *task;
cgroup_iter_start(cgroup, &it);
- while ((task = cgroup_iter_next(cgroup, &it))) {
- thaw_process(task);
- }
+ while ((task = cgroup_iter_next(cgroup, &it)))
+ __thaw_task(task);
cgroup_iter_end(cgroup, &it);
-
- freezer->state = CGROUP_THAWED;
}
static int freezer_change_state(struct cgroup *cgroup,
@@ -326,20 +315,24 @@ static int freezer_change_state(struct cgroup *cgroup,
spin_lock_irq(&freezer->lock);
update_if_frozen(cgroup, freezer);
- if (goal_state == freezer->state)
- goto out;
switch (goal_state) {
case CGROUP_THAWED:
+ if (freezer->state != CGROUP_THAWED)
+ atomic_dec(&system_freezing_cnt);
+ freezer->state = CGROUP_THAWED;
unfreeze_cgroup(cgroup, freezer);
break;
case CGROUP_FROZEN:
+ if (freezer->state == CGROUP_THAWED)
+ atomic_inc(&system_freezing_cnt);
+ freezer->state = CGROUP_FREEZING;
retval = try_to_freeze_cgroup(cgroup, freezer);
break;
default:
BUG();
}
-out:
+
spin_unlock_irq(&freezer->lock);
return retval;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 563f1360947..cf915b86a5f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -470,7 +470,7 @@ out:
cpu_maps_update_done();
}
-static int alloc_frozen_cpus(void)
+static int __init alloc_frozen_cpus(void)
{
if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
return -ENOMEM;
@@ -543,7 +543,7 @@ cpu_hotplug_pm_callback(struct notifier_block *nb,
}
-int cpu_hotplug_pm_sync_init(void)
+static int __init cpu_hotplug_pm_sync_init(void)
{
pm_notifier(cpu_hotplug_pm_callback, 0);
return 0;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 9fe58c46a42..0b1712dba58 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -123,6 +123,19 @@ static inline struct cpuset *task_cs(struct task_struct *task)
struct cpuset, css);
}
+#ifdef CONFIG_NUMA
+static inline bool task_has_mempolicy(struct task_struct *task)
+{
+ return task->mempolicy;
+}
+#else
+static inline bool task_has_mempolicy(struct task_struct *task)
+{
+ return false;
+}
+#endif
+
+
/* bits in struct cpuset flags field */
typedef enum {
CS_CPU_EXCLUSIVE,
@@ -949,7 +962,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
static void cpuset_change_task_nodemask(struct task_struct *tsk,
nodemask_t *newmems)
{
- bool masks_disjoint = !nodes_intersects(*newmems, tsk->mems_allowed);
+ bool need_loop;
repeat:
/*
@@ -962,6 +975,14 @@ repeat:
return;
task_lock(tsk);
+ /*
+ * Determine if a loop is necessary if another thread is doing
+ * get_mems_allowed(). If at least one node remains unchanged and
+ * tsk does not have a mempolicy, then an empty nodemask will not be
+ * possible when mems_allowed is larger than a word.
+ */
+ need_loop = task_has_mempolicy(tsk) ||
+ !nodes_intersects(*newmems, tsk->mems_allowed);
nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
@@ -981,11 +1002,9 @@ repeat:
/*
* Allocation of memory is very fast, we needn't sleep when waiting
- * for the read-side. No wait is necessary, however, if at least one
- * node remains unchanged.
+ * for the read-side.
*/
- while (masks_disjoint &&
- ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
+ while (need_loop && ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
task_unlock(tsk);
if (!task_curr(tsk))
yield();
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0e8457da6f9..58690af323e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -185,6 +185,9 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
static void update_context_time(struct perf_event_context *ctx);
static u64 perf_event_time(struct perf_event *event);
+static void ring_buffer_attach(struct perf_event *event,
+ struct ring_buffer *rb);
+
void __weak perf_event_print_debug(void) { }
extern __weak const char *perf_pmu_name(void)
@@ -2171,9 +2174,10 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
*/
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
- perf_event_sched_in(cpuctx, ctx, task);
+ if (ctx->nr_events)
+ cpuctx->task_ctx = ctx;
- cpuctx->task_ctx = ctx;
+ perf_event_sched_in(cpuctx, cpuctx->task_ctx, task);
perf_pmu_enable(ctx->pmu);
perf_ctx_unlock(cpuctx, ctx);
@@ -3190,12 +3194,33 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
struct ring_buffer *rb;
unsigned int events = POLL_HUP;
+ /*
+ * Race between perf_event_set_output() and perf_poll(): perf_poll()
+ * grabs the rb reference but perf_event_set_output() overrides it.
+ * Here is the timeline for two threads T1, T2:
+ * t0: T1, rb = rcu_dereference(event->rb)
+ * t1: T2, old_rb = event->rb
+ * t2: T2, event->rb = new rb
+ * t3: T2, ring_buffer_detach(old_rb)
+ * t4: T1, ring_buffer_attach(rb1)
+ * t5: T1, poll_wait(event->waitq)
+ *
+ * To avoid this problem, we grab mmap_mutex in perf_poll()
+ * thereby ensuring that the assignment of the new ring buffer
+ * and the detachment of the old buffer appear atomic to perf_poll()
+ */
+ mutex_lock(&event->mmap_mutex);
+
rcu_read_lock();
rb = rcu_dereference(event->rb);
- if (rb)
+ if (rb) {
+ ring_buffer_attach(event, rb);
events = atomic_xchg(&rb->poll, 0);
+ }
rcu_read_unlock();
+ mutex_unlock(&event->mmap_mutex);
+
poll_wait(file, &event->waitq, wait);
return events;
@@ -3496,6 +3521,53 @@ unlock:
return ret;
}
+static void ring_buffer_attach(struct perf_event *event,
+ struct ring_buffer *rb)
+{
+ unsigned long flags;
+
+ if (!list_empty(&event->rb_entry))
+ return;
+
+ spin_lock_irqsave(&rb->event_lock, flags);
+ if (!list_empty(&event->rb_entry))
+ goto unlock;
+
+ list_add(&event->rb_entry, &rb->event_list);
+unlock:
+ spin_unlock_irqrestore(&rb->event_lock, flags);
+}
+
+static void ring_buffer_detach(struct perf_event *event,
+ struct ring_buffer *rb)
+{
+ unsigned long flags;
+
+ if (list_empty(&event->rb_entry))
+ return;
+
+ spin_lock_irqsave(&rb->event_lock, flags);
+ list_del_init(&event->rb_entry);
+ wake_up_all(&event->waitq);
+ spin_unlock_irqrestore(&rb->event_lock, flags);
+}
+
+static void ring_buffer_wakeup(struct perf_event *event)
+{
+ struct ring_buffer *rb;
+
+ rcu_read_lock();
+ rb = rcu_dereference(event->rb);
+ if (!rb)
+ goto unlock;
+
+ list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
+ wake_up_all(&event->waitq);
+
+unlock:
+ rcu_read_unlock();
+}
+
static void rb_free_rcu(struct rcu_head *rcu_head)
{
struct ring_buffer *rb;
@@ -3521,9 +3593,19 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event)
static void ring_buffer_put(struct ring_buffer *rb)
{
+ struct perf_event *event, *n;
+ unsigned long flags;
+
if (!atomic_dec_and_test(&rb->refcount))
return;
+ spin_lock_irqsave(&rb->event_lock, flags);
+ list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) {
+ list_del_init(&event->rb_entry);
+ wake_up_all(&event->waitq);
+ }
+ spin_unlock_irqrestore(&rb->event_lock, flags);
+
call_rcu(&rb->rcu_head, rb_free_rcu);
}
@@ -3546,6 +3628,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
vma->vm_mm->pinned_vm -= event->mmap_locked;
rcu_assign_pointer(event->rb, NULL);
+ ring_buffer_detach(event, rb);
mutex_unlock(&event->mmap_mutex);
ring_buffer_put(rb);
@@ -3700,7 +3783,7 @@ static const struct file_operations perf_fops = {
void perf_event_wakeup(struct perf_event *event)
{
- wake_up_all(&event->waitq);
+ ring_buffer_wakeup(event);
if (event->pending_kill) {
kill_fasync(&event->fasync, SIGIO, event->pending_kill);
@@ -5822,6 +5905,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
INIT_LIST_HEAD(&event->group_entry);
INIT_LIST_HEAD(&event->event_entry);
INIT_LIST_HEAD(&event->sibling_list);
+ INIT_LIST_HEAD(&event->rb_entry);
+
init_waitqueue_head(&event->waitq);
init_irq_work(&event->pending, perf_pending_event);
@@ -6028,6 +6113,8 @@ set:
old_rb = event->rb;
rcu_assign_pointer(event->rb, rb);
+ if (old_rb)
+ ring_buffer_detach(event, old_rb);
ret = 0;
unlock:
mutex_unlock(&event->mmap_mutex);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 09097dd8116..64568a69937 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -22,6 +22,9 @@ struct ring_buffer {
local_t lost; /* nr records lost */
long watermark; /* wakeup watermark */
+ /* poll crap */
+ spinlock_t event_lock;
+ struct list_head event_list;
struct perf_event_mmap_page *user_page;
void *data_pages[0];
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index a2a29205cc0..7f3011c6b57 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -209,6 +209,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
rb->writable = 1;
atomic_set(&rb->refcount, 1);
+
+ INIT_LIST_HEAD(&rb->event_list);
+ spin_lock_init(&rb->event_lock);
}
#ifndef CONFIG_PERF_USE_VMALLOC
diff --git a/kernel/exit.c b/kernel/exit.c
index d0b7d988f87..95a4141d07e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -679,8 +679,6 @@ static void exit_mm(struct task_struct * tsk)
tsk->mm = NULL;
up_read(&mm->mmap_sem);
enter_lazy_tlb(mm, current);
- /* We don't want this task to be frozen prematurely */
- clear_freeze_flag(tsk);
task_unlock(tsk);
mm_update_next_owner(mm);
mmput(mm);
@@ -1040,6 +1038,7 @@ NORET_TYPE void do_exit(long code)
exit_rcu();
/* causes final put_task_struct in finish_task_switch(). */
tsk->state = TASK_DEAD;
+ tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
schedule();
BUG();
/* Avoid "noreturn function does return". */
diff --git a/kernel/fork.c b/kernel/fork.c
index da4a6a10d08..82780861384 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -992,7 +992,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
new_flags |= PF_FORKNOEXEC;
new_flags |= PF_STARTING;
p->flags = new_flags;
- clear_freeze_flag(p);
}
SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 7be56c53439..9815b8d1eed 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -9,101 +9,114 @@
#include <linux/export.h>
#include <linux/syscalls.h>
#include <linux/freezer.h>
+#include <linux/kthread.h>
-/*
- * freezing is complete, mark current process as frozen
+/* total number of freezing conditions in effect */
+atomic_t system_freezing_cnt = ATOMIC_INIT(0);
+EXPORT_SYMBOL(system_freezing_cnt);
+
+/* indicate whether PM freezing is in effect, protected by pm_mutex */
+bool pm_freezing;
+bool pm_nosig_freezing;
+
+/* protects freezing and frozen transitions */
+static DEFINE_SPINLOCK(freezer_lock);
+
+/**
+ * freezing_slow_path - slow path for testing whether a task needs to be frozen
+ * @p: task to be tested
+ *
+ * This function is called by freezing() if system_freezing_cnt isn't zero
+ * and tests whether @p needs to enter and stay in frozen state. Can be
+ * called under any context. The freezers are responsible for ensuring the
+ * target tasks see the updated state.
*/
-static inline void frozen_process(void)
+bool freezing_slow_path(struct task_struct *p)
{
- if (!unlikely(current->flags & PF_NOFREEZE)) {
- current->flags |= PF_FROZEN;
- smp_wmb();
- }
- clear_freeze_flag(current);
+ if (p->flags & PF_NOFREEZE)
+ return false;
+
+ if (pm_nosig_freezing || cgroup_freezing(p))
+ return true;
+
+ if (pm_freezing && !(p->flags & PF_KTHREAD))
+ return true;
+
+ return false;
}
+EXPORT_SYMBOL(freezing_slow_path);
/* Refrigerator is place where frozen processes are stored :-). */
-void refrigerator(void)
+bool __refrigerator(bool check_kthr_stop)
{
/* Hmm, should we be allowed to suspend when there are realtime
processes around? */
- long save;
+ bool was_frozen = false;
+ long save = current->state;
- task_lock(current);
- if (freezing(current)) {
- frozen_process();
- task_unlock(current);
- } else {
- task_unlock(current);
- return;
- }
- save = current->state;
pr_debug("%s entered refrigerator\n", current->comm);
- spin_lock_irq(&current->sighand->siglock);
- recalc_sigpending(); /* We sent fake signal, clean it up */
- spin_unlock_irq(&current->sighand->siglock);
-
- /* prevent accounting of that task to load */
- current->flags |= PF_FREEZING;
-
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (!frozen(current))
+
+ spin_lock_irq(&freezer_lock);
+ current->flags |= PF_FROZEN;
+ if (!freezing(current) ||
+ (check_kthr_stop && kthread_should_stop()))
+ current->flags &= ~PF_FROZEN;
+ spin_unlock_irq(&freezer_lock);
+
+ if (!(current->flags & PF_FROZEN))
break;
+ was_frozen = true;
schedule();
}
- /* Remove the accounting blocker */
- current->flags &= ~PF_FREEZING;
-
pr_debug("%s left refrigerator\n", current->comm);
- __set_current_state(save);
+
+ /*
+ * Restore saved task state before returning. The mb'd version
+ * needs to be used; otherwise, it might silently break
+ * synchronization which depends on ordered task state change.
+ */
+ set_current_state(save);
+
+ return was_frozen;
}
-EXPORT_SYMBOL(refrigerator);
+EXPORT_SYMBOL(__refrigerator);
static void fake_signal_wake_up(struct task_struct *p)
{
unsigned long flags;
- spin_lock_irqsave(&p->sighand->siglock, flags);
- signal_wake_up(p, 0);
- spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ if (lock_task_sighand(p, &flags)) {
+ signal_wake_up(p, 0);
+ unlock_task_sighand(p, &flags);
+ }
}
/**
- * freeze_task - send a freeze request to given task
- * @p: task to send the request to
- * @sig_only: if set, the request will only be sent if the task has the
- * PF_FREEZER_NOSIG flag unset
- * Return value: 'false', if @sig_only is set and the task has
- * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
+ * freeze_task - send a freeze request to given task
+ * @p: task to send the request to
+ *
+ * If @p is freezing, the freeze request is sent by setting %TIF_FREEZE
+ * flag and either sending a fake signal to it or waking it up, depending
+ * on whether it has %PF_FREEZER_NOSIG set.
*
- * The freeze request is sent by setting the tasks's TIF_FREEZE flag and
- * either sending a fake signal to it or waking it up, depending on whether
- * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task
- * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
- * TIF_FREEZE flag will not be set.
+ * RETURNS:
+ * %false, if @p is not freezing or already frozen; %true, otherwise
*/
-bool freeze_task(struct task_struct *p, bool sig_only)
+bool freeze_task(struct task_struct *p)
{
- /*
- * We first check if the task is freezing and next if it has already
- * been frozen to avoid the race with frozen_process() which first marks
- * the task as frozen and next clears its TIF_FREEZE.
- */
- if (!freezing(p)) {
- smp_rmb();
- if (frozen(p))
- return false;
+ unsigned long flags;
- if (!sig_only || should_send_signal(p))
- set_freeze_flag(p);
- else
- return false;
+ spin_lock_irqsave(&freezer_lock, flags);
+ if (!freezing(p) || frozen(p)) {
+ spin_unlock_irqrestore(&freezer_lock, flags);
+ return false;
}
- if (should_send_signal(p)) {
+ if (!(p->flags & PF_KTHREAD)) {
fake_signal_wake_up(p);
/*
* fake_signal_wake_up() goes through p's scheduler
@@ -111,56 +124,48 @@ bool freeze_task(struct task_struct *p, bool sig_only)
* TASK_RUNNING transition can't race with task state
* testing in try_to_freeze_tasks().
*/
- } else if (sig_only) {
- return false;
} else {
wake_up_state(p, TASK_INTERRUPTIBLE);
}
+ spin_unlock_irqrestore(&freezer_lock, flags);
return true;
}
-void cancel_freezing(struct task_struct *p)
+void __thaw_task(struct task_struct *p)
{
unsigned long flags;
- if (freezing(p)) {
- pr_debug(" clean up: %s\n", p->comm);
- clear_freeze_flag(p);
- spin_lock_irqsave(&p->sighand->siglock, flags);
- recalc_sigpending_and_wake(p);
- spin_unlock_irqrestore(&p->sighand->siglock, flags);
- }
-}
-
-static int __thaw_process(struct task_struct *p)
-{
- if (frozen(p)) {
- p->flags &= ~PF_FROZEN;
- return 1;
- }
- clear_freeze_flag(p);
- return 0;
+ /*
+ * Clear freezing and kick @p if FROZEN. Clearing is guaranteed to
+ * be visible to @p as waking up implies wmb. Waking up inside
+ * freezer_lock also prevents wakeups from leaking outside
+ * refrigerator.
+ */
+ spin_lock_irqsave(&freezer_lock, flags);
+ if (frozen(p))
+ wake_up_process(p);
+ spin_unlock_irqrestore(&freezer_lock, flags);
}
-/*
- * Wake up a frozen process
+/**
+ * set_freezable - make %current freezable
*
- * task_lock() is needed to prevent the race with refrigerator() which may
- * occur if the freezing of tasks fails. Namely, without the lock, if the
- * freezing of tasks failed, thaw_tasks() might have run before a task in
- * refrigerator() could call frozen_process(), in which case the task would be
- * frozen and no one would thaw it.
+ * Mark %current freezable and enter refrigerator if necessary.
*/
-int thaw_process(struct task_struct *p)
+bool set_freezable(void)
{
- task_lock(p);
- if (__thaw_process(p) == 1) {
- task_unlock(p);
- wake_up_process(p);
- return 1;
- }
- task_unlock(p);
- return 0;
+ might_sleep();
+
+ /*
+ * Modify flags while holding freezer_lock. This ensures the
+ * freezer notices that we aren't frozen yet or the freezing
+ * condition is visible to try_to_freeze() below.
+ */
+ spin_lock_irq(&freezer_lock);
+ current->flags &= ~PF_NOFREEZE;
+ spin_unlock_irq(&freezer_lock);
+
+ return try_to_freeze();
}
-EXPORT_SYMBOL(thaw_process);
+EXPORT_SYMBOL(set_freezable);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0e2b179bc7b..1da999f5e74 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -623,8 +623,9 @@ static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id)
static int irq_wait_for_interrupt(struct irqaction *action)
{
+ set_current_state(TASK_INTERRUPTIBLE);
+
while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
if (test_and_clear_bit(IRQTF_RUNTHREAD,
&action->thread_flags)) {
@@ -632,7 +633,9 @@ static int irq_wait_for_interrupt(struct irqaction *action)
return 0;
}
schedule();
+ set_current_state(TASK_INTERRUPTIBLE);
}
+ __set_current_state(TASK_RUNNING);
return -1;
}
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index bbdfe2a462a..66ff7109f69 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -66,8 +66,9 @@ void jump_label_inc(struct jump_label_key *key)
return;
jump_label_lock();
- if (atomic_add_return(1, &key->enabled) == 1)
+ if (atomic_read(&key->enabled) == 0)
jump_label_update(key, JUMP_LABEL_ENABLE);
+ atomic_inc(&key->enabled);
jump_label_unlock();
}
diff --git a/kernel/kexec.c b/kernel/kexec.c
index dc7bc082928..090ee10d960 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1523,7 +1523,7 @@ int kernel_kexec(void)
#ifdef CONFIG_KEXEC_JUMP
if (kexec_image->preserve_context) {
- mutex_lock(&pm_mutex);
+ lock_system_sleep();
pm_prepare_console();
error = freeze_processes();
if (error) {
@@ -1576,7 +1576,7 @@ int kernel_kexec(void)
thaw_processes();
Restore_console:
pm_restore_console();
- mutex_unlock(&pm_mutex);
+ unlock_system_sleep();
}
#endif
diff --git a/kernel/kmod.c b/kernel/kmod.c
index a4bea97c75b..a0a88543934 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -36,6 +36,7 @@
#include <linux/resource.h>
#include <linux/notifier.h>
#include <linux/suspend.h>
+#include <linux/rwsem.h>
#include <asm/uaccess.h>
#include <trace/events/module.h>
@@ -50,6 +51,7 @@ static struct workqueue_struct *khelper_wq;
static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
static DEFINE_SPINLOCK(umh_sysctl_lock);
+static DECLARE_RWSEM(umhelper_sem);
#ifdef CONFIG_MODULES
@@ -275,6 +277,7 @@ static void __call_usermodehelper(struct work_struct *work)
* If set, call_usermodehelper_exec() will exit immediately returning -EBUSY
* (used for preventing user land processes from being created after the user
* land has been frozen during a system-wide hibernation or suspend operation).
+ * Should always be manipulated under umhelper_sem acquired for write.
*/
static int usermodehelper_disabled = 1;
@@ -282,17 +285,29 @@ static int usermodehelper_disabled = 1;
static atomic_t running_helpers = ATOMIC_INIT(0);
/*
- * Wait queue head used by usermodehelper_pm_callback() to wait for all running
+ * Wait queue head used by usermodehelper_disable() to wait for all running
* helpers to finish.
*/
static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);
/*
* Time to wait for running_helpers to become zero before the setting of
- * usermodehelper_disabled in usermodehelper_pm_callback() fails
+ * usermodehelper_disabled in usermodehelper_disable() fails
*/
#define RUNNING_HELPERS_TIMEOUT (5 * HZ)
+void read_lock_usermodehelper(void)
+{
+ down_read(&umhelper_sem);
+}
+EXPORT_SYMBOL_GPL(read_lock_usermodehelper);
+
+void read_unlock_usermodehelper(void)
+{
+ up_read(&umhelper_sem);
+}
+EXPORT_SYMBOL_GPL(read_unlock_usermodehelper);
+
/**
* usermodehelper_disable - prevent new helpers from being started
*/
@@ -300,8 +315,10 @@ int usermodehelper_disable(void)
{
long retval;
+ down_write(&umhelper_sem);
usermodehelper_disabled = 1;
- smp_mb();
+ up_write(&umhelper_sem);
+
/*
* From now on call_usermodehelper_exec() won't start any new
* helpers, so it is sufficient if running_helpers turns out to
@@ -314,7 +331,9 @@ int usermodehelper_disable(void)
if (retval)
return 0;
+ down_write(&umhelper_sem);
usermodehelper_disabled = 0;
+ up_write(&umhelper_sem);
return -EAGAIN;
}
@@ -323,7 +342,9 @@ int usermodehelper_disable(void)
*/
void usermodehelper_enable(void)
{
+ down_write(&umhelper_sem);
usermodehelper_disabled = 0;
+ up_write(&umhelper_sem);
}
/**
diff --git a/kernel/kthread.c b/kernel/kthread.c
index b6d216a9263..3d3de633702 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -59,6 +59,31 @@ int kthread_should_stop(void)
EXPORT_SYMBOL(kthread_should_stop);
/**
+ * kthread_freezable_should_stop - should this freezable kthread return now?
+ * @was_frozen: optional out parameter, indicates whether %current was frozen
+ *
+ * kthread_should_stop() for freezable kthreads, which will enter
+ * refrigerator if necessary. This function is safe from kthread_stop() /
+ * freezer deadlock and freezable kthreads should use this function instead
+ * of calling try_to_freeze() directly.
+ */
+bool kthread_freezable_should_stop(bool *was_frozen)
+{
+ bool frozen = false;
+
+ might_sleep();
+
+ if (unlikely(freezing(current)))
+ frozen = __refrigerator(true);
+
+ if (was_frozen)
+ *was_frozen = frozen;
+
+ return kthread_should_stop();
+}
+EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);
+
+/**
* kthread_data - return data value specified on kthread creation
* @task: kthread task in question
*
@@ -257,7 +282,7 @@ int kthreadd(void *unused)
set_cpus_allowed_ptr(tsk, cpu_all_mask);
set_mems_allowed(node_states[N_HIGH_MEMORY]);
- current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
+ current->flags |= PF_NOFREEZE;
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index e69434b070d..b2e08c932d9 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -44,6 +44,7 @@
#include <linux/stringify.h>
#include <linux/bitops.h>
#include <linux/gfp.h>
+#include <linux/kmemcheck.h>
#include <asm/sections.h>
@@ -2948,7 +2949,12 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
void lockdep_init_map(struct lockdep_map *lock, const char *name,
struct lock_class_key *key, int subclass)
{
- memset(lock, 0, sizeof(*lock));
+ int i;
+
+ kmemcheck_mark_initialized(lock, sizeof(*lock));
+
+ for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++)
+ lock->class_cache[i] = NULL;
#ifdef CONFIG_LOCK_STAT
lock->cpu = raw_smp_processor_id();
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index a6b0503574e..6d6d2887033 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -43,8 +43,6 @@ int in_suspend __nosavedata;
enum {
HIBERNATION_INVALID,
HIBERNATION_PLATFORM,
- HIBERNATION_TEST,
- HIBERNATION_TESTPROC,
HIBERNATION_SHUTDOWN,
HIBERNATION_REBOOT,
/* keep last */
@@ -55,7 +53,7 @@ enum {
static int hibernation_mode = HIBERNATION_SHUTDOWN;
-static bool freezer_test_done;
+bool freezer_test_done;
static const struct platform_hibernation_ops *hibernation_ops;
@@ -71,14 +69,14 @@ void hibernation_set_ops(const struct platform_hibernation_ops *ops)
WARN_ON(1);
return;
}
- mutex_lock(&pm_mutex);
+ lock_system_sleep();
hibernation_ops = ops;
if (ops)
hibernation_mode = HIBERNATION_PLATFORM;
else if (hibernation_mode == HIBERNATION_PLATFORM)
hibernation_mode = HIBERNATION_SHUTDOWN;
- mutex_unlock(&pm_mutex);
+ unlock_system_sleep();
}
static bool entering_platform_hibernation;
@@ -96,15 +94,6 @@ static void hibernation_debug_sleep(void)
mdelay(5000);
}
-static int hibernation_testmode(int mode)
-{
- if (hibernation_mode == mode) {
- hibernation_debug_sleep();
- return 1;
- }
- return 0;
-}
-
static int hibernation_test(int level)
{
if (pm_test_level == level) {
@@ -114,7 +103,6 @@ static int hibernation_test(int level)
return 0;
}
#else /* !CONFIG_PM_DEBUG */
-static int hibernation_testmode(int mode) { return 0; }
static int hibernation_test(int level) { return 0; }
#endif /* !CONFIG_PM_DEBUG */
@@ -278,8 +266,7 @@ static int create_image(int platform_mode)
goto Platform_finish;
error = disable_nonboot_cpus();
- if (error || hibernation_test(TEST_CPUS)
- || hibernation_testmode(HIBERNATION_TEST))
+ if (error || hibernation_test(TEST_CPUS))
goto Enable_cpus;
local_irq_disable();
@@ -333,7 +320,7 @@ static int create_image(int platform_mode)
*/
int hibernation_snapshot(int platform_mode)
{
- pm_message_t msg = PMSG_RECOVER;
+ pm_message_t msg;
int error;
error = platform_begin(platform_mode);
@@ -349,8 +336,7 @@ int hibernation_snapshot(int platform_mode)
if (error)
goto Cleanup;
- if (hibernation_test(TEST_FREEZER) ||
- hibernation_testmode(HIBERNATION_TESTPROC)) {
+ if (hibernation_test(TEST_FREEZER)) {
/*
* Indicate to the caller that we are returning due to a
@@ -362,26 +348,26 @@ int hibernation_snapshot(int platform_mode)
error = dpm_prepare(PMSG_FREEZE);
if (error) {
- dpm_complete(msg);
+ dpm_complete(PMSG_RECOVER);
goto Cleanup;
}
suspend_console();
pm_restrict_gfp_mask();
+
error = dpm_suspend(PMSG_FREEZE);
- if (error)
- goto Recover_platform;
- if (hibernation_test(TEST_DEVICES))
- goto Recover_platform;
+ if (error || hibernation_test(TEST_DEVICES))
+ platform_recover(platform_mode);
+ else
+ error = create_image(platform_mode);
- error = create_image(platform_mode);
/*
- * Control returns here (1) after the image has been created or the
+ * In the case that we call create_image() above, the control
+ * returns here (1) after the image has been created or the
* image creation has failed and (2) after a successful restore.
*/
- Resume_devices:
/* We may need to release the preallocated image pages here. */
if (error || !in_suspend)
swsusp_free();
@@ -399,10 +385,6 @@ int hibernation_snapshot(int platform_mode)
platform_end(platform_mode);
return error;
- Recover_platform:
- platform_recover(platform_mode);
- goto Resume_devices;
-
Cleanup:
swsusp_free();
goto Close;
@@ -590,9 +572,6 @@ int hibernation_platform_enter(void)
static void power_down(void)
{
switch (hibernation_mode) {
- case HIBERNATION_TEST:
- case HIBERNATION_TESTPROC:
- break;
case HIBERNATION_REBOOT:
kernel_restart(NULL);
break;
@@ -611,17 +590,6 @@ static void power_down(void)
while(1);
}
-static int prepare_processes(void)
-{
- int error = 0;
-
- if (freeze_processes()) {
- error = -EBUSY;
- thaw_processes();
- }
- return error;
-}
-
/**
* hibernate - Carry out system hibernation, including saving the image.
*/
@@ -629,7 +597,7 @@ int hibernate(void)
{
int error;
- mutex_lock(&pm_mutex);
+ lock_system_sleep();
/* The snapshot device should not be opened while we're running */
if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
error = -EBUSY;
@@ -654,7 +622,7 @@ int hibernate(void)
sys_sync();
printk("done.\n");
- error = prepare_processes();
+ error = freeze_processes();
if (error)
goto Finish;
@@ -697,7 +665,7 @@ int hibernate(void)
pm_restore_console();
atomic_inc(&snapshot_device_available);
Unlock:
- mutex_unlock(&pm_mutex);
+ unlock_system_sleep();
return error;
}
@@ -811,11 +779,13 @@ static int software_resume(void)
goto close_finish;
error = create_basic_memory_bitmaps();
- if (error)
+ if (error) {
+ usermodehelper_enable();
goto close_finish;
+ }
pr_debug("PM: Preparing processes for restore.\n");
- error = prepare_processes();
+ error = freeze_processes();
if (error) {
swsusp_close(FMODE_READ);
goto Done;
@@ -855,8 +825,6 @@ static const char * const hibernation_modes[] = {
[HIBERNATION_PLATFORM] = "platform",
[HIBERNATION_SHUTDOWN] = "shutdown",
[HIBERNATION_REBOOT] = "reboot",
- [HIBERNATION_TEST] = "test",
- [HIBERNATION_TESTPROC] = "testproc",
};
/*
@@ -865,17 +833,15 @@ static const char * const hibernation_modes[] = {
* Hibernation can be handled in several ways. There are a few different ways
* to put the system into the sleep state: using the platform driver (e.g. ACPI
* or other hibernation_ops), powering it off or rebooting it (for testing
- * mostly), or using one of the two available test modes.
+ * mostly).
*
* The sysfs file /sys/power/disk provides an interface for selecting the
* hibernation mode to use. Reading from this file causes the available modes
- * to be printed. There are 5 modes that can be supported:
+ * to be printed. There are 3 modes that can be supported:
*
* 'platform'
* 'shutdown'
* 'reboot'
- * 'test'
- * 'testproc'
*
* If a platform hibernation driver is in use, 'platform' will be supported
* and will be used by default. Otherwise, 'shutdown' will be used by default.
@@ -899,8 +865,6 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
switch (i) {
case HIBERNATION_SHUTDOWN:
case HIBERNATION_REBOOT:
- case HIBERNATION_TEST:
- case HIBERNATION_TESTPROC:
break;
case HIBERNATION_PLATFORM:
if (hibernation_ops)
@@ -929,7 +893,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
p = memchr(buf, '\n', n);
len = p ? p - buf : n;
- mutex_lock(&pm_mutex);
+ lock_system_sleep();
for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
if (len == strlen(hibernation_modes[i])
&& !strncmp(buf, hibernation_modes[i], len)) {
@@ -941,8 +905,6 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
switch (mode) {
case HIBERNATION_SHUTDOWN:
case HIBERNATION_REBOOT:
- case HIBERNATION_TEST:
- case HIBERNATION_TESTPROC:
hibernation_mode = mode;
break;
case HIBERNATION_PLATFORM:
@@ -957,7 +919,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
if (!error)
pr_debug("PM: Hibernation mode set to '%s'\n",
hibernation_modes[mode]);
- mutex_unlock(&pm_mutex);
+ unlock_system_sleep();
return error ? error : n;
}
@@ -984,9 +946,9 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
if (maj != MAJOR(res) || min != MINOR(res))
goto out;
- mutex_lock(&pm_mutex);
+ lock_system_sleep();
swsusp_resume_device = res;
- mutex_unlock(&pm_mutex);
+ unlock_system_sleep();
printk(KERN_INFO "PM: Starting manual resume from disk\n");
noresume = 0;
software_resume();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 36e0f0903c3..9824b41e5a1 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -3,7 +3,7 @@
*
* Copyright (c) 2003 Patrick Mochel
* Copyright (c) 2003 Open Source Development Lab
- *
+ *
* This file is released under the GPLv2
*
*/
@@ -116,7 +116,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
p = memchr(buf, '\n', n);
len = p ? p - buf : n;
- mutex_lock(&pm_mutex);
+ lock_system_sleep();
level = TEST_FIRST;
for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++)
@@ -126,7 +126,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
break;
}
- mutex_unlock(&pm_mutex);
+ unlock_system_sleep();
return error ? error : n;
}
@@ -240,7 +240,7 @@ struct kobject *power_kobj;
* 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and
* 'disk' (Suspend-to-Disk).
*
- * store() accepts one of those strings, translates it into the
+ * store() accepts one of those strings, translates it into the
* proper enumerated value, and initiates a suspend transition.
*/
static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
@@ -282,7 +282,7 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
/* First, check if we are requested to hibernate */
if (len == 4 && !strncmp(buf, "disk", len)) {
error = hibernate();
- goto Exit;
+ goto Exit;
}
#ifdef CONFIG_SUSPEND
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 23a2db1ec44..0c4defe6d3b 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -50,6 +50,8 @@ static inline char *check_image_kernel(struct swsusp_info *info)
#define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT)
/* kernel/power/hibernate.c */
+extern bool freezer_test_done;
+
extern int hibernation_snapshot(int platform_mode);
extern int hibernation_restore(int platform_mode);
extern int hibernation_platform_enter(void);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index addbbe5531b..77274c9ba2f 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -22,16 +22,7 @@
*/
#define TIMEOUT (20 * HZ)
-static inline int freezable(struct task_struct * p)
-{
- if ((p == current) ||
- (p->flags & PF_NOFREEZE) ||
- (p->exit_state != 0))
- return 0;
- return 1;
-}
-
-static int try_to_freeze_tasks(bool sig_only)
+static int try_to_freeze_tasks(bool user_only)
{
struct task_struct *g, *p;
unsigned long end_time;
@@ -46,17 +37,14 @@ static int try_to_freeze_tasks(bool sig_only)
end_time = jiffies + TIMEOUT;
- if (!sig_only)
+ if (!user_only)
freeze_workqueues_begin();
while (true) {
todo = 0;
read_lock(&tasklist_lock);
do_each_thread(g, p) {
- if (frozen(p) || !freezable(p))
- continue;
-
- if (!freeze_task(p, sig_only))
+ if (p == current || !freeze_task(p))
continue;
/*
@@ -77,7 +65,7 @@ static int try_to_freeze_tasks(bool sig_only)
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
- if (!sig_only) {
+ if (!user_only) {
wq_busy = freeze_workqueues_busy();
todo += wq_busy;
}
@@ -103,11 +91,6 @@ static int try_to_freeze_tasks(bool sig_only)
elapsed_csecs = elapsed_csecs64;
if (todo) {
- /* This does not unfreeze processes that are already frozen
- * (we have slightly ugly calling convention in that respect,
- * and caller must call thaw_processes() if something fails),
- * but it cleans up leftover PF_FREEZE requests.
- */
printk("\n");
printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds "
"(%d tasks refusing to freeze, wq_busy=%d):\n",
@@ -115,15 +98,11 @@ static int try_to_freeze_tasks(bool sig_only)
elapsed_csecs / 100, elapsed_csecs % 100,
todo - wq_busy, wq_busy);
- thaw_workqueues();
-
read_lock(&tasklist_lock);
do_each_thread(g, p) {
- task_lock(p);
- if (!wakeup && freezing(p) && !freezer_should_skip(p))
+ if (!wakeup && !freezer_should_skip(p) &&
+ p != current && freezing(p) && !frozen(p))
sched_show_task(p);
- cancel_freezing(p);
- task_unlock(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
} else {
@@ -136,12 +115,18 @@ static int try_to_freeze_tasks(bool sig_only)
/**
* freeze_processes - Signal user space processes to enter the refrigerator.
+ *
+ * On success, returns 0. On failure, -errno and system is fully thawed.
*/
int freeze_processes(void)
{
int error;
+ if (!pm_freezing)
+ atomic_inc(&system_freezing_cnt);
+
printk("Freezing user space processes ... ");
+ pm_freezing = true;
error = try_to_freeze_tasks(true);
if (!error) {
printk("done.");
@@ -150,17 +135,22 @@ int freeze_processes(void)
printk("\n");
BUG_ON(in_atomic());
+ if (error)
+ thaw_processes();
return error;
}
/**
* freeze_kernel_threads - Make freezable kernel threads go to the refrigerator.
+ *
+ * On success, returns 0. On failure, -errno and system is fully thawed.
*/
int freeze_kernel_threads(void)
{
int error;
printk("Freezing remaining freezable tasks ... ");
+ pm_nosig_freezing = true;
error = try_to_freeze_tasks(false);
if (!error)
printk("done.");
@@ -168,37 +158,32 @@ int freeze_kernel_threads(void)
printk("\n");
BUG_ON(in_atomic());
+ if (error)
+ thaw_processes();
return error;
}
-static void thaw_tasks(bool nosig_only)
+void thaw_processes(void)
{
struct task_struct *g, *p;
- read_lock(&tasklist_lock);
- do_each_thread(g, p) {
- if (!freezable(p))
- continue;
+ if (pm_freezing)
+ atomic_dec(&system_freezing_cnt);
+ pm_freezing = false;
+ pm_nosig_freezing = false;
- if (nosig_only && should_send_signal(p))
- continue;
+ oom_killer_enable();
+
+ printk("Restarting tasks ... ");
- if (cgroup_freezing_or_frozen(p))
- continue;
+ thaw_workqueues();
- thaw_process(p);
+ read_lock(&tasklist_lock);
+ do_each_thread(g, p) {
+ __thaw_task(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
-}
-void thaw_processes(void)
-{
- oom_killer_enable();
-
- printk("Restarting tasks ... ");
- thaw_workqueues();
- thaw_tasks(true);
- thaw_tasks(false);
schedule();
printk("done.\n");
}
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 4953dc054c5..4fd51beed87 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -42,9 +42,9 @@ static const struct platform_suspend_ops *suspend_ops;
*/
void suspend_set_ops(const struct platform_suspend_ops *ops)
{
- mutex_lock(&pm_mutex);
+ lock_system_sleep();
suspend_ops = ops;
- mutex_unlock(&pm_mutex);
+ unlock_system_sleep();
}
EXPORT_SYMBOL_GPL(suspend_set_ops);
@@ -106,13 +106,11 @@ static int suspend_prepare(void)
goto Finish;
error = suspend_freeze_processes();
- if (error) {
- suspend_stats.failed_freeze++;
- dpm_save_failed_step(SUSPEND_FREEZE);
- } else
+ if (!error)
return 0;
- suspend_thaw_processes();
+ suspend_stats.failed_freeze++;
+ dpm_save_failed_step(SUSPEND_FREEZE);
usermodehelper_enable();
Finish:
pm_notifier_call_chain(PM_POST_SUSPEND);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 6d8f535c2b8..78bdb4404aa 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -30,28 +30,6 @@
#include "power.h"
-/*
- * NOTE: The SNAPSHOT_SET_SWAP_FILE and SNAPSHOT_PMOPS ioctls are obsolete and
- * will be removed in the future. They are only preserved here for
- * compatibility with existing userland utilities.
- */
-#define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int)
-#define SNAPSHOT_PMOPS _IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int)
-
-#define PMOPS_PREPARE 1
-#define PMOPS_ENTER 2
-#define PMOPS_FINISH 3
-
-/*
- * NOTE: The following ioctl definitions are wrong and have been replaced with
- * correct ones. They are only preserved here for compatibility with existing
- * userland utilities and will be removed in the future.
- */
-#define SNAPSHOT_ATOMIC_SNAPSHOT _IOW(SNAPSHOT_IOC_MAGIC, 3, void *)
-#define SNAPSHOT_SET_IMAGE_SIZE _IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long)
-#define SNAPSHOT_AVAIL_SWAP _IOR(SNAPSHOT_IOC_MAGIC, 7, void *)
-#define SNAPSHOT_GET_SWAP_PAGE _IOR(SNAPSHOT_IOC_MAGIC, 8, void *)
-
#define SNAPSHOT_MINOR 231
@@ -71,7 +49,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
struct snapshot_data *data;
int error;
- mutex_lock(&pm_mutex);
+ lock_system_sleep();
if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
error = -EBUSY;
@@ -123,7 +101,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
data->platform_support = 0;
Unlock:
- mutex_unlock(&pm_mutex);
+ unlock_system_sleep();
return error;
}
@@ -132,7 +110,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
{
struct snapshot_data *data;
- mutex_lock(&pm_mutex);
+ lock_system_sleep();
swsusp_free();
free_basic_memory_bitmaps();
@@ -146,7 +124,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
PM_POST_HIBERNATION : PM_POST_RESTORE);
atomic_inc(&snapshot_device_available);
- mutex_unlock(&pm_mutex);
+ unlock_system_sleep();
return 0;
}
@@ -158,7 +136,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
ssize_t res;
loff_t pg_offp = *offp & ~PAGE_MASK;
- mutex_lock(&pm_mutex);
+ lock_system_sleep();
data = filp->private_data;
if (!data->ready) {
@@ -179,7 +157,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
*offp += res;
Unlock:
- mutex_unlock(&pm_mutex);
+ unlock_system_sleep();
return res;
}
@@ -191,7 +169,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
ssize_t res;
loff_t pg_offp = *offp & ~PAGE_MASK;
- mutex_lock(&pm_mutex);
+ lock_system_sleep();
data = filp->private_data;
@@ -208,20 +186,11 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
if (res > 0)
*offp += res;
unlock:
- mutex_unlock(&pm_mutex);
+ unlock_system_sleep();
return res;
}
-static void snapshot_deprecated_ioctl(unsigned int cmd)
-{
- if (printk_ratelimit())
- printk(KERN_NOTICE "%pf: ioctl '%.8x' is deprecated and will "
- "be removed soon, update your suspend-to-disk "
- "utilities\n",
- __builtin_return_address(0), cmd);
-}
-
static long snapshot_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
@@ -257,11 +226,9 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
break;
error = freeze_processes();
- if (error) {
- thaw_processes();
+ if (error)
usermodehelper_enable();
- }
- if (!error)
+ else
data->frozen = 1;
break;
@@ -274,8 +241,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
data->frozen = 0;
break;
- case SNAPSHOT_ATOMIC_SNAPSHOT:
- snapshot_deprecated_ioctl(cmd);
case SNAPSHOT_CREATE_IMAGE:
if (data->mode != O_RDONLY || !data->frozen || data->ready) {
error = -EPERM;
@@ -283,10 +248,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
}
pm_restore_gfp_mask();
error = hibernation_snapshot(data->platform_support);
- if (!error)
+ if (!error) {
error = put_user(in_suspend, (int __user *)arg);
- if (!error)
- data->ready = 1;
+ if (!error && !freezer_test_done)
+ data->ready = 1;
+ if (freezer_test_done) {
+ freezer_test_done = false;
+ thaw_processes();
+ }
+ }
break;
case SNAPSHOT_ATOMIC_RESTORE:
@@ -305,8 +275,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
data->ready = 0;
break;
- case SNAPSHOT_SET_IMAGE_SIZE:
- snapshot_deprecated_ioctl(cmd);
case SNAPSHOT_PREF_IMAGE_SIZE:
image_size = arg;
break;
@@ -321,16 +289,12 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
error = put_user(size, (loff_t __user *)arg);
break;
- case SNAPSHOT_AVAIL_SWAP:
- snapshot_deprecated_ioctl(cmd);
case SNAPSHOT_AVAIL_SWAP_SIZE:
size = count_swap_pages(data->swap, 1);
size <<= PAGE_SHIFT;
error = put_user(size, (loff_t __user *)arg);
break;
- case SNAPSHOT_GET_SWAP_PAGE:
- snapshot_deprecated_ioctl(cmd);
case SNAPSHOT_ALLOC_SWAP_PAGE:
if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
error = -ENODEV;
@@ -353,27 +317,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
free_all_swap_pages(data->swap);
break;
- case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */
- snapshot_deprecated_ioctl(cmd);
- if (!swsusp_swap_in_use()) {
- /*
- * User space encodes device types as two-byte values,
- * so we need to recode them
- */
- if (old_decode_dev(arg)) {
- data->swap = swap_type_of(old_decode_dev(arg),
- 0, NULL);
- if (data->swap < 0)
- error = -ENODEV;
- } else {
- data->swap = -1;
- error = -EINVAL;
- }
- } else {
- error = -EPERM;
- }
- break;
-
case SNAPSHOT_S2RAM:
if (!data->frozen) {
error = -EPERM;
@@ -396,33 +339,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
error = hibernation_platform_enter();
break;
- case SNAPSHOT_PMOPS: /* This ioctl is deprecated */
- snapshot_deprecated_ioctl(cmd);
- error = -EINVAL;
-
- switch (arg) {
-
- case PMOPS_PREPARE:
- data->platform_support = 1;
- error = 0;
- break;
-
- case PMOPS_ENTER:
- if (data->platform_support)
- error = hibernation_platform_enter();
- break;
-
- case PMOPS_FINISH:
- if (data->platform_support)
- error = 0;
- break;
-
- default:
- printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg);
-
- }
- break;
-
case SNAPSHOT_SET_SWAP_AREA:
if (swsusp_swap_in_use()) {
error = -EPERM;
diff --git a/kernel/printk.c b/kernel/printk.c
index 1455a0d4eed..7982a0a841e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1293,10 +1293,11 @@ again:
raw_spin_lock(&logbuf_lock);
if (con_start != log_end)
retry = 1;
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+
if (retry && console_trylock())
goto again;
- raw_spin_unlock_irqrestore(&logbuf_lock, flags);
if (wake_klogd)
wake_up_klogd();
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 0e9344a71be..d6b149ccf92 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
#include <linux/ctype.h>
#include <linux/ftrace.h>
#include <linux/slab.h>
+#include <linux/init_task.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
@@ -4810,6 +4811,9 @@ EXPORT_SYMBOL(wait_for_completion);
* This waits for either a completion of a specific task to be signaled or for a
* specified timeout to expire. The timeout is in jiffies. It is not
* interruptible.
+ *
+ * The return value is 0 if timed out, and positive (at least 1, or number of
+ * jiffies left till timeout) if completed.
*/
unsigned long __sched
wait_for_completion_timeout(struct completion *x, unsigned long timeout)
@@ -4824,6 +4828,8 @@ EXPORT_SYMBOL(wait_for_completion_timeout);
*
* This waits for completion of a specific task to be signaled. It is
* interruptible.
+ *
+ * The return value is -ERESTARTSYS if interrupted, 0 if completed.
*/
int __sched wait_for_completion_interruptible(struct completion *x)
{
@@ -4841,6 +4847,9 @@ EXPORT_SYMBOL(wait_for_completion_interruptible);
*
* This waits for either a completion of a specific task to be signaled or for a
* specified timeout to expire. It is interruptible. The timeout is in jiffies.
+ *
+ * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
+ * positive (at least 1, or number of jiffies left till timeout) if completed.
*/
long __sched
wait_for_completion_interruptible_timeout(struct completion *x,
@@ -4856,6 +4865,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
*
* This waits to be signaled for completion of a specific task. It can be
* interrupted by a kill signal.
+ *
+ * The return value is -ERESTARTSYS if interrupted, 0 if completed.
*/
int __sched wait_for_completion_killable(struct completion *x)
{
@@ -4874,6 +4885,9 @@ EXPORT_SYMBOL(wait_for_completion_killable);
* This waits for either a completion of a specific task to be
* signaled or for a specified timeout to expire. It can be
* interrupted by a kill signal. The timeout is in jiffies.
+ *
+ * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
+ * positive (at least 1, or number of jiffies left till timeout) if completed.
*/
long __sched
wait_for_completion_killable_timeout(struct completion *x,
@@ -6099,6 +6113,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
*/
idle->sched_class = &idle_sched_class;
ftrace_graph_init_idle_task(idle, cpu);
+#if defined(CONFIG_SMP)
+ sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
+#endif
}
/*
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5c9e67923b7..8a39fa3e3c6 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -772,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
list_del_leaf_cfs_rq(cfs_rq);
}
+static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq)
+{
+ long tg_weight;
+
+ /*
+ * Use this CPU's actual weight instead of the last load_contribution
+ * to gain a more accurate current total weight. See
+ * update_cfs_rq_load_contribution().
+ */
+ tg_weight = atomic_read(&tg->load_weight);
+ tg_weight -= cfs_rq->load_contribution;
+ tg_weight += cfs_rq->load.weight;
+
+ return tg_weight;
+}
+
static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
{
- long load_weight, load, shares;
+ long tg_weight, load, shares;
+ tg_weight = calc_tg_weight(tg, cfs_rq);
load = cfs_rq->load.weight;
- load_weight = atomic_read(&tg->load_weight);
- load_weight += load;
- load_weight -= cfs_rq->load_contribution;
-
shares = (tg->shares * load);
- if (load_weight)
- shares /= load_weight;
+ if (tg_weight)
+ shares /= tg_weight;
if (shares < MIN_SHARES)
shares = MIN_SHARES;
@@ -1743,7 +1756,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
{
- if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running)
+ if (!cfs_rq->runtime_enabled || cfs_rq->nr_running)
return;
__return_cfs_rq_runtime(cfs_rq);
@@ -2036,36 +2049,100 @@ static void task_waking_fair(struct task_struct *p)
* Adding load to a group doesn't make a group heavier, but can cause movement
* of group shares between cpus. Assuming the shares were perfectly aligned one
* can calculate the shift in shares.
+ *
+ * Calculate the effective load difference if @wl is added (subtracted) to @tg
+ * on this @cpu and results in a total addition (subtraction) of @wg to the
+ * total group weight.
+ *
+ * Given a runqueue weight distribution (rw_i) we can compute a shares
+ * distribution (s_i) using:
+ *
+ * s_i = rw_i / \Sum rw_j (1)
+ *
+ * Suppose we have 4 CPUs and our @tg is a direct child of the root group and
+ * has 7 equal weight tasks, distributed as below (rw_i), with the resulting
+ * shares distribution (s_i):
+ *
+ * rw_i = { 2, 4, 1, 0 }
+ * s_i = { 2/7, 4/7, 1/7, 0 }
+ *
+ * As per wake_affine() we're interested in the load of two CPUs (the CPU the
+ * task used to run on and the CPU the waker is running on), we need to
+ * compute the effect of waking a task on either CPU and, in case of a sync
+ * wakeup, compute the effect of the current task going to sleep.
+ *
+ * So for a change of @wl to the local @cpu with an overall group weight change
+ * of @wl we can compute the new shares distribution (s'_i) using:
+ *
+ * s'_i = (rw_i + @wl) / (@wg + \Sum rw_j) (2)
+ *
+ * Suppose we're interested in CPUs 0 and 1, and want to compute the load
+ * differences in waking a task to CPU 0. The additional task changes the
+ * weight and shares distributions like:
+ *
+ * rw'_i = { 3, 4, 1, 0 }
+ * s'_i = { 3/8, 4/8, 1/8, 0 }
+ *
+ * We can then compute the difference in effective weight by using:
+ *
+ * dw_i = S * (s'_i - s_i) (3)
+ *
+ * Where 'S' is the group weight as seen by its parent.
+ *
+ * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7)
+ * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 -
+ * 4/7) times the weight of the group.
*/
static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
{
struct sched_entity *se = tg->se[cpu];
- if (!tg->parent)
+ if (!tg->parent) /* the trivial, non-cgroup case */
return wl;
for_each_sched_entity(se) {
- long lw, w;
+ long w, W;
tg = se->my_q->tg;
- w = se->my_q->load.weight;
- /* use this cpu's instantaneous contribution */
- lw = atomic_read(&tg->load_weight);
- lw -= se->my_q->load_contribution;
- lw += w + wg;
+ /*
+ * W = @wg + \Sum rw_j
+ */
+ W = wg + calc_tg_weight(tg, se->my_q);
- wl += w;
+ /*
+ * w = rw_i + @wl
+ */
+ w = se->my_q->load.weight + wl;
- if (lw > 0 && wl < lw)
- wl = (wl * tg->shares) / lw;
+ /*
+ * wl = S * s'_i; see (2)
+ */
+ if (W > 0 && w < W)
+ wl = (w * tg->shares) / W;
else
wl = tg->shares;
- /* zero point is MIN_SHARES */
+ /*
+ * Per the above, wl is the new se->load.weight value; since
+ * those are clipped to [MIN_SHARES, ...) do so now. See
+ * calc_cfs_shares().
+ */
if (wl < MIN_SHARES)
wl = MIN_SHARES;
+
+ /*
+ * wl = dw_i = S * (s'_i - s_i); see (3)
+ */
wl -= se->load.weight;
+
+ /*
+ * Recursively apply this logic to all parent groups to compute
+ * the final effective load change on the root group. Since
+ * only the @tg group gets extra weight, all parent groups can
+ * only redistribute existing shares. @wl is the shift in shares
+ * resulting from this level per the above.
+ */
wg = 0;
}
@@ -2249,7 +2326,8 @@ static int select_idle_sibling(struct task_struct *p, int target)
int cpu = smp_processor_id();
int prev_cpu = task_cpu(p);
struct sched_domain *sd;
- int i;
+ struct sched_group *sg;
+ int i, smt = 0;
/*
* If the task is going to be woken-up on this cpu and if it is
@@ -2269,25 +2347,40 @@ static int select_idle_sibling(struct task_struct *p, int target)
* Otherwise, iterate the domains and find an elegible idle cpu.
*/
rcu_read_lock();
+again:
for_each_domain(target, sd) {
+ if (!smt && (sd->flags & SD_SHARE_CPUPOWER))
+ continue;
+
+ if (smt && !(sd->flags & SD_SHARE_CPUPOWER))
+ break;
+
if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
break;
- for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) {
- if (idle_cpu(i)) {
- target = i;
- break;
+ sg = sd->groups;
+ do {
+ if (!cpumask_intersects(sched_group_cpus(sg),
+ tsk_cpus_allowed(p)))
+ goto next;
+
+ for_each_cpu(i, sched_group_cpus(sg)) {
+ if (!idle_cpu(i))
+ goto next;
}
- }
- /*
- * Lets stop looking for an idle sibling when we reached
- * the domain that spans the current cpu and prev_cpu.
- */
- if (cpumask_test_cpu(cpu, sched_domain_span(sd)) &&
- cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
- break;
+ target = cpumask_first_and(sched_group_cpus(sg),
+ tsk_cpus_allowed(p));
+ goto done;
+next:
+ sg = sg->next;
+ } while (sg != sd->groups);
+ }
+ if (!smt) {
+ smt = 1;
+ goto again;
}
+done:
rcu_read_unlock();
return target;
@@ -3511,7 +3604,7 @@ static bool update_sd_pick_busiest(struct sched_domain *sd,
}
/**
- * update_sd_lb_stats - Update sched_group's statistics for load balancing.
+ * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
* @sd: sched_domain whose statistics are to be updated.
* @this_cpu: Cpu for which load balance is currently performed.
* @idle: Idle status of this_cpu
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index efa0a7b75dd..84802245abd 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -67,3 +67,4 @@ SCHED_FEAT(NONTASK_POWER, 1)
SCHED_FEAT(TTWU_QUEUE, 1)
SCHED_FEAT(FORCE_SD_OVERLAP, 0)
+SCHED_FEAT(RT_RUNTIME_SHARE, 1)
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 056cbd2e2a2..583a1368afe 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -560,6 +560,9 @@ static int balance_runtime(struct rt_rq *rt_rq)
{
int more = 0;
+ if (!sched_feat(RT_RUNTIME_SHARE))
+ return more;
+
if (rt_rq->rt_time > rt_rq->rt_runtime) {
raw_spin_unlock(&rt_rq->rt_runtime_lock);
more = do_balance_runtime(rt_rq);
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 6318b511afa..a650694883a 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1354,7 +1354,7 @@ static ssize_t binary_sysctl(const int *name, int nlen,
fput(file);
out_putname:
- putname(pathname);
+ __putname(pathname);
out:
return result;
}
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index c436e790b21..8a46f5d6450 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -195,7 +195,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
struct alarm *alarm;
ktime_t expired = next->expires;
- if (expired.tv64 >= now.tv64)
+ if (expired.tv64 > now.tv64)
break;
alarm = container_of(next, struct alarm, node);
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 1ecd6ba36d6..c4eb71c8b2e 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -387,6 +387,7 @@ void clockevents_exchange_device(struct clock_event_device *old,
* released list and do a notify add later.
*/
if (old) {
+ old->event_handler = clockevents_handle_noop;
clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
list_del(&old->list);
list_add(&old->list, &clockevents_released);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index cfc65e1eb9f..d3ad022136e 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -548,7 +548,7 @@ static u64 clocksource_max_deferment(struct clocksource *cs)
* note a margin of 12.5% is used because this can be computed with
* a shift, versus say 10% which would require division.
*/
- return max_nsecs - (max_nsecs >> 5);
+ return max_nsecs - (max_nsecs >> 3);
}
#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
@@ -647,7 +647,7 @@ static void clocksource_enqueue(struct clocksource *cs)
/**
* __clocksource_updatefreq_scale - Used update clocksource with new freq
- * @t: clocksource to be registered
+ * @cs: clocksource to be registered
* @scale: Scale factor multiplied against freq to get clocksource hz
* @freq: clocksource frequency (cycles per second) divided by scale
*
@@ -669,7 +669,7 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
* ~ 0.06ppm granularity for NTP. We apply the same 12.5%
* margin as we do in clocksource_max_deferment()
*/
- sec = (cs->mask - (cs->mask >> 5));
+ sec = (cs->mask - (cs->mask >> 3));
do_div(sec, freq);
do_div(sec, scale);
if (!sec)
@@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
/**
* __clocksource_register_scale - Used to install new clocksources
- * @t: clocksource to be registered
+ * @cs: clocksource to be registered
* @scale: Scale factor multiplied against freq to get clocksource hz
* @freq: clocksource frequency (cycles per second) divided by scale
*
@@ -727,7 +727,7 @@ EXPORT_SYMBOL_GPL(__clocksource_register_scale);
/**
* clocksource_register - Used to install new clocksources
- * @t: clocksource to be registered
+ * @cs: clocksource to be registered
*
* Returns -EBUSY if registration fails, zero otherwise.
*/
@@ -761,6 +761,8 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)
/**
* clocksource_change_rating - Change the rating of a registered clocksource
+ * @cs: clocksource to be changed
+ * @rating: new rating
*/
void clocksource_change_rating(struct clocksource *cs, int rating)
{
@@ -772,6 +774,7 @@ EXPORT_SYMBOL(clocksource_change_rating);
/**
* clocksource_unregister - remove a registered clocksource
+ * @cs: clocksource to be unregistered
*/
void clocksource_unregister(struct clocksource *cs)
{
@@ -787,6 +790,7 @@ EXPORT_SYMBOL(clocksource_unregister);
/**
* sysfs_show_current_clocksources - sysfs interface for current clocksource
* @dev: unused
+ * @attr: unused
* @buf: char buffer to be filled with clocksource list
*
* Provides sysfs interface for listing current clocksource.
@@ -807,6 +811,7 @@ sysfs_show_current_clocksources(struct sys_device *dev,
/**
* sysfs_override_clocksource - interface for manually overriding clocksource
* @dev: unused
+ * @attr: unused
* @buf: name of override clocksource
* @count: length of buffer
*
@@ -842,6 +847,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
/**
* sysfs_show_available_clocksources - sysfs interface for listing clocksource
* @dev: unused
+ * @attr: unused
* @buf: char buffer to be filled with clocksource list
*
* Provides sysfs interface for listing registered clocksources
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f954282d9a8..fd4a7b1625a 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -71,7 +71,7 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
(dev->features & CLOCK_EVT_FEAT_C3STOP))
return 0;
- clockevents_exchange_device(NULL, dev);
+ clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
tick_broadcast_device.evtdev = dev;
if (!cpumask_empty(tick_get_broadcast_mask()))
tick_broadcast_start_periodic(dev);
diff --git a/kernel/timer.c b/kernel/timer.c
index dbaa62422b1..9c3c62b0c4b 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1368,7 +1368,7 @@ SYSCALL_DEFINE0(getppid)
int pid;
rcu_read_lock();
- pid = task_tgid_vnr(current->real_parent);
+ pid = task_tgid_vnr(rcu_dereference(current->real_parent));
rcu_read_unlock();
return pid;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 900b409543d..b1e8943fed1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -152,7 +152,6 @@ void clear_ftrace_function(void)
ftrace_pid_function = ftrace_stub;
}
-#undef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
/*
* For those archs that do not test ftrace_trace_stop in their
@@ -1212,7 +1211,9 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
if (!src->count) {
free_ftrace_hash_rcu(*dst);
rcu_assign_pointer(*dst, EMPTY_HASH);
- return 0;
+ /* still need to update the function records */
+ ret = 0;
+ goto out;
}
/*
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 581876f9f38..c212a7f934e 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1078,7 +1078,6 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
/* First see if we did not already create this dir */
list_for_each_entry(system, &event_subsystems, list) {
if (strcmp(system->name, name) == 0) {
- __get_system(system);
system->nr_events++;
return system->entry;
}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 816d3d07497..95dc31efd6d 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1649,7 +1649,9 @@ static int replace_system_preds(struct event_subsystem *system,
*/
err = replace_preds(call, NULL, ps, filter_string, true);
if (err)
- goto fail;
+ call->flags |= TRACE_EVENT_FL_NO_SET_FILTER;
+ else
+ call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER;
}
list_for_each_entry(call, &ftrace_events, list) {
@@ -1658,6 +1660,9 @@ static int replace_system_preds(struct event_subsystem *system,
if (strcmp(call->class->system, system->name) != 0)
continue;
+ if (call->flags & TRACE_EVENT_FL_NO_SET_FILTER)
+ continue;
+
filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
if (!filter_item)
goto fail_mem;
@@ -1686,7 +1691,7 @@ static int replace_system_preds(struct event_subsystem *system,
* replace the filter for the call.
*/
filter = call->filter;
- call->filter = filter_item->filter;
+ rcu_assign_pointer(call->filter, filter_item->filter);
filter_item->filter = filter;
fail = false;
@@ -1741,7 +1746,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
filter = call->filter;
if (!filter)
goto out_unlock;
- call->filter = NULL;
+ RCU_INIT_POINTER(call->filter, NULL);
/* Make sure the filter is not being used */
synchronize_sched();
__free_filter(filter);
@@ -1782,7 +1787,7 @@ out:
* string
*/
tmp = call->filter;
- call->filter = filter;
+ rcu_assign_pointer(call->filter, filter);
if (tmp) {
/* Make sure the call is done with the filter */
synchronize_sched();