diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2009-12-02 23:38:13 -0800 | 
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2009-12-02 23:38:13 -0800 | 
| commit | 467832032cc07626880363efa8625719c16c04eb (patch) | |
| tree | ee9a62c04f0b3106e412bc1b2dd1cea5566d5ca7 /kernel | |
| parent | 66d2a5952eab875f1286e04f738ef029afdaf013 (diff) | |
| parent | 22763c5cf3690a681551162c15d34d935308c8d7 (diff) | |
| download | olio-linux-3.10-467832032cc07626880363efa8625719c16c04eb.tar.xz olio-linux-3.10-467832032cc07626880363efa8625719c16c04eb.zip  | |
Merge commit 'v2.6.32' into next
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 1 | ||||
| -rw-r--r-- | kernel/irq/spurious.c | 2 | ||||
| -rw-r--r-- | kernel/kthread.c | 23 | ||||
| -rw-r--r-- | kernel/module.c | 5 | ||||
| -rw-r--r-- | kernel/rcutree.c | 16 | ||||
| -rw-r--r-- | kernel/rcutree.h | 7 | ||||
| -rw-r--r-- | kernel/sched.c | 43 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 73 | ||||
| -rw-r--r-- | kernel/slow-work-debugfs.c | 227 | ||||
| -rw-r--r-- | kernel/slow-work.c | 512 | ||||
| -rw-r--r-- | kernel/slow-work.h | 72 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 6 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 2 | ||||
| -rw-r--r-- | kernel/user.c | 2 | ||||
| -rw-r--r-- | kernel/workqueue.c | 28 | 
15 files changed, 894 insertions, 125 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index b8d4cd8ac0b..d7c13d249b2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -94,6 +94,7 @@ obj-$(CONFIG_X86_DS) += trace/  obj-$(CONFIG_RING_BUFFER) += trace/  obj-$(CONFIG_SMP) += sched_cpupri.o  obj-$(CONFIG_SLOW_WORK) += slow-work.o +obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o  obj-$(CONFIG_PERF_EVENTS) += perf_event.o  ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 114e704760f..bd7273e6282 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -121,7 +121,9 @@ static void poll_all_shared_irqs(void)  		if (!(status & IRQ_SPURIOUS_DISABLED))  			continue; +		local_irq_disable();  		try_one_irq(i, desc); +		local_irq_enable();  	}  } diff --git a/kernel/kthread.c b/kernel/kthread.c index 5fe709982ca..ab7ae57773e 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -150,29 +150,6 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),  EXPORT_SYMBOL(kthread_create);  /** - * kthread_bind - bind a just-created kthread to a cpu. - * @k: thread created by kthread_create(). - * @cpu: cpu (might not be online, must be possible) for @k to run on. - * - * Description: This function is equivalent to set_cpus_allowed(), - * except that @cpu doesn't need to be online, and the thread must be - * stopped (i.e., just returned from kthread_create()). - */ -void kthread_bind(struct task_struct *k, unsigned int cpu) -{ -	/* Must have done schedule() in kthread() before we set_task_cpu */ -	if (!wait_task_inactive(k, TASK_UNINTERRUPTIBLE)) { -		WARN_ON(1); -		return; -	} -	set_task_cpu(k, cpu); -	k->cpus_allowed = cpumask_of_cpu(cpu); -	k->rt.nr_cpus_allowed = 1; -	k->flags |= PF_THREAD_BOUND; -} -EXPORT_SYMBOL(kthread_bind); - -/**   * kthread_stop - stop a thread created by kthread_create().   * @k: thread created by kthread_create().   * diff --git a/kernel/module.c b/kernel/module.c index 8b7d8805819..5842a71cf05 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1187,7 +1187,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,  	/* Count loaded sections and allocate structures */  	for (i = 0; i < nsect; i++) -		if (sechdrs[i].sh_flags & SHF_ALLOC) +		if (sechdrs[i].sh_flags & SHF_ALLOC +		    && sechdrs[i].sh_size)  			nloaded++;  	size[0] = ALIGN(sizeof(*sect_attrs)  			+ nloaded * sizeof(sect_attrs->attrs[0]), @@ -1207,6 +1208,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,  	for (i = 0; i < nsect; i++) {  		if (! (sechdrs[i].sh_flags & SHF_ALLOC))  			continue; +		if (!sechdrs[i].sh_size) +			continue;  		sattr->address = sechdrs[i].sh_addr;  		sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,  					GFP_KERNEL); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0536125b049..f3077c0ab18 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -59,7 +59,7 @@  		NUM_RCU_LVL_2, \  		NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \  	}, \ -	.signaled = RCU_SIGNAL_INIT, \ +	.signaled = RCU_GP_IDLE, \  	.gpnum = -300, \  	.completed = -300, \  	.onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \ @@ -657,14 +657,17 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)  	 * irqs disabled.  	 */  	rcu_for_each_node_breadth_first(rsp, rnp) { -		spin_lock(&rnp->lock);	/* irqs already disabled. */ +		spin_lock(&rnp->lock);		/* irqs already disabled. */  		rcu_preempt_check_blocked_tasks(rnp);  		rnp->qsmask = rnp->qsmaskinit;  		rnp->gpnum = rsp->gpnum; -		spin_unlock(&rnp->lock);	/* irqs already disabled. */ +		spin_unlock(&rnp->lock);	/* irqs remain disabled. */  	} +	rnp = rcu_get_root(rsp); +	spin_lock(&rnp->lock);			/* irqs already disabled. */  	rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ +	spin_unlock(&rnp->lock);		/* irqs remain disabled. */  	spin_unlock_irqrestore(&rsp->onofflock, flags);  } @@ -706,6 +709,7 @@ static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags)  {  	WARN_ON_ONCE(!rcu_gp_in_progress(rsp));  	rsp->completed = rsp->gpnum; +	rsp->signaled = RCU_GP_IDLE;  	rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);  	rcu_start_gp(rsp, flags);  /* releases root node's rnp->lock. */  } @@ -1162,9 +1166,10 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)  	}  	spin_unlock(&rnp->lock);  	switch (signaled) { +	case RCU_GP_IDLE:  	case RCU_GP_INIT: -		break; /* grace period still initializing, ignore. */ +		break; /* grace period idle or initializing, ignore. */  	case RCU_SAVE_DYNTICK: @@ -1178,7 +1183,8 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)  		/* Update state, record completion counter. */  		spin_lock(&rnp->lock); -		if (lastcomp == rsp->completed) { +		if (lastcomp == rsp->completed && +		    rsp->signaled == RCU_SAVE_DYNTICK) {  			rsp->signaled = RCU_FORCE_QS;  			dyntick_record_completed(rsp, lastcomp);  		} diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 1823c6e2060..1899023b096 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -201,9 +201,10 @@ struct rcu_data {  };  /* Values for signaled field in struct rcu_state. */ -#define RCU_GP_INIT		0	/* Grace period being initialized. */ -#define RCU_SAVE_DYNTICK	1	/* Need to scan dyntick state. */ -#define RCU_FORCE_QS		2	/* Need to force quiescent state. */ +#define RCU_GP_IDLE		0	/* No grace period in progress. */ +#define RCU_GP_INIT		1	/* Grace period being initialized. */ +#define RCU_SAVE_DYNTICK	2	/* Need to scan dyntick state. */ +#define RCU_FORCE_QS		3	/* Need to force quiescent state. */  #ifdef CONFIG_NO_HZ  #define RCU_SIGNAL_INIT		RCU_SAVE_DYNTICK  #else /* #ifdef CONFIG_NO_HZ */ diff --git a/kernel/sched.c b/kernel/sched.c index a455dca884a..3c11ae0a948 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -309,6 +309,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq);   */  static DEFINE_SPINLOCK(task_group_lock); +#ifdef CONFIG_FAIR_GROUP_SCHED +  #ifdef CONFIG_SMP  static int root_task_group_empty(void)  { @@ -316,7 +318,6 @@ static int root_task_group_empty(void)  }  #endif -#ifdef CONFIG_FAIR_GROUP_SCHED  #ifdef CONFIG_USER_SCHED  # define INIT_TASK_GROUP_LOAD	(2*NICE_0_LOAD)  #else /* !CONFIG_USER_SCHED */ @@ -1992,6 +1993,38 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,  		p->sched_class->prio_changed(rq, p, oldprio, running);  } +/** + * kthread_bind - bind a just-created kthread to a cpu. + * @p: thread created by kthread_create(). + * @cpu: cpu (might not be online, must be possible) for @k to run on. + * + * Description: This function is equivalent to set_cpus_allowed(), + * except that @cpu doesn't need to be online, and the thread must be + * stopped (i.e., just returned from kthread_create()). + * + * Function lives here instead of kthread.c because it messes with + * scheduler internals which require locking. + */ +void kthread_bind(struct task_struct *p, unsigned int cpu) +{ +	struct rq *rq = cpu_rq(cpu); +	unsigned long flags; + +	/* Must have done schedule() in kthread() before we set_task_cpu */ +	if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { +		WARN_ON(1); +		return; +	} + +	spin_lock_irqsave(&rq->lock, flags); +	set_task_cpu(p, cpu); +	p->cpus_allowed = cpumask_of_cpu(cpu); +	p->rt.nr_cpus_allowed = 1; +	p->flags |= PF_THREAD_BOUND; +	spin_unlock_irqrestore(&rq->lock, flags); +} +EXPORT_SYMBOL(kthread_bind); +  #ifdef CONFIG_SMP  /*   * Is this task likely cache-hot: @@ -2004,7 +2037,7 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)  	/*  	 * Buddy candidates are cache hot:  	 */ -	if (sched_feat(CACHE_HOT_BUDDY) && +	if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running &&  			(&p->se == cfs_rq_of(&p->se)->next ||  			 &p->se == cfs_rq_of(&p->se)->last))  		return 1; @@ -9532,13 +9565,13 @@ void __init sched_init(void)  	current->sched_class = &fair_sched_class;  	/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ -	alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); +	zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);  #ifdef CONFIG_SMP  #ifdef CONFIG_NO_HZ -	alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); +	zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);  	alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);  #endif -	alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); +	zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);  #endif /* SMP */  	perf_event_init(); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c32c3e643da..37087a7fac2 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -822,6 +822,26 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)  		 * re-elected due to buddy favours.  		 */  		clear_buddies(cfs_rq, curr); +		return; +	} + +	/* +	 * Ensure that a task that missed wakeup preemption by a +	 * narrow margin doesn't have to wait for a full slice. +	 * This also mitigates buddy induced latencies under load. +	 */ +	if (!sched_feat(WAKEUP_PREEMPT)) +		return; + +	if (delta_exec < sysctl_sched_min_granularity) +		return; + +	if (cfs_rq->nr_running > 1) { +		struct sched_entity *se = __pick_next_entity(cfs_rq); +		s64 delta = curr->vruntime - se->vruntime; + +		if (delta > ideal_runtime) +			resched_task(rq_of(cfs_rq)->curr);  	}  } @@ -861,21 +881,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);  static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)  {  	struct sched_entity *se = __pick_next_entity(cfs_rq); -	struct sched_entity *buddy; +	struct sched_entity *left = se; -	if (cfs_rq->next) { -		buddy = cfs_rq->next; -		cfs_rq->next = NULL; -		if (wakeup_preempt_entity(buddy, se) < 1) -			return buddy; -	} +	if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) +		se = cfs_rq->next; -	if (cfs_rq->last) { -		buddy = cfs_rq->last; -		cfs_rq->last = NULL; -		if (wakeup_preempt_entity(buddy, se) < 1) -			return buddy; -	} +	/* +	 * Prefer last buddy, try to return the CPU to a preempted task. +	 */ +	if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) +		se = cfs_rq->last; + +	clear_buddies(cfs_rq, se);  	return se;  } @@ -1577,6 +1594,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_  	struct sched_entity *se = &curr->se, *pse = &p->se;  	struct cfs_rq *cfs_rq = task_cfs_rq(curr);  	int sync = wake_flags & WF_SYNC; +	int scale = cfs_rq->nr_running >= sched_nr_latency;  	update_curr(cfs_rq); @@ -1591,18 +1609,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_  	if (unlikely(se == pse))  		return; -	/* -	 * Only set the backward buddy when the current task is still on the -	 * rq. This can happen when a wakeup gets interleaved with schedule on -	 * the ->pre_schedule() or idle_balance() point, either of which can -	 * drop the rq lock. -	 * -	 * Also, during early boot the idle thread is in the fair class, for -	 * obvious reasons its a bad idea to schedule back to the idle thread. -	 */ -	if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) -		set_last_buddy(se); -	if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) +	if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK))  		set_next_buddy(pse);  	/* @@ -1648,8 +1655,22 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_  	BUG_ON(!pse); -	if (wakeup_preempt_entity(se, pse) == 1) +	if (wakeup_preempt_entity(se, pse) == 1) {  		resched_task(curr); +		/* +		 * Only set the backward buddy when the current task is still +		 * on the rq. This can happen when a wakeup gets interleaved +		 * with schedule on the ->pre_schedule() or idle_balance() +		 * point, either of which can * drop the rq lock. +		 * +		 * Also, during early boot the idle thread is in the fair class, +		 * for obvious reasons its a bad idea to schedule back to it. +		 */ +		if (unlikely(!se->on_rq || curr == rq->idle)) +			return; +		if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) +			set_last_buddy(se); +	}  }  static struct task_struct *pick_next_task_fair(struct rq *rq) diff --git a/kernel/slow-work-debugfs.c b/kernel/slow-work-debugfs.c new file mode 100644 index 00000000000..e45c4364529 --- /dev/null +++ b/kernel/slow-work-debugfs.c @@ -0,0 +1,227 @@ +/* Slow work debugging + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/slow-work.h> +#include <linux/fs.h> +#include <linux/time.h> +#include <linux/seq_file.h> +#include "slow-work.h" + +#define ITERATOR_SHIFT		(BITS_PER_LONG - 4) +#define ITERATOR_SELECTOR	(0xfUL << ITERATOR_SHIFT) +#define ITERATOR_COUNTER	(~ITERATOR_SELECTOR) + +void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m) +{ +	seq_puts(m, "Slow-work: New thread"); +} + +/* + * Render the time mark field on a work item into a 5-char time with units plus + * a space + */ +static void slow_work_print_mark(struct seq_file *m, struct slow_work *work) +{ +	struct timespec now, diff; + +	now = CURRENT_TIME; +	diff = timespec_sub(now, work->mark); + +	if (diff.tv_sec < 0) +		seq_puts(m, "  -ve "); +	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000) +		seq_printf(m, "%3luns ", diff.tv_nsec); +	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000) +		seq_printf(m, "%3luus ", diff.tv_nsec / 1000); +	else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000) +		seq_printf(m, "%3lums ", diff.tv_nsec / 1000000); +	else if (diff.tv_sec <= 1) +		seq_puts(m, "   1s "); +	else if (diff.tv_sec < 60) +		seq_printf(m, "%4lus ", diff.tv_sec); +	else if (diff.tv_sec < 60 * 60) +		seq_printf(m, "%4lum ", diff.tv_sec / 60); +	else if (diff.tv_sec < 60 * 60 * 24) +		seq_printf(m, "%4luh ", diff.tv_sec / 3600); +	else +		seq_puts(m, "exces "); +} + +/* + * Describe a slow work item for debugfs + */ +static int slow_work_runqueue_show(struct seq_file *m, void *v) +{ +	struct slow_work *work; +	struct list_head *p = v; +	unsigned long id; + +	switch ((unsigned long) v) { +	case 1: +		seq_puts(m, "THR PID   ITEM ADDR        FL MARK  DESC\n"); +		return 0; +	case 2: +		seq_puts(m, "=== ===== ================ == ===== ==========\n"); +		return 0; + +	case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1: +		id = (unsigned long) v - 3; + +		read_lock(&slow_work_execs_lock); +		work = slow_work_execs[id]; +		if (work) { +			smp_read_barrier_depends(); + +			seq_printf(m, "%3lu %5d %16p %2lx ", +				   id, slow_work_pids[id], work, work->flags); +			slow_work_print_mark(m, work); + +			if (work->ops->desc) +				work->ops->desc(work, m); +			seq_putc(m, '\n'); +		} +		read_unlock(&slow_work_execs_lock); +		return 0; + +	default: +		work = list_entry(p, struct slow_work, link); +		seq_printf(m, "%3s     - %16p %2lx ", +			   work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq", +			   work, work->flags); +		slow_work_print_mark(m, work); + +		if (work->ops->desc) +			work->ops->desc(work, m); +		seq_putc(m, '\n'); +		return 0; +	} +} + +/* + * map the iterator to a work item + */ +static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos) +{ +	struct list_head *p; +	unsigned long count, id; + +	switch (*_pos >> ITERATOR_SHIFT) { +	case 0x0: +		if (*_pos == 0) +			*_pos = 1; +		if (*_pos < 3) +			return (void *)(unsigned long) *_pos; +		if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT) +			for (id = *_pos - 3; +			     id < SLOW_WORK_THREAD_LIMIT; +			     id++, (*_pos)++) +				if (slow_work_execs[id]) +					return (void *)(unsigned long) *_pos; +		*_pos = 0x1UL << ITERATOR_SHIFT; + +	case 0x1: +		count = *_pos & ITERATOR_COUNTER; +		list_for_each(p, &slow_work_queue) { +			if (count == 0) +				return p; +			count--; +		} +		*_pos = 0x2UL << ITERATOR_SHIFT; + +	case 0x2: +		count = *_pos & ITERATOR_COUNTER; +		list_for_each(p, &vslow_work_queue) { +			if (count == 0) +				return p; +			count--; +		} +		*_pos = 0x3UL << ITERATOR_SHIFT; + +	default: +		return NULL; +	} +} + +/* + * set up the iterator to start reading from the first line + */ +static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos) +{ +	spin_lock_irq(&slow_work_queue_lock); +	return slow_work_runqueue_index(m, _pos); +} + +/* + * move to the next line + */ +static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos) +{ +	struct list_head *p = v; +	unsigned long selector = *_pos >> ITERATOR_SHIFT; + +	(*_pos)++; +	switch (selector) { +	case 0x0: +		return slow_work_runqueue_index(m, _pos); + +	case 0x1: +		if (*_pos >> ITERATOR_SHIFT == 0x1) { +			p = p->next; +			if (p != &slow_work_queue) +				return p; +		} +		*_pos = 0x2UL << ITERATOR_SHIFT; +		p = &vslow_work_queue; + +	case 0x2: +		if (*_pos >> ITERATOR_SHIFT == 0x2) { +			p = p->next; +			if (p != &vslow_work_queue) +				return p; +		} +		*_pos = 0x3UL << ITERATOR_SHIFT; + +	default: +		return NULL; +	} +} + +/* + * clean up after reading + */ +static void slow_work_runqueue_stop(struct seq_file *m, void *v) +{ +	spin_unlock_irq(&slow_work_queue_lock); +} + +static const struct seq_operations slow_work_runqueue_ops = { +	.start		= slow_work_runqueue_start, +	.stop		= slow_work_runqueue_stop, +	.next		= slow_work_runqueue_next, +	.show		= slow_work_runqueue_show, +}; + +/* + * open "/sys/kernel/debug/slow_work/runqueue" to list queue contents + */ +static int slow_work_runqueue_open(struct inode *inode, struct file *file) +{ +	return seq_open(file, &slow_work_runqueue_ops); +} + +const struct file_operations slow_work_runqueue_fops = { +	.owner		= THIS_MODULE, +	.open		= slow_work_runqueue_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; diff --git a/kernel/slow-work.c b/kernel/slow-work.c index 0d31135efbf..00889bd3c59 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -16,11 +16,8 @@  #include <linux/kthread.h>  #include <linux/freezer.h>  #include <linux/wait.h> - -#define SLOW_WORK_CULL_TIMEOUT (5 * HZ)	/* cull threads 5s after running out of -					 * things to do */ -#define SLOW_WORK_OOM_TIMEOUT (5 * HZ)	/* can't start new threads for 5s after -					 * OOM */ +#include <linux/debugfs.h> +#include "slow-work.h"  static void slow_work_cull_timeout(unsigned long);  static void slow_work_oom_timeout(unsigned long); @@ -46,7 +43,7 @@ static unsigned vslow_work_proportion = 50; /* % of threads that may process  #ifdef CONFIG_SYSCTL  static const int slow_work_min_min_threads = 2; -static int slow_work_max_max_threads = 255; +static int slow_work_max_max_threads = SLOW_WORK_THREAD_LIMIT;  static const int slow_work_min_vslow = 1;  static const int slow_work_max_vslow = 99; @@ -98,6 +95,56 @@ static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0);  static struct slow_work slow_work_new_thread; /* new thread starter */  /* + * slow work ID allocation (use slow_work_queue_lock) + */ +static DECLARE_BITMAP(slow_work_ids, SLOW_WORK_THREAD_LIMIT); + +/* + * Unregistration tracking to prevent put_ref() from disappearing during module + * unload + */ +#ifdef CONFIG_MODULES +static struct module *slow_work_thread_processing[SLOW_WORK_THREAD_LIMIT]; +static struct module *slow_work_unreg_module; +static struct slow_work *slow_work_unreg_work_item; +static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq); +static DEFINE_MUTEX(slow_work_unreg_sync_lock); + +static void slow_work_set_thread_processing(int id, struct slow_work *work) +{ +	if (work) +		slow_work_thread_processing[id] = work->owner; +} +static void slow_work_done_thread_processing(int id, struct slow_work *work) +{ +	struct module *module = slow_work_thread_processing[id]; + +	slow_work_thread_processing[id] = NULL; +	smp_mb(); +	if (slow_work_unreg_work_item == work || +	    slow_work_unreg_module == module) +		wake_up_all(&slow_work_unreg_wq); +} +static void slow_work_clear_thread_processing(int id) +{ +	slow_work_thread_processing[id] = NULL; +} +#else +static void slow_work_set_thread_processing(int id, struct slow_work *work) {} +static void slow_work_done_thread_processing(int id, struct slow_work *work) {} +static void slow_work_clear_thread_processing(int id) {} +#endif + +/* + * Data for tracking currently executing items for indication through /proc + */ +#ifdef CONFIG_SLOW_WORK_DEBUG +struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT]; +pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT]; +DEFINE_RWLOCK(slow_work_execs_lock); +#endif + +/*   * The queues of work items and the lock governing access to them.  These are   * shared between all the CPUs.  It doesn't make sense to have per-CPU queues   * as the number of threads bears no relation to the number of CPUs. @@ -105,9 +152,18 @@ static struct slow_work slow_work_new_thread; /* new thread starter */   * There are two queues of work items: one for slow work items, and one for   * very slow work items.   */ -static LIST_HEAD(slow_work_queue); -static LIST_HEAD(vslow_work_queue); -static DEFINE_SPINLOCK(slow_work_queue_lock); +LIST_HEAD(slow_work_queue); +LIST_HEAD(vslow_work_queue); +DEFINE_SPINLOCK(slow_work_queue_lock); + +/* + * The following are two wait queues that get pinged when a work item is placed + * on an empty queue.  These allow work items that are hogging a thread by + * sleeping in a way that could be deferred to yield their thread and enqueue + * themselves. + */ +static DECLARE_WAIT_QUEUE_HEAD(slow_work_queue_waits_for_occupation); +static DECLARE_WAIT_QUEUE_HEAD(vslow_work_queue_waits_for_occupation);  /*   * The thread controls.  A variable used to signal to the threads that they @@ -126,6 +182,20 @@ static DECLARE_COMPLETION(slow_work_last_thread_exited);  static int slow_work_user_count;  static DEFINE_MUTEX(slow_work_user_lock); +static inline int slow_work_get_ref(struct slow_work *work) +{ +	if (work->ops->get_ref) +		return work->ops->get_ref(work); + +	return 0; +} + +static inline void slow_work_put_ref(struct slow_work *work) +{ +	if (work->ops->put_ref) +		work->ops->put_ref(work); +} +  /*   * Calculate the maximum number of active threads in the pool that are   * permitted to process very slow work items. @@ -149,7 +219,7 @@ static unsigned slow_work_calc_vsmax(void)   * Attempt to execute stuff queued on a slow thread.  Return true if we managed   * it, false if there was nothing to do.   */ -static bool slow_work_execute(void) +static noinline bool slow_work_execute(int id)  {  	struct slow_work *work = NULL;  	unsigned vsmax; @@ -186,6 +256,13 @@ static bool slow_work_execute(void)  	} else {  		very_slow = false; /* avoid the compiler warning */  	} + +	slow_work_set_thread_processing(id, work); +	if (work) { +		slow_work_mark_time(work); +		slow_work_begin_exec(id, work); +	} +  	spin_unlock_irq(&slow_work_queue_lock);  	if (!work) @@ -194,12 +271,19 @@ static bool slow_work_execute(void)  	if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags))  		BUG(); -	work->ops->execute(work); +	/* don't execute if the work is in the process of being cancelled */ +	if (!test_bit(SLOW_WORK_CANCELLING, &work->flags)) +		work->ops->execute(work);  	if (very_slow)  		atomic_dec(&vslow_work_executing_count);  	clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags); +	/* wake up anyone waiting for this work to be complete */ +	wake_up_bit(&work->flags, SLOW_WORK_EXECUTING); + +	slow_work_end_exec(id, work); +  	/* if someone tried to enqueue the item whilst we were executing it,  	 * then it'll be left unenqueued to avoid multiple threads trying to  	 * execute it simultaneously @@ -219,7 +303,10 @@ static bool slow_work_execute(void)  		spin_unlock_irq(&slow_work_queue_lock);  	} -	work->ops->put_ref(work); +	/* sort out the race between module unloading and put_ref() */ +	slow_work_put_ref(work); +	slow_work_done_thread_processing(id, work); +  	return true;  auto_requeue: @@ -227,15 +314,61 @@ auto_requeue:  	 * - we transfer our ref on the item back to the appropriate queue  	 * - don't wake another thread up as we're awake already  	 */ +	slow_work_mark_time(work);  	if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))  		list_add_tail(&work->link, &vslow_work_queue);  	else  		list_add_tail(&work->link, &slow_work_queue);  	spin_unlock_irq(&slow_work_queue_lock); +	slow_work_clear_thread_processing(id);  	return true;  }  /** + * slow_work_sleep_till_thread_needed - Sleep till thread needed by other work + * work: The work item under execution that wants to sleep + * _timeout: Scheduler sleep timeout + * + * Allow a requeueable work item to sleep on a slow-work processor thread until + * that thread is needed to do some other work or the sleep is interrupted by + * some other event. + * + * The caller must set up a wake up event before calling this and must have set + * the appropriate sleep mode (such as TASK_UNINTERRUPTIBLE) and tested its own + * condition before calling this function as no test is made here. + * + * False is returned if there is nothing on the queue; true is returned if the + * work item should be requeued + */ +bool slow_work_sleep_till_thread_needed(struct slow_work *work, +					signed long *_timeout) +{ +	wait_queue_head_t *wfo_wq; +	struct list_head *queue; + +	DEFINE_WAIT(wait); + +	if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) { +		wfo_wq = &vslow_work_queue_waits_for_occupation; +		queue = &vslow_work_queue; +	} else { +		wfo_wq = &slow_work_queue_waits_for_occupation; +		queue = &slow_work_queue; +	} + +	if (!list_empty(queue)) +		return true; + +	add_wait_queue_exclusive(wfo_wq, &wait); +	if (list_empty(queue)) +		*_timeout = schedule_timeout(*_timeout); +	finish_wait(wfo_wq, &wait); + +	return !list_empty(queue); +} +EXPORT_SYMBOL(slow_work_sleep_till_thread_needed); + +/**   * slow_work_enqueue - Schedule a slow work item for processing   * @work: The work item to queue   * @@ -260,16 +393,22 @@ auto_requeue:   * allowed to pick items to execute.  This ensures that very slow items won't   * overly block ones that are just ordinarily slow.   * - * Returns 0 if successful, -EAGAIN if not. + * Returns 0 if successful, -EAGAIN if not (or -ECANCELED if cancelled work is + * attempted queued)   */  int slow_work_enqueue(struct slow_work *work)  { +	wait_queue_head_t *wfo_wq; +	struct list_head *queue;  	unsigned long flags; +	int ret; + +	if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) +		return -ECANCELED;  	BUG_ON(slow_work_user_count <= 0);  	BUG_ON(!work);  	BUG_ON(!work->ops); -	BUG_ON(!work->ops->get_ref);  	/* when honouring an enqueue request, we only promise that we will run  	 * the work function in the future; we do not promise to run it once @@ -280,8 +419,19 @@ int slow_work_enqueue(struct slow_work *work)  	 * maintaining our promise  	 */  	if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { +		if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) { +			wfo_wq = &vslow_work_queue_waits_for_occupation; +			queue = &vslow_work_queue; +		} else { +			wfo_wq = &slow_work_queue_waits_for_occupation; +			queue = &slow_work_queue; +		} +  		spin_lock_irqsave(&slow_work_queue_lock, flags); +		if (unlikely(test_bit(SLOW_WORK_CANCELLING, &work->flags))) +			goto cancelled; +  		/* we promise that we will not attempt to execute the work  		 * function in more than one thread simultaneously  		 * @@ -299,25 +449,221 @@ int slow_work_enqueue(struct slow_work *work)  		if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {  			set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);  		} else { -			if (work->ops->get_ref(work) < 0) -				goto cant_get_ref; -			if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) -				list_add_tail(&work->link, &vslow_work_queue); -			else -				list_add_tail(&work->link, &slow_work_queue); +			ret = slow_work_get_ref(work); +			if (ret < 0) +				goto failed; +			slow_work_mark_time(work); +			list_add_tail(&work->link, queue);  			wake_up(&slow_work_thread_wq); + +			/* if someone who could be requeued is sleeping on a +			 * thread, then ask them to yield their thread */ +			if (work->link.prev == queue) +				wake_up(wfo_wq);  		}  		spin_unlock_irqrestore(&slow_work_queue_lock, flags);  	}  	return 0; -cant_get_ref: +cancelled: +	ret = -ECANCELED; +failed:  	spin_unlock_irqrestore(&slow_work_queue_lock, flags); -	return -EAGAIN; +	return ret;  }  EXPORT_SYMBOL(slow_work_enqueue); +static int slow_work_wait(void *word) +{ +	schedule(); +	return 0; +} + +/** + * slow_work_cancel - Cancel a slow work item + * @work: The work item to cancel + * + * This function will cancel a previously enqueued work item. If we cannot + * cancel the work item, it is guarenteed to have run when this function + * returns. + */ +void slow_work_cancel(struct slow_work *work) +{ +	bool wait = true, put = false; + +	set_bit(SLOW_WORK_CANCELLING, &work->flags); +	smp_mb(); + +	/* if the work item is a delayed work item with an active timer, we +	 * need to wait for the timer to finish _before_ getting the spinlock, +	 * lest we deadlock against the timer routine +	 * +	 * the timer routine will leave DELAYED set if it notices the +	 * CANCELLING flag in time +	 */ +	if (test_bit(SLOW_WORK_DELAYED, &work->flags)) { +		struct delayed_slow_work *dwork = +			container_of(work, struct delayed_slow_work, work); +		del_timer_sync(&dwork->timer); +	} + +	spin_lock_irq(&slow_work_queue_lock); + +	if (test_bit(SLOW_WORK_DELAYED, &work->flags)) { +		/* the timer routine aborted or never happened, so we are left +		 * holding the timer's reference on the item and should just +		 * drop the pending flag and wait for any ongoing execution to +		 * finish */ +		struct delayed_slow_work *dwork = +			container_of(work, struct delayed_slow_work, work); + +		BUG_ON(timer_pending(&dwork->timer)); +		BUG_ON(!list_empty(&work->link)); + +		clear_bit(SLOW_WORK_DELAYED, &work->flags); +		put = true; +		clear_bit(SLOW_WORK_PENDING, &work->flags); + +	} else if (test_bit(SLOW_WORK_PENDING, &work->flags) && +		   !list_empty(&work->link)) { +		/* the link in the pending queue holds a reference on the item +		 * that we will need to release */ +		list_del_init(&work->link); +		wait = false; +		put = true; +		clear_bit(SLOW_WORK_PENDING, &work->flags); + +	} else if (test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) { +		/* the executor is holding our only reference on the item, so +		 * we merely need to wait for it to finish executing */ +		clear_bit(SLOW_WORK_PENDING, &work->flags); +	} + +	spin_unlock_irq(&slow_work_queue_lock); + +	/* the EXECUTING flag is set by the executor whilst the spinlock is set +	 * and before the item is dequeued - so assuming the above doesn't +	 * actually dequeue it, simply waiting for the EXECUTING flag to be +	 * released here should be sufficient */ +	if (wait) +		wait_on_bit(&work->flags, SLOW_WORK_EXECUTING, slow_work_wait, +			    TASK_UNINTERRUPTIBLE); + +	clear_bit(SLOW_WORK_CANCELLING, &work->flags); +	if (put) +		slow_work_put_ref(work); +} +EXPORT_SYMBOL(slow_work_cancel); + +/* + * Handle expiry of the delay timer, indicating that a delayed slow work item + * should now be queued if not cancelled + */ +static void delayed_slow_work_timer(unsigned long data) +{ +	wait_queue_head_t *wfo_wq; +	struct list_head *queue; +	struct slow_work *work = (struct slow_work *) data; +	unsigned long flags; +	bool queued = false, put = false, first = false; + +	if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) { +		wfo_wq = &vslow_work_queue_waits_for_occupation; +		queue = &vslow_work_queue; +	} else { +		wfo_wq = &slow_work_queue_waits_for_occupation; +		queue = &slow_work_queue; +	} + +	spin_lock_irqsave(&slow_work_queue_lock, flags); +	if (likely(!test_bit(SLOW_WORK_CANCELLING, &work->flags))) { +		clear_bit(SLOW_WORK_DELAYED, &work->flags); + +		if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) { +			/* we discard the reference the timer was holding in +			 * favour of the one the executor holds */ +			set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); +			put = true; +		} else { +			slow_work_mark_time(work); +			list_add_tail(&work->link, queue); +			queued = true; +			if (work->link.prev == queue) +				first = true; +		} +	} + +	spin_unlock_irqrestore(&slow_work_queue_lock, flags); +	if (put) +		slow_work_put_ref(work); +	if (first) +		wake_up(wfo_wq); +	if (queued) +		wake_up(&slow_work_thread_wq); +} + +/** + * delayed_slow_work_enqueue - Schedule a delayed slow work item for processing + * @dwork: The delayed work item to queue + * @delay: When to start executing the work, in jiffies from now + * + * This is similar to slow_work_enqueue(), but it adds a delay before the work + * is actually queued for processing. + * + * The item can have delayed processing requested on it whilst it is being + * executed.  The delay will begin immediately, and if it expires before the + * item finishes executing, the item will be placed back on the queue when it + * has done executing. + */ +int delayed_slow_work_enqueue(struct delayed_slow_work *dwork, +			      unsigned long delay) +{ +	struct slow_work *work = &dwork->work; +	unsigned long flags; +	int ret; + +	if (delay == 0) +		return slow_work_enqueue(&dwork->work); + +	BUG_ON(slow_work_user_count <= 0); +	BUG_ON(!work); +	BUG_ON(!work->ops); + +	if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) +		return -ECANCELED; + +	if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { +		spin_lock_irqsave(&slow_work_queue_lock, flags); + +		if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) +			goto cancelled; + +		/* the timer holds a reference whilst it is pending */ +		ret = work->ops->get_ref(work); +		if (ret < 0) +			goto cant_get_ref; + +		if (test_and_set_bit(SLOW_WORK_DELAYED, &work->flags)) +			BUG(); +		dwork->timer.expires = jiffies + delay; +		dwork->timer.data = (unsigned long) work; +		dwork->timer.function = delayed_slow_work_timer; +		add_timer(&dwork->timer); + +		spin_unlock_irqrestore(&slow_work_queue_lock, flags); +	} + +	return 0; + +cancelled: +	ret = -ECANCELED; +cant_get_ref: +	spin_unlock_irqrestore(&slow_work_queue_lock, flags); +	return ret; +} +EXPORT_SYMBOL(delayed_slow_work_enqueue); +  /*   * Schedule a cull of the thread pool at some time in the near future   */ @@ -368,13 +714,23 @@ static inline bool slow_work_available(int vsmax)   */  static int slow_work_thread(void *_data)  { -	int vsmax; +	int vsmax, id;  	DEFINE_WAIT(wait);  	set_freezable();  	set_user_nice(current, -5); +	/* allocate ourselves an ID */ +	spin_lock_irq(&slow_work_queue_lock); +	id = find_first_zero_bit(slow_work_ids, SLOW_WORK_THREAD_LIMIT); +	BUG_ON(id < 0 || id >= SLOW_WORK_THREAD_LIMIT); +	__set_bit(id, slow_work_ids); +	slow_work_set_thread_pid(id, current->pid); +	spin_unlock_irq(&slow_work_queue_lock); + +	sprintf(current->comm, "kslowd%03u", id); +  	for (;;) {  		vsmax = vslow_work_proportion;  		vsmax *= atomic_read(&slow_work_thread_count); @@ -395,7 +751,7 @@ static int slow_work_thread(void *_data)  		vsmax *= atomic_read(&slow_work_thread_count);  		vsmax /= 100; -		if (slow_work_available(vsmax) && slow_work_execute()) { +		if (slow_work_available(vsmax) && slow_work_execute(id)) {  			cond_resched();  			if (list_empty(&slow_work_queue) &&  			    list_empty(&vslow_work_queue) && @@ -412,6 +768,11 @@ static int slow_work_thread(void *_data)  			break;  	} +	spin_lock_irq(&slow_work_queue_lock); +	slow_work_set_thread_pid(id, 0); +	__clear_bit(id, slow_work_ids); +	spin_unlock_irq(&slow_work_queue_lock); +  	if (atomic_dec_and_test(&slow_work_thread_count))  		complete_and_exit(&slow_work_last_thread_exited, 0);  	return 0; @@ -427,21 +788,6 @@ static void slow_work_cull_timeout(unsigned long data)  }  /* - * Get a reference on slow work thread starter - */ -static int slow_work_new_thread_get_ref(struct slow_work *work) -{ -	return 0; -} - -/* - * Drop a reference on slow work thread starter - */ -static void slow_work_new_thread_put_ref(struct slow_work *work) -{ -} - -/*   * Start a new slow work thread   */  static void slow_work_new_thread_execute(struct slow_work *work) @@ -475,9 +821,11 @@ static void slow_work_new_thread_execute(struct slow_work *work)  }  static const struct slow_work_ops slow_work_new_thread_ops = { -	.get_ref	= slow_work_new_thread_get_ref, -	.put_ref	= slow_work_new_thread_put_ref, +	.owner		= THIS_MODULE,  	.execute	= slow_work_new_thread_execute, +#ifdef CONFIG_SLOW_WORK_DEBUG +	.desc		= slow_work_new_thread_desc, +#endif  };  /* @@ -546,12 +894,13 @@ static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,  /**   * slow_work_register_user - Register a user of the facility + * @module: The module about to make use of the facility   *   * Register a user of the facility, starting up the initial threads if there   * aren't any other users at this point.  This will return 0 if successful, or   * an error if not.   */ -int slow_work_register_user(void) +int slow_work_register_user(struct module *module)  {  	struct task_struct *p;  	int loop; @@ -598,14 +947,81 @@ error:  }  EXPORT_SYMBOL(slow_work_register_user); +/* + * wait for all outstanding items from the calling module to complete + * - note that more items may be queued whilst we're waiting + */ +static void slow_work_wait_for_items(struct module *module) +{ +#ifdef CONFIG_MODULES +	DECLARE_WAITQUEUE(myself, current); +	struct slow_work *work; +	int loop; + +	mutex_lock(&slow_work_unreg_sync_lock); +	add_wait_queue(&slow_work_unreg_wq, &myself); + +	for (;;) { +		spin_lock_irq(&slow_work_queue_lock); + +		/* first of all, we wait for the last queued item in each list +		 * to be processed */ +		list_for_each_entry_reverse(work, &vslow_work_queue, link) { +			if (work->owner == module) { +				set_current_state(TASK_UNINTERRUPTIBLE); +				slow_work_unreg_work_item = work; +				goto do_wait; +			} +		} +		list_for_each_entry_reverse(work, &slow_work_queue, link) { +			if (work->owner == module) { +				set_current_state(TASK_UNINTERRUPTIBLE); +				slow_work_unreg_work_item = work; +				goto do_wait; +			} +		} + +		/* then we wait for the items being processed to finish */ +		slow_work_unreg_module = module; +		smp_mb(); +		for (loop = 0; loop < SLOW_WORK_THREAD_LIMIT; loop++) { +			if (slow_work_thread_processing[loop] == module) +				goto do_wait; +		} +		spin_unlock_irq(&slow_work_queue_lock); +		break; /* okay, we're done */ + +	do_wait: +		spin_unlock_irq(&slow_work_queue_lock); +		schedule(); +		slow_work_unreg_work_item = NULL; +		slow_work_unreg_module = NULL; +	} + +	remove_wait_queue(&slow_work_unreg_wq, &myself); +	mutex_unlock(&slow_work_unreg_sync_lock); +#endif /* CONFIG_MODULES */ +} +  /**   * slow_work_unregister_user - Unregister a user of the facility + * @module: The module whose items should be cleared   *   * Unregister a user of the facility, killing all the threads if this was the   * last one. + * + * This waits for all the work items belonging to the nominated module to go + * away before proceeding.   */ -void slow_work_unregister_user(void) +void slow_work_unregister_user(struct module *module)  { +	/* first of all, wait for all outstanding items from the calling module +	 * to complete */ +	if (module) +		slow_work_wait_for_items(module); + +	/* then we can actually go about shutting down the facility if need +	 * be */  	mutex_lock(&slow_work_user_lock);  	BUG_ON(slow_work_user_count <= 0); @@ -639,6 +1055,16 @@ static int __init init_slow_work(void)  	if (slow_work_max_max_threads < nr_cpus * 2)  		slow_work_max_max_threads = nr_cpus * 2;  #endif +#ifdef CONFIG_SLOW_WORK_DEBUG +	{ +		struct dentry *dbdir; + +		dbdir = debugfs_create_dir("slow_work", NULL); +		if (dbdir && !IS_ERR(dbdir)) +			debugfs_create_file("runqueue", S_IFREG | 0400, dbdir, +					    NULL, &slow_work_runqueue_fops); +	} +#endif  	return 0;  } diff --git a/kernel/slow-work.h b/kernel/slow-work.h new file mode 100644 index 00000000000..321f3c59d73 --- /dev/null +++ b/kernel/slow-work.h @@ -0,0 +1,72 @@ +/* Slow work private definitions + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define SLOW_WORK_CULL_TIMEOUT (5 * HZ)	/* cull threads 5s after running out of +					 * things to do */ +#define SLOW_WORK_OOM_TIMEOUT (5 * HZ)	/* can't start new threads for 5s after +					 * OOM */ + +#define SLOW_WORK_THREAD_LIMIT	255	/* abs maximum number of slow-work threads */ + +/* + * slow-work.c + */ +#ifdef CONFIG_SLOW_WORK_DEBUG +extern struct slow_work *slow_work_execs[]; +extern pid_t slow_work_pids[]; +extern rwlock_t slow_work_execs_lock; +#endif + +extern struct list_head slow_work_queue; +extern struct list_head vslow_work_queue; +extern spinlock_t slow_work_queue_lock; + +/* + * slow-work-debugfs.c + */ +#ifdef CONFIG_SLOW_WORK_DEBUG +extern const struct file_operations slow_work_runqueue_fops; + +extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *); +#endif + +/* + * Helper functions + */ +static inline void slow_work_set_thread_pid(int id, pid_t pid) +{ +#ifdef CONFIG_SLOW_WORK_PROC +	slow_work_pids[id] = pid; +#endif +} + +static inline void slow_work_mark_time(struct slow_work *work) +{ +#ifdef CONFIG_SLOW_WORK_PROC +	work->mark = CURRENT_TIME; +#endif +} + +static inline void slow_work_begin_exec(int id, struct slow_work *work) +{ +#ifdef CONFIG_SLOW_WORK_PROC +	slow_work_execs[id] = work; +#endif +} + +static inline void slow_work_end_exec(int id, struct slow_work *work) +{ +#ifdef CONFIG_SLOW_WORK_PROC +	write_lock(&slow_work_execs_lock); +	slow_work_execs[id] = NULL; +	write_unlock(&slow_work_execs_lock); +#endif +} diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9c451a1930b..6dc4e5ef7a0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2222,15 +2222,15 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,  		ret = ftrace_process_regex(parser->buffer,  					   parser->idx, enable);  		if (ret) -			goto out; +			goto out_unlock;  		trace_parser_clear(parser);  	}  	ret = read; - +out_unlock:  	mutex_unlock(&ftrace_regex_lock); -out: +  	return ret;  } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3ffa502fb24..5dd017fea6f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1193,6 +1193,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)  	atomic_inc(&cpu_buffer->record_disabled);  	synchronize_sched(); +	spin_lock_irq(&cpu_buffer->reader_lock);  	rb_head_page_deactivate(cpu_buffer);  	for (i = 0; i < nr_pages; i++) { @@ -1207,6 +1208,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)  		return;  	rb_reset_cpu(cpu_buffer); +	spin_unlock_irq(&cpu_buffer->reader_lock);  	rb_check_pages(cpu_buffer); diff --git a/kernel/user.c b/kernel/user.c index 2c000e7132a..46d0165ca70 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -330,9 +330,9 @@ done:   */  static void free_user(struct user_struct *up, unsigned long flags)  { -	spin_unlock_irqrestore(&uidhash_lock, flags);  	INIT_DELAYED_WORK(&up->work, cleanup_user_struct);  	schedule_delayed_work(&up->work, msecs_to_jiffies(1000)); +	spin_unlock_irqrestore(&uidhash_lock, flags);  }  #else	/* CONFIG_USER_SCHED && CONFIG_SYSFS */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 12328147132..67e526b6ae8 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -692,31 +692,29 @@ int schedule_on_each_cpu(work_func_t func)  	if (!works)  		return -ENOMEM; +	get_online_cpus(); +  	/* -	 * when running in keventd don't schedule a work item on itself. -	 * Can just call directly because the work queue is already bound. -	 * This also is faster. -	 * Make this a generic parameter for other workqueues? +	 * When running in keventd don't schedule a work item on +	 * itself.  Can just call directly because the work queue is +	 * already bound.  This also is faster.  	 */ -	if (current_is_keventd()) { +	if (current_is_keventd())  		orig = raw_smp_processor_id(); -		INIT_WORK(per_cpu_ptr(works, orig), func); -		func(per_cpu_ptr(works, orig)); -	} -	get_online_cpus();  	for_each_online_cpu(cpu) {  		struct work_struct *work = per_cpu_ptr(works, cpu); -		if (cpu == orig) -			continue;  		INIT_WORK(work, func); -		schedule_work_on(cpu, work); -	} -	for_each_online_cpu(cpu) {  		if (cpu != orig) -			flush_work(per_cpu_ptr(works, cpu)); +			schedule_work_on(cpu, work);  	} +	if (orig >= 0) +		func(per_cpu_ptr(works, orig)); + +	for_each_online_cpu(cpu) +		flush_work(per_cpu_ptr(works, cpu)); +  	put_online_cpus();  	free_percpu(works);  	return 0;  |