diff options
| author | Ingo Molnar <mingo@elte.hu> | 2006-06-27 02:54:53 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-27 17:32:47 -0700 | 
| commit | 23f78d4a03c53cbd75d87a795378ea540aa08c86 (patch) | |
| tree | 27dfe06337990911380fe8c5949ae9acd8e9568a | |
| parent | b29739f902ee76a05493fb7d2303490fc75364f4 (diff) | |
| download | olio-linux-3.10-23f78d4a03c53cbd75d87a795378ea540aa08c86.tar.xz olio-linux-3.10-23f78d4a03c53cbd75d87a795378ea540aa08c86.zip | |
[PATCH] pi-futex: rt mutex core
Core functions for the rt-mutex subsystem.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
| -rw-r--r-- | include/linux/init_task.h | 1 | ||||
| -rw-r--r-- | include/linux/rtmutex.h | 104 | ||||
| -rw-r--r-- | include/linux/sched.h | 12 | ||||
| -rw-r--r-- | include/linux/sysctl.h | 1 | ||||
| -rw-r--r-- | init/Kconfig | 5 | ||||
| -rw-r--r-- | kernel/Makefile | 1 | ||||
| -rw-r--r-- | kernel/fork.c | 16 | ||||
| -rw-r--r-- | kernel/rtmutex.c | 904 | ||||
| -rw-r--r-- | kernel/rtmutex.h | 29 | ||||
| -rw-r--r-- | kernel/rtmutex_common.h | 93 | ||||
| -rw-r--r-- | kernel/sysctl.c | 15 | 
11 files changed, 1181 insertions, 0 deletions
| diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 678c1a90380..3a256957fb5 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -124,6 +124,7 @@ extern struct group_info init_groups;  	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\  	.fs_excl	= ATOMIC_INIT(0),				\  	.pi_lock	= SPIN_LOCK_UNLOCKED,				\ +	INIT_RT_MUTEXES(tsk)						\  } diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h new file mode 100644 index 00000000000..12309c916c6 --- /dev/null +++ b/include/linux/rtmutex.h @@ -0,0 +1,104 @@ +/* + * RT Mutexes: blocking mutual exclusion locks with PI support + * + * started by Ingo Molnar and Thomas Gleixner: + * + *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + *  Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> + * + * This file contains the public data structure and API definitions. + */ + +#ifndef __LINUX_RT_MUTEX_H +#define __LINUX_RT_MUTEX_H + +#include <linux/linkage.h> +#include <linux/plist.h> +#include <linux/spinlock_types.h> + +/* + * The rt_mutex structure + * + * @wait_lock:	spinlock to protect the structure + * @wait_list:	pilist head to enqueue waiters in priority order + * @owner:	the mutex owner + */ +struct rt_mutex { +	spinlock_t		wait_lock; +	struct plist_head	wait_list; +	struct task_struct	*owner; +#ifdef CONFIG_DEBUG_RT_MUTEXES +	int			save_state; +	struct list_head	held_list_entry; +	unsigned long		acquire_ip; +	const char 		*name, *file; +	int			line; +	void			*magic; +#endif +}; + +struct rt_mutex_waiter; +struct hrtimer_sleeper; + +#ifdef CONFIG_DEBUG_RT_MUTEXES +# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ +	, .name = #mutexname, .file = __FILE__, .line = __LINE__ +# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, __FUNCTION__) + extern void rt_mutex_debug_task_free(struct task_struct *tsk); +#else +# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) +# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, NULL) +# define rt_mutex_debug_task_free(t)		do { } while (0) +#endif + +#define __RT_MUTEX_INITIALIZER(mutexname) \ +	{ .wait_lock = SPIN_LOCK_UNLOCKED \ +	, .wait_list = PLIST_HEAD_INIT(mutexname.wait_list, mutexname.wait_lock) \ +	, .owner = NULL \ +	__DEBUG_RT_MUTEX_INITIALIZER(mutexname)} + +#define DEFINE_RT_MUTEX(mutexname) \ +	struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) + +/*** + * rt_mutex_is_locked - is the mutex locked + * @lock: the mutex to be queried + * + * Returns 1 if the mutex is locked, 0 if unlocked. + */ +static inline int rt_mutex_is_locked(struct rt_mutex *lock) +{ +	return lock->owner != NULL; +} + +extern void __rt_mutex_init(struct rt_mutex *lock, const char *name); +extern void rt_mutex_destroy(struct rt_mutex *lock); + +extern void rt_mutex_lock(struct rt_mutex *lock); +extern int rt_mutex_lock_interruptible(struct rt_mutex *lock, +						int detect_deadlock); +extern int rt_mutex_timed_lock(struct rt_mutex *lock, +					struct hrtimer_sleeper *timeout, +					int detect_deadlock); + +extern int rt_mutex_trylock(struct rt_mutex *lock); + +extern void rt_mutex_unlock(struct rt_mutex *lock); + +#ifdef CONFIG_DEBUG_RT_MUTEXES +# define INIT_RT_MUTEX_DEBUG(tsk)					\ +	.held_list_head	= LIST_HEAD_INIT(tsk.held_list_head),		\ +	.held_list_lock	= SPIN_LOCK_UNLOCKED +#else +# define INIT_RT_MUTEX_DEBUG(tsk) +#endif + +#ifdef CONFIG_RT_MUTEXES +# define INIT_RT_MUTEXES(tsk)						\ +	.pi_waiters	= PLIST_HEAD_INIT(tsk.pi_waiters, tsk.pi_lock),	\ +	INIT_RT_MUTEX_DEBUG(tsk) +#else +# define INIT_RT_MUTEXES(tsk) +#endif + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 6f167645e7e..6ea23c9af41 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -73,6 +73,7 @@ struct sched_param {  #include <linux/seccomp.h>  #include <linux/rcupdate.h>  #include <linux/futex.h> +#include <linux/rtmutex.h>  #include <linux/time.h>  #include <linux/param.h> @@ -858,6 +859,17 @@ struct task_struct {  	/* Protection of the PI data structures: */  	spinlock_t pi_lock; +#ifdef CONFIG_RT_MUTEXES +	/* PI waiters blocked on a rt_mutex held by this task */ +	struct plist_head pi_waiters; +	/* Deadlock detection and priority inheritance handling */ +	struct rt_mutex_waiter *pi_blocked_on; +# ifdef CONFIG_DEBUG_RT_MUTEXES +	spinlock_t held_list_lock; +	struct list_head held_list_head; +# endif +#endif +  #ifdef CONFIG_DEBUG_MUTEXES  	/* mutex deadlock detection */  	struct mutex_waiter *blocked_on; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index bee12a7a057..46e4d8f2771 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -149,6 +149,7 @@ enum  	KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */  	KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */  	KERN_COMPAT_LOG=73,	/* int: print compat layer  messages */ +	KERN_MAX_LOCK_DEPTH=74,  }; diff --git a/init/Kconfig b/init/Kconfig index df55b366560..f70f2fd273c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -339,9 +339,14 @@ config BASE_FULL  	  kernel data structures. This saves memory on small machines,  	  but may reduce performance. +config RT_MUTEXES +	boolean +	select PLIST +  config FUTEX  	bool "Enable futex support" if EMBEDDED  	default y +	select RT_MUTEXES  	help  	  Disabling this option will cause the kernel to be built without  	  support for "fast userspace mutexes".  The resulting kernel may not diff --git a/kernel/Makefile b/kernel/Makefile index 752bd7d383a..21df9a338ff 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_FUTEX) += futex.o  ifeq ($(CONFIG_COMPAT),y)  obj-$(CONFIG_FUTEX) += futex_compat.o  endif +obj-$(CONFIG_RT_MUTEXES) += rtmutex.o  obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o  obj-$(CONFIG_SMP) += cpu.o spinlock.o  obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o diff --git a/kernel/fork.c b/kernel/fork.c index 9b4e54ef022..b664a081fff 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -104,6 +104,7 @@ static kmem_cache_t *mm_cachep;  void free_task(struct task_struct *tsk)  {  	free_thread_info(tsk->thread_info); +	rt_mutex_debug_task_free(tsk);  	free_task_struct(tsk);  }  EXPORT_SYMBOL(free_task); @@ -913,6 +914,19 @@ asmlinkage long sys_set_tid_address(int __user *tidptr)  	return current->pid;  } +static inline void rt_mutex_init_task(struct task_struct *p) +{ +#ifdef CONFIG_RT_MUTEXES +	spin_lock_init(&p->pi_lock); +	plist_head_init(&p->pi_waiters, &p->pi_lock); +	p->pi_blocked_on = NULL; +# ifdef CONFIG_DEBUG_RT_MUTEXES +	spin_lock_init(&p->held_list_lock); +	INIT_LIST_HEAD(&p->held_list_head); +# endif +#endif +} +  /*   * This creates a new process as a copy of the old one,   * but does not actually start it yet. @@ -1034,6 +1048,8 @@ static task_t *copy_process(unsigned long clone_flags,  	mpol_fix_fork_child_flag(p);  #endif +	rt_mutex_init_task(p); +  #ifdef CONFIG_DEBUG_MUTEXES  	p->blocked_on = NULL; /* not blocked yet */  #endif diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c new file mode 100644 index 00000000000..937a474fae9 --- /dev/null +++ b/kernel/rtmutex.c @@ -0,0 +1,904 @@ +/* + * RT-Mutexes: simple blocking mutual exclusion locks with PI support + * + * started by Ingo Molnar and Thomas Gleixner. + * + *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> + *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt + *  Copyright (C) 2006 Esben Nielsen + */ +#include <linux/spinlock.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/timer.h> + +#include "rtmutex_common.h" + +#ifdef CONFIG_DEBUG_RT_MUTEXES +# include "rtmutex-debug.h" +#else +# include "rtmutex.h" +#endif + +/* + * lock->owner state tracking: + * + * lock->owner holds the task_struct pointer of the owner. Bit 0 and 1 + * are used to keep track of the "owner is pending" and "lock has + * waiters" state. + * + * owner	bit1	bit0 + * NULL		0	0	lock is free (fast acquire possible) + * NULL		0	1	invalid state + * NULL		1	0	Transitional State* + * NULL		1	1	invalid state + * taskpointer	0	0	lock is held (fast release possible) + * taskpointer	0	1	task is pending owner + * taskpointer	1	0	lock is held and has waiters + * taskpointer	1	1	task is pending owner and lock has more waiters + * + * Pending ownership is assigned to the top (highest priority) + * waiter of the lock, when the lock is released. The thread is woken + * up and can now take the lock. Until the lock is taken (bit 0 + * cleared) a competing higher priority thread can steal the lock + * which puts the woken up thread back on the waiters list. + * + * The fast atomic compare exchange based acquire and release is only + * possible when bit 0 and 1 of lock->owner are 0. + * + * (*) There's a small time where the owner can be NULL and the + * "lock has waiters" bit is set.  This can happen when grabbing the lock. + * To prevent a cmpxchg of the owner releasing the lock, we need to set this + * bit before looking at the lock, hence the reason this is a transitional + * state. + */ + +static void +rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner, +		   unsigned long mask) +{ +	unsigned long val = (unsigned long)owner | mask; + +	if (rt_mutex_has_waiters(lock)) +		val |= RT_MUTEX_HAS_WAITERS; + +	lock->owner = (struct task_struct *)val; +} + +static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) +{ +	lock->owner = (struct task_struct *) +			((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); +} + +static void fixup_rt_mutex_waiters(struct rt_mutex *lock) +{ +	if (!rt_mutex_has_waiters(lock)) +		clear_rt_mutex_waiters(lock); +} + +/* + * We can speed up the acquire/release, if the architecture + * supports cmpxchg and if there's no debugging state to be set up + */ +#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES) +# define rt_mutex_cmpxchg(l,c,n)	(cmpxchg(&l->owner, c, n) == c) +static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) +{ +	unsigned long owner, *p = (unsigned long *) &lock->owner; + +	do { +		owner = *p; +	} while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); +} +#else +# define rt_mutex_cmpxchg(l,c,n)	(0) +static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) +{ +	lock->owner = (struct task_struct *) +			((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); +} +#endif + +/* + * Calculate task priority from the waiter list priority + * + * Return task->normal_prio when the waiter list is empty or when + * the waiter is not allowed to do priority boosting + */ +int rt_mutex_getprio(struct task_struct *task) +{ +	if (likely(!task_has_pi_waiters(task))) +		return task->normal_prio; + +	return min(task_top_pi_waiter(task)->pi_list_entry.prio, +		   task->normal_prio); +} + +/* + * Adjust the priority of a task, after its pi_waiters got modified. + * + * This can be both boosting and unboosting. task->pi_lock must be held. + */ +static void __rt_mutex_adjust_prio(struct task_struct *task) +{ +	int prio = rt_mutex_getprio(task); + +	if (task->prio != prio) +		rt_mutex_setprio(task, prio); +} + +/* + * Adjust task priority (undo boosting). Called from the exit path of + * rt_mutex_slowunlock() and rt_mutex_slowlock(). + * + * (Note: We do this outside of the protection of lock->wait_lock to + * allow the lock to be taken while or before we readjust the priority + * of task. We do not use the spin_xx_mutex() variants here as we are + * outside of the debug path.) + */ +static void rt_mutex_adjust_prio(struct task_struct *task) +{ +	unsigned long flags; + +	spin_lock_irqsave(&task->pi_lock, flags); +	__rt_mutex_adjust_prio(task); +	spin_unlock_irqrestore(&task->pi_lock, flags); +} + +/* + * Max number of times we'll walk the boosting chain: + */ +int max_lock_depth = 1024; + +/* + * Adjust the priority chain. Also used for deadlock detection. + * Decreases task's usage by one - may thus free the task. + * Returns 0 or -EDEADLK. + */ +static int rt_mutex_adjust_prio_chain(task_t *task, +				      int deadlock_detect, +				      struct rt_mutex *orig_lock, +				      struct rt_mutex_waiter *orig_waiter +				      __IP_DECL__) +{ +	struct rt_mutex *lock; +	struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; +	int detect_deadlock, ret = 0, depth = 0; +	unsigned long flags; + +	detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter, +							 deadlock_detect); + +	/* +	 * The (de)boosting is a step by step approach with a lot of +	 * pitfalls. We want this to be preemptible and we want hold a +	 * maximum of two locks per step. So we have to check +	 * carefully whether things change under us. +	 */ + again: +	if (++depth > max_lock_depth) { +		static int prev_max; + +		/* +		 * Print this only once. If the admin changes the limit, +		 * print a new message when reaching the limit again. +		 */ +		if (prev_max != max_lock_depth) { +			prev_max = max_lock_depth; +			printk(KERN_WARNING "Maximum lock depth %d reached " +			       "task: %s (%d)\n", max_lock_depth, +			       current->comm, current->pid); +		} +		put_task_struct(task); + +		return deadlock_detect ? -EDEADLK : 0; +	} + retry: +	/* +	 * Task can not go away as we did a get_task() before ! +	 */ +	spin_lock_irqsave(&task->pi_lock, flags); + +	waiter = task->pi_blocked_on; +	/* +	 * Check whether the end of the boosting chain has been +	 * reached or the state of the chain has changed while we +	 * dropped the locks. +	 */ +	if (!waiter || !waiter->task) +		goto out_unlock_pi; + +	if (top_waiter && (!task_has_pi_waiters(task) || +			   top_waiter != task_top_pi_waiter(task))) +		goto out_unlock_pi; + +	/* +	 * When deadlock detection is off then we check, if further +	 * priority adjustment is necessary. +	 */ +	if (!detect_deadlock && waiter->list_entry.prio == task->prio) +		goto out_unlock_pi; + +	lock = waiter->lock; +	if (!spin_trylock(&lock->wait_lock)) { +		spin_unlock_irqrestore(&task->pi_lock, flags); +		cpu_relax(); +		goto retry; +	} + +	/* Deadlock detection */ +	if (lock == orig_lock || rt_mutex_owner(lock) == current) { +		debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); +		spin_unlock(&lock->wait_lock); +		ret = deadlock_detect ? -EDEADLK : 0; +		goto out_unlock_pi; +	} + +	top_waiter = rt_mutex_top_waiter(lock); + +	/* Requeue the waiter */ +	plist_del(&waiter->list_entry, &lock->wait_list); +	waiter->list_entry.prio = task->prio; +	plist_add(&waiter->list_entry, &lock->wait_list); + +	/* Release the task */ +	spin_unlock_irqrestore(&task->pi_lock, flags); +	put_task_struct(task); + +	/* Grab the next task */ +	task = rt_mutex_owner(lock); +	spin_lock_irqsave(&task->pi_lock, flags); + +	if (waiter == rt_mutex_top_waiter(lock)) { +		/* Boost the owner */ +		plist_del(&top_waiter->pi_list_entry, &task->pi_waiters); +		waiter->pi_list_entry.prio = waiter->list_entry.prio; +		plist_add(&waiter->pi_list_entry, &task->pi_waiters); +		__rt_mutex_adjust_prio(task); + +	} else if (top_waiter == waiter) { +		/* Deboost the owner */ +		plist_del(&waiter->pi_list_entry, &task->pi_waiters); +		waiter = rt_mutex_top_waiter(lock); +		waiter->pi_list_entry.prio = waiter->list_entry.prio; +		plist_add(&waiter->pi_list_entry, &task->pi_waiters); +		__rt_mutex_adjust_prio(task); +	} + +	get_task_struct(task); +	spin_unlock_irqrestore(&task->pi_lock, flags); + +	top_waiter = rt_mutex_top_waiter(lock); +	spin_unlock(&lock->wait_lock); + +	if (!detect_deadlock && waiter != top_waiter) +		goto out_put_task; + +	goto again; + + out_unlock_pi: +	spin_unlock_irqrestore(&task->pi_lock, flags); + out_put_task: +	put_task_struct(task); +	return ret; +} + +/* + * Optimization: check if we can steal the lock from the + * assigned pending owner [which might not have taken the + * lock yet]: + */ +static inline int try_to_steal_lock(struct rt_mutex *lock) +{ +	struct task_struct *pendowner = rt_mutex_owner(lock); +	struct rt_mutex_waiter *next; +	unsigned long flags; + +	if (!rt_mutex_owner_pending(lock)) +		return 0; + +	if (pendowner == current) +		return 1; + +	spin_lock_irqsave(&pendowner->pi_lock, flags); +	if (current->prio >= pendowner->prio) { +		spin_unlock_irqrestore(&pendowner->pi_lock, flags); +		return 0; +	} + +	/* +	 * Check if a waiter is enqueued on the pending owners +	 * pi_waiters list. Remove it and readjust pending owners +	 * priority. +	 */ +	if (likely(!rt_mutex_has_waiters(lock))) { +		spin_unlock_irqrestore(&pendowner->pi_lock, flags); +		return 1; +	} + +	/* No chain handling, pending owner is not blocked on anything: */ +	next = rt_mutex_top_waiter(lock); +	plist_del(&next->pi_list_entry, &pendowner->pi_waiters); +	__rt_mutex_adjust_prio(pendowner); +	spin_unlock_irqrestore(&pendowner->pi_lock, flags); + +	/* +	 * We are going to steal the lock and a waiter was +	 * enqueued on the pending owners pi_waiters queue. So +	 * we have to enqueue this waiter into +	 * current->pi_waiters list. This covers the case, +	 * where current is boosted because it holds another +	 * lock and gets unboosted because the booster is +	 * interrupted, so we would delay a waiter with higher +	 * priority as current->normal_prio. +	 * +	 * Note: in the rare case of a SCHED_OTHER task changing +	 * its priority and thus stealing the lock, next->task +	 * might be current: +	 */ +	if (likely(next->task != current)) { +		spin_lock_irqsave(¤t->pi_lock, flags); +		plist_add(&next->pi_list_entry, ¤t->pi_waiters); +		__rt_mutex_adjust_prio(current); +		spin_unlock_irqrestore(¤t->pi_lock, flags); +	} +	return 1; +} + +/* + * Try to take an rt-mutex + * + * This fails + * - when the lock has a real owner + * - when a different pending owner exists and has higher priority than current + * + * Must be called with lock->wait_lock held. + */ +static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__) +{ +	/* +	 * We have to be careful here if the atomic speedups are +	 * enabled, such that, when +	 *  - no other waiter is on the lock +	 *  - the lock has been released since we did the cmpxchg +	 * the lock can be released or taken while we are doing the +	 * checks and marking the lock with RT_MUTEX_HAS_WAITERS. +	 * +	 * The atomic acquire/release aware variant of +	 * mark_rt_mutex_waiters uses a cmpxchg loop. After setting +	 * the WAITERS bit, the atomic release / acquire can not +	 * happen anymore and lock->wait_lock protects us from the +	 * non-atomic case. +	 * +	 * Note, that this might set lock->owner = +	 * RT_MUTEX_HAS_WAITERS in the case the lock is not contended +	 * any more. This is fixed up when we take the ownership. +	 * This is the transitional state explained at the top of this file. +	 */ +	mark_rt_mutex_waiters(lock); + +	if (rt_mutex_owner(lock) && !try_to_steal_lock(lock)) +		return 0; + +	/* We got the lock. */ +	debug_rt_mutex_lock(lock __IP__); + +	rt_mutex_set_owner(lock, current, 0); + +	rt_mutex_deadlock_account_lock(lock, current); + +	return 1; +} + +/* + * Task blocks on lock. + * + * Prepare waiter and propagate pi chain + * + * This must be called with lock->wait_lock held. + */ +static int task_blocks_on_rt_mutex(struct rt_mutex *lock, +				   struct rt_mutex_waiter *waiter, +				   int detect_deadlock +				   __IP_DECL__) +{ +	struct rt_mutex_waiter *top_waiter = waiter; +	task_t *owner = rt_mutex_owner(lock); +	int boost = 0, res; +	unsigned long flags; + +	spin_lock_irqsave(¤t->pi_lock, flags); +	__rt_mutex_adjust_prio(current); +	waiter->task = current; +	waiter->lock = lock; +	plist_node_init(&waiter->list_entry, current->prio); +	plist_node_init(&waiter->pi_list_entry, current->prio); + +	/* Get the top priority waiter on the lock */ +	if (rt_mutex_has_waiters(lock)) +		top_waiter = rt_mutex_top_waiter(lock); +	plist_add(&waiter->list_entry, &lock->wait_list); + +	current->pi_blocked_on = waiter; + +	spin_unlock_irqrestore(¤t->pi_lock, flags); + +	if (waiter == rt_mutex_top_waiter(lock)) { +		spin_lock_irqsave(&owner->pi_lock, flags); +		plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); +		plist_add(&waiter->pi_list_entry, &owner->pi_waiters); + +		__rt_mutex_adjust_prio(owner); +		if (owner->pi_blocked_on) { +			boost = 1; +			get_task_struct(owner); +		} +		spin_unlock_irqrestore(&owner->pi_lock, flags); +	} +	else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) { +		spin_lock_irqsave(&owner->pi_lock, flags); +		if (owner->pi_blocked_on) { +			boost = 1; +			get_task_struct(owner); +		} +		spin_unlock_irqrestore(&owner->pi_lock, flags); +	} +	if (!boost) +		return 0; + +	spin_unlock(&lock->wait_lock); + +	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, +					 waiter __IP__); + +	spin_lock(&lock->wait_lock); + +	return res; +} + +/* + * Wake up the next waiter on the lock. + * + * Remove the top waiter from the current tasks waiter list and from + * the lock waiter list. Set it as pending owner. Then wake it up. + * + * Called with lock->wait_lock held. + */ +static void wakeup_next_waiter(struct rt_mutex *lock) +{ +	struct rt_mutex_waiter *waiter; +	struct task_struct *pendowner; +	unsigned long flags; + +	spin_lock_irqsave(¤t->pi_lock, flags); + +	waiter = rt_mutex_top_waiter(lock); +	plist_del(&waiter->list_entry, &lock->wait_list); + +	/* +	 * Remove it from current->pi_waiters. We do not adjust a +	 * possible priority boost right now. We execute wakeup in the +	 * boosted mode and go back to normal after releasing +	 * lock->wait_lock. +	 */ +	plist_del(&waiter->pi_list_entry, ¤t->pi_waiters); +	pendowner = waiter->task; +	waiter->task = NULL; + +	rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING); + +	spin_unlock_irqrestore(¤t->pi_lock, flags); + +	/* +	 * Clear the pi_blocked_on variable and enqueue a possible +	 * waiter into the pi_waiters list of the pending owner. This +	 * prevents that in case the pending owner gets unboosted a +	 * waiter with higher priority than pending-owner->normal_prio +	 * is blocked on the unboosted (pending) owner. +	 */ +	spin_lock_irqsave(&pendowner->pi_lock, flags); + +	WARN_ON(!pendowner->pi_blocked_on); +	WARN_ON(pendowner->pi_blocked_on != waiter); +	WARN_ON(pendowner->pi_blocked_on->lock != lock); + +	pendowner->pi_blocked_on = NULL; + +	if (rt_mutex_has_waiters(lock)) { +		struct rt_mutex_waiter *next; + +		next = rt_mutex_top_waiter(lock); +		plist_add(&next->pi_list_entry, &pendowner->pi_waiters); +	} +	spin_unlock_irqrestore(&pendowner->pi_lock, flags); + +	wake_up_process(pendowner); +} + +/* + * Remove a waiter from a lock + * + * Must be called with lock->wait_lock held + */ +static void remove_waiter(struct rt_mutex *lock, +			  struct rt_mutex_waiter *waiter  __IP_DECL__) +{ +	int first = (waiter == rt_mutex_top_waiter(lock)); +	int boost = 0; +	task_t *owner = rt_mutex_owner(lock); +	unsigned long flags; + +	spin_lock_irqsave(¤t->pi_lock, flags); +	plist_del(&waiter->list_entry, &lock->wait_list); +	waiter->task = NULL; +	current->pi_blocked_on = NULL; +	spin_unlock_irqrestore(¤t->pi_lock, flags); + +	if (first && owner != current) { + +		spin_lock_irqsave(&owner->pi_lock, flags); + +		plist_del(&waiter->pi_list_entry, &owner->pi_waiters); + +		if (rt_mutex_has_waiters(lock)) { +			struct rt_mutex_waiter *next; + +			next = rt_mutex_top_waiter(lock); +			plist_add(&next->pi_list_entry, &owner->pi_waiters); +		} +		__rt_mutex_adjust_prio(owner); + +		if (owner->pi_blocked_on) { +			boost = 1; +			get_task_struct(owner); +		} +		spin_unlock_irqrestore(&owner->pi_lock, flags); +	} + +	WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); + +	if (!boost) +		return; + +	spin_unlock(&lock->wait_lock); + +	rt_mutex_adjust_prio_chain(owner, 0, lock, NULL __IP__); + +	spin_lock(&lock->wait_lock); +} + +/* + * Slow path lock function: + */ +static int __sched +rt_mutex_slowlock(struct rt_mutex *lock, int state, +		  struct hrtimer_sleeper *timeout, +		  int detect_deadlock __IP_DECL__) +{ +	struct rt_mutex_waiter waiter; +	int ret = 0; + +	debug_rt_mutex_init_waiter(&waiter); +	waiter.task = NULL; + +	spin_lock(&lock->wait_lock); + +	/* Try to acquire the lock again: */ +	if (try_to_take_rt_mutex(lock __IP__)) { +		spin_unlock(&lock->wait_lock); +		return 0; +	} + +	set_current_state(state); + +	/* Setup the timer, when timeout != NULL */ +	if (unlikely(timeout)) +		hrtimer_start(&timeout->timer, timeout->timer.expires, +			      HRTIMER_ABS); + +	for (;;) { +		/* Try to acquire the lock: */ +		if (try_to_take_rt_mutex(lock __IP__)) +			break; + +		/* +		 * TASK_INTERRUPTIBLE checks for signals and +		 * timeout. Ignored otherwise. +		 */ +		if (unlikely(state == TASK_INTERRUPTIBLE)) { +			/* Signal pending? */ +			if (signal_pending(current)) +				ret = -EINTR; +			if (timeout && !timeout->task) +				ret = -ETIMEDOUT; +			if (ret) +				break; +		} + +		/* +		 * waiter.task is NULL the first time we come here and +		 * when we have been woken up by the previous owner +		 * but the lock got stolen by a higher prio task. +		 */ +		if (!waiter.task) { +			ret = task_blocks_on_rt_mutex(lock, &waiter, +						      detect_deadlock __IP__); +			/* +			 * If we got woken up by the owner then start loop +			 * all over without going into schedule to try +			 * to get the lock now: +			 */ +			if (unlikely(!waiter.task)) +				continue; + +			if (unlikely(ret)) +				break; +		} +		spin_unlock(&lock->wait_lock); + +		debug_rt_mutex_print_deadlock(&waiter); + +		schedule(); + +		spin_lock(&lock->wait_lock); +		set_current_state(state); +	} + +	set_current_state(TASK_RUNNING); + +	if (unlikely(waiter.task)) +		remove_waiter(lock, &waiter __IP__); + +	/* +	 * try_to_take_rt_mutex() sets the waiter bit +	 * unconditionally. We might have to fix that up. +	 */ +	fixup_rt_mutex_waiters(lock); + +	spin_unlock(&lock->wait_lock); + +	/* Remove pending timer: */ +	if (unlikely(timeout)) +		hrtimer_cancel(&timeout->timer); + +	/* +	 * Readjust priority, when we did not get the lock. We might +	 * have been the pending owner and boosted. Since we did not +	 * take the lock, the PI boost has to go. +	 */ +	if (unlikely(ret)) +		rt_mutex_adjust_prio(current); + +	debug_rt_mutex_free_waiter(&waiter); + +	return ret; +} + +/* + * Slow path try-lock function: + */ +static inline int +rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__) +{ +	int ret = 0; + +	spin_lock(&lock->wait_lock); + +	if (likely(rt_mutex_owner(lock) != current)) { + +		ret = try_to_take_rt_mutex(lock __IP__); +		/* +		 * try_to_take_rt_mutex() sets the lock waiters +		 * bit unconditionally. Clean this up. +		 */ +		fixup_rt_mutex_waiters(lock); +	} + +	spin_unlock(&lock->wait_lock); + +	return ret; +} + +/* + * Slow path to release a rt-mutex: + */ +static void __sched +rt_mutex_slowunlock(struct rt_mutex *lock) +{ +	spin_lock(&lock->wait_lock); + +	debug_rt_mutex_unlock(lock); + +	rt_mutex_deadlock_account_unlock(current); + +	if (!rt_mutex_has_waiters(lock)) { +		lock->owner = NULL; +		spin_unlock(&lock->wait_lock); +		return; +	} + +	wakeup_next_waiter(lock); + +	spin_unlock(&lock->wait_lock); + +	/* Undo pi boosting if necessary: */ +	rt_mutex_adjust_prio(current); +} + +/* + * debug aware fast / slowpath lock,trylock,unlock + * + * The atomic acquire/release ops are compiled away, when either the + * architecture does not support cmpxchg or when debugging is enabled. + */ +static inline int +rt_mutex_fastlock(struct rt_mutex *lock, int state, +		  int detect_deadlock, +		  int (*slowfn)(struct rt_mutex *lock, int state, +				struct hrtimer_sleeper *timeout, +				int detect_deadlock __IP_DECL__)) +{ +	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { +		rt_mutex_deadlock_account_lock(lock, current); +		return 0; +	} else +		return slowfn(lock, state, NULL, detect_deadlock __RET_IP__); +} + +static inline int +rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, +			struct hrtimer_sleeper *timeout, int detect_deadlock, +			int (*slowfn)(struct rt_mutex *lock, int state, +				      struct hrtimer_sleeper *timeout, +				      int detect_deadlock __IP_DECL__)) +{ +	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { +		rt_mutex_deadlock_account_lock(lock, current); +		return 0; +	} else +		return slowfn(lock, state, timeout, detect_deadlock __RET_IP__); +} + +static inline int +rt_mutex_fasttrylock(struct rt_mutex *lock, +		     int (*slowfn)(struct rt_mutex *lock __IP_DECL__)) +{ +	if (likely(rt_mutex_cmpxchg(lock, NULL, current))) { +		rt_mutex_deadlock_account_lock(lock, current); +		return 1; +	} +	return slowfn(lock __RET_IP__); +} + +static inline void +rt_mutex_fastunlock(struct rt_mutex *lock, +		    void (*slowfn)(struct rt_mutex *lock)) +{ +	if (likely(rt_mutex_cmpxchg(lock, current, NULL))) +		rt_mutex_deadlock_account_unlock(current); +	else +		slowfn(lock); +} + +/** + * rt_mutex_lock - lock a rt_mutex + * + * @lock: the rt_mutex to be locked + */ +void __sched rt_mutex_lock(struct rt_mutex *lock) +{ +	might_sleep(); + +	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock); +} +EXPORT_SYMBOL_GPL(rt_mutex_lock); + +/** + * rt_mutex_lock_interruptible - lock a rt_mutex interruptible + * + * @lock: 		the rt_mutex to be locked + * @detect_deadlock:	deadlock detection on/off + * + * Returns: + *  0 		on success + * -EINTR 	when interrupted by a signal + * -EDEADLK	when the lock would deadlock (when deadlock detection is on) + */ +int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock, +						 int detect_deadlock) +{ +	might_sleep(); + +	return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, +				 detect_deadlock, rt_mutex_slowlock); +} +EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); + +/** + * rt_mutex_lock_interruptible_ktime - lock a rt_mutex interruptible + *				       the timeout structure is provided + *				       by the caller + * + * @lock: 		the rt_mutex to be locked + * @timeout:		timeout structure or NULL (no timeout) + * @detect_deadlock:	deadlock detection on/off + * + * Returns: + *  0 		on success + * -EINTR 	when interrupted by a signal + * -ETIMEOUT	when the timeout expired + * -EDEADLK	when the lock would deadlock (when deadlock detection is on) + */ +int +rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout, +		    int detect_deadlock) +{ +	might_sleep(); + +	return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, +				       detect_deadlock, rt_mutex_slowlock); +} +EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); + +/** + * rt_mutex_trylock - try to lock a rt_mutex + * + * @lock:	the rt_mutex to be locked + * + * Returns 1 on success and 0 on contention + */ +int __sched rt_mutex_trylock(struct rt_mutex *lock) +{ +	return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); +} +EXPORT_SYMBOL_GPL(rt_mutex_trylock); + +/** + * rt_mutex_unlock - unlock a rt_mutex + * + * @lock: the rt_mutex to be unlocked + */ +void __sched rt_mutex_unlock(struct rt_mutex *lock) +{ +	rt_mutex_fastunlock(lock, rt_mutex_slowunlock); +} +EXPORT_SYMBOL_GPL(rt_mutex_unlock); + +/*** + * rt_mutex_destroy - mark a mutex unusable + * @lock: the mutex to be destroyed + * + * This function marks the mutex uninitialized, and any subsequent + * use of the mutex is forbidden. The mutex must not be locked when + * this function is called. + */ +void rt_mutex_destroy(struct rt_mutex *lock) +{ +	WARN_ON(rt_mutex_is_locked(lock)); +#ifdef CONFIG_DEBUG_RT_MUTEXES +	lock->magic = NULL; +#endif +} + +EXPORT_SYMBOL_GPL(rt_mutex_destroy); + +/** + * __rt_mutex_init - initialize the rt lock + * + * @lock: the rt lock to be initialized + * + * Initialize the rt lock to unlocked state. + * + * Initializing of a locked rt lock is not allowed + */ +void __rt_mutex_init(struct rt_mutex *lock, const char *name) +{ +	lock->owner = NULL; +	spin_lock_init(&lock->wait_lock); +	plist_head_init(&lock->wait_list, &lock->wait_lock); + +	debug_rt_mutex_init(lock, name); +} +EXPORT_SYMBOL_GPL(__rt_mutex_init); diff --git a/kernel/rtmutex.h b/kernel/rtmutex.h new file mode 100644 index 00000000000..1e0fca13ff7 --- /dev/null +++ b/kernel/rtmutex.h @@ -0,0 +1,29 @@ +/* + * RT-Mutexes: blocking mutual exclusion locks with PI support + * + * started by Ingo Molnar and Thomas Gleixner: + * + *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + *  Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> + * + * This file contains macros used solely by rtmutex.c. + * Non-debug version. + */ + +#define __IP_DECL__ +#define __IP__ +#define __RET_IP__ +#define rt_mutex_deadlock_check(l)			(0) +#define rt_mutex_deadlock_account_lock(m, t)		do { } while (0) +#define rt_mutex_deadlock_account_unlock(l)		do { } while (0) +#define debug_rt_mutex_init_waiter(w)			do { } while (0) +#define debug_rt_mutex_free_waiter(w)			do { } while (0) +#define debug_rt_mutex_lock(l)				do { } while (0) +#define debug_rt_mutex_proxy_lock(l,p)			do { } while (0) +#define debug_rt_mutex_proxy_unlock(l)			do { } while (0) +#define debug_rt_mutex_unlock(l)			do { } while (0) +#define debug_rt_mutex_init(m, n)			do { } while (0) +#define debug_rt_mutex_deadlock(d, a ,l)		do { } while (0) +#define debug_rt_mutex_print_deadlock(w)		do { } while (0) +#define debug_rt_mutex_detect_deadlock(w,d)		(d) +#define debug_rt_mutex_reset_waiter(w)			do { } while (0) diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h new file mode 100644 index 00000000000..50eed60eb08 --- /dev/null +++ b/kernel/rtmutex_common.h @@ -0,0 +1,93 @@ +/* + * RT Mutexes: blocking mutual exclusion locks with PI support + * + * started by Ingo Molnar and Thomas Gleixner: + * + *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + *  Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> + * + * This file contains the private data structure and API definitions. + */ + +#ifndef __KERNEL_RTMUTEX_COMMON_H +#define __KERNEL_RTMUTEX_COMMON_H + +#include <linux/rtmutex.h> + +/* + * This is the control structure for tasks blocked on a rt_mutex, + * which is allocated on the kernel stack on of the blocked task. + * + * @list_entry:		pi node to enqueue into the mutex waiters list + * @pi_list_entry:	pi node to enqueue into the mutex owner waiters list + * @task:		task reference to the blocked task + */ +struct rt_mutex_waiter { +	struct plist_node	list_entry; +	struct plist_node	pi_list_entry; +	struct task_struct	*task; +	struct rt_mutex		*lock; +#ifdef CONFIG_DEBUG_RT_MUTEXES +	unsigned long		ip; +	pid_t			deadlock_task_pid; +	struct rt_mutex		*deadlock_lock; +#endif +}; + +/* + * Various helpers to access the waiters-plist: + */ +static inline int rt_mutex_has_waiters(struct rt_mutex *lock) +{ +	return !plist_head_empty(&lock->wait_list); +} + +static inline struct rt_mutex_waiter * +rt_mutex_top_waiter(struct rt_mutex *lock) +{ +	struct rt_mutex_waiter *w; + +	w = plist_first_entry(&lock->wait_list, struct rt_mutex_waiter, +			       list_entry); +	BUG_ON(w->lock != lock); + +	return w; +} + +static inline int task_has_pi_waiters(struct task_struct *p) +{ +	return !plist_head_empty(&p->pi_waiters); +} + +static inline struct rt_mutex_waiter * +task_top_pi_waiter(struct task_struct *p) +{ +	return plist_first_entry(&p->pi_waiters, struct rt_mutex_waiter, +				  pi_list_entry); +} + +/* + * lock->owner state tracking: + */ +#define RT_MUTEX_OWNER_PENDING	1UL +#define RT_MUTEX_HAS_WAITERS	2UL +#define RT_MUTEX_OWNER_MASKALL	3UL + +static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) +{ +	return (struct task_struct *) +		((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL); +} + +static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock) +{ + 	return (struct task_struct *) +		((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); +} + +static inline unsigned long rt_mutex_owner_pending(struct rt_mutex *lock) +{ +	return (unsigned long)lock->owner & RT_MUTEX_OWNER_PENDING; +} + +#endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f54afed8426..93a2c539864 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -133,6 +133,10 @@ extern int acct_parm[];  extern int no_unaligned_warning;  #endif +#ifdef CONFIG_RT_MUTEXES +extern int max_lock_depth; +#endif +  static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,  		       ctl_table *, void **);  static int proc_doutsstring(ctl_table *table, int write, struct file *filp, @@ -688,6 +692,17 @@ static ctl_table kern_table[] = {  		.proc_handler	= &proc_dointvec,  	},  #endif +#ifdef CONFIG_RT_MUTEXES +	{ +		.ctl_name	= KERN_MAX_LOCK_DEPTH, +		.procname	= "max_lock_depth", +		.data		= &max_lock_depth, +		.maxlen		= sizeof(int), +		.mode		= 0644, +		.proc_handler	= &proc_dointvec, +	}, +#endif +  	{ .ctl_name = 0 }  }; |