diff options
Diffstat (limited to 'fs/btrfs/locking.c')
| -rw-r--r-- | fs/btrfs/locking.c | 208 | 
1 files changed, 190 insertions, 18 deletions
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 39bae7761db..68fd9ccf180 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -26,45 +26,215 @@  #include "locking.h"  /* - * locks the per buffer mutex in an extent buffer.  This uses adaptive locks - * and the spin is not tuned very extensively.  The spinning does make a big - * difference in almost every workload, but spinning for the right amount of - * time needs some help. - * - * In general, we want to spin as long as the lock holder is doing btree - * searches, and we should give up if they are in more expensive code. + * btrfs_header_level() isn't free, so don't call it when lockdep isn't + * on   */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline void spin_nested(struct extent_buffer *eb) +{ +	spin_lock_nested(&eb->lock, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); +} +#else +static inline void spin_nested(struct extent_buffer *eb) +{ +	spin_lock(&eb->lock); +} +#endif -int btrfs_tree_lock(struct extent_buffer *eb) +/* + * Setting a lock to blocking will drop the spinlock and set the + * flag that forces other procs who want the lock to wait.  After + * this you can safely schedule with the lock held. + */ +void btrfs_set_lock_blocking(struct extent_buffer *eb)  { -	int i; +	if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { +		set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); +		spin_unlock(&eb->lock); +	} +	/* exit with the spin lock released and the bit set */ +} -	if (mutex_trylock(&eb->mutex)) -		return 0; +/* + * clearing the blocking flag will take the spinlock again. + * After this you can't safely schedule + */ +void btrfs_clear_lock_blocking(struct extent_buffer *eb) +{ +	if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { +		spin_nested(eb); +		clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); +		smp_mb__after_clear_bit(); +	} +	/* exit with the spin lock held */ +} + +/* + * unfortunately, many of the places that currently set a lock to blocking + * don't end up blocking for every long, and often they don't block + * at all.  For a dbench 50 run, if we don't spin one the blocking bit + * at all, the context switch rate can jump up to 400,000/sec or more. + * + * So, we're still stuck with this crummy spin on the blocking bit, + * at least until the most common causes of the short blocks + * can be dealt with. + */ +static int btrfs_spin_on_block(struct extent_buffer *eb) +{ +	int i;  	for (i = 0; i < 512; i++) {  		cpu_relax(); -		if (mutex_trylock(&eb->mutex)) +		if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) +			return 1; +		if (need_resched()) +			break; +	} +	return 0; +} + +/* + * This is somewhat different from trylock.  It will take the + * spinlock but if it finds the lock is set to blocking, it will + * return without the lock held. + * + * returns 1 if it was able to take the lock and zero otherwise + * + * After this call, scheduling is not safe without first calling + * btrfs_set_lock_blocking() + */ +int btrfs_try_spin_lock(struct extent_buffer *eb) +{ +	int i; + +	spin_nested(eb); +	if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) +		return 1; +	spin_unlock(&eb->lock); + +	/* spin for a bit on the BLOCKING flag */ +	for (i = 0; i < 2; i++) { +		if (!btrfs_spin_on_block(eb)) +			break; + +		spin_nested(eb); +		if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) +			return 1; +		spin_unlock(&eb->lock); +	} +	return 0; +} + +/* + * the autoremove wake function will return 0 if it tried to wake up + * a process that was already awake, which means that process won't + * count as an exclusive wakeup.  The waitq code will continue waking + * procs until it finds one that was actually sleeping. + * + * For btrfs, this isn't quite what we want.  We want a single proc + * to be notified that the lock is ready for taking.  If that proc + * already happen to be awake, great, it will loop around and try for + * the lock. + * + * So, btrfs_wake_function always returns 1, even when the proc that we + * tried to wake up was already awake. + */ +static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, +			       int sync, void *key) +{ +	autoremove_wake_function(wait, mode, sync, key); +	return 1; +} + +/* + * returns with the extent buffer spinlocked. + * + * This will spin and/or wait as required to take the lock, and then + * return with the spinlock held. + * + * After this call, scheduling is not safe without first calling + * btrfs_set_lock_blocking() + */ +int btrfs_tree_lock(struct extent_buffer *eb) +{ +	DEFINE_WAIT(wait); +	wait.func = btrfs_wake_function; + +	while(1) { +		spin_nested(eb); + +		/* nobody is blocking, exit with the spinlock held */ +		if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))  			return 0; + +		/* +		 * we have the spinlock, but the real owner is blocking. +		 * wait for them +		 */ +		spin_unlock(&eb->lock); + +		/* +		 * spin for a bit, and if the blocking flag goes away, +		 * loop around +		 */ +		if (btrfs_spin_on_block(eb)) +			continue; + +		prepare_to_wait_exclusive(&eb->lock_wq, &wait, +					  TASK_UNINTERRUPTIBLE); + +		if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) +			schedule(); + +		finish_wait(&eb->lock_wq, &wait);  	} -	cpu_relax(); -	mutex_lock_nested(&eb->mutex, BTRFS_MAX_LEVEL - btrfs_header_level(eb));  	return 0;  } +/* + * Very quick trylock, this does not spin or schedule.  It returns + * 1 with the spinlock held if it was able to take the lock, or it + * returns zero if it was unable to take the lock. + * + * After this call, scheduling is not safe without first calling + * btrfs_set_lock_blocking() + */  int btrfs_try_tree_lock(struct extent_buffer *eb)  { -	return mutex_trylock(&eb->mutex); +	if (spin_trylock(&eb->lock)) { +		if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { +			/* +			 * we've got the spinlock, but the real owner is +			 * blocking.  Drop the spinlock and return failure +			 */ +			spin_unlock(&eb->lock); +			return 0; +		} +		return 1; +	} +	/* someone else has the spinlock giveup */ +	return 0;  }  int btrfs_tree_unlock(struct extent_buffer *eb)  { -	mutex_unlock(&eb->mutex); +	/* +	 * if we were a blocking owner, we don't have the spinlock held +	 * just clear the bit and look for waiters +	 */ +	if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) +		smp_mb__after_clear_bit(); +	else +		spin_unlock(&eb->lock); + +	if (waitqueue_active(&eb->lock_wq)) +		wake_up(&eb->lock_wq);  	return 0;  }  int btrfs_tree_locked(struct extent_buffer *eb)  { -	return mutex_is_locked(&eb->mutex); +	return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) || +			spin_is_locked(&eb->lock);  }  /* @@ -75,12 +245,14 @@ int btrfs_path_lock_waiting(struct btrfs_path *path, int level)  {  	int i;  	struct extent_buffer *eb; +  	for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) {  		eb = path->nodes[i];  		if (!eb)  			break;  		smp_mb(); -		if (!list_empty(&eb->mutex.wait_list)) +		if (spin_is_contended(&eb->lock) || +		    waitqueue_active(&eb->lock_wq))  			return 1;  	}  	return 0;  |