diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
| -rw-r--r-- | fs/btrfs/extent-tree.c | 227 | 
1 files changed, 135 insertions, 92 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 06b2635073f..521e9d4424f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,6 +33,7 @@  #include "volumes.h"  #include "locking.h"  #include "free-space-cache.h" +#include "math.h"  #undef SCRAMBLE_DELAYED_REFS @@ -649,24 +650,6 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)  	rcu_read_unlock();  } -static u64 div_factor(u64 num, int factor) -{ -	if (factor == 10) -		return num; -	num *= factor; -	do_div(num, 10); -	return num; -} - -static u64 div_factor_fine(u64 num, int factor) -{ -	if (factor == 100) -		return num; -	num *= factor; -	do_div(num, 100); -	return num; -} -  u64 btrfs_find_block_group(struct btrfs_root *root,  			   u64 search_start, u64 search_hint, int owner)  { @@ -1835,7 +1818,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,  	/* Tell the block device(s) that the sectors can be discarded */ -	ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, +	ret = btrfs_map_block(root->fs_info, REQ_DISCARD,  			      bytenr, &num_bytes, &bbio, 0);  	/* Error condition is -ENOMEM */  	if (!ret) { @@ -2314,6 +2297,9 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,  				kfree(extent_op);  				if (ret) { +					list_del_init(&locked_ref->cluster); +					mutex_unlock(&locked_ref->mutex); +  					printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);  					spin_lock(&delayed_refs->lock);  					return ret; @@ -2356,6 +2342,10 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,  		count++;  		if (ret) { +			if (locked_ref) { +				list_del_init(&locked_ref->cluster); +				mutex_unlock(&locked_ref->mutex); +			}  			printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);  			spin_lock(&delayed_refs->lock);  			return ret; @@ -3661,7 +3651,7 @@ out:  static int can_overcommit(struct btrfs_root *root,  			  struct btrfs_space_info *space_info, u64 bytes, -			  int flush) +			  enum btrfs_reserve_flush_enum flush)  {  	u64 profile = btrfs_get_alloc_profile(root, 0);  	u64 avail; @@ -3685,11 +3675,11 @@ static int can_overcommit(struct btrfs_root *root,  		avail >>= 1;  	/* -	 * If we aren't flushing don't let us overcommit too much, say -	 * 1/8th of the space.  If we can flush, let it overcommit up to -	 * 1/2 of the space. +	 * If we aren't flushing all things, let us overcommit up to +	 * 1/2th of the space. If we can flush, don't let us overcommit +	 * too much, let it overcommit up to 1/8 of the space.  	 */ -	if (flush) +	if (flush == BTRFS_RESERVE_FLUSH_ALL)  		avail >>= 3;  	else  		avail >>= 1; @@ -3699,6 +3689,20 @@ static int can_overcommit(struct btrfs_root *root,  	return 0;  } +static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb, +					       unsigned long nr_pages, +					       enum wb_reason reason) +{ +	if (!writeback_in_progress(sb->s_bdi) && +	    down_read_trylock(&sb->s_umount)) { +		writeback_inodes_sb_nr(sb, nr_pages, reason); +		up_read(&sb->s_umount); +		return 1; +	} + +	return 0; +} +  /*   * shrink metadata reservation for delalloc   */ @@ -3713,6 +3717,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,  	long time_left;  	unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;  	int loops = 0; +	enum btrfs_reserve_flush_enum flush;  	trans = (struct btrfs_trans_handle *)current->journal_info;  	block_rsv = &root->fs_info->delalloc_block_rsv; @@ -3730,8 +3735,9 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,  	while (delalloc_bytes && loops < 3) {  		max_reclaim = min(delalloc_bytes, to_reclaim);  		nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; -		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, -					       WB_REASON_FS_FREE_SPACE); +		writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb, +						    nr_pages, +						    WB_REASON_FS_FREE_SPACE);  		/*  		 * We need to wait for the async pages to actually start before @@ -3740,8 +3746,12 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,  		wait_event(root->fs_info->async_submit_wait,  			   !atomic_read(&root->fs_info->async_delalloc_pages)); +		if (!trans) +			flush = BTRFS_RESERVE_FLUSH_ALL; +		else +			flush = BTRFS_RESERVE_NO_FLUSH;  		spin_lock(&space_info->lock); -		if (can_overcommit(root, space_info, orig, !trans)) { +		if (can_overcommit(root, space_info, orig, flush)) {  			spin_unlock(&space_info->lock);  			break;  		} @@ -3899,7 +3909,8 @@ static int flush_space(struct btrfs_root *root,   */  static int reserve_metadata_bytes(struct btrfs_root *root,  				  struct btrfs_block_rsv *block_rsv, -				  u64 orig_bytes, int flush) +				  u64 orig_bytes, +				  enum btrfs_reserve_flush_enum flush)  {  	struct btrfs_space_info *space_info = block_rsv->space_info;  	u64 used; @@ -3912,10 +3923,11 @@ again:  	ret = 0;  	spin_lock(&space_info->lock);  	/* -	 * We only want to wait if somebody other than us is flushing and we are -	 * actually alloed to flush. +	 * We only want to wait if somebody other than us is flushing and we +	 * are actually allowed to flush all things.  	 */ -	while (flush && !flushing && space_info->flush) { +	while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing && +	       space_info->flush) {  		spin_unlock(&space_info->lock);  		/*  		 * If we have a trans handle we can't wait because the flusher @@ -3981,23 +3993,40 @@ again:  	 * Couldn't make our reservation, save our place so while we're trying  	 * to reclaim space we can actually use it instead of somebody else  	 * stealing it from us. +	 * +	 * We make the other tasks wait for the flush only when we can flush +	 * all things.  	 */ -	if (ret && flush) { +	if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) {  		flushing = true;  		space_info->flush = 1;  	}  	spin_unlock(&space_info->lock); -	if (!ret || !flush) +	if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)  		goto out;  	ret = flush_space(root, space_info, num_bytes, orig_bytes,  			  flush_state);  	flush_state++; + +	/* +	 * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock +	 * would happen. So skip delalloc flush. +	 */ +	if (flush == BTRFS_RESERVE_FLUSH_LIMIT && +	    (flush_state == FLUSH_DELALLOC || +	     flush_state == FLUSH_DELALLOC_WAIT)) +		flush_state = ALLOC_CHUNK; +  	if (!ret)  		goto again; -	else if (flush_state <= COMMIT_TRANS) +	else if (flush == BTRFS_RESERVE_FLUSH_LIMIT && +		 flush_state < COMMIT_TRANS) +		goto again; +	else if (flush == BTRFS_RESERVE_FLUSH_ALL && +		 flush_state <= COMMIT_TRANS)  		goto again;  out: @@ -4148,9 +4177,9 @@ void btrfs_free_block_rsv(struct btrfs_root *root,  	kfree(rsv);  } -static inline int __block_rsv_add(struct btrfs_root *root, -				  struct btrfs_block_rsv *block_rsv, -				  u64 num_bytes, int flush) +int btrfs_block_rsv_add(struct btrfs_root *root, +			struct btrfs_block_rsv *block_rsv, u64 num_bytes, +			enum btrfs_reserve_flush_enum flush)  {  	int ret; @@ -4166,20 +4195,6 @@ static inline int __block_rsv_add(struct btrfs_root *root,  	return ret;  } -int btrfs_block_rsv_add(struct btrfs_root *root, -			struct btrfs_block_rsv *block_rsv, -			u64 num_bytes) -{ -	return __block_rsv_add(root, block_rsv, num_bytes, 1); -} - -int btrfs_block_rsv_add_noflush(struct btrfs_root *root, -				struct btrfs_block_rsv *block_rsv, -				u64 num_bytes) -{ -	return __block_rsv_add(root, block_rsv, num_bytes, 0); -} -  int btrfs_block_rsv_check(struct btrfs_root *root,  			  struct btrfs_block_rsv *block_rsv, int min_factor)  { @@ -4198,9 +4213,9 @@ int btrfs_block_rsv_check(struct btrfs_root *root,  	return ret;  } -static inline int __btrfs_block_rsv_refill(struct btrfs_root *root, -					   struct btrfs_block_rsv *block_rsv, -					   u64 min_reserved, int flush) +int btrfs_block_rsv_refill(struct btrfs_root *root, +			   struct btrfs_block_rsv *block_rsv, u64 min_reserved, +			   enum btrfs_reserve_flush_enum flush)  {  	u64 num_bytes = 0;  	int ret = -ENOSPC; @@ -4228,20 +4243,6 @@ static inline int __btrfs_block_rsv_refill(struct btrfs_root *root,  	return ret;  } -int btrfs_block_rsv_refill(struct btrfs_root *root, -			   struct btrfs_block_rsv *block_rsv, -			   u64 min_reserved) -{ -	return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1); -} - -int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, -				   struct btrfs_block_rsv *block_rsv, -				   u64 min_reserved) -{ -	return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0); -} -  int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,  			    struct btrfs_block_rsv *dst_rsv,  			    u64 num_bytes) @@ -4532,17 +4533,27 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)  	u64 csum_bytes;  	unsigned nr_extents = 0;  	int extra_reserve = 0; -	int flush = 1; +	enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;  	int ret; +	bool delalloc_lock = true; -	/* Need to be holding the i_mutex here if we aren't free space cache */ -	if (btrfs_is_free_space_inode(inode)) -		flush = 0; +	/* If we are a free space inode we need to not flush since we will be in +	 * the middle of a transaction commit.  We also don't need the delalloc +	 * mutex since we won't race with anybody.  We need this mostly to make +	 * lockdep shut its filthy mouth. +	 */ +	if (btrfs_is_free_space_inode(inode)) { +		flush = BTRFS_RESERVE_NO_FLUSH; +		delalloc_lock = false; +	} -	if (flush && btrfs_transaction_in_commit(root->fs_info)) +	if (flush != BTRFS_RESERVE_NO_FLUSH && +	    btrfs_transaction_in_commit(root->fs_info))  		schedule_timeout(1); -	mutex_lock(&BTRFS_I(inode)->delalloc_mutex); +	if (delalloc_lock) +		mutex_lock(&BTRFS_I(inode)->delalloc_mutex); +  	num_bytes = ALIGN(num_bytes, root->sectorsize);  	spin_lock(&BTRFS_I(inode)->lock); @@ -4572,7 +4583,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)  		ret = btrfs_qgroup_reserve(root, num_bytes +  					   nr_extents * root->leafsize);  		if (ret) { -			mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); +			spin_lock(&BTRFS_I(inode)->lock); +			calc_csum_metadata_size(inode, num_bytes, 0); +			spin_unlock(&BTRFS_I(inode)->lock); +			if (delalloc_lock) +				mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);  			return ret;  		}  	} @@ -4607,7 +4622,12 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)  						      btrfs_ino(inode),  						      to_free, 0);  		} -		mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); +		if (root->fs_info->quota_enabled) { +			btrfs_qgroup_free(root, num_bytes + +						nr_extents * root->leafsize); +		} +		if (delalloc_lock) +			mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);  		return ret;  	} @@ -4619,7 +4639,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)  	}  	BTRFS_I(inode)->reserved_extents += nr_extents;  	spin_unlock(&BTRFS_I(inode)->lock); -	mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); + +	if (delalloc_lock) +		mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);  	if (to_reserve)  		trace_btrfs_space_reservation(root->fs_info,"delalloc", @@ -4969,9 +4991,13 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)  {  	struct btrfs_fs_info *fs_info = root->fs_info;  	struct btrfs_block_group_cache *cache = NULL; +	struct btrfs_space_info *space_info; +	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;  	u64 len; +	bool readonly;  	while (start <= end) { +		readonly = false;  		if (!cache ||  		    start >= cache->key.objectid + cache->key.offset) {  			if (cache) @@ -4989,15 +5015,30 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)  		}  		start += len; +		space_info = cache->space_info; -		spin_lock(&cache->space_info->lock); +		spin_lock(&space_info->lock);  		spin_lock(&cache->lock);  		cache->pinned -= len; -		cache->space_info->bytes_pinned -= len; -		if (cache->ro) -			cache->space_info->bytes_readonly += len; +		space_info->bytes_pinned -= len; +		if (cache->ro) { +			space_info->bytes_readonly += len; +			readonly = true; +		}  		spin_unlock(&cache->lock); -		spin_unlock(&cache->space_info->lock); +		if (!readonly && global_rsv->space_info == space_info) { +			spin_lock(&global_rsv->lock); +			if (!global_rsv->full) { +				len = min(len, global_rsv->size - +					  global_rsv->reserved); +				global_rsv->reserved += len; +				space_info->bytes_may_use += len; +				if (global_rsv->reserved >= global_rsv->size) +					global_rsv->full = 1; +			} +			spin_unlock(&global_rsv->lock); +		} +		spin_unlock(&space_info->lock);  	}  	if (cache) @@ -5466,7 +5507,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)  	return 0;  } -static int __get_block_group_index(u64 flags) +int __get_raid_index(u64 flags)  {  	int index; @@ -5486,7 +5527,7 @@ static int __get_block_group_index(u64 flags)  static int get_block_group_index(struct btrfs_block_group_cache *cache)  { -	return __get_block_group_index(cache->flags); +	return __get_raid_index(cache->flags);  }  enum btrfs_loop_type { @@ -6269,7 +6310,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,  	block_rsv = get_block_rsv(trans, root);  	if (block_rsv->size == 0) { -		ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); +		ret = reserve_metadata_bytes(root, block_rsv, blocksize, +					     BTRFS_RESERVE_NO_FLUSH);  		/*  		 * If we couldn't reserve metadata bytes try and use some from  		 * the global reserve. @@ -6292,11 +6334,11 @@ use_block_rsv(struct btrfs_trans_handle *trans,  		static DEFINE_RATELIMIT_STATE(_rs,  				DEFAULT_RATELIMIT_INTERVAL,  				/*DEFAULT_RATELIMIT_BURST*/ 2); -		if (__ratelimit(&_rs)) { -			printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret); -			WARN_ON(1); -		} -		ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); +		if (__ratelimit(&_rs)) +			WARN(1, KERN_DEBUG "btrfs: block rsv returned %d\n", +			     ret); +		ret = reserve_metadata_bytes(root, block_rsv, blocksize, +					     BTRFS_RESERVE_NO_FLUSH);  		if (!ret) {  			return block_rsv;  		} else if (ret && block_rsv != global_rsv) { @@ -7427,7 +7469,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)  	 */  	target = get_restripe_target(root->fs_info, block_group->flags);  	if (target) { -		index = __get_block_group_index(extended_to_chunk(target)); +		index = __get_raid_index(extended_to_chunk(target));  	} else {  		/*  		 * this is just a balance, so if we were marked as full @@ -7461,7 +7503,8 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)  		 * check to make sure we can actually find a chunk with enough  		 * space to fit our block group in.  		 */ -		if (device->total_bytes > device->bytes_used + min_free) { +		if (device->total_bytes > device->bytes_used + min_free && +		    !device->is_tgtdev_for_dev_replace) {  			ret = find_free_dev_extent(device, min_free,  						   &dev_offset, NULL);  			if (!ret)  |