diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
| -rw-r--r-- | fs/btrfs/extent-tree.c | 187 | 
1 files changed, 140 insertions, 47 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1c1cf216be8..a44072a692a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1871,20 +1871,24 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,  int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,  			 struct btrfs_root *root,  			 u64 bytenr, u64 num_bytes, u64 parent, -			 u64 root_objectid, u64 owner, u64 offset) +			 u64 root_objectid, u64 owner, u64 offset, int for_cow)  {  	int ret; +	struct btrfs_fs_info *fs_info = root->fs_info; +  	BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&  	       root_objectid == BTRFS_TREE_LOG_OBJECTID);  	if (owner < BTRFS_FIRST_FREE_OBJECTID) { -		ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, +		ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, +					num_bytes,  					parent, root_objectid, (int)owner, -					BTRFS_ADD_DELAYED_REF, NULL); +					BTRFS_ADD_DELAYED_REF, NULL, for_cow);  	} else { -		ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, +		ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, +					num_bytes,  					parent, root_objectid, owner, offset, -					BTRFS_ADD_DELAYED_REF, NULL); +					BTRFS_ADD_DELAYED_REF, NULL, for_cow);  	}  	return ret;  } @@ -2232,6 +2236,28 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,  		}  		/* +		 * locked_ref is the head node, so we have to go one +		 * node back for any delayed ref updates +		 */ +		ref = select_delayed_ref(locked_ref); + +		if (ref && ref->seq && +		    btrfs_check_delayed_seq(delayed_refs, ref->seq)) { +			/* +			 * there are still refs with lower seq numbers in the +			 * process of being added. Don't run this ref yet. +			 */ +			list_del_init(&locked_ref->cluster); +			mutex_unlock(&locked_ref->mutex); +			locked_ref = NULL; +			delayed_refs->num_heads_ready++; +			spin_unlock(&delayed_refs->lock); +			cond_resched(); +			spin_lock(&delayed_refs->lock); +			continue; +		} + +		/*  		 * record the must insert reserved flag before we  		 * drop the spin lock.  		 */ @@ -2241,11 +2267,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,  		extent_op = locked_ref->extent_op;  		locked_ref->extent_op = NULL; -		/* -		 * locked_ref is the head node, so we have to go one -		 * node back for any delayed ref updates -		 */ -		ref = select_delayed_ref(locked_ref);  		if (!ref) {  			/* All delayed refs have been processed, Go ahead  			 * and send the head node to run_one_delayed_ref, @@ -2276,7 +2297,12 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,  		ref->in_tree = 0;  		rb_erase(&ref->rb_node, &delayed_refs->root);  		delayed_refs->num_entries--; - +		/* +		 * we modified num_entries, but as we're currently running +		 * delayed refs, skip +		 *     wake_up(&delayed_refs->seq_wait); +		 * here. +		 */  		spin_unlock(&delayed_refs->lock);  		ret = run_one_delayed_ref(trans, root, ref, extent_op, @@ -2297,6 +2323,23 @@ next:  	return count;  } + +static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, +			unsigned long num_refs) +{ +	struct list_head *first_seq = delayed_refs->seq_head.next; + +	spin_unlock(&delayed_refs->lock); +	pr_debug("waiting for more refs (num %ld, first %p)\n", +		 num_refs, first_seq); +	wait_event(delayed_refs->seq_wait, +		   num_refs != delayed_refs->num_entries || +		   delayed_refs->seq_head.next != first_seq); +	pr_debug("done waiting for more refs (num %ld, first %p)\n", +		 delayed_refs->num_entries, delayed_refs->seq_head.next); +	spin_lock(&delayed_refs->lock); +} +  /*   * this starts processing the delayed reference count updates and   * extent insertions we have queued up so far.  count can be @@ -2312,8 +2355,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,  	struct btrfs_delayed_ref_node *ref;  	struct list_head cluster;  	int ret; +	u64 delayed_start;  	int run_all = count == (unsigned long)-1;  	int run_most = 0; +	unsigned long num_refs = 0; +	int consider_waiting;  	if (root == root->fs_info->extent_root)  		root = root->fs_info->tree_root; @@ -2325,6 +2371,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,  	delayed_refs = &trans->transaction->delayed_refs;  	INIT_LIST_HEAD(&cluster);  again: +	consider_waiting = 0;  	spin_lock(&delayed_refs->lock);  	if (count == 0) {  		count = delayed_refs->num_entries * 2; @@ -2341,11 +2388,35 @@ again:  		 * of refs to process starting at the first one we are able to  		 * lock  		 */ +		delayed_start = delayed_refs->run_delayed_start;  		ret = btrfs_find_ref_cluster(trans, &cluster,  					     delayed_refs->run_delayed_start);  		if (ret)  			break; +		if (delayed_start >= delayed_refs->run_delayed_start) { +			if (consider_waiting == 0) { +				/* +				 * btrfs_find_ref_cluster looped. let's do one +				 * more cycle. if we don't run any delayed ref +				 * during that cycle (because we can't because +				 * all of them are blocked) and if the number of +				 * refs doesn't change, we avoid busy waiting. +				 */ +				consider_waiting = 1; +				num_refs = delayed_refs->num_entries; +			} else { +				wait_for_more_refs(delayed_refs, num_refs); +				/* +				 * after waiting, things have changed. we +				 * dropped the lock and someone else might have +				 * run some refs, built new clusters and so on. +				 * therefore, we restart staleness detection. +				 */ +				consider_waiting = 0; +			} +		} +  		ret = run_clustered_refs(trans, root, &cluster);  		BUG_ON(ret < 0); @@ -2353,6 +2424,11 @@ again:  		if (count == 0)  			break; + +		if (ret || delayed_refs->run_delayed_start == 0) { +			/* refs were run, let's reset staleness detection */ +			consider_waiting = 0; +		}  	}  	if (run_all) { @@ -2410,7 +2486,8 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,  	extent_op->update_key = 0;  	extent_op->is_data = is_data ? 1 : 0; -	ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); +	ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, +					  num_bytes, extent_op);  	if (ret)  		kfree(extent_op);  	return ret; @@ -2595,7 +2672,7 @@ out:  static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,  			   struct btrfs_root *root,  			   struct extent_buffer *buf, -			   int full_backref, int inc) +			   int full_backref, int inc, int for_cow)  {  	u64 bytenr;  	u64 num_bytes; @@ -2608,7 +2685,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,  	int level;  	int ret = 0;  	int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, -			    u64, u64, u64, u64, u64, u64); +			    u64, u64, u64, u64, u64, u64, int);  	ref_root = btrfs_header_owner(buf);  	nritems = btrfs_header_nritems(buf); @@ -2645,14 +2722,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,  			key.offset -= btrfs_file_extent_offset(buf, fi);  			ret = process_func(trans, root, bytenr, num_bytes,  					   parent, ref_root, key.objectid, -					   key.offset); +					   key.offset, for_cow);  			if (ret)  				goto fail;  		} else {  			bytenr = btrfs_node_blockptr(buf, i);  			num_bytes = btrfs_level_size(root, level - 1);  			ret = process_func(trans, root, bytenr, num_bytes, -					   parent, ref_root, level - 1, 0); +					   parent, ref_root, level - 1, 0, +					   for_cow);  			if (ret)  				goto fail;  		} @@ -2664,15 +2742,15 @@ fail:  }  int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, -		  struct extent_buffer *buf, int full_backref) +		  struct extent_buffer *buf, int full_backref, int for_cow)  { -	return __btrfs_mod_ref(trans, root, buf, full_backref, 1); +	return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow);  }  int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, -		  struct extent_buffer *buf, int full_backref) +		  struct extent_buffer *buf, int full_backref, int for_cow)  { -	return __btrfs_mod_ref(trans, root, buf, full_backref, 0); +	return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow);  }  static int write_one_cache_group(struct btrfs_trans_handle *trans, @@ -4954,6 +5032,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,  	rb_erase(&head->node.rb_node, &delayed_refs->root);  	delayed_refs->num_entries--; +	if (waitqueue_active(&delayed_refs->seq_wait)) +		wake_up(&delayed_refs->seq_wait);  	/*  	 * we don't take a ref on the node because we're removing it from the @@ -4981,16 +5061,17 @@ out:  void btrfs_free_tree_block(struct btrfs_trans_handle *trans,  			   struct btrfs_root *root,  			   struct extent_buffer *buf, -			   u64 parent, int last_ref) +			   u64 parent, int last_ref, int for_cow)  {  	struct btrfs_block_group_cache *cache = NULL;  	int ret;  	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { -		ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, -						parent, root->root_key.objectid, -						btrfs_header_level(buf), -						BTRFS_DROP_DELAYED_REF, NULL); +		ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, +					buf->start, buf->len, +					parent, root->root_key.objectid, +					btrfs_header_level(buf), +					BTRFS_DROP_DELAYED_REF, NULL, for_cow);  		BUG_ON(ret);  	} @@ -5025,12 +5106,12 @@ out:  	btrfs_put_block_group(cache);  } -int btrfs_free_extent(struct btrfs_trans_handle *trans, -		      struct btrfs_root *root, -		      u64 bytenr, u64 num_bytes, u64 parent, -		      u64 root_objectid, u64 owner, u64 offset) +int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, +		      u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, +		      u64 owner, u64 offset, int for_cow)  {  	int ret; +	struct btrfs_fs_info *fs_info = root->fs_info;  	/*  	 * tree log blocks never actually go into the extent allocation @@ -5042,14 +5123,17 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,  		btrfs_pin_extent(root, bytenr, num_bytes, 1);  		ret = 0;  	} else if (owner < BTRFS_FIRST_FREE_OBJECTID) { -		ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, +		ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, +					num_bytes,  					parent, root_objectid, (int)owner, -					BTRFS_DROP_DELAYED_REF, NULL); +					BTRFS_DROP_DELAYED_REF, NULL, for_cow);  		BUG_ON(ret);  	} else { -		ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, -					parent, root_objectid, owner, -					offset, BTRFS_DROP_DELAYED_REF, NULL); +		ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, +						num_bytes, +						parent, root_objectid, owner, +						offset, BTRFS_DROP_DELAYED_REF, +						NULL, for_cow);  		BUG_ON(ret);  	}  	return ret; @@ -5877,9 +5961,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,  	BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); -	ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, -					 0, root_objectid, owner, offset, -					 BTRFS_ADD_DELAYED_EXTENT, NULL); +	ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid, +					 ins->offset, 0, +					 root_objectid, owner, offset, +					 BTRFS_ADD_DELAYED_EXTENT, NULL, 0);  	return ret;  } @@ -6049,7 +6134,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,  					struct btrfs_root *root, u32 blocksize,  					u64 parent, u64 root_objectid,  					struct btrfs_disk_key *key, int level, -					u64 hint, u64 empty_size) +					u64 hint, u64 empty_size, int for_cow)  {  	struct btrfs_key ins;  	struct btrfs_block_rsv *block_rsv; @@ -6093,10 +6178,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,  		extent_op->update_flags = 1;  		extent_op->is_data = 0; -		ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, +		ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, +					ins.objectid,  					ins.offset, parent, root_objectid,  					level, BTRFS_ADD_DELAYED_EXTENT, -					extent_op); +					extent_op, for_cow);  		BUG_ON(ret);  	}  	return buf; @@ -6113,6 +6199,7 @@ struct walk_control {  	int keep_locks;  	int reada_slot;  	int reada_count; +	int for_reloc;  };  #define DROP_REFERENCE	1 @@ -6251,9 +6338,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,  	/* wc->stage == UPDATE_BACKREF */  	if (!(wc->flags[level] & flag)) {  		BUG_ON(!path->locks[level]); -		ret = btrfs_inc_ref(trans, root, eb, 1); +		ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);  		BUG_ON(ret); -		ret = btrfs_dec_ref(trans, root, eb, 0); +		ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);  		BUG_ON(ret);  		ret = btrfs_set_disk_extent_flags(trans, root, eb->start,  						  eb->len, flag, 0); @@ -6397,7 +6484,7 @@ skip:  		}  		ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, -					root->root_key.objectid, level - 1, 0); +				root->root_key.objectid, level - 1, 0, 0);  		BUG_ON(ret);  	}  	btrfs_tree_unlock(next); @@ -6471,9 +6558,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,  	if (wc->refs[level] == 1) {  		if (level == 0) {  			if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) -				ret = btrfs_dec_ref(trans, root, eb, 1); +				ret = btrfs_dec_ref(trans, root, eb, 1, +						    wc->for_reloc);  			else -				ret = btrfs_dec_ref(trans, root, eb, 0); +				ret = btrfs_dec_ref(trans, root, eb, 0, +						    wc->for_reloc);  			BUG_ON(ret);  		}  		/* make block locked assertion in clean_tree_block happy */ @@ -6500,7 +6589,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,  			       btrfs_header_owner(path->nodes[level + 1]));  	} -	btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); +	btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0);  out:  	wc->refs[level] = 0;  	wc->flags[level] = 0; @@ -6584,7 +6673,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,   * blocks are properly updated.   */  void btrfs_drop_snapshot(struct btrfs_root *root, -			 struct btrfs_block_rsv *block_rsv, int update_ref) +			 struct btrfs_block_rsv *block_rsv, int update_ref, +			 int for_reloc)  {  	struct btrfs_path *path;  	struct btrfs_trans_handle *trans; @@ -6672,6 +6762,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,  	wc->stage = DROP_REFERENCE;  	wc->update_ref = update_ref;  	wc->keep_locks = 0; +	wc->for_reloc = for_reloc;  	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);  	while (1) { @@ -6756,6 +6847,7 @@ out:   * drop subtree rooted at tree block 'node'.   *   * NOTE: this function will unlock and release tree block 'node' + * only used by relocation code   */  int btrfs_drop_subtree(struct btrfs_trans_handle *trans,  			struct btrfs_root *root, @@ -6800,6 +6892,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,  	wc->stage = DROP_REFERENCE;  	wc->update_ref = 0;  	wc->keep_locks = 1; +	wc->for_reloc = 1;  	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);  	while (1) {  |