diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/Makefile | 2 | ||||
| -rw-r--r-- | fs/btrfs/ctree.c | 3 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 12 | ||||
| -rw-r--r-- | fs/btrfs/delayed-ref.c | 585 | ||||
| -rw-r--r-- | fs/btrfs/delayed-ref.h | 182 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 29 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 1496 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 6 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 54 | ||||
| -rw-r--r-- | fs/btrfs/transaction.h | 3 | ||||
| -rw-r--r-- | fs/btrfs/tree-defrag.c | 2 | 
11 files changed, 1234 insertions, 1140 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d2cf5a54a4b..9adf5e4f7e9 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -8,7 +8,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \  	   extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \  	   extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \  	   ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ -	   compression.o +	   compression.o delayed-ref.o  else  # Normal Makefile diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 87c90387283..bebc9fd1666 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -922,6 +922,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,  		spin_unlock(&root->node_lock);  		ret = btrfs_update_extent_ref(trans, root, child->start, +					      child->len,  					      mid->start, child->start,  					      root->root_key.objectid,  					      trans->transid, level - 1); @@ -2075,7 +2076,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,  	spin_unlock(&root->node_lock);  	ret = btrfs_update_extent_ref(trans, root, lower->start, -				      lower->start, c->start, +				      lower->len, lower->start, c->start,  				      root->root_key.objectid,  				      trans->transid, level - 1);  	BUG_ON(ret); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3a37ba7a8d6..ced5fd85dc3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -688,8 +688,6 @@ struct btrfs_fs_info {  	struct rb_root block_group_cache_tree;  	struct extent_io_tree pinned_extents; -	struct extent_io_tree pending_del; -	struct extent_io_tree extent_ins;  	/* logical->physical extent mapping */  	struct btrfs_mapping_tree mapping_tree; @@ -717,7 +715,6 @@ struct btrfs_fs_info {  	struct mutex tree_log_mutex;  	struct mutex transaction_kthread_mutex;  	struct mutex cleaner_mutex; -	struct mutex extent_ins_mutex;  	struct mutex pinned_mutex;  	struct mutex chunk_mutex;  	struct mutex drop_mutex; @@ -1704,18 +1701,15 @@ static inline struct dentry *fdentry(struct file *file)  }  /* extent-tree.c */ +int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, +			   struct btrfs_root *root, unsigned long count);  int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); -int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, -			    struct btrfs_root *root, u64 bytenr, -			    u64 num_bytes, u32 *refs);  int btrfs_update_pinned_extents(struct btrfs_root *root,  				u64 bytenr, u64 num, int pin);  int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,  			struct btrfs_root *root, struct extent_buffer *leaf);  int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,  			  struct btrfs_root *root, u64 objectid, u64 bytenr); -int btrfs_extent_post_op(struct btrfs_trans_handle *trans, -			 struct btrfs_root *root);  int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);  struct btrfs_block_group_cache *btrfs_lookup_block_group(  						 struct btrfs_fs_info *info, @@ -1777,7 +1771,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,  			 u64 root_objectid, u64 ref_generation,  			 u64 owner_objectid);  int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, -			    struct btrfs_root *root, u64 bytenr, +			    struct btrfs_root *root, u64 bytenr, u64 num_bytes,  			    u64 orig_parent, u64 parent,  			    u64 root_objectid, u64 ref_generation,  			    u64 owner_objectid); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c new file mode 100644 index 00000000000..874565a1f63 --- /dev/null +++ b/fs/btrfs/delayed-ref.c @@ -0,0 +1,585 @@ +/* + * Copyright (C) 2009 Oracle.  All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <linux/sched.h> +#include <linux/sort.h> +#include <linux/ftrace.h> +#include "ctree.h" +#include "delayed-ref.h" +#include "transaction.h" + +/* + * delayed back reference update tracking.  For subvolume trees + * we queue up extent allocations and backref maintenance for + * delayed processing.   This avoids deep call chains where we + * add extents in the middle of btrfs_search_slot, and it allows + * us to buffer up frequently modified backrefs in an rb tree instead + * of hammering updates on the extent allocation tree. + * + * Right now this code is only used for reference counted trees, but + * the long term goal is to get rid of the similar code for delayed + * extent tree modifications. + */ + +/* + * entries in the rb tree are ordered by the byte number of the extent + * and by the byte number of the parent block. + */ +static int comp_entry(struct btrfs_delayed_ref_node *ref, +		      u64 bytenr, u64 parent) +{ +	if (bytenr < ref->bytenr) +		return -1; +	if (bytenr > ref->bytenr) +		return 1; +	if (parent < ref->parent) +		return -1; +	if (parent > ref->parent) +		return 1; +	return 0; +} + +/* + * insert a new ref into the rbtree.  This returns any existing refs + * for the same (bytenr,parent) tuple, or NULL if the new node was properly + * inserted. + */ +static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, +						  u64 bytenr, u64 parent, +						  struct rb_node *node) +{ +	struct rb_node **p = &root->rb_node; +	struct rb_node *parent_node = NULL; +	struct btrfs_delayed_ref_node *entry; +	int cmp; + +	while (*p) { +		parent_node = *p; +		entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, +				 rb_node); + +		cmp = comp_entry(entry, bytenr, parent); +		if (cmp < 0) +			p = &(*p)->rb_left; +		else if (cmp > 0) +			p = &(*p)->rb_right; +		else +			return entry; +	} + +	entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); +	rb_link_node(node, parent_node, p); +	rb_insert_color(node, root); +	return NULL; +} + +/* + * find an entry based on (bytenr,parent).  This returns the delayed + * ref if it was able to find one, or NULL if nothing was in that spot + */ +static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, +						  u64 bytenr, u64 parent) +{ +	struct rb_node *n = root->rb_node; +	struct btrfs_delayed_ref_node *entry; +	int cmp; + +	while (n) { +		entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); +		WARN_ON(!entry->in_tree); + +		cmp = comp_entry(entry, bytenr, parent); +		if (cmp < 0) +			n = n->rb_left; +		else if (cmp > 0) +			n = n->rb_right; +		else +			return entry; +	} +	return NULL; +} + +/* + * Locking on delayed refs is done by taking a lock on the head node, + * which has the (impossible) parent id of (u64)-1.  Once a lock is held + * on the head node, you're allowed (and required) to process all the + * delayed refs for a given byte number in the tree. + * + * This will walk forward in the rbtree until it finds a head node it + * is able to lock.  It might not lock the delayed ref you asked for, + * and so it will return the one it did lock in next_ret and return 0. + * + * If no locks are taken, next_ret is set to null and 1 is returned.  This + * means there are no more unlocked head nodes in the rbtree. + */ +int btrfs_lock_delayed_ref(struct btrfs_trans_handle *trans, +			   struct btrfs_delayed_ref_node *ref, +			   struct btrfs_delayed_ref_head **next_ret) +{ +	struct rb_node *node; +	struct btrfs_delayed_ref_head *head; +	int ret = 0; + +	while (1) { +		if (btrfs_delayed_ref_is_head(ref)) { +			head = btrfs_delayed_node_to_head(ref); +			if (mutex_trylock(&head->mutex)) { +				*next_ret = head; +				ret = 0; +				break; +			} +		} +		node = rb_next(&ref->rb_node); +		if (!node) { +			ret = 1; +			*next_ret = NULL; +			break; +		} +		ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); +	} +	return ret; +} + +/* + * This checks to see if there are any delayed refs in the + * btree for a given bytenr.  It returns one if it finds any + * and zero otherwise. + * + * If it only finds a head node, it returns 0. + * + * The idea is to use this when deciding if you can safely delete an + * extent from the extent allocation tree.  There may be a pending + * ref in the rbtree that adds or removes references, so as long as this + * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent + * allocation tree. + */ +int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr) +{ +	struct btrfs_delayed_ref_node *ref; +	struct btrfs_delayed_ref_root *delayed_refs; +	struct rb_node *prev_node; +	int ret = 0; + +	delayed_refs = &trans->transaction->delayed_refs; +	spin_lock(&delayed_refs->lock); + +	ref = tree_search(&delayed_refs->root, bytenr, (u64)-1); +	if (ref) { +		prev_node = rb_prev(&ref->rb_node); +		if (!prev_node) +			goto out; +		ref = rb_entry(prev_node, struct btrfs_delayed_ref_node, +			       rb_node); +		if (ref->bytenr == bytenr) +			ret = 1; +	} +out: +	spin_unlock(&delayed_refs->lock); +	return ret; +} + +/* + * helper function to lookup reference count + * + * the head node for delayed ref is used to store the sum of all the + * reference count modifications queued up in the rbtree.  This way you + * can check to see what the reference count would be if all of the + * delayed refs are processed. + */ +int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, +			    struct btrfs_root *root, u64 bytenr, +			    u64 num_bytes, u32 *refs) +{ +	struct btrfs_delayed_ref_node *ref; +	struct btrfs_delayed_ref_head *head; +	struct btrfs_delayed_ref_root *delayed_refs; +	struct btrfs_path *path; +	struct extent_buffer *leaf; +	struct btrfs_extent_item *ei; +	struct btrfs_key key; +	u32 num_refs; +	int ret; + +	path = btrfs_alloc_path(); +	if (!path) +		return -ENOMEM; + +	key.objectid = bytenr; +	key.type = BTRFS_EXTENT_ITEM_KEY; +	key.offset = num_bytes; +	delayed_refs = &trans->transaction->delayed_refs; +again: +	ret = btrfs_search_slot(trans, root->fs_info->extent_root, +				&key, path, 0, 0); +	if (ret < 0) +		goto out; + +	if (ret == 0) { +		leaf = path->nodes[0]; +		ei = btrfs_item_ptr(leaf, path->slots[0], +				    struct btrfs_extent_item); +		num_refs = btrfs_extent_refs(leaf, ei); +	} else { +		num_refs = 0; +		ret = 0; +	} + +	spin_lock(&delayed_refs->lock); +	ref = tree_search(&delayed_refs->root, bytenr, (u64)-1); +	if (ref) { +		head = btrfs_delayed_node_to_head(ref); +		if (mutex_trylock(&head->mutex)) { +			num_refs += ref->ref_mod; +			mutex_unlock(&head->mutex); +			*refs = num_refs; +			goto out; +		} + +		atomic_inc(&ref->refs); +		spin_unlock(&delayed_refs->lock); + +		btrfs_release_path(root->fs_info->extent_root, path); + +		mutex_lock(&head->mutex); +		mutex_unlock(&head->mutex); +		btrfs_put_delayed_ref(ref); +		goto again; +	} else { +		*refs = num_refs; +	} +out: +	spin_unlock(&delayed_refs->lock); +	btrfs_free_path(path); +	return ret; +} + +/* + * helper function to update an extent delayed ref in the + * rbtree.  existing and update must both have the same + * bytenr and parent + * + * This may free existing if the update cancels out whatever + * operation it was doing. + */ +static noinline void +update_existing_ref(struct btrfs_trans_handle *trans, +		    struct btrfs_delayed_ref_root *delayed_refs, +		    struct btrfs_delayed_ref_node *existing, +		    struct btrfs_delayed_ref_node *update) +{ +	struct btrfs_delayed_ref *existing_ref; +	struct btrfs_delayed_ref *ref; + +	existing_ref = btrfs_delayed_node_to_ref(existing); +	ref = btrfs_delayed_node_to_ref(update); + +	if (ref->pin) +		existing_ref->pin = 1; + +	if (ref->action != existing_ref->action) { +		/* +		 * this is effectively undoing either an add or a +		 * drop.  We decrement the ref_mod, and if it goes +		 * down to zero we just delete the entry without +		 * every changing the extent allocation tree. +		 */ +		existing->ref_mod--; +		if (existing->ref_mod == 0) { +			rb_erase(&existing->rb_node, +				 &delayed_refs->root); +			existing->in_tree = 0; +			btrfs_put_delayed_ref(existing); +			delayed_refs->num_entries--; +			if (trans->delayed_ref_updates) +				trans->delayed_ref_updates--; +		} +	} else { +		if (existing_ref->action == BTRFS_ADD_DELAYED_REF) { +			/* if we're adding refs, make sure all the +			 * details match up.  The extent could +			 * have been totally freed and reallocated +			 * by a different owner before the delayed +			 * ref entries were removed. +			 */ +			existing_ref->owner_objectid = ref->owner_objectid; +			existing_ref->generation = ref->generation; +			existing_ref->root = ref->root; +			existing->num_bytes = update->num_bytes; +		} +		/* +		 * the action on the existing ref matches +		 * the action on the ref we're trying to add. +		 * Bump the ref_mod by one so the backref that +		 * is eventually added/removed has the correct +		 * reference count +		 */ +		existing->ref_mod += update->ref_mod; +	} +} + +/* + * helper function to update the accounting in the head ref + * existing and update must have the same bytenr + */ +static noinline void +update_existing_head_ref(struct btrfs_delayed_ref_node *existing, +			 struct btrfs_delayed_ref_node *update) +{ +	struct btrfs_delayed_ref_head *existing_ref; +	struct btrfs_delayed_ref_head *ref; + +	existing_ref = btrfs_delayed_node_to_head(existing); +	ref = btrfs_delayed_node_to_head(update); + +	if (ref->must_insert_reserved) { +		/* if the extent was freed and then +		 * reallocated before the delayed ref +		 * entries were processed, we can end up +		 * with an existing head ref without +		 * the must_insert_reserved flag set. +		 * Set it again here +		 */ +		existing_ref->must_insert_reserved = ref->must_insert_reserved; + +		/* +		 * update the num_bytes so we make sure the accounting +		 * is done correctly +		 */ +		existing->num_bytes = update->num_bytes; + +	} + +	/* +	 * update the reference mod on the head to reflect this new operation +	 */ +	existing->ref_mod += update->ref_mod; +} + +/* + * helper function to actually insert a delayed ref into the rbtree. + * this does all the dirty work in terms of maintaining the correct + * overall modification count in the head node and properly dealing + * with updating existing nodes as new modifications are queued. + */ +static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, +			  struct btrfs_delayed_ref_node *ref, +			  u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, +			  u64 ref_generation, u64 owner_objectid, int action, +			  int pin) +{ +	struct btrfs_delayed_ref_node *existing; +	struct btrfs_delayed_ref *full_ref; +	struct btrfs_delayed_ref_head *head_ref; +	struct btrfs_delayed_ref_root *delayed_refs; +	int count_mod = 1; +	int must_insert_reserved = 0; + +	/* +	 * the head node stores the sum of all the mods, so dropping a ref +	 * should drop the sum in the head node by one. +	 */ +	if (parent == (u64)-1 && action == BTRFS_DROP_DELAYED_REF) +		count_mod = -1; + +	/* +	 * BTRFS_ADD_DELAYED_EXTENT means that we need to update +	 * the reserved accounting when the extent is finally added, or +	 * if a later modification deletes the delayed ref without ever +	 * inserting the extent into the extent allocation tree. +	 * ref->must_insert_reserved is the flag used to record +	 * that accounting mods are required. +	 * +	 * Once we record must_insert_reserved, switch the action to +	 * BTRFS_ADD_DELAYED_REF because other special casing is not required. +	 */ +	if (action == BTRFS_ADD_DELAYED_EXTENT) { +		must_insert_reserved = 1; +		action = BTRFS_ADD_DELAYED_REF; +	} else { +		must_insert_reserved = 0; +	} + + +	delayed_refs = &trans->transaction->delayed_refs; + +	/* first set the basic ref node struct up */ +	atomic_set(&ref->refs, 1); +	ref->bytenr = bytenr; +	ref->parent = parent; +	ref->ref_mod = count_mod; +	ref->in_tree = 1; +	ref->num_bytes = num_bytes; + +	if (btrfs_delayed_ref_is_head(ref)) { +		head_ref = btrfs_delayed_node_to_head(ref); +		head_ref->must_insert_reserved = must_insert_reserved; +		mutex_init(&head_ref->mutex); +	} else { +		full_ref = btrfs_delayed_node_to_ref(ref); +		full_ref->root = ref_root; +		full_ref->generation = ref_generation; +		full_ref->owner_objectid = owner_objectid; +		full_ref->pin = pin; +		full_ref->action = action; +	} + +	existing = tree_insert(&delayed_refs->root, bytenr, +			       parent, &ref->rb_node); + +	if (existing) { +		if (btrfs_delayed_ref_is_head(ref)) +			update_existing_head_ref(existing, ref); +		else +			update_existing_ref(trans, delayed_refs, existing, ref); + +		/* +		 * we've updated the existing ref, free the newly +		 * allocated ref +		 */ +		kfree(ref); +	} else { +		delayed_refs->num_entries++; +		trans->delayed_ref_updates++; +	} +	return 0; +} + +/* + * add a delayed ref to the tree.  This does all of the accounting required + * to make sure the delayed ref is eventually processed before this + * transaction commits. + */ +int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, +			  u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, +			  u64 ref_generation, u64 owner_objectid, int action, +			  int pin) +{ +	struct btrfs_delayed_ref *ref; +	struct btrfs_delayed_ref_head *head_ref; +	struct btrfs_delayed_ref_root *delayed_refs; +	int ret; + +	ref = kmalloc(sizeof(*ref), GFP_NOFS); +	if (!ref) +		return -ENOMEM; + +	/* +	 * the parent = 0 case comes from cases where we don't actually +	 * know the parent yet.  It will get updated later via a add/drop +	 * pair. +	 */ +	if (parent == 0) +		parent = bytenr; + +	head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); +	if (!head_ref) { +		kfree(ref); +		return -ENOMEM; +	} +	delayed_refs = &trans->transaction->delayed_refs; +	spin_lock(&delayed_refs->lock); + +	/* +	 * insert both the head node and the new ref without dropping +	 * the spin lock +	 */ +	ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, +				      (u64)-1, 0, 0, 0, action, pin); +	BUG_ON(ret); + +	ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, +				      parent, ref_root, ref_generation, +				      owner_objectid, action, pin); +	BUG_ON(ret); +	spin_unlock(&delayed_refs->lock); +	return 0; +} + +/* + * add a delayed ref to the tree.  This does all of the accounting required + * to make sure the delayed ref is eventually processed before this + * transaction commits. + * + * The main point of this call is to add and remove a backreference in a single + * shot, taking the lock only once, and only searching for the head node once. + * + * It is the same as doing a ref add and delete in two separate calls. + */ +int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, +			  u64 bytenr, u64 num_bytes, u64 orig_parent, +			  u64 parent, u64 orig_ref_root, u64 ref_root, +			  u64 orig_ref_generation, u64 ref_generation, +			  u64 owner_objectid, int pin) +{ +	struct btrfs_delayed_ref *ref; +	struct btrfs_delayed_ref *old_ref; +	struct btrfs_delayed_ref_head *head_ref; +	struct btrfs_delayed_ref_root *delayed_refs; +	int ret; + +	ref = kmalloc(sizeof(*ref), GFP_NOFS); +	if (!ref) +		return -ENOMEM; + +	old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS); +	if (!old_ref) { +		kfree(ref); +		return -ENOMEM; +	} + +	/* +	 * the parent = 0 case comes from cases where we don't actually +	 * know the parent yet.  It will get updated later via a add/drop +	 * pair. +	 */ +	if (parent == 0) +		parent = bytenr; +	if (orig_parent == 0) +		orig_parent = bytenr; + +	head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); +	if (!head_ref) { +		kfree(ref); +		kfree(old_ref); +		return -ENOMEM; +	} +	delayed_refs = &trans->transaction->delayed_refs; +	spin_lock(&delayed_refs->lock); + +	/* +	 * insert both the head node and the new ref without dropping +	 * the spin lock +	 */ +	ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, +				      (u64)-1, 0, 0, 0, +				      BTRFS_ADD_DELAYED_REF, 0); +	BUG_ON(ret); + +	ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, +				      parent, ref_root, ref_generation, +				      owner_objectid, BTRFS_ADD_DELAYED_REF, 0); +	BUG_ON(ret); + +	ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes, +				      orig_parent, orig_ref_root, +				      orig_ref_generation, owner_objectid, +				      BTRFS_DROP_DELAYED_REF, pin); +	BUG_ON(ret); +	spin_unlock(&delayed_refs->lock); +	return 0; +} diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h new file mode 100644 index 00000000000..37919e5c007 --- /dev/null +++ b/fs/btrfs/delayed-ref.h @@ -0,0 +1,182 @@ +/* + * Copyright (C) 2008 Oracle.  All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#ifndef __DELAYED_REF__ +#define __DELAYED_REF__ + +/* these are the possible values of struct btrfs_delayed_ref->action */ +#define BTRFS_ADD_DELAYED_REF    1 /* add one backref to the tree */ +#define BTRFS_DROP_DELAYED_REF   2 /* delete one backref from the tree */ +#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ + +struct btrfs_delayed_ref_node { +	struct rb_node rb_node; + +	/* the starting bytenr of the extent */ +	u64 bytenr; + +	/* the parent our backref will point to */ +	u64 parent; + +	/* the size of the extent */ +	u64 num_bytes; + +	/* ref count on this data structure */ +	atomic_t refs; + +	/* +	 * how many refs is this entry adding or deleting.  For +	 * head refs, this may be a negative number because it is keeping +	 * track of the total mods done to the reference count. +	 * For individual refs, this will always be a positive number +	 * +	 * It may be more than one, since it is possible for a single +	 * parent to have more than one ref on an extent +	 */ +	int ref_mod; + +	/* is this node still in the rbtree? */ +	unsigned int in_tree:1; +}; + +/* + * the head refs are used to hold a lock on a given extent, which allows us + * to make sure that only one process is running the delayed refs + * at a time for a single extent.  They also store the sum of all the + * reference count modifications we've queued up. + */ +struct btrfs_delayed_ref_head { +	struct btrfs_delayed_ref_node node; + +	/* +	 * the mutex is held while running the refs, and it is also +	 * held when checking the sum of reference modifications. +	 */ +	struct mutex mutex; + +	/* +	 * when a new extent is allocated, it is just reserved in memory +	 * The actual extent isn't inserted into the extent allocation tree +	 * until the delayed ref is processed.  must_insert_reserved is +	 * used to flag a delayed ref so the accounting can be updated +	 * when a full insert is done. +	 * +	 * It is possible the extent will be freed before it is ever +	 * inserted into the extent allocation tree.  In this case +	 * we need to update the in ram accounting to properly reflect +	 * the free has happened. +	 */ +	unsigned int must_insert_reserved:1; +}; + +struct btrfs_delayed_ref { +	struct btrfs_delayed_ref_node node; + +	/* the root objectid our ref will point to */ +	u64 root; + +	/* the generation for the backref */ +	u64 generation; + +	/* owner_objectid of the backref  */ +	u64 owner_objectid; + +	/* operation done by this entry in the rbtree */ +	u8 action; + +	/* if pin == 1, when the extent is freed it will be pinned until +	 * transaction commit +	 */ +	unsigned int pin:1; +}; + +struct btrfs_delayed_ref_root { +	struct rb_root root; + +	/* this spin lock protects the rbtree and the entries inside */ +	spinlock_t lock; + +	/* how many delayed ref updates we've queued, used by the +	 * throttling code +	 */ +	unsigned long num_entries; + +	/* +	 * set when the tree is flushing before a transaction commit, +	 * used by the throttling code to decide if new updates need +	 * to be run right away +	 */ +	int flushing; +}; + +static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) +{ +	WARN_ON(atomic_read(&ref->refs) == 0); +	if (atomic_dec_and_test(&ref->refs)) { +		WARN_ON(ref->in_tree); +		kfree(ref); +	} +} + +int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, +			  u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, +			  u64 ref_generation, u64 owner_objectid, int action, +			  int pin); + +struct btrfs_delayed_ref * +btrfs_find_delayed_ref(struct btrfs_trans_handle *trans, u64 bytenr, +		       u64 parent); +int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); +int btrfs_lock_delayed_ref(struct btrfs_trans_handle *trans, +			   struct btrfs_delayed_ref_node *ref, +			   struct btrfs_delayed_ref_head **next_ret); +int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, +			    struct btrfs_root *root, u64 bytenr, +			    u64 num_bytes, u32 *refs); +int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, +			  u64 bytenr, u64 num_bytes, u64 orig_parent, +			  u64 parent, u64 orig_ref_root, u64 ref_root, +			  u64 orig_ref_generation, u64 ref_generation, +			  u64 owner_objectid, int pin); +/* + * a node might live in a head or a regular ref, this lets you + * test for the proper type to use. + */ +static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node) +{ +	return node->parent == (u64)-1; +} + +/* + * helper functions to cast a node into its container + */ +static inline struct btrfs_delayed_ref * +btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node) +{ +	WARN_ON(btrfs_delayed_ref_is_head(node)); +	return container_of(node, struct btrfs_delayed_ref, node); + +} + +static inline struct btrfs_delayed_ref_head * +btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node) +{ +	WARN_ON(!btrfs_delayed_ref_is_head(node)); +	return container_of(node, struct btrfs_delayed_ref_head, node); + +} +#endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3e18175248e..4f43e227a29 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1458,6 +1458,7 @@ static int transaction_kthread(void *arg)  	struct btrfs_root *root = arg;  	struct btrfs_trans_handle *trans;  	struct btrfs_transaction *cur; +	struct btrfs_fs_info *info = root->fs_info;  	unsigned long now;  	unsigned long delay;  	int ret; @@ -1471,12 +1472,6 @@ static int transaction_kthread(void *arg)  		vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);  		mutex_lock(&root->fs_info->transaction_kthread_mutex); -		if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { -			printk(KERN_INFO "btrfs: total reference cache " -			       "size %llu\n", -			       root->fs_info->total_ref_cache_size); -		} -  		mutex_lock(&root->fs_info->trans_mutex);  		cur = root->fs_info->running_transaction;  		if (!cur) { @@ -1486,13 +1481,30 @@ static int transaction_kthread(void *arg)  		now = get_seconds();  		if (now < cur->start_time || now - cur->start_time < 30) { +			unsigned long num_delayed; +			num_delayed = cur->delayed_refs.num_entries;  			mutex_unlock(&root->fs_info->trans_mutex);  			delay = HZ * 5; + +			/* +			 * we may have been woken up early to start +			 * processing the delayed extent ref updates +			 * If so, run some of them and then loop around again +			 * to see if we need to force a commit +			 */ +			if (num_delayed > 64) { +				mutex_unlock(&info->transaction_kthread_mutex); +				trans = btrfs_start_transaction(root, 1); +				btrfs_run_delayed_refs(trans, root, 256); +				btrfs_end_transaction(trans, root); +				continue; +			}  			goto sleep;  		}  		mutex_unlock(&root->fs_info->trans_mutex);  		trans = btrfs_start_transaction(root, 1);  		ret = btrfs_commit_transaction(trans, root); +  sleep:  		wake_up_process(root->fs_info->cleaner_kthread);  		mutex_unlock(&root->fs_info->transaction_kthread_mutex); @@ -1611,10 +1623,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,  	extent_io_tree_init(&fs_info->pinned_extents,  			     fs_info->btree_inode->i_mapping, GFP_NOFS); -	extent_io_tree_init(&fs_info->pending_del, -			     fs_info->btree_inode->i_mapping, GFP_NOFS); -	extent_io_tree_init(&fs_info->extent_ins, -			     fs_info->btree_inode->i_mapping, GFP_NOFS);  	fs_info->do_barriers = 1;  	INIT_LIST_HEAD(&fs_info->dead_reloc_roots); @@ -1629,7 +1637,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,  	mutex_init(&fs_info->trans_mutex);  	mutex_init(&fs_info->tree_log_mutex);  	mutex_init(&fs_info->drop_mutex); -	mutex_init(&fs_info->extent_ins_mutex);  	mutex_init(&fs_info->pinned_mutex);  	mutex_init(&fs_info->chunk_mutex);  	mutex_init(&fs_info->transaction_kthread_mutex); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fefe83ad205..9b5da2b013e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -49,10 +49,13 @@ struct pending_extent_op {  	int del;  }; -static int finish_current_insert(struct btrfs_trans_handle *trans, -				 struct btrfs_root *extent_root, int all); -static int del_pending_extents(struct btrfs_trans_handle *trans, -			       struct btrfs_root *extent_root, int all); +static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, +					 struct btrfs_root *root, u64 parent, +					 u64 root_objectid, u64 ref_generation, +					 u64 owner, struct btrfs_key *ins, +					 int ref_mod); +static int update_reserved_extents(struct btrfs_root *root, +				   u64 bytenr, u64 num, int reserve);  static int pin_down_bytes(struct btrfs_trans_handle *trans,  			  struct btrfs_root *root,  			  u64 bytenr, u64 num_bytes, int is_data); @@ -60,6 +63,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,  			      struct btrfs_root *root,  			      u64 bytenr, u64 num_bytes, int alloc,  			      int mark_free); +static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans, +					struct btrfs_root *root, +					u64 bytenr, u64 num_bytes, u64 parent, +					u64 root_objectid, u64 ref_generation, +					u64 owner_objectid, int pin, +					int ref_to_drop);  static int do_chunk_alloc(struct btrfs_trans_handle *trans,  			  struct btrfs_root *extent_root, u64 alloc_bytes, @@ -554,262 +563,13 @@ out:  	return ret;  } -/* - * updates all the backrefs that are pending on update_list for the - * extent_root - */ -static noinline int update_backrefs(struct btrfs_trans_handle *trans, -				    struct btrfs_root *extent_root, -				    struct btrfs_path *path, -				    struct list_head *update_list) -{ -	struct btrfs_key key; -	struct btrfs_extent_ref *ref; -	struct btrfs_fs_info *info = extent_root->fs_info; -	struct pending_extent_op *op; -	struct extent_buffer *leaf; -	int ret = 0; -	struct list_head *cur = update_list->next; -	u64 ref_objectid; -	u64 ref_root = extent_root->root_key.objectid; - -	op = list_entry(cur, struct pending_extent_op, list); - -search: -	key.objectid = op->bytenr; -	key.type = BTRFS_EXTENT_REF_KEY; -	key.offset = op->orig_parent; - -	ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1); -	BUG_ON(ret); - -	leaf = path->nodes[0]; - -loop: -	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); - -	ref_objectid = btrfs_ref_objectid(leaf, ref); - -	if (btrfs_ref_root(leaf, ref) != ref_root || -	    btrfs_ref_generation(leaf, ref) != op->orig_generation || -	    (ref_objectid != op->level && -	     ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { -		printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, " -		       "root %llu, owner %u\n", -		       (unsigned long long)op->bytenr, -		       (unsigned long long)op->orig_parent, -		       (unsigned long long)ref_root, op->level); -		btrfs_print_leaf(extent_root, leaf); -		BUG(); -	} - -	key.objectid = op->bytenr; -	key.offset = op->parent; -	key.type = BTRFS_EXTENT_REF_KEY; -	ret = btrfs_set_item_key_safe(trans, extent_root, path, &key); -	BUG_ON(ret); -	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); -	btrfs_set_ref_generation(leaf, ref, op->generation); - -	cur = cur->next; - -	list_del_init(&op->list); -	unlock_extent(&info->extent_ins, op->bytenr, -		      op->bytenr + op->num_bytes - 1, GFP_NOFS); -	kfree(op); - -	if (cur == update_list) { -		btrfs_mark_buffer_dirty(path->nodes[0]); -		btrfs_release_path(extent_root, path); -		goto out; -	} - -	op = list_entry(cur, struct pending_extent_op, list); - -	path->slots[0]++; -	while (path->slots[0] < btrfs_header_nritems(leaf)) { -		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); -		if (key.objectid == op->bytenr && -		    key.type == BTRFS_EXTENT_REF_KEY) -			goto loop; -		path->slots[0]++; -	} - -	btrfs_mark_buffer_dirty(path->nodes[0]); -	btrfs_release_path(extent_root, path); -	goto search; - -out: -	return 0; -} - -static noinline int insert_extents(struct btrfs_trans_handle *trans, -				   struct btrfs_root *extent_root, -				   struct btrfs_path *path, -				   struct list_head *insert_list, int nr) -{ -	struct btrfs_key *keys; -	u32 *data_size; -	struct pending_extent_op *op; -	struct extent_buffer *leaf; -	struct list_head *cur = insert_list->next; -	struct btrfs_fs_info *info = extent_root->fs_info; -	u64 ref_root = extent_root->root_key.objectid; -	int i = 0, last = 0, ret; -	int total = nr * 2; - -	if (!nr) -		return 0; - -	keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS); -	if (!keys) -		return -ENOMEM; - -	data_size = kzalloc(total * sizeof(u32), GFP_NOFS); -	if (!data_size) { -		kfree(keys); -		return -ENOMEM; -	} - -	list_for_each_entry(op, insert_list, list) { -		keys[i].objectid = op->bytenr; -		keys[i].offset = op->num_bytes; -		keys[i].type = BTRFS_EXTENT_ITEM_KEY; -		data_size[i] = sizeof(struct btrfs_extent_item); -		i++; - -		keys[i].objectid = op->bytenr; -		keys[i].offset = op->parent; -		keys[i].type = BTRFS_EXTENT_REF_KEY; -		data_size[i] = sizeof(struct btrfs_extent_ref); -		i++; -	} - -	op = list_entry(cur, struct pending_extent_op, list); -	i = 0; -	while (i < total) { -		int c; -		ret = btrfs_insert_some_items(trans, extent_root, path, -					      keys+i, data_size+i, total-i); -		BUG_ON(ret < 0); - -		if (last && ret > 1) -			BUG(); - -		leaf = path->nodes[0]; -		for (c = 0; c < ret; c++) { -			int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY; - -			/* -			 * if the first item we inserted was a backref, then -			 * the EXTENT_ITEM will be the odd c's, else it will -			 * be the even c's -			 */ -			if ((ref_first && (c % 2)) || -			    (!ref_first && !(c % 2))) { -				struct btrfs_extent_item *itm; - -				itm = btrfs_item_ptr(leaf, path->slots[0] + c, -						     struct btrfs_extent_item); -				btrfs_set_extent_refs(path->nodes[0], itm, 1); -				op->del++; -			} else { -				struct btrfs_extent_ref *ref; - -				ref = btrfs_item_ptr(leaf, path->slots[0] + c, -						     struct btrfs_extent_ref); -				btrfs_set_ref_root(leaf, ref, ref_root); -				btrfs_set_ref_generation(leaf, ref, -							 op->generation); -				btrfs_set_ref_objectid(leaf, ref, op->level); -				btrfs_set_ref_num_refs(leaf, ref, 1); -				op->del++; -			} - -			/* -			 * using del to see when its ok to free up the -			 * pending_extent_op.  In the case where we insert the -			 * last item on the list in order to help do batching -			 * we need to not free the extent op until we actually -			 * insert the extent_item -			 */ -			if (op->del == 2) { -				unlock_extent(&info->extent_ins, op->bytenr, -					      op->bytenr + op->num_bytes - 1, -					      GFP_NOFS); -				cur = cur->next; -				list_del_init(&op->list); -				kfree(op); -				if (cur != insert_list) -					op = list_entry(cur, -						struct pending_extent_op, -						list); -			} -		} -		btrfs_mark_buffer_dirty(leaf); -		btrfs_release_path(extent_root, path); - -		/* -		 * Ok backref's and items usually go right next to eachother, -		 * but if we could only insert 1 item that means that we -		 * inserted on the end of a leaf, and we have no idea what may -		 * be on the next leaf so we just play it safe.  In order to -		 * try and help this case we insert the last thing on our -		 * insert list so hopefully it will end up being the last -		 * thing on the leaf and everything else will be before it, -		 * which will let us insert a whole bunch of items at the same -		 * time. -		 */ -		if (ret == 1 && !last && (i + ret < total)) { -			/* -			 * last: where we will pick up the next time around -			 * i: our current key to insert, will be total - 1 -			 * cur: the current op we are screwing with -			 * op: duh -			 */ -			last = i + ret; -			i = total - 1; -			cur = insert_list->prev; -			op = list_entry(cur, struct pending_extent_op, list); -		} else if (last) { -			/* -			 * ok we successfully inserted the last item on the -			 * list, lets reset everything -			 * -			 * i: our current key to insert, so where we left off -			 *    last time -			 * last: done with this -			 * cur: the op we are messing with -			 * op: duh -			 * total: since we inserted the last key, we need to -			 *        decrement total so we dont overflow -			 */ -			i = last; -			last = 0; -			total--; -			if (i < total) { -				cur = insert_list->next; -				op = list_entry(cur, struct pending_extent_op, -						list); -			} -		} else { -			i += ret; -		} - -		cond_resched(); -	} -	ret = 0; -	kfree(keys); -	kfree(data_size); -	return ret; -} -  static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,  					  struct btrfs_root *root,  					  struct btrfs_path *path,  					  u64 bytenr, u64 parent,  					  u64 ref_root, u64 ref_generation, -					  u64 owner_objectid) +					  u64 owner_objectid, +					  int refs_to_add)  {  	struct btrfs_key key;  	struct extent_buffer *leaf; @@ -829,9 +589,10 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,  		btrfs_set_ref_root(leaf, ref, ref_root);  		btrfs_set_ref_generation(leaf, ref, ref_generation);  		btrfs_set_ref_objectid(leaf, ref, owner_objectid); -		btrfs_set_ref_num_refs(leaf, ref, 1); +		btrfs_set_ref_num_refs(leaf, ref, refs_to_add);  	} else if (ret == -EEXIST) {  		u64 existing_owner; +  		BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);  		leaf = path->nodes[0];  		ref = btrfs_item_ptr(leaf, path->slots[0], @@ -845,7 +606,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,  		num_refs = btrfs_ref_num_refs(leaf, ref);  		BUG_ON(num_refs == 0); -		btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); +		btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add);  		existing_owner = btrfs_ref_objectid(leaf, ref);  		if (existing_owner != owner_objectid && @@ -865,7 +626,8 @@ out:  static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,  					  struct btrfs_root *root, -					  struct btrfs_path *path) +					  struct btrfs_path *path, +					  int refs_to_drop)  {  	struct extent_buffer *leaf;  	struct btrfs_extent_ref *ref; @@ -875,8 +637,8 @@ static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,  	leaf = path->nodes[0];  	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);  	num_refs = btrfs_ref_num_refs(leaf, ref); -	BUG_ON(num_refs == 0); -	num_refs -= 1; +	BUG_ON(num_refs < refs_to_drop); +	num_refs -= refs_to_drop;  	if (num_refs == 0) {  		ret = btrfs_del_item(trans, root, path);  	} else { @@ -927,332 +689,28 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,  #endif  } -static noinline int free_extents(struct btrfs_trans_handle *trans, -				 struct btrfs_root *extent_root, -				 struct list_head *del_list) -{ -	struct btrfs_fs_info *info = extent_root->fs_info; -	struct btrfs_path *path; -	struct btrfs_key key, found_key; -	struct extent_buffer *leaf; -	struct list_head *cur; -	struct pending_extent_op *op; -	struct btrfs_extent_item *ei; -	int ret, num_to_del, extent_slot = 0, found_extent = 0; -	u32 refs; -	u64 bytes_freed = 0; - -	path = btrfs_alloc_path(); -	if (!path) -		return -ENOMEM; -	path->reada = 1; - -search: -	/* search for the backref for the current ref we want to delete */ -	cur = del_list->next; -	op = list_entry(cur, struct pending_extent_op, list); -	ret = lookup_extent_backref(trans, extent_root, path, op->bytenr, -				    op->orig_parent, -				    extent_root->root_key.objectid, -				    op->orig_generation, op->level, 1); -	if (ret) { -		printk(KERN_ERR "btrfs unable to find backref byte nr %llu " -		       "root %llu gen %llu owner %u\n", -		       (unsigned long long)op->bytenr, -		       (unsigned long long)extent_root->root_key.objectid, -		       (unsigned long long)op->orig_generation, op->level); -		btrfs_print_leaf(extent_root, path->nodes[0]); -		WARN_ON(1); -		goto out; -	} - -	extent_slot = path->slots[0]; -	num_to_del = 1; -	found_extent = 0; - -	/* -	 * if we aren't the first item on the leaf we can move back one and see -	 * if our ref is right next to our extent item -	 */ -	if (likely(extent_slot)) { -		extent_slot--; -		btrfs_item_key_to_cpu(path->nodes[0], &found_key, -				      extent_slot); -		if (found_key.objectid == op->bytenr && -		    found_key.type == BTRFS_EXTENT_ITEM_KEY && -		    found_key.offset == op->num_bytes) { -			num_to_del++; -			found_extent = 1; -		} -	} - -	/* -	 * if we didn't find the extent we need to delete the backref and then -	 * search for the extent item key so we can update its ref count -	 */ -	if (!found_extent) { -		key.objectid = op->bytenr; -		key.type = BTRFS_EXTENT_ITEM_KEY; -		key.offset = op->num_bytes; - -		ret = remove_extent_backref(trans, extent_root, path); -		BUG_ON(ret); -		btrfs_release_path(extent_root, path); -		ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); -		BUG_ON(ret); -		extent_slot = path->slots[0]; -	} - -	/* this is where we update the ref count for the extent */ -	leaf = path->nodes[0]; -	ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); -	refs = btrfs_extent_refs(leaf, ei); -	BUG_ON(refs == 0); -	refs--; -	btrfs_set_extent_refs(leaf, ei, refs); - -	btrfs_mark_buffer_dirty(leaf); - -	/* -	 * This extent needs deleting.  The reason cur_slot is extent_slot + -	 * num_to_del is because extent_slot points to the slot where the extent -	 * is, and if the backref was not right next to the extent we will be -	 * deleting at least 1 item, and will want to start searching at the -	 * slot directly next to extent_slot.  However if we did find the -	 * backref next to the extent item them we will be deleting at least 2 -	 * items and will want to start searching directly after the ref slot -	 */ -	if (!refs) { -		struct list_head *pos, *n, *end; -		int cur_slot = extent_slot+num_to_del; -		u64 super_used; -		u64 root_used; - -		path->slots[0] = extent_slot; -		bytes_freed = op->num_bytes; - -		mutex_lock(&info->pinned_mutex); -		ret = pin_down_bytes(trans, extent_root, op->bytenr, -				     op->num_bytes, op->level >= -				     BTRFS_FIRST_FREE_OBJECTID); -		mutex_unlock(&info->pinned_mutex); -		BUG_ON(ret < 0); -		op->del = ret; - -		/* -		 * we need to see if we can delete multiple things at once, so -		 * start looping through the list of extents we are wanting to -		 * delete and see if their extent/backref's are right next to -		 * eachother and the extents only have 1 ref -		 */ -		for (pos = cur->next; pos != del_list; pos = pos->next) { -			struct pending_extent_op *tmp; - -			tmp = list_entry(pos, struct pending_extent_op, list); - -			/* we only want to delete extent+ref at this stage */ -			if (cur_slot >= btrfs_header_nritems(leaf) - 1) -				break; - -			btrfs_item_key_to_cpu(leaf, &found_key, cur_slot); -			if (found_key.objectid != tmp->bytenr || -			    found_key.type != BTRFS_EXTENT_ITEM_KEY || -			    found_key.offset != tmp->num_bytes) -				break; - -			/* check to make sure this extent only has one ref */ -			ei = btrfs_item_ptr(leaf, cur_slot, -					    struct btrfs_extent_item); -			if (btrfs_extent_refs(leaf, ei) != 1) -				break; - -			btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1); -			if (found_key.objectid != tmp->bytenr || -			    found_key.type != BTRFS_EXTENT_REF_KEY || -			    found_key.offset != tmp->orig_parent) -				break; - -			/* -			 * the ref is right next to the extent, we can set the -			 * ref count to 0 since we will delete them both now -			 */ -			btrfs_set_extent_refs(leaf, ei, 0); - -			/* pin down the bytes for this extent */ -			mutex_lock(&info->pinned_mutex); -			ret = pin_down_bytes(trans, extent_root, tmp->bytenr, -					     tmp->num_bytes, tmp->level >= -					     BTRFS_FIRST_FREE_OBJECTID); -			mutex_unlock(&info->pinned_mutex); -			BUG_ON(ret < 0); - -			/* -			 * use the del field to tell if we need to go ahead and -			 * free up the extent when we delete the item or not. -			 */ -			tmp->del = ret; -			bytes_freed += tmp->num_bytes; - -			num_to_del += 2; -			cur_slot += 2; -		} -		end = pos; - -		/* update the free space counters */ -		spin_lock(&info->delalloc_lock); -		super_used = btrfs_super_bytes_used(&info->super_copy); -		btrfs_set_super_bytes_used(&info->super_copy, -					   super_used - bytes_freed); - -		root_used = btrfs_root_used(&extent_root->root_item); -		btrfs_set_root_used(&extent_root->root_item, -				    root_used - bytes_freed); -		spin_unlock(&info->delalloc_lock); - -		/* delete the items */ -		ret = btrfs_del_items(trans, extent_root, path, -				      path->slots[0], num_to_del); -		BUG_ON(ret); - -		/* -		 * loop through the extents we deleted and do the cleanup work -		 * on them -		 */ -		for (pos = cur, n = pos->next; pos != end; -		     pos = n, n = pos->next) { -			struct pending_extent_op *tmp; -			tmp = list_entry(pos, struct pending_extent_op, list); - -			/* -			 * remember tmp->del tells us wether or not we pinned -			 * down the extent -			 */ -			ret = update_block_group(trans, extent_root, -						 tmp->bytenr, tmp->num_bytes, 0, -						 tmp->del); -			BUG_ON(ret); - -			list_del_init(&tmp->list); -			unlock_extent(&info->extent_ins, tmp->bytenr, -				      tmp->bytenr + tmp->num_bytes - 1, -				      GFP_NOFS); -			kfree(tmp); -		} -	} else if (refs && found_extent) { -		/* -		 * the ref and extent were right next to eachother, but the -		 * extent still has a ref, so just free the backref and keep -		 * going -		 */ -		ret = remove_extent_backref(trans, extent_root, path); -		BUG_ON(ret); - -		list_del_init(&op->list); -		unlock_extent(&info->extent_ins, op->bytenr, -			      op->bytenr + op->num_bytes - 1, GFP_NOFS); -		kfree(op); -	} else { -		/* -		 * the extent has multiple refs and the backref we were looking -		 * for was not right next to it, so just unlock and go next, -		 * we're good to go -		 */ -		list_del_init(&op->list); -		unlock_extent(&info->extent_ins, op->bytenr, -			      op->bytenr + op->num_bytes - 1, GFP_NOFS); -		kfree(op); -	} - -	btrfs_release_path(extent_root, path); -	if (!list_empty(del_list)) -		goto search; - -out: -	btrfs_free_path(path); -	return ret; -} -  static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,  				     struct btrfs_root *root, u64 bytenr, +				     u64 num_bytes,  				     u64 orig_parent, u64 parent,  				     u64 orig_root, u64 ref_root,  				     u64 orig_generation, u64 ref_generation,  				     u64 owner_objectid)  {  	int ret; -	struct btrfs_root *extent_root = root->fs_info->extent_root; -	struct btrfs_path *path; - -	if (root == root->fs_info->extent_root) { -		struct pending_extent_op *extent_op; -		u64 num_bytes; +	int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID; -		BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL); -		num_bytes = btrfs_level_size(root, (int)owner_objectid); -		mutex_lock(&root->fs_info->extent_ins_mutex); -		if (test_range_bit(&root->fs_info->extent_ins, bytenr, -				bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { -			u64 priv; -			ret = get_state_private(&root->fs_info->extent_ins, -						bytenr, &priv); -			BUG_ON(ret); -			extent_op = (struct pending_extent_op *) -							(unsigned long)priv; -			BUG_ON(extent_op->parent != orig_parent); -			BUG_ON(extent_op->generation != orig_generation); - -			extent_op->parent = parent; -			extent_op->generation = ref_generation; -		} else { -			extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); -			BUG_ON(!extent_op); - -			extent_op->type = PENDING_BACKREF_UPDATE; -			extent_op->bytenr = bytenr; -			extent_op->num_bytes = num_bytes; -			extent_op->parent = parent; -			extent_op->orig_parent = orig_parent; -			extent_op->generation = ref_generation; -			extent_op->orig_generation = orig_generation; -			extent_op->level = (int)owner_objectid; -			INIT_LIST_HEAD(&extent_op->list); -			extent_op->del = 0; - -			set_extent_bits(&root->fs_info->extent_ins, -					bytenr, bytenr + num_bytes - 1, -					EXTENT_WRITEBACK, GFP_NOFS); -			set_state_private(&root->fs_info->extent_ins, -					  bytenr, (unsigned long)extent_op); -		} -		mutex_unlock(&root->fs_info->extent_ins_mutex); -		return 0; -	} - -	path = btrfs_alloc_path(); -	if (!path) -		return -ENOMEM; -	ret = lookup_extent_backref(trans, extent_root, path, -				    bytenr, orig_parent, orig_root, -				    orig_generation, owner_objectid, 1); -	if (ret) -		goto out; -	ret = remove_extent_backref(trans, extent_root, path); -	if (ret) -		goto out; -	ret = insert_extent_backref(trans, extent_root, path, bytenr, -				    parent, ref_root, ref_generation, -				    owner_objectid); +	ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes, +				       orig_parent, parent, orig_root, +				       ref_root, orig_generation, +				       ref_generation, owner_objectid, pin);  	BUG_ON(ret); -	finish_current_insert(trans, extent_root, 0); -	del_pending_extents(trans, extent_root, 0); -out: -	btrfs_free_path(path);  	return ret;  }  int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,  			    struct btrfs_root *root, u64 bytenr, -			    u64 orig_parent, u64 parent, +			    u64 num_bytes, u64 orig_parent, u64 parent,  			    u64 ref_root, u64 ref_generation,  			    u64 owner_objectid)  { @@ -1260,20 +718,36 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,  	if (ref_root == BTRFS_TREE_LOG_OBJECTID &&  	    owner_objectid < BTRFS_FIRST_FREE_OBJECTID)  		return 0; -	ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, -					parent, ref_root, ref_root, -					ref_generation, ref_generation, -					owner_objectid); + +	ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes, +					orig_parent, parent, ref_root, +					ref_root, ref_generation, +					ref_generation, owner_objectid);  	return ret;  } -  static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,  				  struct btrfs_root *root, u64 bytenr, +				  u64 num_bytes,  				  u64 orig_parent, u64 parent,  				  u64 orig_root, u64 ref_root,  				  u64 orig_generation, u64 ref_generation,  				  u64 owner_objectid)  { +	int ret; + +	ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root, +				    ref_generation, owner_objectid, +				    BTRFS_ADD_DELAYED_REF, 0); +	BUG_ON(ret); +	return ret; +} + +static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans, +			  struct btrfs_root *root, u64 bytenr, +			  u64 num_bytes, u64 parent, u64 ref_root, +			  u64 ref_generation, u64 owner_objectid, +			  int refs_to_add) +{  	struct btrfs_path *path;  	int ret;  	struct btrfs_key key; @@ -1288,15 +762,19 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,  	path->reada = 1;  	key.objectid = bytenr;  	key.type = BTRFS_EXTENT_ITEM_KEY; -	key.offset = (u64)-1; +	key.offset = num_bytes; -	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, -				0, 1); +	/* first find the extent item and update its reference count */ +	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, +				path, 0, 1);  	if (ret < 0)  		return ret; -	BUG_ON(ret == 0 || path->slots[0] == 0); -	path->slots[0]--; +	if (ret > 0) { +		WARN_ON(1); +		btrfs_free_path(path); +		return -EIO; +	}  	l = path->nodes[0];  	btrfs_item_key_to_cpu(l, &key, path->slots[0]); @@ -1310,21 +788,20 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,  	BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);  	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); +  	refs = btrfs_extent_refs(l, item); -	btrfs_set_extent_refs(l, item, refs + 1); +	btrfs_set_extent_refs(l, item, refs + refs_to_add);  	btrfs_mark_buffer_dirty(path->nodes[0]);  	btrfs_release_path(root->fs_info->extent_root, path);  	path->reada = 1; +	/* now insert the actual backref */  	ret = insert_extent_backref(trans, root->fs_info->extent_root,  				    path, bytenr, parent,  				    ref_root, ref_generation, -				    owner_objectid); +				    owner_objectid, refs_to_add);  	BUG_ON(ret); -	finish_current_insert(trans, root->fs_info->extent_root, 0); -	del_pending_extents(trans, root->fs_info->extent_root, 0); -  	btrfs_free_path(path);  	return 0;  } @@ -1339,68 +816,245 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,  	if (ref_root == BTRFS_TREE_LOG_OBJECTID &&  	    owner_objectid < BTRFS_FIRST_FREE_OBJECTID)  		return 0; -	ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, + +	ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent,  				     0, ref_root, 0, ref_generation,  				     owner_objectid);  	return ret;  } -int btrfs_extent_post_op(struct btrfs_trans_handle *trans, -			 struct btrfs_root *root) +static int drop_delayed_ref(struct btrfs_trans_handle *trans, +					struct btrfs_root *root, +					struct btrfs_delayed_ref_node *node) +{ +	int ret = 0; +	struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node); + +	BUG_ON(node->ref_mod == 0); +	ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes, +				  node->parent, ref->root, ref->generation, +				  ref->owner_objectid, ref->pin, node->ref_mod); + +	return ret; +} + +/* helper function to actually process a single delayed ref entry */ +static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans, +					struct btrfs_root *root, +					struct btrfs_delayed_ref_node *node, +					int insert_reserved)  { -	u64 start; -	u64 end;  	int ret; +	struct btrfs_delayed_ref *ref; + +	if (node->parent == (u64)-1) { +		struct btrfs_delayed_ref_head *head; +		/* +		 * we've hit the end of the chain and we were supposed +		 * to insert this extent into the tree.  But, it got +		 * deleted before we ever needed to insert it, so all +		 * we have to do is clean up the accounting +		 */ +		if (insert_reserved) { +			update_reserved_extents(root, node->bytenr, +						node->num_bytes, 0); +		} +		head = btrfs_delayed_node_to_head(node); +		mutex_unlock(&head->mutex); +		return 0; +	} -	while(1) { -		finish_current_insert(trans, root->fs_info->extent_root, 1); -		del_pending_extents(trans, root->fs_info->extent_root, 1); +	ref = btrfs_delayed_node_to_ref(node); +	if (ref->action == BTRFS_ADD_DELAYED_REF) { +		if (insert_reserved) { +			struct btrfs_key ins; -		/* is there more work to do? */ -		ret = find_first_extent_bit(&root->fs_info->pending_del, -					    0, &start, &end, EXTENT_WRITEBACK); -		if (!ret) -			continue; -		ret = find_first_extent_bit(&root->fs_info->extent_ins, -					    0, &start, &end, EXTENT_WRITEBACK); -		if (!ret) -			continue; -		break; +			ins.objectid = node->bytenr; +			ins.offset = node->num_bytes; +			ins.type = BTRFS_EXTENT_ITEM_KEY; + +			/* record the full extent allocation */ +			ret = __btrfs_alloc_reserved_extent(trans, root, +					node->parent, ref->root, +					ref->generation, ref->owner_objectid, +					&ins, node->ref_mod); +			update_reserved_extents(root, node->bytenr, +						node->num_bytes, 0); +		} else { +			/* just add one backref */ +			ret = add_extent_ref(trans, root, node->bytenr, +				     node->num_bytes, +				     node->parent, ref->root, ref->generation, +				     ref->owner_objectid, node->ref_mod); +		} +		BUG_ON(ret); +	} else if (ref->action == BTRFS_DROP_DELAYED_REF) { +		WARN_ON(insert_reserved); +		ret = drop_delayed_ref(trans, root, node);  	}  	return 0;  } -int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, -			    struct btrfs_root *root, u64 bytenr, -			    u64 num_bytes, u32 *refs) +static noinline struct btrfs_delayed_ref_node * +select_delayed_ref(struct btrfs_delayed_ref_head *head)  { -	struct btrfs_path *path; +	struct rb_node *node; +	struct btrfs_delayed_ref_node *ref; +	int action = BTRFS_ADD_DELAYED_REF; +again: +	/* +	 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. +	 * this prevents ref count from going down to zero when +	 * there still are pending delayed ref. +	 */ +	node = rb_prev(&head->node.rb_node); +	while (1) { +		if (!node) +			break; +		ref = rb_entry(node, struct btrfs_delayed_ref_node, +				rb_node); +		if (ref->bytenr != head->node.bytenr) +			break; +		if (btrfs_delayed_node_to_ref(ref)->action == action) +			return ref; +		node = rb_prev(node); +	} +	if (action == BTRFS_ADD_DELAYED_REF) { +		action = BTRFS_DROP_DELAYED_REF; +		goto again; +	} +	return NULL; +} + +/* + * this starts processing the delayed reference count updates and + * extent insertions we have queued up so far.  count can be + * 0, which means to process everything in the tree at the start + * of the run (but not newly added entries), or it can be some target + * number you'd like to process. + */ +int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, +			   struct btrfs_root *root, unsigned long count) +{ +	struct rb_node *node; +	struct btrfs_delayed_ref_root *delayed_refs; +	struct btrfs_delayed_ref_node *ref; +	struct btrfs_delayed_ref_head *locked_ref = NULL;  	int ret; -	struct btrfs_key key; -	struct extent_buffer *l; -	struct btrfs_extent_item *item; +	int must_insert_reserved = 0; +	int run_all = count == (unsigned long)-1; -	WARN_ON(num_bytes < root->sectorsize); -	path = btrfs_alloc_path(); -	path->reada = 1; -	key.objectid = bytenr; -	key.offset = num_bytes; -	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); -	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, -				0, 0); -	if (ret < 0) -		goto out; -	if (ret != 0) { -		btrfs_print_leaf(root, path->nodes[0]); -		printk(KERN_INFO "btrfs failed to find block number %llu\n", -		       (unsigned long long)bytenr); -		BUG(); +	if (root == root->fs_info->extent_root) +		root = root->fs_info->tree_root; + +	delayed_refs = &trans->transaction->delayed_refs; +again: +	spin_lock(&delayed_refs->lock); +	if (count == 0) +		count = delayed_refs->num_entries; +	while (1) { +		if (!locked_ref) { +			/* +			 * no locked ref, go find something we can +			 * process in the rbtree.  We start at +			 * the beginning of the tree, there may be less +			 * lock contention if we do something smarter here. +			 */ +			node = rb_first(&delayed_refs->root); +			if (!node) { +				spin_unlock(&delayed_refs->lock); +				break; +			} + +			ref = rb_entry(node, struct btrfs_delayed_ref_node, +				       rb_node); +			ret = btrfs_lock_delayed_ref(trans, ref, &locked_ref); +			if (ret) { +				spin_unlock(&delayed_refs->lock); +				break; +			} +		} + +		/* +		 * record the must insert reserved flag before we +		 * drop the spin lock. +		 */ +		must_insert_reserved = locked_ref->must_insert_reserved; +		locked_ref->must_insert_reserved = 0; + +		/* +		 * locked_ref is the head node, so we have to go one +		 * node back for any delayed ref updates +		 */ + +		ref = select_delayed_ref(locked_ref); +		if (!ref) { +			/* All delayed refs have been processed, Go ahead +			 * and send the head node to run_one_delayed_ref, +			 * so that any accounting fixes can happen +			 */ +			ref = &locked_ref->node; +			locked_ref = NULL; +		} + +		ref->in_tree = 0; +		rb_erase(&ref->rb_node, &delayed_refs->root); +		delayed_refs->num_entries--; +		spin_unlock(&delayed_refs->lock); + +		ret = run_one_delayed_ref(trans, root, ref, +					  must_insert_reserved); +		BUG_ON(ret); +		btrfs_put_delayed_ref(ref); + +		/* once we lock the head ref, we have to process all the +		 * entries for it.  So, we might end up doing more entries +		 * that count was asking us to do. +		 */ +		if (count > 0) +			count--; + +		/* +		 * we set locked_ref to null above if we're all done +		 * with this bytenr +		 */ +		if (!locked_ref && count == 0) +			break; + +		spin_lock(&delayed_refs->lock); +	} +	if (run_all) { +		spin_lock(&delayed_refs->lock); +		node = rb_first(&delayed_refs->root); +		if (!node) { +			spin_unlock(&delayed_refs->lock); +			goto out; +		} + +		while (node) { +			ref = rb_entry(node, struct btrfs_delayed_ref_node, +				       rb_node); +			if (btrfs_delayed_ref_is_head(ref)) { +				struct btrfs_delayed_ref_head *head; + +				head = btrfs_delayed_node_to_head(ref); +				atomic_inc(&ref->refs); + +				spin_unlock(&delayed_refs->lock); +				mutex_lock(&head->mutex); +				mutex_unlock(&head->mutex); + +				btrfs_put_delayed_ref(ref); +				goto again; +			} +			node = rb_next(node); +		} +		spin_unlock(&delayed_refs->lock); +		count = (unsigned long)-1; +		schedule_timeout(1); +		goto again;  	} -	l = path->nodes[0]; -	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); -	*refs = btrfs_extent_refs(l, item);  out: -	btrfs_free_path(path);  	return 0;  } @@ -1624,7 +1278,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,  	int refi = 0;  	int slot;  	int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, -			    u64, u64, u64, u64, u64, u64, u64, u64); +			    u64, u64, u64, u64, u64, u64, u64, u64, u64);  	ref_root = btrfs_header_owner(buf);  	ref_generation = btrfs_header_generation(buf); @@ -1696,12 +1350,19 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,  		if (level == 0) {  			btrfs_item_key_to_cpu(buf, &key, slot); +			fi = btrfs_item_ptr(buf, slot, +					    struct btrfs_file_extent_item); + +			bytenr = btrfs_file_extent_disk_bytenr(buf, fi); +			if (bytenr == 0) +				continue;  			ret = process_func(trans, root, bytenr, -					   orig_buf->start, buf->start, -					   orig_root, ref_root, -					   orig_generation, ref_generation, -					   key.objectid); +				   btrfs_file_extent_disk_num_bytes(buf, fi), +				   orig_buf->start, buf->start, +				   orig_root, ref_root, +				   orig_generation, ref_generation, +				   key.objectid);  			if (ret) {  				faili = slot; @@ -1709,7 +1370,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,  				goto fail;  			}  		} else { -			ret = process_func(trans, root, bytenr, +			ret = process_func(trans, root, bytenr, buf->len,  					   orig_buf->start, buf->start,  					   orig_root, ref_root,  					   orig_generation, ref_generation, @@ -1786,17 +1447,17 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans,  			if (bytenr == 0)  				continue;  			ret = __btrfs_update_extent_ref(trans, root, bytenr, -					    orig_buf->start, buf->start, -					    orig_root, ref_root, -					    orig_generation, ref_generation, -					    key.objectid); +				    btrfs_file_extent_disk_num_bytes(buf, fi), +				    orig_buf->start, buf->start, +				    orig_root, ref_root, orig_generation, +				    ref_generation, key.objectid);  			if (ret)  				goto fail;  		} else {  			bytenr = btrfs_node_blockptr(buf, slot);  			ret = __btrfs_update_extent_ref(trans, root, bytenr, -					    orig_buf->start, buf->start, -					    orig_root, ref_root, +					    buf->len, orig_buf->start, +					    buf->start, orig_root, ref_root,  					    orig_generation, ref_generation,  					    level - 1);  			if (ret) @@ -1815,7 +1476,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,  				 struct btrfs_block_group_cache *cache)  {  	int ret; -	int pending_ret;  	struct btrfs_root *extent_root = root->fs_info->extent_root;  	unsigned long bi;  	struct extent_buffer *leaf; @@ -1831,12 +1491,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,  	btrfs_mark_buffer_dirty(leaf);  	btrfs_release_path(extent_root, path);  fail: -	finish_current_insert(trans, extent_root, 0); -	pending_ret = del_pending_extents(trans, extent_root, 0);  	if (ret)  		return ret; -	if (pending_ret) -		return pending_ret;  	return 0;  } @@ -2474,193 +2130,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,  	return ret;  } -static int finish_current_insert(struct btrfs_trans_handle *trans, -				 struct btrfs_root *extent_root, int all) -{ -	u64 start; -	u64 end; -	u64 priv; -	u64 search = 0; -	struct btrfs_fs_info *info = extent_root->fs_info; -	struct btrfs_path *path; -	struct pending_extent_op *extent_op, *tmp; -	struct list_head insert_list, update_list; -	int ret; -	int num_inserts = 0, max_inserts, restart = 0; - -	path = btrfs_alloc_path(); -	INIT_LIST_HEAD(&insert_list); -	INIT_LIST_HEAD(&update_list); - -	max_inserts = extent_root->leafsize / -		(2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) + -		 sizeof(struct btrfs_extent_ref) + -		 sizeof(struct btrfs_extent_item)); -again: -	mutex_lock(&info->extent_ins_mutex); -	while (1) { -		ret = find_first_extent_bit(&info->extent_ins, search, &start, -					    &end, EXTENT_WRITEBACK); -		if (ret) { -			if (restart && !num_inserts && -			    list_empty(&update_list)) { -				restart = 0; -				search = 0; -				continue; -			} -			break; -		} - -		ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); -		if (!ret) { -			if (all) -				restart = 1; -			search = end + 1; -			if (need_resched()) { -				mutex_unlock(&info->extent_ins_mutex); -				cond_resched(); -				mutex_lock(&info->extent_ins_mutex); -			} -			continue; -		} - -		ret = get_state_private(&info->extent_ins, start, &priv); -		BUG_ON(ret); -		extent_op = (struct pending_extent_op *)(unsigned long) priv; - -		if (extent_op->type == PENDING_EXTENT_INSERT) { -			num_inserts++; -			list_add_tail(&extent_op->list, &insert_list); -			search = end + 1; -			if (num_inserts == max_inserts) { -				restart = 1; -				break; -			} -		} else if (extent_op->type == PENDING_BACKREF_UPDATE) { -			list_add_tail(&extent_op->list, &update_list); -			search = end + 1; -		} else { -			BUG(); -		} -	} - -	/* -	 * process the update list, clear the writeback bit for it, and if -	 * somebody marked this thing for deletion then just unlock it and be -	 * done, the free_extents will handle it -	 */ -	list_for_each_entry_safe(extent_op, tmp, &update_list, list) { -		clear_extent_bits(&info->extent_ins, extent_op->bytenr, -				  extent_op->bytenr + extent_op->num_bytes - 1, -				  EXTENT_WRITEBACK, GFP_NOFS); -		if (extent_op->del) { -			list_del_init(&extent_op->list); -			unlock_extent(&info->extent_ins, extent_op->bytenr, -				      extent_op->bytenr + extent_op->num_bytes -				      - 1, GFP_NOFS); -			kfree(extent_op); -		} -	} -	mutex_unlock(&info->extent_ins_mutex); - -	/* -	 * still have things left on the update list, go ahead an update -	 * everything -	 */ -	if (!list_empty(&update_list)) { -		ret = update_backrefs(trans, extent_root, path, &update_list); -		BUG_ON(ret); - -		/* we may have COW'ed new blocks, so lets start over */ -		if (all) -			restart = 1; -	} - -	/* -	 * if no inserts need to be done, but we skipped some extents and we -	 * need to make sure everything is cleaned then reset everything and -	 * go back to the beginning -	 */ -	if (!num_inserts && restart) { -		search = 0; -		restart = 0; -		INIT_LIST_HEAD(&update_list); -		INIT_LIST_HEAD(&insert_list); -		goto again; -	} else if (!num_inserts) { -		goto out; -	} - -	/* -	 * process the insert extents list.  Again if we are deleting this -	 * extent, then just unlock it, pin down the bytes if need be, and be -	 * done with it.  Saves us from having to actually insert the extent -	 * into the tree and then subsequently come along and delete it -	 */ -	mutex_lock(&info->extent_ins_mutex); -	list_for_each_entry_safe(extent_op, tmp, &insert_list, list) { -		clear_extent_bits(&info->extent_ins, extent_op->bytenr, -				  extent_op->bytenr + extent_op->num_bytes - 1, -				  EXTENT_WRITEBACK, GFP_NOFS); -		if (extent_op->del) { -			u64 used; -			list_del_init(&extent_op->list); -			unlock_extent(&info->extent_ins, extent_op->bytenr, -				      extent_op->bytenr + extent_op->num_bytes -				      - 1, GFP_NOFS); - -			mutex_lock(&extent_root->fs_info->pinned_mutex); -			ret = pin_down_bytes(trans, extent_root, -					     extent_op->bytenr, -					     extent_op->num_bytes, 0); -			mutex_unlock(&extent_root->fs_info->pinned_mutex); - -			spin_lock(&info->delalloc_lock); -			used = btrfs_super_bytes_used(&info->super_copy); -			btrfs_set_super_bytes_used(&info->super_copy, -					used - extent_op->num_bytes); -			used = btrfs_root_used(&extent_root->root_item); -			btrfs_set_root_used(&extent_root->root_item, -					used - extent_op->num_bytes); -			spin_unlock(&info->delalloc_lock); - -			ret = update_block_group(trans, extent_root, -						 extent_op->bytenr, -						 extent_op->num_bytes, -						 0, ret > 0); -			BUG_ON(ret); -			kfree(extent_op); -			num_inserts--; -		} -	} -	mutex_unlock(&info->extent_ins_mutex); - -	ret = insert_extents(trans, extent_root, path, &insert_list, -			     num_inserts); -	BUG_ON(ret); - -	/* -	 * if restart is set for whatever reason we need to go back and start -	 * searching through the pending list again. -	 * -	 * We just inserted some extents, which could have resulted in new -	 * blocks being allocated, which would result in new blocks needing -	 * updates, so if all is set we _must_ restart to get the updated -	 * blocks. -	 */ -	if (restart || all) { -		INIT_LIST_HEAD(&insert_list); -		INIT_LIST_HEAD(&update_list); -		search = 0; -		restart = 0; -		num_inserts = 0; -		goto again; -	} -out: -	btrfs_free_path(path); -	return 0; -} -  static int pin_down_bytes(struct btrfs_trans_handle *trans,  			  struct btrfs_root *root,  			  u64 bytenr, u64 num_bytes, int is_data) @@ -2686,6 +2155,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,  		u64 header_transid = btrfs_header_generation(buf);  		if (header_owner != BTRFS_TREE_LOG_OBJECTID &&  		    header_owner != BTRFS_TREE_RELOC_OBJECTID && +		    header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&  		    header_transid == trans->transid &&  		    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {  			clean_tree_block(NULL, root, buf); @@ -2710,7 +2180,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,  			 struct btrfs_root *root,  			 u64 bytenr, u64 num_bytes, u64 parent,  			 u64 root_objectid, u64 ref_generation, -			 u64 owner_objectid, int pin, int mark_free) +			 u64 owner_objectid, int pin, int mark_free, +			 int refs_to_drop)  {  	struct btrfs_path *path;  	struct btrfs_key key; @@ -2753,7 +2224,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,  				break;  		}  		if (!found_extent) { -			ret = remove_extent_backref(trans, extent_root, path); +			ret = remove_extent_backref(trans, extent_root, path, +						    refs_to_drop);  			BUG_ON(ret);  			btrfs_release_path(extent_root, path);  			ret = btrfs_search_slot(trans, extent_root, @@ -2771,8 +2243,9 @@ static int __free_extent(struct btrfs_trans_handle *trans,  		btrfs_print_leaf(extent_root, path->nodes[0]);  		WARN_ON(1);  		printk(KERN_ERR "btrfs unable to find ref byte nr %llu " -		       "root %llu gen %llu owner %llu\n", +		       "parent %llu root %llu gen %llu owner %llu\n",  		       (unsigned long long)bytenr, +		       (unsigned long long)parent,  		       (unsigned long long)root_objectid,  		       (unsigned long long)ref_generation,  		       (unsigned long long)owner_objectid); @@ -2782,17 +2255,23 @@ static int __free_extent(struct btrfs_trans_handle *trans,  	ei = btrfs_item_ptr(leaf, extent_slot,  			    struct btrfs_extent_item);  	refs = btrfs_extent_refs(leaf, ei); -	BUG_ON(refs == 0); -	refs -= 1; -	btrfs_set_extent_refs(leaf, ei, refs); +	/* +	 * we're not allowed to delete the extent item if there +	 * are other delayed ref updates pending +	 */ + +	BUG_ON(refs < refs_to_drop); +	refs -= refs_to_drop; +	btrfs_set_extent_refs(leaf, ei, refs);  	btrfs_mark_buffer_dirty(leaf); -	if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { +	if (refs == 0 && found_extent && +	    path->slots[0] == extent_slot + 1) {  		struct btrfs_extent_ref *ref;  		ref = btrfs_item_ptr(leaf, path->slots[0],  				     struct btrfs_extent_ref); -		BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1); +		BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop);  		/* if the back ref and the extent are next to each other  		 * they get deleted below in one shot  		 */ @@ -2800,7 +2279,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,  		num_to_del = 2;  	} else if (found_extent) {  		/* otherwise delete the extent back ref */ -		ret = remove_extent_backref(trans, extent_root, path); +		ret = remove_extent_backref(trans, extent_root, path, +					    refs_to_drop);  		BUG_ON(ret);  		/* if refs are 0, we need to setup the path for deletion */  		if (refs == 0) { @@ -2850,218 +2330,35 @@ static int __free_extent(struct btrfs_trans_handle *trans,  		BUG_ON(ret);  	}  	btrfs_free_path(path); -	finish_current_insert(trans, extent_root, 0);  	return ret;  }  /* - * find all the blocks marked as pending in the radix tree and remove - * them from the extent map - */ -static int del_pending_extents(struct btrfs_trans_handle *trans, -			       struct btrfs_root *extent_root, int all) -{ -	int ret; -	int err = 0; -	u64 start; -	u64 end; -	u64 priv; -	u64 search = 0; -	int nr = 0, skipped = 0; -	struct extent_io_tree *pending_del; -	struct extent_io_tree *extent_ins; -	struct pending_extent_op *extent_op; -	struct btrfs_fs_info *info = extent_root->fs_info; -	struct list_head delete_list; - -	INIT_LIST_HEAD(&delete_list); -	extent_ins = &extent_root->fs_info->extent_ins; -	pending_del = &extent_root->fs_info->pending_del; - -again: -	mutex_lock(&info->extent_ins_mutex); -	while (1) { -		ret = find_first_extent_bit(pending_del, search, &start, &end, -					    EXTENT_WRITEBACK); -		if (ret) { -			if (all && skipped && !nr) { -				search = 0; -				skipped = 0; -				continue; -			} -			mutex_unlock(&info->extent_ins_mutex); -			break; -		} - -		ret = try_lock_extent(extent_ins, start, end, GFP_NOFS); -		if (!ret) { -			search = end+1; -			skipped = 1; - -			if (need_resched()) { -				mutex_unlock(&info->extent_ins_mutex); -				cond_resched(); -				mutex_lock(&info->extent_ins_mutex); -			} - -			continue; -		} -		BUG_ON(ret < 0); - -		ret = get_state_private(pending_del, start, &priv); -		BUG_ON(ret); -		extent_op = (struct pending_extent_op *)(unsigned long)priv; - -		clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK, -				  GFP_NOFS); -		if (!test_range_bit(extent_ins, start, end, -				    EXTENT_WRITEBACK, 0)) { -			list_add_tail(&extent_op->list, &delete_list); -			nr++; -		} else { -			kfree(extent_op); - -			ret = get_state_private(&info->extent_ins, start, -						&priv); -			BUG_ON(ret); -			extent_op = (struct pending_extent_op *) -						(unsigned long)priv; - -			clear_extent_bits(&info->extent_ins, start, end, -					  EXTENT_WRITEBACK, GFP_NOFS); - -			if (extent_op->type == PENDING_BACKREF_UPDATE) { -				list_add_tail(&extent_op->list, &delete_list); -				search = end + 1; -				nr++; -				continue; -			} - -			mutex_lock(&extent_root->fs_info->pinned_mutex); -			ret = pin_down_bytes(trans, extent_root, start, -					     end + 1 - start, 0); -			mutex_unlock(&extent_root->fs_info->pinned_mutex); - -			ret = update_block_group(trans, extent_root, start, -						end + 1 - start, 0, ret > 0); - -			unlock_extent(extent_ins, start, end, GFP_NOFS); -			BUG_ON(ret); -			kfree(extent_op); -		} -		if (ret) -			err = ret; - -		search = end + 1; - -		if (need_resched()) { -			mutex_unlock(&info->extent_ins_mutex); -			cond_resched(); -			mutex_lock(&info->extent_ins_mutex); -		} -	} - -	if (nr) { -		ret = free_extents(trans, extent_root, &delete_list); -		BUG_ON(ret); -	} - -	if (all && skipped) { -		INIT_LIST_HEAD(&delete_list); -		search = 0; -		nr = 0; -		goto again; -	} - -	if (!err) -		finish_current_insert(trans, extent_root, 0); -	return err; -} - -/*   * remove an extent from the root, returns 0 on success   */  static int __btrfs_free_extent(struct btrfs_trans_handle *trans, -			       struct btrfs_root *root, -			       u64 bytenr, u64 num_bytes, u64 parent, -			       u64 root_objectid, u64 ref_generation, -			       u64 owner_objectid, int pin) +					struct btrfs_root *root, +					u64 bytenr, u64 num_bytes, u64 parent, +					u64 root_objectid, u64 ref_generation, +					u64 owner_objectid, int pin, +					int refs_to_drop)  { -	struct btrfs_root *extent_root = root->fs_info->extent_root; -	int pending_ret; -	int ret; -  	WARN_ON(num_bytes < root->sectorsize); -	if (root == extent_root) { -		struct pending_extent_op *extent_op = NULL; - -		mutex_lock(&root->fs_info->extent_ins_mutex); -		if (test_range_bit(&root->fs_info->extent_ins, bytenr, -				bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { -			u64 priv; -			ret = get_state_private(&root->fs_info->extent_ins, -						bytenr, &priv); -			BUG_ON(ret); -			extent_op = (struct pending_extent_op *) -						(unsigned long)priv; - -			extent_op->del = 1; -			if (extent_op->type == PENDING_EXTENT_INSERT) { -				mutex_unlock(&root->fs_info->extent_ins_mutex); -				return 0; -			} -		} - -		if (extent_op) { -			ref_generation = extent_op->orig_generation; -			parent = extent_op->orig_parent; -		} -		extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); -		BUG_ON(!extent_op); - -		extent_op->type = PENDING_EXTENT_DELETE; -		extent_op->bytenr = bytenr; -		extent_op->num_bytes = num_bytes; -		extent_op->parent = parent; -		extent_op->orig_parent = parent; -		extent_op->generation = ref_generation; -		extent_op->orig_generation = ref_generation; -		extent_op->level = (int)owner_objectid; -		INIT_LIST_HEAD(&extent_op->list); -		extent_op->del = 0; - -		set_extent_bits(&root->fs_info->pending_del, -				bytenr, bytenr + num_bytes - 1, -				EXTENT_WRITEBACK, GFP_NOFS); -		set_state_private(&root->fs_info->pending_del, -				  bytenr, (unsigned long)extent_op); -		mutex_unlock(&root->fs_info->extent_ins_mutex); -		return 0; -	} -	/* if metadata always pin */ -	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { -		if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { -			mutex_lock(&root->fs_info->pinned_mutex); -			btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); -			mutex_unlock(&root->fs_info->pinned_mutex); -			update_reserved_extents(root, bytenr, num_bytes, 0); -			return 0; -		} +	/* +	 * if metadata always pin +	 * if data pin when any transaction has committed this +	 */ +	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID || +	    ref_generation != trans->transid)  		pin = 1; -	} -	/* if data pin when any transaction has committed this */  	if (ref_generation != trans->transid)  		pin = 1; -	ret = __free_extent(trans, root, bytenr, num_bytes, parent, +	return __free_extent(trans, root, bytenr, num_bytes, parent,  			    root_objectid, ref_generation, -			    owner_objectid, pin, pin == 0); - -	finish_current_insert(trans, root->fs_info->extent_root, 0); -	pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0); -	return ret ? ret : pending_ret; +			    owner_objectid, pin, pin == 0, refs_to_drop);  }  int btrfs_free_extent(struct btrfs_trans_handle *trans, @@ -3072,9 +2369,26 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,  {  	int ret; -	ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, -				  root_objectid, ref_generation, -				  owner_objectid, pin); +	/* +	 * tree log blocks never actually go into the extent allocation +	 * tree, just update pinning info and exit early. +	 * +	 * data extents referenced by the tree log do need to have +	 * their reference counts bumped. +	 */ +	if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID && +	    owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { +		mutex_lock(&root->fs_info->pinned_mutex); +		btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); +		mutex_unlock(&root->fs_info->pinned_mutex); +		update_reserved_extents(root, bytenr, num_bytes, 0); +		ret = 0; +	} else { +		ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, +				       root_objectid, ref_generation, +				       owner_objectid, +				       BTRFS_DROP_DELAYED_REF, 1); +	}  	return ret;  } @@ -3475,10 +2789,10 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,  static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,  					 struct btrfs_root *root, u64 parent,  					 u64 root_objectid, u64 ref_generation, -					 u64 owner, struct btrfs_key *ins) +					 u64 owner, struct btrfs_key *ins, +					 int ref_mod)  {  	int ret; -	int pending_ret;  	u64 super_used;  	u64 root_used;  	u64 num_bytes = ins->offset; @@ -3503,33 +2817,6 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,  	btrfs_set_root_used(&root->root_item, root_used + num_bytes);  	spin_unlock(&info->delalloc_lock); -	if (root == extent_root) { -		struct pending_extent_op *extent_op; - -		extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); -		BUG_ON(!extent_op); - -		extent_op->type = PENDING_EXTENT_INSERT; -		extent_op->bytenr = ins->objectid; -		extent_op->num_bytes = ins->offset; -		extent_op->parent = parent; -		extent_op->orig_parent = 0; -		extent_op->generation = ref_generation; -		extent_op->orig_generation = 0; -		extent_op->level = (int)owner; -		INIT_LIST_HEAD(&extent_op->list); -		extent_op->del = 0; - -		mutex_lock(&root->fs_info->extent_ins_mutex); -		set_extent_bits(&root->fs_info->extent_ins, ins->objectid, -				ins->objectid + ins->offset - 1, -				EXTENT_WRITEBACK, GFP_NOFS); -		set_state_private(&root->fs_info->extent_ins, -				  ins->objectid, (unsigned long)extent_op); -		mutex_unlock(&root->fs_info->extent_ins_mutex); -		goto update_block; -	} -  	memcpy(&keys[0], ins, sizeof(*ins));  	keys[1].objectid = ins->objectid;  	keys[1].type = BTRFS_EXTENT_REF_KEY; @@ -3546,31 +2833,24 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,  	extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],  				     struct btrfs_extent_item); -	btrfs_set_extent_refs(path->nodes[0], extent_item, 1); +	btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod);  	ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,  			     struct btrfs_extent_ref);  	btrfs_set_ref_root(path->nodes[0], ref, root_objectid);  	btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);  	btrfs_set_ref_objectid(path->nodes[0], ref, owner); -	btrfs_set_ref_num_refs(path->nodes[0], ref, 1); +	btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod);  	btrfs_mark_buffer_dirty(path->nodes[0]);  	trans->alloc_exclude_start = 0;  	trans->alloc_exclude_nr = 0;  	btrfs_free_path(path); -	finish_current_insert(trans, extent_root, 0); -	pending_ret = del_pending_extents(trans, extent_root, 0);  	if (ret)  		goto out; -	if (pending_ret) { -		ret = pending_ret; -		goto out; -	} -update_block:  	ret = update_block_group(trans, root, ins->objectid,  				 ins->offset, 1, 0);  	if (ret) { @@ -3592,9 +2872,12 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,  	if (root_objectid == BTRFS_TREE_LOG_OBJECTID)  		return 0; -	ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, -					    ref_generation, owner, ins); -	update_reserved_extents(root, ins->objectid, ins->offset, 0); + +	ret = btrfs_add_delayed_ref(trans, ins->objectid, +				    ins->offset, parent, root_objectid, +				    ref_generation, owner, +				    BTRFS_ADD_DELAYED_EXTENT, 0); +	BUG_ON(ret);  	return ret;  } @@ -3621,7 +2904,7 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,  	BUG_ON(ret);  	put_block_group(block_group);  	ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, -					    ref_generation, owner, ins); +					    ref_generation, owner, ins, 1);  	return ret;  } @@ -3640,20 +2923,18 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,  		       u64 search_end, struct btrfs_key *ins, u64 data)  {  	int ret; -  	ret = __btrfs_reserve_extent(trans, root, num_bytes,  				     min_alloc_size, empty_size, hint_byte,  				     search_end, ins, data);  	BUG_ON(ret);  	if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { -		ret = __btrfs_alloc_reserved_extent(trans, root, parent, -					root_objectid, ref_generation, -					owner_objectid, ins); +		ret = btrfs_add_delayed_ref(trans, ins->objectid, +					    ins->offset, parent, root_objectid, +					    ref_generation, owner_objectid, +					    BTRFS_ADD_DELAYED_EXTENT, 0);  		BUG_ON(ret); - -	} else { -		update_reserved_extents(root, ins->objectid, ins->offset, 1);  	} +	update_reserved_extents(root, ins->objectid, ins->offset, 1);  	return ret;  } @@ -3789,7 +3070,7 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,  		fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); -		ret = __btrfs_free_extent(trans, root, disk_bytenr, +		ret = btrfs_free_extent(trans, root, disk_bytenr,  				btrfs_file_extent_disk_num_bytes(leaf, fi),  				leaf->start, leaf_owner, leaf_generation,  				key.objectid, 0); @@ -3829,7 +3110,7 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,  	 */  	for (i = 0; i < ref->nritems; i++) {  		info = ref->extents + sorted[i].slot; -		ret = __btrfs_free_extent(trans, root, info->bytenr, +		ret = btrfs_free_extent(trans, root, info->bytenr,  					  info->num_bytes, ref->bytenr,  					  ref->owner, ref->generation,  					  info->objectid, 0); @@ -3846,12 +3127,13 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,  	return 0;  } -static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, +static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, +				     struct btrfs_root *root, u64 start,  				     u64 len, u32 *refs)  {  	int ret; -	ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); +	ret = btrfs_lookup_extent_ref(trans, root, start, len, refs);  	BUG_ON(ret);  #if 0 /* some debugging code in case we see problems here */ @@ -3959,7 +3241,8 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,  		 * we just decrement it below and don't update any  		 * of the refs the leaf points to.  		 */ -		ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); +		ret = drop_snap_lookup_refcount(trans, root, bytenr, +						blocksize, &refs);  		BUG_ON(ret);  		if (refs != 1)  			continue; @@ -4010,7 +3293,7 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,  	 */  	for (i = 0; i < refi; i++) {  		bytenr = sorted[i].bytenr; -		ret = __btrfs_free_extent(trans, root, bytenr, +		ret = btrfs_free_extent(trans, root, bytenr,  					blocksize, eb->start,  					root_owner, root_gen, 0, 1);  		BUG_ON(ret); @@ -4053,7 +3336,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,  	WARN_ON(*level < 0);  	WARN_ON(*level >= BTRFS_MAX_LEVEL); -	ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, +	ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,  				path->nodes[*level]->len, &refs);  	BUG_ON(ret);  	if (refs > 1) @@ -4104,7 +3387,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,  		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);  		blocksize = btrfs_level_size(root, *level - 1); -		ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); +		ret = drop_snap_lookup_refcount(trans, root, bytenr, +						blocksize, &refs);  		BUG_ON(ret);  		/* @@ -4119,7 +3403,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,  			root_gen = btrfs_header_generation(parent);  			path->slots[*level]++; -			ret = __btrfs_free_extent(trans, root, bytenr, +			ret = btrfs_free_extent(trans, root, bytenr,  						blocksize, parent->start,  						root_owner, root_gen,  						*level - 1, 1); @@ -4165,7 +3449,7 @@ out:  	 * cleanup and free the reference on the last node  	 * we processed  	 */ -	ret = __btrfs_free_extent(trans, root, bytenr, blocksize, +	ret = btrfs_free_extent(trans, root, bytenr, blocksize,  				  parent->start, root_owner, root_gen,  				  *level, 1);  	free_extent_buffer(path->nodes[*level]); @@ -5457,6 +4741,7 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,  					root->root_key.objectid,  					trans->transid, key.objectid);  		BUG_ON(ret); +  		ret = btrfs_free_extent(trans, root,  					bytenr, num_bytes, leaf->start,  					btrfs_header_owner(leaf), @@ -5768,9 +5053,6 @@ static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,  				ref_path, NULL, NULL);  	BUG_ON(ret); -	if (root == root->fs_info->extent_root) -		btrfs_extent_post_op(trans, root); -  	return 0;  } @@ -6208,6 +5490,9 @@ again:  	btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);  	mutex_unlock(&root->fs_info->cleaner_mutex); +	trans = btrfs_start_transaction(info->tree_root, 1); +	btrfs_commit_transaction(trans, info->tree_root); +  	while (1) {  		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);  		if (ret < 0) @@ -6500,9 +5785,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,  				sizeof(cache->item));  	BUG_ON(ret); -	finish_current_insert(trans, extent_root, 0); -	ret = del_pending_extents(trans, extent_root, 0); -	BUG_ON(ret);  	set_avail_alloc_bits(extent_root->fs_info, type);  	return 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index dc78954861b..c8007549764 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -643,7 +643,9 @@ next_slot:  			if (disk_bytenr != 0) {  				ret = btrfs_update_extent_ref(trans, root, -						disk_bytenr, orig_parent, +						disk_bytenr, +						le64_to_cpu(old.disk_num_bytes), +						orig_parent,  						leaf->start,  						root->root_key.objectid,  						trans->transid, ins.objectid); @@ -912,7 +914,7 @@ again:  	btrfs_set_file_extent_other_encoding(leaf, fi, 0);  	if (orig_parent != leaf->start) { -		ret = btrfs_update_extent_ref(trans, root, bytenr, +		ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes,  					      orig_parent, leaf->start,  					      root->root_key.objectid,  					      trans->transid, inode->i_ino); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index d638c54d39e..f94c2ad8996 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -65,6 +65,12 @@ static noinline int join_transaction(struct btrfs_root *root)  		cur_trans->use_count = 1;  		cur_trans->commit_done = 0;  		cur_trans->start_time = get_seconds(); + +		cur_trans->delayed_refs.root.rb_node = NULL; +		cur_trans->delayed_refs.num_entries = 0; +		cur_trans->delayed_refs.flushing = 0; +		spin_lock_init(&cur_trans->delayed_refs.lock); +  		INIT_LIST_HEAD(&cur_trans->pending_snapshots);  		list_add_tail(&cur_trans->list, &root->fs_info->trans_list);  		extent_io_tree_init(&cur_trans->dirty_pages, @@ -182,6 +188,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,  	h->block_group = 0;  	h->alloc_exclude_nr = 0;  	h->alloc_exclude_start = 0; +	h->delayed_ref_updates = 0;  	root->fs_info->running_transaction->use_count++;  	mutex_unlock(&root->fs_info->trans_mutex);  	return h; @@ -281,6 +288,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,  	struct btrfs_transaction *cur_trans;  	struct btrfs_fs_info *info = root->fs_info; +	if (trans->delayed_ref_updates && +	    (trans->transaction->delayed_refs.flushing || +	    trans->transaction->delayed_refs.num_entries > 16384)) { +		btrfs_run_delayed_refs(trans, root, trans->delayed_ref_updates); +	} else if (trans->transaction->delayed_refs.num_entries > 64) { +		wake_up_process(root->fs_info->transaction_kthread); +	} +  	mutex_lock(&info->trans_mutex);  	cur_trans = info->running_transaction;  	WARN_ON(cur_trans != trans->transaction); @@ -424,9 +439,10 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,  	u64 old_root_bytenr;  	struct btrfs_root *tree_root = root->fs_info->tree_root; -	btrfs_extent_post_op(trans, root);  	btrfs_write_dirty_block_groups(trans, root); -	btrfs_extent_post_op(trans, root); + +	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); +	BUG_ON(ret);  	while (1) {  		old_root_bytenr = btrfs_root_bytenr(&root->root_item); @@ -438,14 +454,14 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,  				     btrfs_header_level(root->node));  		btrfs_set_root_generation(&root->root_item, trans->transid); -		btrfs_extent_post_op(trans, root); -  		ret = btrfs_update_root(trans, tree_root,  					&root->root_key,  					&root->root_item);  		BUG_ON(ret);  		btrfs_write_dirty_block_groups(trans, root); -		btrfs_extent_post_op(trans, root); + +		ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); +		BUG_ON(ret);  	}  	return 0;  } @@ -459,15 +475,18 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,  	struct btrfs_fs_info *fs_info = root->fs_info;  	struct list_head *next;  	struct extent_buffer *eb; +	int ret; -	btrfs_extent_post_op(trans, fs_info->tree_root); +	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); +	BUG_ON(ret);  	eb = btrfs_lock_root_node(fs_info->tree_root);  	btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb);  	btrfs_tree_unlock(eb);  	free_extent_buffer(eb); -	btrfs_extent_post_op(trans, fs_info->tree_root); +	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); +	BUG_ON(ret);  	while (!list_empty(&fs_info->dirty_cowonly_roots)) {  		next = fs_info->dirty_cowonly_roots.next; @@ -475,6 +494,9 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,  		root = list_entry(next, struct btrfs_root, dirty_list);  		update_cowonly_root(trans, root); + +		ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); +		BUG_ON(ret);  	}  	return 0;  } @@ -895,6 +917,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,  	DEFINE_WAIT(wait);  	int ret; +	/* make a pass through all the delayed refs we have so far +	 * any runnings procs may add more while we are here +	 */ +	ret = btrfs_run_delayed_refs(trans, root, 0); +	BUG_ON(ret); + +	/* +	 * set the flushing flag so procs in this transaction have to +	 * start sending their work down. +	 */ +	trans->transaction->delayed_refs.flushing = 1; + +	ret = btrfs_run_delayed_refs(trans, root, (u64)-1); +	BUG_ON(ret); +  	INIT_LIST_HEAD(&dirty_fs_roots);  	mutex_lock(&root->fs_info->trans_mutex);  	if (trans->transaction->in_commit) { @@ -969,6 +1006,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,  	ret = create_pending_snapshots(trans, root->fs_info);  	BUG_ON(ret); +	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); +	BUG_ON(ret); +  	WARN_ON(cur_trans != trans->transaction);  	/* btrfs_commit_tree_roots is responsible for getting the diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ea292117f88..94876709217 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -19,6 +19,7 @@  #ifndef __BTRFS_TRANSACTION__  #define __BTRFS_TRANSACTION__  #include "btrfs_inode.h" +#include "delayed-ref.h"  struct btrfs_transaction {  	u64 transid; @@ -34,6 +35,7 @@ struct btrfs_transaction {  	wait_queue_head_t writer_wait;  	wait_queue_head_t commit_wait;  	struct list_head pending_snapshots; +	struct btrfs_delayed_ref_root delayed_refs;  };  struct btrfs_trans_handle { @@ -44,6 +46,7 @@ struct btrfs_trans_handle {  	u64 block_group;  	u64 alloc_exclude_start;  	u64 alloc_exclude_nr; +	unsigned long delayed_ref_updates;  };  struct btrfs_pending_snapshot { diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 98d25fa4570..b10eacdb162 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -124,8 +124,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,  	}  	btrfs_release_path(root, path); -	if (is_extent) -		btrfs_extent_post_op(trans, root);  out:  	if (path)  		btrfs_free_path(path);  |