diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/ctree.c | 30 | ||||
| -rw-r--r-- | fs/btrfs/delayed-inode.c | 151 | ||||
| -rw-r--r-- | fs/btrfs/delayed-inode.h | 2 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 30 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 89 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 33 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.h | 2 | ||||
| -rw-r--r-- | fs/btrfs/file-item.c | 6 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 10 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 31 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 18 | ||||
| -rw-r--r-- | fs/btrfs/locking.h | 1 | ||||
| -rw-r--r-- | fs/btrfs/ordered-data.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/qgroup.c | 13 | ||||
| -rw-r--r-- | fs/btrfs/relocation.c | 74 | ||||
| -rw-r--r-- | fs/btrfs/scrub.c | 3 | ||||
| -rw-r--r-- | fs/btrfs/send.c | 10 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 1 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 76 | ||||
| -rw-r--r-- | fs/btrfs/tree-log.c | 5 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 33 | 
21 files changed, 435 insertions, 185 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ecd25a1b4e5..ca9d8f1a3bb 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -651,6 +651,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,  	if (tree_mod_dont_log(fs_info, NULL))  		return 0; +	__tree_mod_log_free_eb(fs_info, old_root); +  	ret = tree_mod_alloc(fs_info, flags, &tm);  	if (ret < 0)  		goto out; @@ -736,7 +738,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)  static noinline void  tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,  		     struct extent_buffer *src, unsigned long dst_offset, -		     unsigned long src_offset, int nr_items) +		     unsigned long src_offset, int nr_items, int log_removal)  {  	int ret;  	int i; @@ -750,10 +752,12 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,  	}  	for (i = 0; i < nr_items; i++) { -		ret = tree_mod_log_insert_key_locked(fs_info, src, -						     i + src_offset, -						     MOD_LOG_KEY_REMOVE); -		BUG_ON(ret < 0); +		if (log_removal) { +			ret = tree_mod_log_insert_key_locked(fs_info, src, +							i + src_offset, +							MOD_LOG_KEY_REMOVE); +			BUG_ON(ret < 0); +		}  		ret = tree_mod_log_insert_key_locked(fs_info, dst,  						     i + dst_offset,  						     MOD_LOG_KEY_ADD); @@ -927,7 +931,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,  			ret = btrfs_dec_ref(trans, root, buf, 1, 1);  			BUG_ON(ret); /* -ENOMEM */  		} -		tree_mod_log_free_eb(root->fs_info, buf);  		clean_tree_block(trans, root, buf);  		*last_ref = 1;  	} @@ -1046,6 +1049,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,  		btrfs_set_node_ptr_generation(parent, parent_slot,  					      trans->transid);  		btrfs_mark_buffer_dirty(parent); +		tree_mod_log_free_eb(root->fs_info, buf);  		btrfs_free_tree_block(trans, root, buf, parent_start,  				      last_ref);  	} @@ -1750,7 +1754,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,  			goto enospc;  		} -		tree_mod_log_free_eb(root->fs_info, root->node);  		tree_mod_log_set_root_pointer(root, child);  		rcu_assign_pointer(root->node, child); @@ -2995,7 +2998,7 @@ static int push_node_left(struct btrfs_trans_handle *trans,  		push_items = min(src_nritems - 8, push_items);  	tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, -			     push_items); +			     push_items, 1);  	copy_extent_buffer(dst, src,  			   btrfs_node_key_ptr_offset(dst_nritems),  			   btrfs_node_key_ptr_offset(0), @@ -3066,7 +3069,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,  				      sizeof(struct btrfs_key_ptr));  	tree_mod_log_eb_copy(root->fs_info, dst, src, 0, -			     src_nritems - push_items, push_items); +			     src_nritems - push_items, push_items, 1);  	copy_extent_buffer(dst, src,  			   btrfs_node_key_ptr_offset(0),  			   btrfs_node_key_ptr_offset(src_nritems - push_items), @@ -3218,12 +3221,18 @@ static noinline int split_node(struct btrfs_trans_handle *trans,  	int mid;  	int ret;  	u32 c_nritems; +	int tree_mod_log_removal = 1;  	c = path->nodes[level];  	WARN_ON(btrfs_header_generation(c) != trans->transid);  	if (c == root->node) {  		/* trying to split the root, lets make a new one */  		ret = insert_new_root(trans, root, path, level + 1); +		/* +		 * removal of root nodes has been logged by +		 * tree_mod_log_set_root_pointer due to locking +		 */ +		tree_mod_log_removal = 0;  		if (ret)  			return ret;  	} else { @@ -3261,7 +3270,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,  			    (unsigned long)btrfs_header_chunk_tree_uuid(split),  			    BTRFS_UUID_SIZE); -	tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); +	tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid, +			     tree_mod_log_removal);  	copy_extent_buffer(split, c,  			   btrfs_node_key_ptr_offset(0),  			   btrfs_node_key_ptr_offset(mid), diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 0b278b117cb..14fce27b478 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -22,8 +22,9 @@  #include "disk-io.h"  #include "transaction.h" -#define BTRFS_DELAYED_WRITEBACK		400 -#define BTRFS_DELAYED_BACKGROUND	100 +#define BTRFS_DELAYED_WRITEBACK		512 +#define BTRFS_DELAYED_BACKGROUND	128 +#define BTRFS_DELAYED_BATCH		16  static struct kmem_cache *delayed_node_cache; @@ -494,6 +495,15 @@ static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,  					BTRFS_DELAYED_DELETION_ITEM);  } +static void finish_one_item(struct btrfs_delayed_root *delayed_root) +{ +	int seq = atomic_inc_return(&delayed_root->items_seq); +	if ((atomic_dec_return(&delayed_root->items) < +	    BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) && +	    waitqueue_active(&delayed_root->wait)) +		wake_up(&delayed_root->wait); +} +  static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)  {  	struct rb_root *root; @@ -512,10 +522,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)  	rb_erase(&delayed_item->rb_node, root);  	delayed_item->delayed_node->count--; -	if (atomic_dec_return(&delayed_root->items) < -	    BTRFS_DELAYED_BACKGROUND && -	    waitqueue_active(&delayed_root->wait)) -		wake_up(&delayed_root->wait); + +	finish_one_item(delayed_root);  }  static void btrfs_release_delayed_item(struct btrfs_delayed_item *item) @@ -1056,10 +1064,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)  		delayed_node->count--;  		delayed_root = delayed_node->root->fs_info->delayed_root; -		if (atomic_dec_return(&delayed_root->items) < -		    BTRFS_DELAYED_BACKGROUND && -		    waitqueue_active(&delayed_root->wait)) -			wake_up(&delayed_root->wait); +		finish_one_item(delayed_root);  	}  } @@ -1304,35 +1309,44 @@ void btrfs_remove_delayed_node(struct inode *inode)  	btrfs_release_delayed_node(delayed_node);  } -struct btrfs_async_delayed_node { -	struct btrfs_root *root; -	struct btrfs_delayed_node *delayed_node; +struct btrfs_async_delayed_work { +	struct btrfs_delayed_root *delayed_root; +	int nr;  	struct btrfs_work work;  }; -static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) +static void btrfs_async_run_delayed_root(struct btrfs_work *work)  { -	struct btrfs_async_delayed_node *async_node; +	struct btrfs_async_delayed_work *async_work; +	struct btrfs_delayed_root *delayed_root;  	struct btrfs_trans_handle *trans;  	struct btrfs_path *path;  	struct btrfs_delayed_node *delayed_node = NULL;  	struct btrfs_root *root;  	struct btrfs_block_rsv *block_rsv; -	int need_requeue = 0; +	int total_done = 0; -	async_node = container_of(work, struct btrfs_async_delayed_node, work); +	async_work = container_of(work, struct btrfs_async_delayed_work, work); +	delayed_root = async_work->delayed_root;  	path = btrfs_alloc_path();  	if (!path)  		goto out; -	path->leave_spinning = 1; -	delayed_node = async_node->delayed_node; +again: +	if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2) +		goto free_path; + +	delayed_node = btrfs_first_prepared_delayed_node(delayed_root); +	if (!delayed_node) +		goto free_path; + +	path->leave_spinning = 1;  	root = delayed_node->root;  	trans = btrfs_join_transaction(root);  	if (IS_ERR(trans)) -		goto free_path; +		goto release_path;  	block_rsv = trans->block_rsv;  	trans->block_rsv = &root->fs_info->delayed_block_rsv; @@ -1363,57 +1377,47 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)  	 * Task1 will sleep until the transaction is commited.  	 */  	mutex_lock(&delayed_node->mutex); -	if (delayed_node->count) -		need_requeue = 1; -	else -		btrfs_dequeue_delayed_node(root->fs_info->delayed_root, -					   delayed_node); +	btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node);  	mutex_unlock(&delayed_node->mutex);  	trans->block_rsv = block_rsv;  	btrfs_end_transaction_dmeta(trans, root);  	btrfs_btree_balance_dirty_nodelay(root); + +release_path: +	btrfs_release_path(path); +	total_done++; + +	btrfs_release_prepared_delayed_node(delayed_node); +	if (async_work->nr == 0 || total_done < async_work->nr) +		goto again; +  free_path:  	btrfs_free_path(path);  out: -	if (need_requeue) -		btrfs_requeue_work(&async_node->work); -	else { -		btrfs_release_prepared_delayed_node(delayed_node); -		kfree(async_node); -	} +	wake_up(&delayed_root->wait); +	kfree(async_work);  } +  static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, -				     struct btrfs_root *root, int all) +				     struct btrfs_root *root, int nr)  { -	struct btrfs_async_delayed_node *async_node; -	struct btrfs_delayed_node *curr; -	int count = 0; +	struct btrfs_async_delayed_work *async_work; -again: -	curr = btrfs_first_prepared_delayed_node(delayed_root); -	if (!curr) +	if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)  		return 0; -	async_node = kmalloc(sizeof(*async_node), GFP_NOFS); -	if (!async_node) { -		btrfs_release_prepared_delayed_node(curr); +	async_work = kmalloc(sizeof(*async_work), GFP_NOFS); +	if (!async_work)  		return -ENOMEM; -	} -	async_node->root = root; -	async_node->delayed_node = curr; - -	async_node->work.func = btrfs_async_run_delayed_node_done; -	async_node->work.flags = 0; - -	btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work); -	count++; - -	if (all || count < 4) -		goto again; +	async_work->delayed_root = delayed_root; +	async_work->work.func = btrfs_async_run_delayed_root; +	async_work->work.flags = 0; +	async_work->nr = nr; +	btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work);  	return 0;  } @@ -1424,30 +1428,55 @@ void btrfs_assert_delayed_root_empty(struct btrfs_root *root)  	WARN_ON(btrfs_first_delayed_node(delayed_root));  } +static int refs_newer(struct btrfs_delayed_root *delayed_root, +		      int seq, int count) +{ +	int val = atomic_read(&delayed_root->items_seq); + +	if (val < seq || val >= seq + count) +		return 1; +	return 0; +} +  void btrfs_balance_delayed_items(struct btrfs_root *root)  {  	struct btrfs_delayed_root *delayed_root; +	int seq;  	delayed_root = btrfs_get_delayed_root(root);  	if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)  		return; +	seq = atomic_read(&delayed_root->items_seq); +  	if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {  		int ret; -		ret = btrfs_wq_run_delayed_node(delayed_root, root, 1); +		DEFINE_WAIT(__wait); + +		ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);  		if (ret)  			return; -		wait_event_interruptible_timeout( -				delayed_root->wait, -				(atomic_read(&delayed_root->items) < -				 BTRFS_DELAYED_BACKGROUND), -				HZ); -		return; +		while (1) { +			prepare_to_wait(&delayed_root->wait, &__wait, +					TASK_INTERRUPTIBLE); + +			if (refs_newer(delayed_root, seq, +				       BTRFS_DELAYED_BATCH) || +			    atomic_read(&delayed_root->items) < +			    BTRFS_DELAYED_BACKGROUND) { +				break; +			} +			if (!signal_pending(current)) +				schedule(); +			else +				break; +		} +		finish_wait(&delayed_root->wait, &__wait);  	} -	btrfs_wq_run_delayed_node(delayed_root, root, 0); +	btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);  }  /* Will return 0 or -ENOMEM */ diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 78b6ad0fc66..1d5c5f7abe3 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -43,6 +43,7 @@ struct btrfs_delayed_root {  	 */  	struct list_head prepare_list;  	atomic_t items;		/* for delayed items */ +	atomic_t items_seq;	/* for delayed items */  	int nodes;		/* for delayed nodes */  	wait_queue_head_t wait;  }; @@ -86,6 +87,7 @@ static inline void btrfs_init_delayed_root(  				struct btrfs_delayed_root *delayed_root)  {  	atomic_set(&delayed_root->items, 0); +	atomic_set(&delayed_root->items_seq, 0);  	delayed_root->nodes = 0;  	spin_lock_init(&delayed_root->lock);  	init_waitqueue_head(&delayed_root->wait); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 02369a3c162..6d19a0a554a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -62,7 +62,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,  static void btrfs_destroy_ordered_extents(struct btrfs_root *root);  static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,  				      struct btrfs_root *root); -static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); +static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t);  static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);  static int btrfs_destroy_marked_extents(struct btrfs_root *root,  					struct extent_io_tree *dirty_pages, @@ -1291,6 +1291,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,  				      0, objectid, NULL, 0, 0, 0);  	if (IS_ERR(leaf)) {  		ret = PTR_ERR(leaf); +		leaf = NULL;  		goto fail;  	} @@ -1334,11 +1335,16 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,  	btrfs_tree_unlock(leaf); +	return root; +  fail: -	if (ret) -		return ERR_PTR(ret); +	if (leaf) { +		btrfs_tree_unlock(leaf); +		free_extent_buffer(leaf); +	} +	kfree(root); -	return root; +	return ERR_PTR(ret);  }  static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, @@ -3253,7 +3259,7 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)  	if (btrfs_root_refs(&root->root_item) == 0)  		synchronize_srcu(&fs_info->subvol_srcu); -	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { +	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {  		btrfs_free_log(NULL, root);  		btrfs_free_log_root_tree(NULL, fs_info);  	} @@ -3687,7 +3693,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,  	return ret;  } -static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) +static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t)  {  	struct btrfs_pending_snapshot *snapshot;  	struct list_head splice; @@ -3700,10 +3706,8 @@ static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)  		snapshot = list_entry(splice.next,  				      struct btrfs_pending_snapshot,  				      list); - +		snapshot->error = -ECANCELED;  		list_del_init(&snapshot->list); - -		kfree(snapshot);  	}  } @@ -3840,6 +3844,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,  	cur_trans->blocked = 1;  	wake_up(&root->fs_info->transaction_blocked_wait); +	btrfs_evict_pending_snapshots(cur_trans); +  	cur_trans->blocked = 0;  	wake_up(&root->fs_info->transaction_wait); @@ -3849,8 +3855,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,  	btrfs_destroy_delayed_inodes(root);  	btrfs_assert_delayed_root_empty(root); -	btrfs_destroy_pending_snapshots(cur_trans); -  	btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,  				     EXTENT_DIRTY);  	btrfs_destroy_pinned_extent(root, @@ -3894,6 +3898,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)  		if (waitqueue_active(&root->fs_info->transaction_blocked_wait))  			wake_up(&root->fs_info->transaction_blocked_wait); +		btrfs_evict_pending_snapshots(t); +  		t->blocked = 0;  		smp_mb();  		if (waitqueue_active(&root->fs_info->transaction_wait)) @@ -3907,8 +3913,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)  		btrfs_destroy_delayed_inodes(root);  		btrfs_assert_delayed_root_empty(root); -		btrfs_destroy_pending_snapshots(t); -  		btrfs_destroy_delalloc_inodes(root);  		spin_lock(&root->fs_info->trans_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3e074dab2d5..3d551231cab 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -257,7 +257,8 @@ static int exclude_super_stripes(struct btrfs_root *root,  		cache->bytes_super += stripe_len;  		ret = add_excluded_extent(root, cache->key.objectid,  					  stripe_len); -		BUG_ON(ret); /* -ENOMEM */ +		if (ret) +			return ret;  	}  	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { @@ -265,13 +266,17 @@ static int exclude_super_stripes(struct btrfs_root *root,  		ret = btrfs_rmap_block(&root->fs_info->mapping_tree,  				       cache->key.objectid, bytenr,  				       0, &logical, &nr, &stripe_len); -		BUG_ON(ret); /* -ENOMEM */ +		if (ret) +			return ret;  		while (nr--) {  			cache->bytes_super += stripe_len;  			ret = add_excluded_extent(root, logical[nr],  						  stripe_len); -			BUG_ON(ret); /* -ENOMEM */ +			if (ret) { +				kfree(logical); +				return ret; +			}  		}  		kfree(logical); @@ -1467,8 +1472,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,  	if (ret && !insert) {  		err = -ENOENT;  		goto out; +	} else if (ret) { +		err = -EIO; +		WARN_ON(1); +		goto out;  	} -	BUG_ON(ret); /* Corruption */  	leaf = path->nodes[0];  	item_size = btrfs_item_size_nr(leaf, path->slots[0]); @@ -4435,7 +4443,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)  	spin_lock(&sinfo->lock);  	spin_lock(&block_rsv->lock); -	block_rsv->size = num_bytes; +	block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);  	num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +  		    sinfo->bytes_reserved + sinfo->bytes_readonly + @@ -4790,14 +4798,49 @@ out_fail:  	 * If the inodes csum_bytes is the same as the original  	 * csum_bytes then we know we haven't raced with any free()ers  	 * so we can just reduce our inodes csum bytes and carry on. -	 * Otherwise we have to do the normal free thing to account for -	 * the case that the free side didn't free up its reserve -	 * because of this outstanding reservation.  	 */ -	if (BTRFS_I(inode)->csum_bytes == csum_bytes) +	if (BTRFS_I(inode)->csum_bytes == csum_bytes) {  		calc_csum_metadata_size(inode, num_bytes, 0); -	else -		to_free = calc_csum_metadata_size(inode, num_bytes, 0); +	} else { +		u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes; +		u64 bytes; + +		/* +		 * This is tricky, but first we need to figure out how much we +		 * free'd from any free-ers that occured during this +		 * reservation, so we reset ->csum_bytes to the csum_bytes +		 * before we dropped our lock, and then call the free for the +		 * number of bytes that were freed while we were trying our +		 * reservation. +		 */ +		bytes = csum_bytes - BTRFS_I(inode)->csum_bytes; +		BTRFS_I(inode)->csum_bytes = csum_bytes; +		to_free = calc_csum_metadata_size(inode, bytes, 0); + + +		/* +		 * Now we need to see how much we would have freed had we not +		 * been making this reservation and our ->csum_bytes were not +		 * artificially inflated. +		 */ +		BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes; +		bytes = csum_bytes - orig_csum_bytes; +		bytes = calc_csum_metadata_size(inode, bytes, 0); + +		/* +		 * Now reset ->csum_bytes to what it should be.  If bytes is +		 * more than to_free then we would have free'd more space had we +		 * not had an artificially high ->csum_bytes, so we need to free +		 * the remainder.  If bytes is the same or less then we don't +		 * need to do anything, the other free-ers did the correct +		 * thing. +		 */ +		BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes; +		if (bytes > to_free) +			to_free = bytes - to_free; +		else +			to_free = 0; +	}  	spin_unlock(&BTRFS_I(inode)->lock);  	if (dropped)  		to_free += btrfs_calc_trans_metadata_size(root, dropped); @@ -7944,7 +7987,17 @@ int btrfs_read_block_groups(struct btrfs_root *root)  		 * info has super bytes accounted for, otherwise we'll think  		 * we have more space than we actually do.  		 */ -		exclude_super_stripes(root, cache); +		ret = exclude_super_stripes(root, cache); +		if (ret) { +			/* +			 * We may have excluded something, so call this just in +			 * case. +			 */ +			free_excluded_extents(root, cache); +			kfree(cache->free_space_ctl); +			kfree(cache); +			goto error; +		}  		/*  		 * check for two cases, either we are full, and therefore @@ -8086,7 +8139,17 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,  	cache->last_byte_to_unpin = (u64)-1;  	cache->cached = BTRFS_CACHE_FINISHED; -	exclude_super_stripes(root, cache); +	ret = exclude_super_stripes(root, cache); +	if (ret) { +		/* +		 * We may have excluded something, so call this just in +		 * case. +		 */ +		free_excluded_extents(root, cache); +		kfree(cache->free_space_ctl); +		kfree(cache); +		return ret; +	}  	add_new_free_space(cache, root->fs_info, chunk_offset,  			   chunk_offset + size); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f173c5af646..cdee391fc7b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1257,6 +1257,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)  				GFP_NOFS);  } +int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) +{ +	unsigned long index = start >> PAGE_CACHE_SHIFT; +	unsigned long end_index = end >> PAGE_CACHE_SHIFT; +	struct page *page; + +	while (index <= end_index) { +		page = find_get_page(inode->i_mapping, index); +		BUG_ON(!page); /* Pages should be in the extent_io_tree */ +		clear_page_dirty_for_io(page); +		page_cache_release(page); +		index++; +	} +	return 0; +} + +int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) +{ +	unsigned long index = start >> PAGE_CACHE_SHIFT; +	unsigned long end_index = end >> PAGE_CACHE_SHIFT; +	struct page *page; + +	while (index <= end_index) { +		page = find_get_page(inode->i_mapping, index); +		BUG_ON(!page); /* Pages should be in the extent_io_tree */ +		account_page_redirty(page); +		__set_page_dirty_nobuffers(page); +		page_cache_release(page); +		index++; +	} +	return 0; +} +  /*   * helper function to set both pages and extents in the tree writeback   */ diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 6068a198556..258c9215685 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -325,6 +325,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,  		      unsigned long *map_len);  int extent_range_uptodate(struct extent_io_tree *tree,  			  u64 start, u64 end); +int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); +int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);  int extent_clear_unlock_delalloc(struct inode *inode,  				struct extent_io_tree *tree,  				u64 start, u64 end, struct page *locked_page, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ec160202be3..c4628a201cb 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -118,9 +118,11 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,  		csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);  		csums_in_item /= csum_size; -		if (csum_offset >= csums_in_item) { +		if (csum_offset == csums_in_item) {  			ret = -EFBIG;  			goto fail; +		} else if (csum_offset > csums_in_item) { +			goto fail;  		}  	}  	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); @@ -728,7 +730,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,  		return -ENOMEM;  	sector_sum = sums->sums; -	trans->adding_csums = 1;  again:  	next_offset = (u64)-1;  	found_next = 0; @@ -899,7 +900,6 @@ next_sector:  		goto again;  	}  out: -	trans->adding_csums = 0;  	btrfs_free_path(path);  	return ret; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index af1d0605a5c..ade03e6f7bd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -591,6 +591,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,  		}  		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);  		clear_bit(EXTENT_FLAG_PINNED, &em->flags); +		clear_bit(EXTENT_FLAG_LOGGING, &flags);  		remove_extent_mapping(em_tree, em);  		if (no_splits)  			goto next; @@ -2141,6 +2142,7 @@ static long btrfs_fallocate(struct file *file, int mode,  {  	struct inode *inode = file_inode(file);  	struct extent_state *cached_state = NULL; +	struct btrfs_root *root = BTRFS_I(inode)->root;  	u64 cur_offset;  	u64 last_byte;  	u64 alloc_start; @@ -2168,6 +2170,11 @@ static long btrfs_fallocate(struct file *file, int mode,  	ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);  	if (ret)  		return ret; +	if (root->fs_info->quota_enabled) { +		ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start); +		if (ret) +			goto out_reserve_fail; +	}  	/*  	 * wait for ordered IO before we have any locks.  We'll loop again @@ -2271,6 +2278,9 @@ static long btrfs_fallocate(struct file *file, int mode,  			     &cached_state, GFP_NOFS);  out:  	mutex_unlock(&inode->i_mutex); +	if (root->fs_info->quota_enabled) +		btrfs_qgroup_free(root, alloc_end - alloc_start); +out_reserve_fail:  	/* Let go of our reservation. */  	btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);  	return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c226daefd65..09c58a35b42 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -353,6 +353,7 @@ static noinline int compress_file_range(struct inode *inode,  	int i;  	int will_compress;  	int compress_type = root->fs_info->compress_type; +	int redirty = 0;  	/* if this is a small write inside eof, kick off a defrag */  	if ((end - start + 1) < 16 * 1024 && @@ -415,6 +416,17 @@ again:  		if (BTRFS_I(inode)->force_compress)  			compress_type = BTRFS_I(inode)->force_compress; +		/* +		 * we need to call clear_page_dirty_for_io on each +		 * page in the range.  Otherwise applications with the file +		 * mmap'd can wander in and change the page contents while +		 * we are compressing them. +		 * +		 * If the compression fails for any reason, we set the pages +		 * dirty again later on. +		 */ +		extent_range_clear_dirty_for_io(inode, start, end); +		redirty = 1;  		ret = btrfs_compress_pages(compress_type,  					   inode->i_mapping, start,  					   total_compressed, pages, @@ -554,6 +566,8 @@ cleanup_and_bail_uncompressed:  			__set_page_dirty_nobuffers(locked_page);  			/* unlocked later on in the async handlers */  		} +		if (redirty) +			extent_range_redirty_for_io(inode, start, end);  		add_async_extent(async_cow, start, end - start + 1,  				 0, NULL, 0, BTRFS_COMPRESS_NONE);  		*num_added += 1; @@ -1743,8 +1757,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,  	struct btrfs_ordered_sum *sum;  	list_for_each_entry(sum, list, list) { +		trans->adding_csums = 1;  		btrfs_csum_file_blocks(trans,  		       BTRFS_I(inode)->root->fs_info->csum_root, sum); +		trans->adding_csums = 0;  	}  	return 0;  } @@ -2312,6 +2328,7 @@ again:  	key.type = BTRFS_EXTENT_DATA_KEY;  	key.offset = start; +	path->leave_spinning = 1;  	if (merge) {  		struct btrfs_file_extent_item *fi;  		u64 extent_len; @@ -2368,6 +2385,7 @@ again:  	btrfs_mark_buffer_dirty(leaf);  	inode_add_bytes(inode, len); +	btrfs_release_path(path);  	ret = btrfs_inc_extent_ref(trans, root, new->bytenr,  			new->disk_len, 0, @@ -2381,6 +2399,7 @@ again:  	ret = 1;  out_free_path:  	btrfs_release_path(path); +	path->leave_spinning = 0;  	btrfs_end_transaction(trans, root);  out_unlock:  	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end, @@ -3676,11 +3695,9 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,  	 * 1 for the dir item  	 * 1 for the dir index  	 * 1 for the inode ref -	 * 1 for the inode ref in the tree log -	 * 2 for the dir entries in the log  	 * 1 for the inode  	 */ -	trans = btrfs_start_transaction(root, 8); +	trans = btrfs_start_transaction(root, 5);  	if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)  		return trans; @@ -8124,7 +8141,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  	 * inodes.  So 5 * 2 is 10, plus 1 for the new link, so 11 total items  	 * should cover the worst case number of items we'll modify.  	 */ -	trans = btrfs_start_transaction(root, 20); +	trans = btrfs_start_transaction(root, 11);  	if (IS_ERR(trans)) {                  ret = PTR_ERR(trans);                  goto out_notrans; @@ -8502,6 +8519,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,  	struct btrfs_key ins;  	u64 cur_offset = start;  	u64 i_size; +	u64 cur_bytes;  	int ret = 0;  	bool own_trans = true; @@ -8516,8 +8534,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,  			}  		} -		ret = btrfs_reserve_extent(trans, root, -					   min(num_bytes, 256ULL * 1024 * 1024), +		cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); +		cur_bytes = max(cur_bytes, min_size); +		ret = btrfs_reserve_extent(trans, root, cur_bytes,  					   min_size, 0, *alloc_hint, &ins, 1);  		if (ret) {  			if (own_trans) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c83086fdda0..2c02310ff2d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -527,6 +527,8 @@ fail:  	if (async_transid) {  		*async_transid = trans->transid;  		err = btrfs_commit_transaction_async(trans, root, 1); +		if (err) +			err = btrfs_commit_transaction(trans, root);  	} else {  		err = btrfs_commit_transaction(trans, root);  	} @@ -592,16 +594,14 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,  		*async_transid = trans->transid;  		ret = btrfs_commit_transaction_async(trans,  				     root->fs_info->extent_root, 1); +		if (ret) +			ret = btrfs_commit_transaction(trans, root);  	} else {  		ret = btrfs_commit_transaction(trans,  					       root->fs_info->extent_root);  	} -	if (ret) { -		/* cleanup_transaction has freed this for us */ -		if (trans->aborted) -			pending_snapshot = NULL; +	if (ret)  		goto fail; -	}  	ret = pending_snapshot->error;  	if (ret) @@ -2245,13 +2245,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)  	if (ret)  		return ret; -	if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, -			1)) { -		pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); -		mnt_drop_write_file(file); -		return -EINVAL; -	} -  	if (btrfs_root_readonly(root)) {  		ret = -EROFS;  		goto out; @@ -2306,7 +2299,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)  		ret = -EINVAL;  	}  out: -	atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);  	mnt_drop_write_file(file);  	return ret;  } diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index ca52681e5f4..b81e0e9a489 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -26,7 +26,6 @@  void btrfs_tree_lock(struct extent_buffer *eb);  void btrfs_tree_unlock(struct extent_buffer *eb); -int btrfs_try_spin_lock(struct extent_buffer *eb);  void btrfs_tree_read_lock(struct extent_buffer *eb);  void btrfs_tree_read_unlock(struct extent_buffer *eb); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index dc08d77b717..005c45db699 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -557,6 +557,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)  	INIT_LIST_HEAD(&splice);  	INIT_LIST_HEAD(&works); +	mutex_lock(&root->fs_info->ordered_operations_mutex);  	spin_lock(&root->fs_info->ordered_extent_lock);  	list_splice_init(&root->fs_info->ordered_extents, &splice);  	while (!list_empty(&splice)) { @@ -600,6 +601,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)  		cond_resched();  	} +	mutex_unlock(&root->fs_info->ordered_operations_mutex);  }  /* diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index aee4b1cc3d9..b44124dd237 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1153,7 +1153,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,  	ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,  				   sgn > 0 ? node->seq - 1 : node->seq, &roots);  	if (ret < 0) -		goto out; +		return ret;  	spin_lock(&fs_info->qgroup_lock);  	quota_root = fs_info->quota_root; @@ -1275,7 +1275,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,  	ret = 0;  unlock:  	spin_unlock(&fs_info->qgroup_lock); -out:  	ulist_free(roots);  	ulist_free(tmp); @@ -1525,21 +1524,23 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)  		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&  		    qg->reserved + qg->rfer + num_bytes > -		    qg->max_rfer) +		    qg->max_rfer) {  			ret = -EDQUOT; +			goto out; +		}  		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&  		    qg->reserved + qg->excl + num_bytes > -		    qg->max_excl) +		    qg->max_excl) {  			ret = -EDQUOT; +			goto out; +		}  		list_for_each_entry(glist, &qg->groups, next_group) {  			ulist_add(ulist, glist->group->qgroupid,  				  (uintptr_t)glist->group, GFP_ATOMIC);  		}  	} -	if (ret) -		goto out;  	/*  	 * no limits exceeded, now record the reservation into all qgroups diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 50695dc5e2a..b67171e6d68 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1269,6 +1269,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del)  	}  	spin_unlock(&rc->reloc_root_tree.lock); +	if (!node) +		return 0;  	BUG_ON((struct btrfs_root *)node->data != root);  	if (!del) { @@ -2238,13 +2240,28 @@ again:  }  static noinline_for_stack +void free_reloc_roots(struct list_head *list) +{ +	struct btrfs_root *reloc_root; + +	while (!list_empty(list)) { +		reloc_root = list_entry(list->next, struct btrfs_root, +					root_list); +		__update_reloc_root(reloc_root, 1); +		free_extent_buffer(reloc_root->node); +		free_extent_buffer(reloc_root->commit_root); +		kfree(reloc_root); +	} +} + +static noinline_for_stack  int merge_reloc_roots(struct reloc_control *rc)  {  	struct btrfs_root *root;  	struct btrfs_root *reloc_root;  	LIST_HEAD(reloc_roots);  	int found = 0; -	int ret; +	int ret = 0;  again:  	root = rc->extent_root; @@ -2270,20 +2287,33 @@ again:  			BUG_ON(root->reloc_root != reloc_root);  			ret = merge_reloc_root(rc, root); -			BUG_ON(ret); +			if (ret) +				goto out;  		} else {  			list_del_init(&reloc_root->root_list);  		}  		ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); -		BUG_ON(ret < 0); +		if (ret < 0) { +			if (list_empty(&reloc_root->root_list)) +				list_add_tail(&reloc_root->root_list, +					      &reloc_roots); +			goto out; +		}  	}  	if (found) {  		found = 0;  		goto again;  	} +out: +	if (ret) { +		btrfs_std_error(root->fs_info, ret); +		if (!list_empty(&reloc_roots)) +			free_reloc_roots(&reloc_roots); +	} +  	BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); -	return 0; +	return ret;  }  static void free_block_list(struct rb_root *blocks) @@ -2818,8 +2848,10 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,  	int err = 0;  	path = btrfs_alloc_path(); -	if (!path) -		return -ENOMEM; +	if (!path) { +		err = -ENOMEM; +		goto out_path; +	}  	rb_node = rb_first(blocks);  	while (rb_node) { @@ -2858,10 +2890,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,  		rb_node = rb_next(rb_node);  	}  out: -	free_block_list(blocks);  	err = finish_pending_nodes(trans, rc, path, err);  	btrfs_free_path(path); +out_path: +	free_block_list(blocks);  	return err;  } @@ -3698,7 +3731,15 @@ int prepare_to_relocate(struct reloc_control *rc)  	set_reloc_control(rc);  	trans = btrfs_join_transaction(rc->extent_root); -	BUG_ON(IS_ERR(trans)); +	if (IS_ERR(trans)) { +		unset_reloc_control(rc); +		/* +		 * extent tree is not a ref_cow tree and has no reloc_root to +		 * cleanup.  And callers are responsible to free the above +		 * block rsv. +		 */ +		return PTR_ERR(trans); +	}  	btrfs_commit_transaction(trans, rc->extent_root);  	return 0;  } @@ -3730,7 +3771,11 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)  	while (1) {  		progress++;  		trans = btrfs_start_transaction(rc->extent_root, 0); -		BUG_ON(IS_ERR(trans)); +		if (IS_ERR(trans)) { +			err = PTR_ERR(trans); +			trans = NULL; +			break; +		}  restart:  		if (update_backref_cache(trans, &rc->backref_cache)) {  			btrfs_end_transaction(trans, rc->extent_root); @@ -4264,14 +4309,9 @@ int btrfs_recover_relocation(struct btrfs_root *root)  out_free:  	kfree(rc);  out: -	while (!list_empty(&reloc_roots)) { -		reloc_root = list_entry(reloc_roots.next, -					struct btrfs_root, root_list); -		list_del(&reloc_root->root_list); -		free_extent_buffer(reloc_root->node); -		free_extent_buffer(reloc_root->commit_root); -		kfree(reloc_root); -	} +	if (!list_empty(&reloc_roots)) +		free_reloc_roots(&reloc_roots); +  	btrfs_free_path(path);  	if (err == 0) { diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 53c3501fa4c..85e072b956d 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -542,7 +542,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)  	eb = path->nodes[0];  	ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);  	item_size = btrfs_item_size_nr(eb, path->slots[0]); -	btrfs_release_path(path);  	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {  		do { @@ -558,7 +557,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)  				ret < 0 ? -1 : ref_level,  				ret < 0 ? -1 : ref_root);  		} while (ret != 1); +		btrfs_release_path(path);  	} else { +		btrfs_release_path(path);  		swarn.path = path;  		swarn.dev = dev;  		iterate_extent_inodes(fs_info, found_key.objectid, diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index f7a8b861058..c85e7c6b459 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3945,12 +3945,10 @@ static int is_extent_unchanged(struct send_ctx *sctx,  		    found_key.type != key.type) {  			key.offset += right_len;  			break; -		} else { -			if (found_key.offset != key.offset + right_len) { -				/* Should really not happen */ -				ret = -EIO; -				goto out; -			} +		} +		if (found_key.offset != key.offset + right_len) { +			ret = 0; +			goto out;  		}  		key = found_key;  	} diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 68a29a1ea06..f6b88595f85 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1558,6 +1558,7 @@ static struct file_system_type btrfs_fs_type = {  	.kill_sb	= btrfs_kill_super,  	.fs_flags	= FS_REQUIRES_DEV,  }; +MODULE_ALIAS_FS("btrfs");  /*   * used by btrfsctl to scan devices when no FS is mounted diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e52da6fb116..50767bbaad6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -625,14 +625,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,  	btrfs_trans_release_metadata(trans, root);  	trans->block_rsv = NULL; -	/* -	 * the same root has to be passed to start_transaction and -	 * end_transaction. Subvolume quota depends on this. -	 */ -	WARN_ON(trans->root != root);  	if (trans->qgroup_reserved) { -		btrfs_qgroup_free(root, trans->qgroup_reserved); +		/* +		 * the same root has to be passed here between start_transaction +		 * and end_transaction. Subvolume quota depends on this. +		 */ +		btrfs_qgroup_free(trans->root, trans->qgroup_reserved);  		trans->qgroup_reserved = 0;  	} @@ -1052,7 +1051,12 @@ int btrfs_defrag_root(struct btrfs_root *root)  /*   * new snapshots need to be created at a very specific time in the - * transaction commit.  This does the actual creation + * transaction commit.  This does the actual creation. + * + * Note: + * If the error which may affect the commitment of the current transaction + * happens, we should return the error number. If the error which just affect + * the creation of the pending snapshots, just return 0.   */  static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,  				   struct btrfs_fs_info *fs_info, @@ -1071,7 +1075,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,  	struct extent_buffer *tmp;  	struct extent_buffer *old;  	struct timespec cur_time = CURRENT_TIME; -	int ret; +	int ret = 0;  	u64 to_reserve = 0;  	u64 index = 0;  	u64 objectid; @@ -1080,40 +1084,36 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,  	path = btrfs_alloc_path();  	if (!path) { -		ret = pending->error = -ENOMEM; -		return ret; +		pending->error = -ENOMEM; +		return 0;  	}  	new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);  	if (!new_root_item) { -		ret = pending->error = -ENOMEM; +		pending->error = -ENOMEM;  		goto root_item_alloc_fail;  	} -	ret = btrfs_find_free_objectid(tree_root, &objectid); -	if (ret) { -		pending->error = ret; +	pending->error = btrfs_find_free_objectid(tree_root, &objectid); +	if (pending->error)  		goto no_free_objectid; -	}  	btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);  	if (to_reserve > 0) { -		ret = btrfs_block_rsv_add(root, &pending->block_rsv, -					  to_reserve, -					  BTRFS_RESERVE_NO_FLUSH); -		if (ret) { -			pending->error = ret; +		pending->error = btrfs_block_rsv_add(root, +						     &pending->block_rsv, +						     to_reserve, +						     BTRFS_RESERVE_NO_FLUSH); +		if (pending->error)  			goto no_free_objectid; -		}  	} -	ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, -				   objectid, pending->inherit); -	if (ret) { -		pending->error = ret; +	pending->error = btrfs_qgroup_inherit(trans, fs_info, +					      root->root_key.objectid, +					      objectid, pending->inherit); +	if (pending->error)  		goto no_free_objectid; -	}  	key.objectid = objectid;  	key.offset = (u64)-1; @@ -1141,7 +1141,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,  					 dentry->d_name.len, 0);  	if (dir_item != NULL && !IS_ERR(dir_item)) {  		pending->error = -EEXIST; -		goto fail; +		goto dir_item_existed;  	} else if (IS_ERR(dir_item)) {  		ret = PTR_ERR(dir_item);  		btrfs_abort_transaction(trans, root, ret); @@ -1272,6 +1272,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,  	if (ret)  		btrfs_abort_transaction(trans, root, ret);  fail: +	pending->error = ret; +dir_item_existed:  	trans->block_rsv = rsv;  	trans->bytes_reserved = 0;  no_free_objectid: @@ -1287,12 +1289,17 @@ root_item_alloc_fail:  static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,  					     struct btrfs_fs_info *fs_info)  { -	struct btrfs_pending_snapshot *pending; +	struct btrfs_pending_snapshot *pending, *next;  	struct list_head *head = &trans->transaction->pending_snapshots; +	int ret = 0; -	list_for_each_entry(pending, head, list) -		create_pending_snapshot(trans, fs_info, pending); -	return 0; +	list_for_each_entry_safe(pending, next, head, list) { +		list_del(&pending->list); +		ret = create_pending_snapshot(trans, fs_info, pending); +		if (ret) +			break; +	} +	return ret;  }  static void update_super_roots(struct btrfs_root *root) @@ -1448,6 +1455,13 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,  	btrfs_abort_transaction(trans, root, err);  	spin_lock(&root->fs_info->trans_lock); + +	if (list_empty(&cur_trans->list)) { +		spin_unlock(&root->fs_info->trans_lock); +		btrfs_end_transaction(trans, root); +		return; +	} +  	list_del_init(&cur_trans->list);  	if (cur_trans == root->fs_info->running_transaction) {  		root->fs_info->trans_no_join = 1; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c7ef569eb22..451fad96ecd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1382,7 +1382,10 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,  	btrfs_release_path(path);  	if (ret == 0) { -		btrfs_inc_nlink(inode); +		if (!inode->i_nlink) +			set_nlink(inode, 1); +		else +			btrfs_inc_nlink(inode);  		ret = btrfs_update_inode(trans, root, inode);  	} else if (ret == -EEXIST) {  		ret = 0; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 35bb2d4ed29..2854c824ab6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -684,6 +684,12 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)  		__btrfs_close_devices(fs_devices);  		free_fs_devices(fs_devices);  	} +	/* +	 * Wait for rcu kworkers under __btrfs_close_devices +	 * to finish all blkdev_puts so device is really +	 * free when umount is done. +	 */ +	rcu_barrier();  	return ret;  } @@ -2379,7 +2385,11 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,  		return ret;  	trans = btrfs_start_transaction(root, 0); -	BUG_ON(IS_ERR(trans)); +	if (IS_ERR(trans)) { +		ret = PTR_ERR(trans); +		btrfs_std_error(root->fs_info, ret); +		return ret; +	}  	lock_chunks(root); @@ -3050,7 +3060,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)  	unset_balance_control(fs_info);  	ret = del_balance_item(fs_info->tree_root); -	BUG_ON(ret); +	if (ret) +		btrfs_std_error(fs_info, ret);  	atomic_set(&fs_info->mutually_exclusive_operation_running, 0);  } @@ -3230,6 +3241,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl,  		update_ioctl_balance_args(fs_info, 0, bargs);  	} +	if ((ret && ret != -ECANCELED && ret != -ENOSPC) || +	    balance_need_close(fs_info)) { +		__cancel_balance(fs_info); +	} +  	wake_up(&fs_info->balance_wait_q);  	return ret; @@ -4919,7 +4935,18 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,  	em = lookup_extent_mapping(em_tree, chunk_start, 1);  	read_unlock(&em_tree->lock); -	BUG_ON(!em || em->start != chunk_start); +	if (!em) { +		printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n", +		       chunk_start); +		return -EIO; +	} + +	if (em->start != chunk_start) { +		printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n", +		       em->start, chunk_start); +		free_extent_map(em); +		return -EIO; +	}  	map = (struct map_lookup *)em->bdev;  	length = em->len;  |