diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/btrfs_inode.h | 3 | ||||
| -rw-r--r-- | fs/btrfs/ctree.c | 28 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 12 | ||||
| -rw-r--r-- | fs/btrfs/delayed-inode.c | 5 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 36 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 100 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/free-space-cache.c | 27 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 258 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 19 | ||||
| -rw-r--r-- | fs/btrfs/relocation.c | 34 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 298 | ||||
| -rw-r--r-- | fs/btrfs/transaction.h | 29 | ||||
| -rw-r--r-- | fs/btrfs/xattr.c | 2 | 
15 files changed, 473 insertions, 384 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 93b1aa93201..52d7eca8c7b 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -121,9 +121,6 @@ struct btrfs_inode {  	 */  	u64 index_cnt; -	/* the start of block group preferred for allocations. */ -	u64 block_group; -  	/* the fsync log has some corner cases that mean we have to check  	 * directories to see if any unlinks have been done before  	 * the directory was logged.  See tree-log.c for all the diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b0e18d986e0..d84089349c8 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -43,8 +43,6 @@ struct btrfs_path *btrfs_alloc_path(void)  {  	struct btrfs_path *path;  	path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); -	if (path) -		path->reada = 1;  	return path;  } @@ -1224,6 +1222,7 @@ static void reada_for_search(struct btrfs_root *root,  	u64 search;  	u64 target;  	u64 nread = 0; +	u64 gen;  	int direction = path->reada;  	struct extent_buffer *eb;  	u32 nr; @@ -1251,6 +1250,15 @@ static void reada_for_search(struct btrfs_root *root,  	nritems = btrfs_header_nritems(node);  	nr = slot;  	while (1) { +		if (!node->map_token) { +			unsigned long offset = btrfs_node_key_ptr_offset(nr); +			map_private_extent_buffer(node, offset, +						  sizeof(struct btrfs_key_ptr), +						  &node->map_token, +						  &node->kaddr, +						  &node->map_start, +						  &node->map_len, KM_USER1); +		}  		if (direction < 0) {  			if (nr == 0)  				break; @@ -1268,14 +1276,23 @@ static void reada_for_search(struct btrfs_root *root,  		search = btrfs_node_blockptr(node, nr);  		if ((search <= target && target - search <= 65536) ||  		    (search > target && search - target <= 65536)) { -			readahead_tree_block(root, search, blocksize, -				     btrfs_node_ptr_generation(node, nr)); +			gen = btrfs_node_ptr_generation(node, nr); +			if (node->map_token) { +				unmap_extent_buffer(node, node->map_token, +						    KM_USER1); +				node->map_token = NULL; +			} +			readahead_tree_block(root, search, blocksize, gen);  			nread += blocksize;  		}  		nscan++;  		if ((nread > 65536 || nscan > 32))  			break;  	} +	if (node->map_token) { +		unmap_extent_buffer(node, node->map_token, KM_USER1); +		node->map_token = NULL; +	}  }  /* @@ -1648,9 +1665,6 @@ again:  		}  cow_done:  		BUG_ON(!cow && ins_len); -		if (level != btrfs_header_level(b)) -			WARN_ON(1); -		level = btrfs_header_level(b);  		p->nodes[level] = b;  		if (!p->skip_locking) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 332323e19dd..8f98c200571 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -930,7 +930,6 @@ struct btrfs_fs_info {  	 * is required instead of the faster short fsync log commits  	 */  	u64 last_trans_log_full_commit; -	u64 open_ioctl_trans;  	unsigned long mount_opt:20;  	unsigned long compress_type:4;  	u64 max_inline; @@ -947,7 +946,6 @@ struct btrfs_fs_info {  	struct super_block *sb;  	struct inode *btree_inode;  	struct backing_dev_info bdi; -	struct mutex trans_mutex;  	struct mutex tree_log_mutex;  	struct mutex transaction_kthread_mutex;  	struct mutex cleaner_mutex; @@ -968,6 +966,7 @@ struct btrfs_fs_info {  	struct rw_semaphore subvol_sem;  	struct srcu_struct subvol_srcu; +	spinlock_t trans_lock;  	struct list_head trans_list;  	struct list_head hashers;  	struct list_head dead_roots; @@ -980,6 +979,7 @@ struct btrfs_fs_info {  	atomic_t async_submit_draining;  	atomic_t nr_async_bios;  	atomic_t async_delalloc_pages; +	atomic_t open_ioctl_trans;  	/*  	 * this is used by the balancing code to wait for all the pending @@ -1044,6 +1044,7 @@ struct btrfs_fs_info {  	int closing;  	int log_root_recovering;  	int enospc_unlink; +	int trans_no_join;  	u64 total_pinned; @@ -1065,7 +1066,6 @@ struct btrfs_fs_info {  	struct reloc_control *reloc_ctl;  	spinlock_t delalloc_lock; -	spinlock_t new_trans_lock;  	u64 delalloc_bytes;  	/* data_alloc_cluster is only used in ssd mode */ @@ -2238,6 +2238,9 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,  void btrfs_block_rsv_release(struct btrfs_root *root,  			     struct btrfs_block_rsv *block_rsv,  			     u64 num_bytes); +int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, +				    struct btrfs_root *root, +				    struct btrfs_block_rsv *rsv);  int btrfs_set_block_group_ro(struct btrfs_root *root,  			     struct btrfs_block_group_cache *cache);  int btrfs_set_block_group_rw(struct btrfs_root *root, @@ -2512,8 +2515,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,  int btrfs_writepages(struct address_space *mapping,  		     struct writeback_control *wbc);  int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, -			     struct btrfs_root *new_root, -			     u64 new_dirid, u64 alloc_hint); +			     struct btrfs_root *new_root, u64 new_dirid);  int btrfs_merge_bio_hook(struct page *page, unsigned long offset,  			 size_t size, struct bio *bio, unsigned long bio_flags); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 01e29503a54..b46d94d1dea 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1129,7 +1129,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)  	delayed_node = async_node->delayed_node;  	root = delayed_node->root; -	trans = btrfs_join_transaction(root, 0); +	trans = btrfs_join_transaction(root);  	if (IS_ERR(trans))  		goto free_path; @@ -1572,8 +1572,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,  	btrfs_set_stack_inode_transid(inode_item, trans->transid);  	btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);  	btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); -	btrfs_set_stack_inode_block_group(inode_item, -					  BTRFS_I(inode)->block_group); +	btrfs_set_stack_inode_block_group(inode_item, 0);  	btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),  				     inode->i_atime.tv_sec); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 98b6a71decb..a203d363184 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1505,24 +1505,24 @@ static int transaction_kthread(void *arg)  		vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);  		mutex_lock(&root->fs_info->transaction_kthread_mutex); -		spin_lock(&root->fs_info->new_trans_lock); +		spin_lock(&root->fs_info->trans_lock);  		cur = root->fs_info->running_transaction;  		if (!cur) { -			spin_unlock(&root->fs_info->new_trans_lock); +			spin_unlock(&root->fs_info->trans_lock);  			goto sleep;  		}  		now = get_seconds();  		if (!cur->blocked &&  		    (now < cur->start_time || now - cur->start_time < 30)) { -			spin_unlock(&root->fs_info->new_trans_lock); +			spin_unlock(&root->fs_info->trans_lock);  			delay = HZ * 5;  			goto sleep;  		}  		transid = cur->transid; -		spin_unlock(&root->fs_info->new_trans_lock); +		spin_unlock(&root->fs_info->trans_lock); -		trans = btrfs_join_transaction(root, 1); +		trans = btrfs_join_transaction(root);  		BUG_ON(IS_ERR(trans));  		if (transid == trans->transid) {  			ret = btrfs_commit_transaction(trans, root); @@ -1613,7 +1613,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,  	INIT_LIST_HEAD(&fs_info->ordered_operations);  	INIT_LIST_HEAD(&fs_info->caching_block_groups);  	spin_lock_init(&fs_info->delalloc_lock); -	spin_lock_init(&fs_info->new_trans_lock); +	spin_lock_init(&fs_info->trans_lock);  	spin_lock_init(&fs_info->ref_cache_lock);  	spin_lock_init(&fs_info->fs_roots_radix_lock);  	spin_lock_init(&fs_info->delayed_iput_lock); @@ -1645,6 +1645,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,  	fs_info->max_inline = 8192 * 1024;  	fs_info->metadata_ratio = 0;  	fs_info->defrag_inodes = RB_ROOT; +	fs_info->trans_no_join = 0;  	fs_info->thread_pool_size = min_t(unsigned long,  					  num_online_cpus() + 2, 8); @@ -1709,7 +1710,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,  	fs_info->do_barriers = 1; -	mutex_init(&fs_info->trans_mutex);  	mutex_init(&fs_info->ordered_operations_mutex);  	mutex_init(&fs_info->tree_log_mutex);  	mutex_init(&fs_info->chunk_mutex); @@ -2479,13 +2479,13 @@ int btrfs_commit_super(struct btrfs_root *root)  	down_write(&root->fs_info->cleanup_work_sem);  	up_write(&root->fs_info->cleanup_work_sem); -	trans = btrfs_join_transaction(root, 1); +	trans = btrfs_join_transaction(root);  	if (IS_ERR(trans))  		return PTR_ERR(trans);  	ret = btrfs_commit_transaction(trans, root);  	BUG_ON(ret);  	/* run commit again to drop the original snapshot */ -	trans = btrfs_join_transaction(root, 1); +	trans = btrfs_join_transaction(root);  	if (IS_ERR(trans))  		return PTR_ERR(trans);  	btrfs_commit_transaction(trans, root); @@ -3024,10 +3024,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)  	WARN_ON(1); -	mutex_lock(&root->fs_info->trans_mutex);  	mutex_lock(&root->fs_info->transaction_kthread_mutex); +	spin_lock(&root->fs_info->trans_lock);  	list_splice_init(&root->fs_info->trans_list, &list); +	root->fs_info->trans_no_join = 1; +	spin_unlock(&root->fs_info->trans_lock); +  	while (!list_empty(&list)) {  		t = list_entry(list.next, struct btrfs_transaction, list);  		if (!t) @@ -3052,23 +3055,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)  		t->blocked = 0;  		if (waitqueue_active(&root->fs_info->transaction_wait))  			wake_up(&root->fs_info->transaction_wait); -		mutex_unlock(&root->fs_info->trans_mutex); -		mutex_lock(&root->fs_info->trans_mutex);  		t->commit_done = 1;  		if (waitqueue_active(&t->commit_wait))  			wake_up(&t->commit_wait); -		mutex_unlock(&root->fs_info->trans_mutex); - -		mutex_lock(&root->fs_info->trans_mutex);  		btrfs_destroy_pending_snapshots(t);  		btrfs_destroy_delalloc_inodes(root); -		spin_lock(&root->fs_info->new_trans_lock); +		spin_lock(&root->fs_info->trans_lock);  		root->fs_info->running_transaction = NULL; -		spin_unlock(&root->fs_info->new_trans_lock); +		spin_unlock(&root->fs_info->trans_lock);  		btrfs_destroy_marked_extents(root, &t->dirty_pages,  					     EXTENT_DIRTY); @@ -3082,8 +3080,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)  		kmem_cache_free(btrfs_transaction_cachep, t);  	} +	spin_lock(&root->fs_info->trans_lock); +	root->fs_info->trans_no_join = 0; +	spin_unlock(&root->fs_info->trans_lock);  	mutex_unlock(&root->fs_info->transaction_kthread_mutex); -	mutex_unlock(&root->fs_info->trans_mutex);  	return 0;  } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 169bd62ce77..c9173a7827b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -348,7 +348,7 @@ static int caching_kthread(void *data)  	 */  	path->skip_locking = 1;  	path->search_commit_root = 1; -	path->reada = 2; +	path->reada = 1;  	key.objectid = last;  	key.offset = 0; @@ -379,15 +379,18 @@ again:  			if (ret)  				break; -			caching_ctl->progress = last; -			btrfs_release_path(path); -			up_read(&fs_info->extent_commit_sem); -			mutex_unlock(&caching_ctl->mutex); -			if (btrfs_transaction_in_commit(fs_info)) -				schedule_timeout(1); -			else +			if (need_resched() || +			    btrfs_next_leaf(extent_root, path)) { +				caching_ctl->progress = last; +				btrfs_release_path(path); +				up_read(&fs_info->extent_commit_sem); +				mutex_unlock(&caching_ctl->mutex);  				cond_resched(); -			goto again; +				goto again; +			} +			leaf = path->nodes[0]; +			nritems = btrfs_header_nritems(leaf); +			continue;  		}  		if (key.objectid < block_group->key.objectid) { @@ -3065,7 +3068,7 @@ again:  			spin_unlock(&data_sinfo->lock);  alloc:  			alloc_target = btrfs_get_alloc_profile(root, 1); -			trans = btrfs_join_transaction(root, 1); +			trans = btrfs_join_transaction(root);  			if (IS_ERR(trans))  				return PTR_ERR(trans); @@ -3091,9 +3094,10 @@ alloc:  		/* commit the current transaction and try again */  commit_trans: -		if (!committed && !root->fs_info->open_ioctl_trans) { +		if (!committed && +		    !atomic_read(&root->fs_info->open_ioctl_trans)) {  			committed = 1; -			trans = btrfs_join_transaction(root, 1); +			trans = btrfs_join_transaction(root);  			if (IS_ERR(trans))  				return PTR_ERR(trans);  			ret = btrfs_commit_transaction(trans, root); @@ -3472,7 +3476,7 @@ again:  		goto out;  	ret = -ENOSPC; -	trans = btrfs_join_transaction(root, 1); +	trans = btrfs_join_transaction(root);  	if (IS_ERR(trans))  		goto out;  	ret = btrfs_commit_transaction(trans, root); @@ -3699,7 +3703,7 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,  		if (trans)  			return -EAGAIN; -		trans = btrfs_join_transaction(root, 1); +		trans = btrfs_join_transaction(root);  		BUG_ON(IS_ERR(trans));  		ret = btrfs_commit_transaction(trans, root);  		return 0; @@ -3837,6 +3841,37 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)  	WARN_ON(fs_info->chunk_block_rsv.reserved > 0);  } +int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, +				    struct btrfs_root *root, +				    struct btrfs_block_rsv *rsv) +{ +	struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; +	u64 num_bytes; +	int ret; + +	/* +	 * Truncate should be freeing data, but give us 2 items just in case it +	 * needs to use some space.  We may want to be smarter about this in the +	 * future. +	 */ +	num_bytes = btrfs_calc_trans_metadata_size(root, 2); + +	/* We already have enough bytes, just return */ +	if (rsv->reserved >= num_bytes) +		return 0; + +	num_bytes -= rsv->reserved; + +	/* +	 * You should have reserved enough space before hand to do this, so this +	 * should not fail. +	 */ +	ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); +	BUG_ON(ret); + +	return 0; +} +  int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,  				 struct btrfs_root *root,  				 int num_items) @@ -3877,23 +3912,18 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,  	struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;  	/* -	 * one for deleting orphan item, one for updating inode and -	 * two for calling btrfs_truncate_inode_items. -	 * -	 * btrfs_truncate_inode_items is a delete operation, it frees -	 * more space than it uses in most cases. So two units of -	 * metadata space should be enough for calling it many times. -	 * If all of the metadata space is used, we can commit -	 * transaction and use space it freed. +	 * We need to hold space in order to delete our orphan item once we've +	 * added it, so this takes the reservation so we can release it later +	 * when we are truly done with the orphan item.  	 */ -	u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); +	u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);  	return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);  }  void btrfs_orphan_release_metadata(struct inode *inode)  {  	struct btrfs_root *root = BTRFS_I(inode)->root; -	u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); +	u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);  	btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);  } @@ -4987,6 +5017,15 @@ have_block_group:  		if (unlikely(block_group->ro))  			goto loop; +		spin_lock(&block_group->free_space_ctl->tree_lock); +		if (cached && +		    block_group->free_space_ctl->free_space < +		    num_bytes + empty_size) { +			spin_unlock(&block_group->free_space_ctl->tree_lock); +			goto loop; +		} +		spin_unlock(&block_group->free_space_ctl->tree_lock); +  		/*  		 * Ok we want to try and use the cluster allocator, so lets look  		 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will @@ -5150,6 +5189,7 @@ checks:  			btrfs_add_free_space(block_group, offset,  					     search_start - offset);  		BUG_ON(offset > search_start); +		btrfs_put_block_group(block_group);  		break;  loop:  		failed_cluster_refill = false; @@ -5242,14 +5282,7 @@ loop:  		ret = -ENOSPC;  	} else if (!ins->objectid) {  		ret = -ENOSPC; -	} - -	/* we found what we needed */ -	if (ins->objectid) { -		if (!(data & BTRFS_BLOCK_GROUP_DATA)) -			trans->block_group = block_group->key.objectid; - -		btrfs_put_block_group(block_group); +	} else if (ins->objectid) {  		ret = 0;  	} @@ -6526,7 +6559,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,  	BUG_ON(cache->ro); -	trans = btrfs_join_transaction(root, 1); +	trans = btrfs_join_transaction(root);  	BUG_ON(IS_ERR(trans));  	alloc_flags = update_block_group_flags(root, cache->flags); @@ -6882,6 +6915,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; +	path->reada = 1;  	cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);  	if (cache_gen != 0 && diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0e0fe0f6ec7..b181a94a717 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1474,7 +1474,7 @@ u64 count_range_bits(struct extent_io_tree *tree,  			if (total_bytes >= max_bytes)  				break;  			if (!found) { -				*start = state->start; +				*start = max(cur_start, state->start);  				found = 1;  			}  			last = state->end; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c6a22d783c3..e3a1b0c2394 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1480,14 +1480,12 @@ int btrfs_sync_file(struct file *file, int datasync)  	 * the current transaction, we can bail out now without any  	 * syncing  	 */ -	mutex_lock(&root->fs_info->trans_mutex); +	smp_mb();  	if (BTRFS_I(inode)->last_trans <=  	    root->fs_info->last_trans_committed) {  		BTRFS_I(inode)->last_trans = 0; -		mutex_unlock(&root->fs_info->trans_mutex);  		goto out;  	} -	mutex_unlock(&root->fs_info->trans_mutex);  	/*  	 * ok we haven't committed the transaction yet, lets do a commit diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 70d45795d75..dd38d4c3a59 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -402,7 +402,14 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,  				spin_lock(&ctl->tree_lock);  				ret = link_free_space(ctl, e);  				spin_unlock(&ctl->tree_lock); -				BUG_ON(ret); +				if (ret) { +					printk(KERN_ERR "Duplicate entries in " +					       "free space cache, dumping\n"); +					kunmap(page); +					unlock_page(page); +					page_cache_release(page); +					goto free_cache; +				}  			} else {  				e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);  				if (!e->bitmap) { @@ -419,6 +426,14 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,  				ctl->op->recalc_thresholds(ctl);  				spin_unlock(&ctl->tree_lock);  				list_add_tail(&e->list, &bitmaps); +				if (ret) { +					printk(KERN_ERR "Duplicate entries in " +					       "free space cache, dumping\n"); +					kunmap(page); +					unlock_page(page); +					page_cache_release(page); +					goto free_cache; +				}  			}  			num_entries--; @@ -963,10 +978,16 @@ static int tree_insert_offset(struct rb_root *root, u64 offset,  			 * logically.  			 */  			if (bitmap) { -				WARN_ON(info->bitmap); +				if (info->bitmap) { +					WARN_ON_ONCE(1); +					return -EEXIST; +				}  				p = &(*p)->rb_right;  			} else { -				WARN_ON(!info->bitmap); +				if (!info->bitmap) { +					WARN_ON_ONCE(1); +					return -EEXIST; +				}  				p = &(*p)->rb_left;  			}  		} diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bb51bb1fa44..a83e44bf320 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -138,7 +138,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,  		return -ENOMEM;  	path->leave_spinning = 1; -	btrfs_set_trans_block_group(trans, inode);  	key.objectid = btrfs_ino(inode);  	key.offset = start; @@ -426,9 +425,8 @@ again:  		}  	}  	if (start == 0) { -		trans = btrfs_join_transaction(root, 1); +		trans = btrfs_join_transaction(root);  		BUG_ON(IS_ERR(trans)); -		btrfs_set_trans_block_group(trans, inode);  		trans->block_rsv = &root->fs_info->delalloc_block_rsv;  		/* lets try to make an inline extent */ @@ -623,8 +621,9 @@ retry:  			    async_extent->start + async_extent->ram_size - 1,  			    GFP_NOFS); -		trans = btrfs_join_transaction(root, 1); +		trans = btrfs_join_transaction(root);  		BUG_ON(IS_ERR(trans)); +		trans->block_rsv = &root->fs_info->delalloc_block_rsv;  		ret = btrfs_reserve_extent(trans, root,  					   async_extent->compressed_size,  					   async_extent->compressed_size, @@ -793,9 +792,8 @@ static noinline int cow_file_range(struct inode *inode,  	int ret = 0;  	BUG_ON(is_free_space_inode(root, inode)); -	trans = btrfs_join_transaction(root, 1); +	trans = btrfs_join_transaction(root);  	BUG_ON(IS_ERR(trans)); -	btrfs_set_trans_block_group(trans, inode);  	trans->block_rsv = &root->fs_info->delalloc_block_rsv;  	num_bytes = (end - start + blocksize) & ~(blocksize - 1); @@ -1077,10 +1075,12 @@ static noinline int run_delalloc_nocow(struct inode *inode,  	nolock = is_free_space_inode(root, inode);  	if (nolock) -		trans = btrfs_join_transaction_nolock(root, 1); +		trans = btrfs_join_transaction_nolock(root);  	else -		trans = btrfs_join_transaction(root, 1); +		trans = btrfs_join_transaction(root); +  	BUG_ON(IS_ERR(trans)); +	trans->block_rsv = &root->fs_info->delalloc_block_rsv;  	cow_start = (u64)-1;  	cur_offset = start; @@ -1519,8 +1519,6 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,  {  	struct btrfs_ordered_sum *sum; -	btrfs_set_trans_block_group(trans, inode); -  	list_for_each_entry(sum, list, list) {  		btrfs_csum_file_blocks(trans,  		       BTRFS_I(inode)->root->fs_info->csum_root, sum); @@ -1735,11 +1733,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)  		ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);  		if (!ret) {  			if (nolock) -				trans = btrfs_join_transaction_nolock(root, 1); +				trans = btrfs_join_transaction_nolock(root);  			else -				trans = btrfs_join_transaction(root, 1); +				trans = btrfs_join_transaction(root);  			BUG_ON(IS_ERR(trans)); -			btrfs_set_trans_block_group(trans, inode);  			trans->block_rsv = &root->fs_info->delalloc_block_rsv;  			ret = btrfs_update_inode(trans, root, inode);  			BUG_ON(ret); @@ -1752,11 +1749,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)  			 0, &cached_state, GFP_NOFS);  	if (nolock) -		trans = btrfs_join_transaction_nolock(root, 1); +		trans = btrfs_join_transaction_nolock(root);  	else -		trans = btrfs_join_transaction(root, 1); +		trans = btrfs_join_transaction(root);  	BUG_ON(IS_ERR(trans)); -	btrfs_set_trans_block_group(trans, inode);  	trans->block_rsv = &root->fs_info->delalloc_block_rsv;  	if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) @@ -2431,7 +2427,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)  					(u64)-1);  	if (root->orphan_block_rsv || root->orphan_item_inserted) { -		trans = btrfs_join_transaction(root, 1); +		trans = btrfs_join_transaction(root);  		if (!IS_ERR(trans))  			btrfs_end_transaction(trans, root);  	} @@ -2511,12 +2507,12 @@ static void btrfs_read_locked_inode(struct inode *inode)  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct btrfs_key location;  	int maybe_acls; -	u64 alloc_group_block;  	u32 rdev;  	int ret;  	path = btrfs_alloc_path();  	BUG_ON(!path); +	path->leave_spinning = 1;  	memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));  	ret = btrfs_lookup_inode(NULL, root, path, &location, 0); @@ -2526,6 +2522,12 @@ static void btrfs_read_locked_inode(struct inode *inode)  	leaf = path->nodes[0];  	inode_item = btrfs_item_ptr(leaf, path->slots[0],  				    struct btrfs_inode_item); +	if (!leaf->map_token) +		map_private_extent_buffer(leaf, (unsigned long)inode_item, +					  sizeof(struct btrfs_inode_item), +					  &leaf->map_token, &leaf->kaddr, +					  &leaf->map_start, &leaf->map_len, +					  KM_USER1);  	inode->i_mode = btrfs_inode_mode(leaf, inode_item);  	inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); @@ -2555,8 +2557,6 @@ static void btrfs_read_locked_inode(struct inode *inode)  	BTRFS_I(inode)->index_cnt = (u64)-1;  	BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); -	alloc_group_block = btrfs_inode_block_group(leaf, inode_item); -  	/*  	 * try to precache a NULL acl entry for files that don't have  	 * any xattrs or acls @@ -2566,8 +2566,11 @@ static void btrfs_read_locked_inode(struct inode *inode)  	if (!maybe_acls)  		cache_no_acl(inode); -	BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, -						alloc_group_block, 0); +	if (leaf->map_token) { +		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); +		leaf->map_token = NULL; +	} +  	btrfs_free_path(path);  	inode_item = NULL; @@ -2647,7 +2650,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,  	btrfs_set_inode_transid(leaf, item, trans->transid);  	btrfs_set_inode_rdev(leaf, item, inode->i_rdev);  	btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); -	btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); +	btrfs_set_inode_block_group(leaf, item, 0);  	if (leaf->map_token) {  		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); @@ -3004,8 +3007,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)  	if (IS_ERR(trans))  		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, dir); -  	btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);  	ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, @@ -3094,8 +3095,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)  	if (IS_ERR(trans))  		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, dir); -  	if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {  		err = btrfs_unlink_subvol(trans, root, dir,  					  BTRFS_I(inode)->location.objectid, @@ -3514,7 +3513,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)  				err = PTR_ERR(trans);  				break;  			} -			btrfs_set_trans_block_group(trans, inode);  			err = btrfs_drop_extents(trans, inode, cur_offset,  						 cur_offset + hole_size, @@ -3650,7 +3648,6 @@ void btrfs_evict_inode(struct inode *inode)  	while (1) {  		trans = btrfs_start_transaction(root, 0);  		BUG_ON(IS_ERR(trans)); -		btrfs_set_trans_block_group(trans, inode);  		trans->block_rsv = root->orphan_block_rsv;  		ret = btrfs_block_rsv_check(trans, root, @@ -4133,7 +4130,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; -	path->reada = 2; + +	path->reada = 1;  	if (key_type == BTRFS_DIR_INDEX_KEY) {  		INIT_LIST_HEAD(&ins_list); @@ -4274,12 +4272,11 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)  	if (wbc->sync_mode == WB_SYNC_ALL) {  		if (nolock) -			trans = btrfs_join_transaction_nolock(root, 1); +			trans = btrfs_join_transaction_nolock(root);  		else -			trans = btrfs_join_transaction(root, 1); +			trans = btrfs_join_transaction(root);  		if (IS_ERR(trans))  			return PTR_ERR(trans); -		btrfs_set_trans_block_group(trans, inode);  		if (nolock)  			ret = btrfs_end_transaction_nolock(trans, root);  		else @@ -4303,9 +4300,8 @@ void btrfs_dirty_inode(struct inode *inode)  	if (BTRFS_I(inode)->dummy_inode)  		return; -	trans = btrfs_join_transaction(root, 1); +	trans = btrfs_join_transaction(root);  	BUG_ON(IS_ERR(trans)); -	btrfs_set_trans_block_group(trans, inode);  	ret = btrfs_update_inode(trans, root, inode);  	if (ret && ret == -ENOSPC) { @@ -4319,7 +4315,6 @@ void btrfs_dirty_inode(struct inode *inode)  				       PTR_ERR(trans));  			return;  		} -		btrfs_set_trans_block_group(trans, inode);  		ret = btrfs_update_inode(trans, root, inode);  		if (ret) { @@ -4418,8 +4413,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  				     struct btrfs_root *root,  				     struct inode *dir,  				     const char *name, int name_len, -				     u64 ref_objectid, u64 objectid, -				     u64 alloc_hint, int mode, u64 *index) +				     u64 ref_objectid, u64 objectid, int mode, +				     u64 *index)  {  	struct inode *inode;  	struct btrfs_inode_item *inode_item; @@ -4472,8 +4467,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  		owner = 0;  	else  		owner = 1; -	BTRFS_I(inode)->block_group = -			btrfs_find_block_group(root, 0, alloc_hint, owner);  	key[0].objectid = objectid;  	btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); @@ -4629,15 +4622,13 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,  	if (IS_ERR(trans))  		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, dir); -  	err = btrfs_find_free_ino(root, &objectid);  	if (err)  		goto out_unlock;  	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,  				dentry->d_name.len, btrfs_ino(dir), objectid, -				BTRFS_I(dir)->block_group, mode, &index); +				mode, &index);  	if (IS_ERR(inode)) {  		err = PTR_ERR(inode);  		goto out_unlock; @@ -4649,7 +4640,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,  		goto out_unlock;  	} -	btrfs_set_trans_block_group(trans, inode);  	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);  	if (err)  		drop_inode = 1; @@ -4658,8 +4648,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,  		init_special_inode(inode, inode->i_mode, rdev);  		btrfs_update_inode(trans, root, inode);  	} -	btrfs_update_inode_block_group(trans, inode); -	btrfs_update_inode_block_group(trans, dir);  out_unlock:  	nr = trans->blocks_used;  	btrfs_end_transaction_throttle(trans, root); @@ -4692,15 +4680,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,  	if (IS_ERR(trans))  		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, dir); -  	err = btrfs_find_free_ino(root, &objectid);  	if (err)  		goto out_unlock;  	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,  				dentry->d_name.len, btrfs_ino(dir), objectid, -				BTRFS_I(dir)->block_group, mode, &index); +				mode, &index);  	if (IS_ERR(inode)) {  		err = PTR_ERR(inode);  		goto out_unlock; @@ -4712,7 +4698,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,  		goto out_unlock;  	} -	btrfs_set_trans_block_group(trans, inode);  	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);  	if (err)  		drop_inode = 1; @@ -4723,8 +4708,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,  		inode->i_op = &btrfs_file_inode_operations;  		BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;  	} -	btrfs_update_inode_block_group(trans, inode); -	btrfs_update_inode_block_group(trans, dir);  out_unlock:  	nr = trans->blocks_used;  	btrfs_end_transaction_throttle(trans, root); @@ -4771,8 +4754,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,  	btrfs_inc_nlink(inode);  	inode->i_ctime = CURRENT_TIME; - -	btrfs_set_trans_block_group(trans, dir);  	ihold(inode);  	err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); @@ -4781,7 +4762,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,  		drop_inode = 1;  	} else {  		struct dentry *parent = dget_parent(dentry); -		btrfs_update_inode_block_group(trans, dir);  		err = btrfs_update_inode(trans, root, inode);  		BUG_ON(err);  		btrfs_log_new_name(trans, inode, NULL, parent); @@ -4818,7 +4798,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)  	trans = btrfs_start_transaction(root, 5);  	if (IS_ERR(trans))  		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, dir);  	err = btrfs_find_free_ino(root, &objectid);  	if (err) @@ -4826,8 +4805,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)  	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,  				dentry->d_name.len, btrfs_ino(dir), objectid, -				BTRFS_I(dir)->block_group, S_IFDIR | mode, -				&index); +				S_IFDIR | mode, &index);  	if (IS_ERR(inode)) {  		err = PTR_ERR(inode);  		goto out_fail; @@ -4841,7 +4819,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)  	inode->i_op = &btrfs_dir_inode_operations;  	inode->i_fop = &btrfs_dir_file_operations; -	btrfs_set_trans_block_group(trans, inode);  	btrfs_i_size_write(inode, 0);  	err = btrfs_update_inode(trans, root, inode); @@ -4855,8 +4832,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)  	d_instantiate(dentry, inode);  	drop_on_err = 0; -	btrfs_update_inode_block_group(trans, inode); -	btrfs_update_inode_block_group(trans, dir);  out_fail:  	nr = trans->blocks_used; @@ -4989,7 +4964,15 @@ again:  	if (!path) {  		path = btrfs_alloc_path(); -		BUG_ON(!path); +		if (!path) { +			err = -ENOMEM; +			goto out; +		} +		/* +		 * Chances are we'll be called again, so go ahead and do +		 * readahead +		 */ +		path->reada = 1;  	}  	ret = btrfs_lookup_file_extent(trans, root, path, @@ -5130,8 +5113,10 @@ again:  				kunmap(page);  				free_extent_map(em);  				em = NULL; +  				btrfs_release_path(path); -				trans = btrfs_join_transaction(root, 1); +				trans = btrfs_join_transaction(root); +  				if (IS_ERR(trans))  					return ERR_CAST(trans);  				goto again; @@ -5375,7 +5360,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,  		btrfs_drop_extent_cache(inode, start, start + len - 1, 0);  	} -	trans = btrfs_join_transaction(root, 0); +	trans = btrfs_join_transaction(root);  	if (IS_ERR(trans))  		return ERR_CAST(trans); @@ -5611,7 +5596,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,  		 * to make sure the current transaction stays open  		 * while we look for nocow cross refs  		 */ -		trans = btrfs_join_transaction(root, 0); +		trans = btrfs_join_transaction(root);  		if (IS_ERR(trans))  			goto must_cow; @@ -5750,7 +5735,7 @@ again:  	BUG_ON(!ordered); -	trans = btrfs_join_transaction(root, 1); +	trans = btrfs_join_transaction(root);  	if (IS_ERR(trans)) {  		err = -ENOMEM;  		goto out; @@ -6500,6 +6485,7 @@ out:  static int btrfs_truncate(struct inode *inode)  {  	struct btrfs_root *root = BTRFS_I(inode)->root; +	struct btrfs_block_rsv *rsv;  	int ret;  	int err = 0;  	struct btrfs_trans_handle *trans; @@ -6513,28 +6499,80 @@ static int btrfs_truncate(struct inode *inode)  	btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);  	btrfs_ordered_update_i_size(inode, inode->i_size, NULL); -	trans = btrfs_start_transaction(root, 5); -	if (IS_ERR(trans)) -		return PTR_ERR(trans); +	/* +	 * Yes ladies and gentelment, this is indeed ugly.  The fact is we have +	 * 3 things going on here +	 * +	 * 1) We need to reserve space for our orphan item and the space to +	 * delete our orphan item.  Lord knows we don't want to have a dangling +	 * orphan item because we didn't reserve space to remove it. +	 * +	 * 2) We need to reserve space to update our inode. +	 * +	 * 3) We need to have something to cache all the space that is going to +	 * be free'd up by the truncate operation, but also have some slack +	 * space reserved in case it uses space during the truncate (thank you +	 * very much snapshotting). +	 * +	 * And we need these to all be seperate.  The fact is we can use alot of +	 * space doing the truncate, and we have no earthly idea how much space +	 * we will use, so we need the truncate reservation to be seperate so it +	 * doesn't end up using space reserved for updating the inode or +	 * removing the orphan item.  We also need to be able to stop the +	 * transaction and start a new one, which means we need to be able to +	 * update the inode several times, and we have no idea of knowing how +	 * many times that will be, so we can't just reserve 1 item for the +	 * entirety of the opration, so that has to be done seperately as well. +	 * Then there is the orphan item, which does indeed need to be held on +	 * to for the whole operation, and we need nobody to touch this reserved +	 * space except the orphan code. +	 * +	 * So that leaves us with +	 * +	 * 1) root->orphan_block_rsv - for the orphan deletion. +	 * 2) rsv - for the truncate reservation, which we will steal from the +	 * transaction reservation. +	 * 3) fs_info->trans_block_rsv - this will have 1 items worth left for +	 * updating the inode. +	 */ +	rsv = btrfs_alloc_block_rsv(root); +	if (!rsv) +		return -ENOMEM; +	btrfs_add_durable_block_rsv(root->fs_info, rsv); -	btrfs_set_trans_block_group(trans, inode); +	trans = btrfs_start_transaction(root, 4); +	if (IS_ERR(trans)) { +		err = PTR_ERR(trans); +		goto out; +	} + +	/* +	 * Reserve space for the truncate process.  Truncate should be adding +	 * space, but if there are snapshots it may end up using space. +	 */ +	ret = btrfs_truncate_reserve_metadata(trans, root, rsv); +	BUG_ON(ret);  	ret = btrfs_orphan_add(trans, inode);  	if (ret) {  		btrfs_end_transaction(trans, root); -		return ret; +		goto out;  	}  	nr = trans->blocks_used;  	btrfs_end_transaction(trans, root);  	btrfs_btree_balance_dirty(root, nr); -	/* Now start a transaction for the truncate */ -	trans = btrfs_start_transaction(root, 0); -	if (IS_ERR(trans)) -		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, inode); -	trans->block_rsv = root->orphan_block_rsv; +	/* +	 * Ok so we've already migrated our bytes over for the truncate, so here +	 * just reserve the one slot we need for updating the inode. +	 */ +	trans = btrfs_start_transaction(root, 1); +	if (IS_ERR(trans)) { +		err = PTR_ERR(trans); +		goto out; +	} +	trans->block_rsv = rsv;  	/*  	 * setattr is responsible for setting the ordered_data_close flag, @@ -6558,24 +6596,17 @@ static int btrfs_truncate(struct inode *inode)  	while (1) {  		if (!trans) { -			trans = btrfs_start_transaction(root, 0); -			if (IS_ERR(trans)) -				return PTR_ERR(trans); -			btrfs_set_trans_block_group(trans, inode); -			trans->block_rsv = root->orphan_block_rsv; -		} +			trans = btrfs_start_transaction(root, 3); +			if (IS_ERR(trans)) { +				err = PTR_ERR(trans); +				goto out; +			} -		ret = btrfs_block_rsv_check(trans, root, -					    root->orphan_block_rsv, 0, 5); -		if (ret == -EAGAIN) { -			ret = btrfs_commit_transaction(trans, root); -			if (ret) -				return ret; -			trans = NULL; -			continue; -		} else if (ret) { -			err = ret; -			break; +			ret = btrfs_truncate_reserve_metadata(trans, root, +							      rsv); +			BUG_ON(ret); + +			trans->block_rsv = rsv;  		}  		ret = btrfs_truncate_inode_items(trans, root, inode, @@ -6586,6 +6617,7 @@ static int btrfs_truncate(struct inode *inode)  			break;  		} +		trans->block_rsv = &root->fs_info->trans_block_rsv;  		ret = btrfs_update_inode(trans, root, inode);  		if (ret) {  			err = ret; @@ -6599,6 +6631,7 @@ static int btrfs_truncate(struct inode *inode)  	}  	if (ret == 0 && inode->i_nlink > 0) { +		trans->block_rsv = root->orphan_block_rsv;  		ret = btrfs_orphan_del(trans, inode);  		if (ret)  			err = ret; @@ -6610,15 +6643,20 @@ static int btrfs_truncate(struct inode *inode)  		ret = btrfs_orphan_del(NULL, inode);  	} +	trans->block_rsv = &root->fs_info->trans_block_rsv;  	ret = btrfs_update_inode(trans, root, inode);  	if (ret && !err)  		err = ret;  	nr = trans->blocks_used;  	ret = btrfs_end_transaction_throttle(trans, root); +	btrfs_btree_balance_dirty(root, nr); + +out: +	btrfs_free_block_rsv(root, rsv); +  	if (ret && !err)  		err = ret; -	btrfs_btree_balance_dirty(root, nr);  	return err;  } @@ -6627,15 +6665,14 @@ static int btrfs_truncate(struct inode *inode)   * create a new subvolume directory/inode (helper for the ioctl).   */  int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, -			     struct btrfs_root *new_root, -			     u64 new_dirid, u64 alloc_hint) +			     struct btrfs_root *new_root, u64 new_dirid)  {  	struct inode *inode;  	int err;  	u64 index = 0;  	inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, -				new_dirid, alloc_hint, S_IFDIR | 0700, &index); +				new_dirid, S_IFDIR | 0700, &index);  	if (IS_ERR(inode))  		return PTR_ERR(inode);  	inode->i_op = &btrfs_dir_inode_operations; @@ -6748,21 +6785,6 @@ void btrfs_destroy_inode(struct inode *inode)  		spin_unlock(&root->fs_info->ordered_extent_lock);  	} -	if (root == root->fs_info->tree_root) { -		struct btrfs_block_group_cache *block_group; - -		block_group = btrfs_lookup_block_group(root->fs_info, -						BTRFS_I(inode)->block_group); -		if (block_group && block_group->inode == inode) { -			spin_lock(&block_group->lock); -			block_group->inode = NULL; -			spin_unlock(&block_group->lock); -			btrfs_put_block_group(block_group); -		} else if (block_group) { -			btrfs_put_block_group(block_group); -		} -	} -  	spin_lock(&root->orphan_lock);  	if (!list_empty(&BTRFS_I(inode)->i_orphan)) {  		printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", @@ -6948,8 +6970,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,                  goto out_notrans;          } -	btrfs_set_trans_block_group(trans, new_dir); -  	if (dest != root)  		btrfs_record_root_in_trans(trans, dest); @@ -7131,16 +7151,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,  	if (IS_ERR(trans))  		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, dir); -  	err = btrfs_find_free_ino(root, &objectid);  	if (err)  		goto out_unlock;  	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,  				dentry->d_name.len, btrfs_ino(dir), objectid, -				BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, -				&index); +				S_IFLNK|S_IRWXUGO, &index);  	if (IS_ERR(inode)) {  		err = PTR_ERR(inode);  		goto out_unlock; @@ -7152,7 +7169,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,  		goto out_unlock;  	} -	btrfs_set_trans_block_group(trans, inode);  	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);  	if (err)  		drop_inode = 1; @@ -7163,8 +7179,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,  		inode->i_op = &btrfs_file_inode_operations;  		BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;  	} -	btrfs_update_inode_block_group(trans, inode); -	btrfs_update_inode_block_group(trans, dir);  	if (drop_inode)  		goto out_unlock; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 85e818ce00c..74c80595d70 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -243,7 +243,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)  		ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);  	} -	trans = btrfs_join_transaction(root, 1); +	trans = btrfs_join_transaction(root);  	BUG_ON(IS_ERR(trans));  	ret = btrfs_update_inode(trans, root, inode); @@ -414,8 +414,7 @@ static noinline int create_subvol(struct btrfs_root *root,  	btrfs_record_root_in_trans(trans, new_root); -	ret = btrfs_create_subvol_root(trans, new_root, new_dirid, -				       BTRFS_I(dir)->block_group); +	ret = btrfs_create_subvol_root(trans, new_root, new_dirid);  	/*  	 * insert the directory item  	 */ @@ -2489,12 +2488,10 @@ static long btrfs_ioctl_trans_start(struct file *file)  	if (ret)  		goto out; -	mutex_lock(&root->fs_info->trans_mutex); -	root->fs_info->open_ioctl_trans++; -	mutex_unlock(&root->fs_info->trans_mutex); +	atomic_inc(&root->fs_info->open_ioctl_trans);  	ret = -ENOMEM; -	trans = btrfs_start_ioctl_transaction(root, 0); +	trans = btrfs_start_ioctl_transaction(root);  	if (IS_ERR(trans))  		goto out_drop; @@ -2502,9 +2499,7 @@ static long btrfs_ioctl_trans_start(struct file *file)  	return 0;  out_drop: -	mutex_lock(&root->fs_info->trans_mutex); -	root->fs_info->open_ioctl_trans--; -	mutex_unlock(&root->fs_info->trans_mutex); +	atomic_dec(&root->fs_info->open_ioctl_trans);  	mnt_drop_write(file->f_path.mnt);  out:  	return ret; @@ -2738,9 +2733,7 @@ long btrfs_ioctl_trans_end(struct file *file)  	btrfs_end_transaction(trans, root); -	mutex_lock(&root->fs_info->trans_mutex); -	root->fs_info->open_ioctl_trans--; -	mutex_unlock(&root->fs_info->trans_mutex); +	atomic_dec(&root->fs_info->open_ioctl_trans);  	mnt_drop_write(file->f_path.mnt);  	return 0; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index fa2c5d87f21..f25b10a22a0 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -677,6 +677,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,  		err = -ENOMEM;  		goto out;  	} +	path1->reada = 1; +	path2->reada = 2;  	node = alloc_backref_node(cache);  	if (!node) { @@ -1999,6 +2001,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; +	path->reada = 1;  	reloc_root = root->reloc_root;  	root_item = &reloc_root->root_item; @@ -2139,10 +2142,10 @@ int prepare_to_merge(struct reloc_control *rc, int err)  	u64 num_bytes = 0;  	int ret; -	mutex_lock(&root->fs_info->trans_mutex); +	spin_lock(&root->fs_info->trans_lock);  	rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;  	rc->merging_rsv_size += rc->nodes_relocated * 2; -	mutex_unlock(&root->fs_info->trans_mutex); +	spin_unlock(&root->fs_info->trans_lock);  again:  	if (!err) {  		num_bytes = rc->merging_rsv_size; @@ -2152,7 +2155,7 @@ again:  			err = ret;  	} -	trans = btrfs_join_transaction(rc->extent_root, 1); +	trans = btrfs_join_transaction(rc->extent_root);  	if (IS_ERR(trans)) {  		if (!err)  			btrfs_block_rsv_release(rc->extent_root, @@ -2211,9 +2214,9 @@ int merge_reloc_roots(struct reloc_control *rc)  	int ret;  again:  	root = rc->extent_root; -	mutex_lock(&root->fs_info->trans_mutex); +	spin_lock(&root->fs_info->trans_lock);  	list_splice_init(&rc->reloc_roots, &reloc_roots); -	mutex_unlock(&root->fs_info->trans_mutex); +	spin_unlock(&root->fs_info->trans_lock);  	while (!list_empty(&reloc_roots)) {  		found = 1; @@ -3236,7 +3239,7 @@ truncate:  		goto out;  	} -	trans = btrfs_join_transaction(root, 0); +	trans = btrfs_join_transaction(root);  	if (IS_ERR(trans)) {  		btrfs_free_path(path);  		ret = PTR_ERR(trans); @@ -3300,6 +3303,7 @@ static int find_data_references(struct reloc_control *rc,  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; +	path->reada = 1;  	root = read_fs_root(rc->extent_root->fs_info, ref_root);  	if (IS_ERR(root)) { @@ -3586,17 +3590,17 @@ next:  static void set_reloc_control(struct reloc_control *rc)  {  	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; -	mutex_lock(&fs_info->trans_mutex); +	spin_lock(&fs_info->trans_lock);  	fs_info->reloc_ctl = rc; -	mutex_unlock(&fs_info->trans_mutex); +	spin_unlock(&fs_info->trans_lock);  }  static void unset_reloc_control(struct reloc_control *rc)  {  	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; -	mutex_lock(&fs_info->trans_mutex); +	spin_lock(&fs_info->trans_lock);  	fs_info->reloc_ctl = NULL; -	mutex_unlock(&fs_info->trans_mutex); +	spin_unlock(&fs_info->trans_lock);  }  static int check_extent_flags(u64 flags) @@ -3645,7 +3649,7 @@ int prepare_to_relocate(struct reloc_control *rc)  	rc->create_reloc_tree = 1;  	set_reloc_control(rc); -	trans = btrfs_join_transaction(rc->extent_root, 1); +	trans = btrfs_join_transaction(rc->extent_root);  	BUG_ON(IS_ERR(trans));  	btrfs_commit_transaction(trans, rc->extent_root);  	return 0; @@ -3668,6 +3672,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; +	path->reada = 1;  	ret = prepare_to_relocate(rc);  	if (ret) { @@ -3834,7 +3839,7 @@ restart:  	btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);  	/* get rid of pinned extents */ -	trans = btrfs_join_transaction(rc->extent_root, 1); +	trans = btrfs_join_transaction(rc->extent_root);  	if (IS_ERR(trans))  		err = PTR_ERR(trans);  	else @@ -4093,6 +4098,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; +	path->reada = -1;  	key.objectid = BTRFS_TREE_RELOC_OBJECTID;  	key.type = BTRFS_ROOT_ITEM_KEY; @@ -4159,7 +4165,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)  	set_reloc_control(rc); -	trans = btrfs_join_transaction(rc->extent_root, 1); +	trans = btrfs_join_transaction(rc->extent_root);  	if (IS_ERR(trans)) {  		unset_reloc_control(rc);  		err = PTR_ERR(trans); @@ -4193,7 +4199,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)  	unset_reloc_control(rc); -	trans = btrfs_join_transaction(rc->extent_root, 1); +	trans = btrfs_join_transaction(rc->extent_root);  	if (IS_ERR(trans))  		err = PTR_ERR(trans);  	else diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index dc80f715692..2d5c6d2aa4e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -35,6 +35,7 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)  {  	WARN_ON(atomic_read(&transaction->use_count) == 0);  	if (atomic_dec_and_test(&transaction->use_count)) { +		BUG_ON(!list_empty(&transaction->list));  		memset(transaction, 0, sizeof(*transaction));  		kmem_cache_free(btrfs_transaction_cachep, transaction);  	} @@ -49,46 +50,72 @@ static noinline void switch_commit_root(struct btrfs_root *root)  /*   * either allocate a new transaction or hop into the existing one   */ -static noinline int join_transaction(struct btrfs_root *root) +static noinline int join_transaction(struct btrfs_root *root, int nofail)  {  	struct btrfs_transaction *cur_trans; -	cur_trans = root->fs_info->running_transaction; -	if (!cur_trans) { -		cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, -					     GFP_NOFS); -		if (!cur_trans) -			return -ENOMEM; -		root->fs_info->generation++; -		atomic_set(&cur_trans->num_writers, 1); -		cur_trans->num_joined = 0; -		cur_trans->transid = root->fs_info->generation; -		init_waitqueue_head(&cur_trans->writer_wait); -		init_waitqueue_head(&cur_trans->commit_wait); -		cur_trans->in_commit = 0; -		cur_trans->blocked = 0; -		atomic_set(&cur_trans->use_count, 1); -		cur_trans->commit_done = 0; -		cur_trans->start_time = get_seconds(); -		cur_trans->delayed_refs.root = RB_ROOT; -		cur_trans->delayed_refs.num_entries = 0; -		cur_trans->delayed_refs.num_heads_ready = 0; -		cur_trans->delayed_refs.num_heads = 0; -		cur_trans->delayed_refs.flushing = 0; -		cur_trans->delayed_refs.run_delayed_start = 0; -		spin_lock_init(&cur_trans->delayed_refs.lock); +	spin_lock(&root->fs_info->trans_lock); +	if (root->fs_info->trans_no_join) { +		if (!nofail) { +			spin_unlock(&root->fs_info->trans_lock); +			return -EBUSY; +		} +	} -		INIT_LIST_HEAD(&cur_trans->pending_snapshots); -		list_add_tail(&cur_trans->list, &root->fs_info->trans_list); -		extent_io_tree_init(&cur_trans->dirty_pages, -				     root->fs_info->btree_inode->i_mapping); -		spin_lock(&root->fs_info->new_trans_lock); -		root->fs_info->running_transaction = cur_trans; -		spin_unlock(&root->fs_info->new_trans_lock); -	} else { +	cur_trans = root->fs_info->running_transaction; +	if (cur_trans) { +		atomic_inc(&cur_trans->use_count); +		atomic_inc(&cur_trans->num_writers); +		cur_trans->num_joined++; +		spin_unlock(&root->fs_info->trans_lock); +		return 0; +	} +	spin_unlock(&root->fs_info->trans_lock); + +	cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); +	if (!cur_trans) +		return -ENOMEM; +	spin_lock(&root->fs_info->trans_lock); +	if (root->fs_info->running_transaction) { +		kmem_cache_free(btrfs_transaction_cachep, cur_trans); +		cur_trans = root->fs_info->running_transaction; +		atomic_inc(&cur_trans->use_count);  		atomic_inc(&cur_trans->num_writers);  		cur_trans->num_joined++; +		spin_unlock(&root->fs_info->trans_lock); +		return 0;  	} +	atomic_set(&cur_trans->num_writers, 1); +	cur_trans->num_joined = 0; +	init_waitqueue_head(&cur_trans->writer_wait); +	init_waitqueue_head(&cur_trans->commit_wait); +	cur_trans->in_commit = 0; +	cur_trans->blocked = 0; +	/* +	 * One for this trans handle, one so it will live on until we +	 * commit the transaction. +	 */ +	atomic_set(&cur_trans->use_count, 2); +	cur_trans->commit_done = 0; +	cur_trans->start_time = get_seconds(); + +	cur_trans->delayed_refs.root = RB_ROOT; +	cur_trans->delayed_refs.num_entries = 0; +	cur_trans->delayed_refs.num_heads_ready = 0; +	cur_trans->delayed_refs.num_heads = 0; +	cur_trans->delayed_refs.flushing = 0; +	cur_trans->delayed_refs.run_delayed_start = 0; +	spin_lock_init(&cur_trans->commit_lock); +	spin_lock_init(&cur_trans->delayed_refs.lock); + +	INIT_LIST_HEAD(&cur_trans->pending_snapshots); +	list_add_tail(&cur_trans->list, &root->fs_info->trans_list); +	extent_io_tree_init(&cur_trans->dirty_pages, +			     root->fs_info->btree_inode->i_mapping); +	root->fs_info->generation++; +	cur_trans->transid = root->fs_info->generation; +	root->fs_info->running_transaction = cur_trans; +	spin_unlock(&root->fs_info->trans_lock);  	return 0;  } @@ -99,39 +126,28 @@ static noinline int join_transaction(struct btrfs_root *root)   * to make sure the old root from before we joined the transaction is deleted   * when the transaction commits   */ -static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, -					 struct btrfs_root *root) +int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, +			       struct btrfs_root *root)  {  	if (root->ref_cows && root->last_trans < trans->transid) {  		WARN_ON(root == root->fs_info->extent_root);  		WARN_ON(root->commit_root != root->node); +		spin_lock(&root->fs_info->fs_roots_radix_lock); +		if (root->last_trans == trans->transid) { +			spin_unlock(&root->fs_info->fs_roots_radix_lock); +			return 0; +		} +		root->last_trans = trans->transid;  		radix_tree_tag_set(&root->fs_info->fs_roots_radix,  			   (unsigned long)root->root_key.objectid,  			   BTRFS_ROOT_TRANS_TAG); -		root->last_trans = trans->transid; +		spin_unlock(&root->fs_info->fs_roots_radix_lock);  		btrfs_init_reloc_root(trans, root);  	}  	return 0;  } -int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, -			       struct btrfs_root *root) -{ -	if (!root->ref_cows) -		return 0; - -	mutex_lock(&root->fs_info->trans_mutex); -	if (root->last_trans == trans->transid) { -		mutex_unlock(&root->fs_info->trans_mutex); -		return 0; -	} - -	record_root_in_trans(trans, root); -	mutex_unlock(&root->fs_info->trans_mutex); -	return 0; -} -  /* wait for commit against the current transaction to become unblocked   * when this is done, it is safe to start a new transaction, but the current   * transaction might not be fully on disk. @@ -140,21 +156,23 @@ static void wait_current_trans(struct btrfs_root *root)  {  	struct btrfs_transaction *cur_trans; +	spin_lock(&root->fs_info->trans_lock);  	cur_trans = root->fs_info->running_transaction;  	if (cur_trans && cur_trans->blocked) {  		DEFINE_WAIT(wait);  		atomic_inc(&cur_trans->use_count); +		spin_unlock(&root->fs_info->trans_lock);  		while (1) {  			prepare_to_wait(&root->fs_info->transaction_wait, &wait,  					TASK_UNINTERRUPTIBLE);  			if (!cur_trans->blocked)  				break; -			mutex_unlock(&root->fs_info->trans_mutex);  			schedule(); -			mutex_lock(&root->fs_info->trans_mutex);  		}  		finish_wait(&root->fs_info->transaction_wait, &wait);  		put_transaction(cur_trans); +	} else { +		spin_unlock(&root->fs_info->trans_lock);  	}  } @@ -167,10 +185,16 @@ enum btrfs_trans_type {  static int may_wait_transaction(struct btrfs_root *root, int type)  { -	if (!root->fs_info->log_root_recovering && -	    ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || -	     type == TRANS_USERSPACE)) +	if (root->fs_info->log_root_recovering) +		return 0; + +	if (type == TRANS_USERSPACE) +		return 1; + +	if (type == TRANS_START && +	    !atomic_read(&root->fs_info->open_ioctl_trans))  		return 1; +  	return 0;  } @@ -184,36 +208,44 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,  	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)  		return ERR_PTR(-EROFS); + +	if (current->journal_info) { +		WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK); +		h = current->journal_info; +		h->use_count++; +		h->orig_rsv = h->block_rsv; +		h->block_rsv = NULL; +		goto got_it; +	}  again:  	h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);  	if (!h)  		return ERR_PTR(-ENOMEM); -	if (type != TRANS_JOIN_NOLOCK) -		mutex_lock(&root->fs_info->trans_mutex);  	if (may_wait_transaction(root, type))  		wait_current_trans(root); -	ret = join_transaction(root); +	do { +		ret = join_transaction(root, type == TRANS_JOIN_NOLOCK); +		if (ret == -EBUSY) +			wait_current_trans(root); +	} while (ret == -EBUSY); +  	if (ret < 0) {  		kmem_cache_free(btrfs_trans_handle_cachep, h); -		if (type != TRANS_JOIN_NOLOCK) -			mutex_unlock(&root->fs_info->trans_mutex);  		return ERR_PTR(ret);  	}  	cur_trans = root->fs_info->running_transaction; -	atomic_inc(&cur_trans->use_count); -	if (type != TRANS_JOIN_NOLOCK) -		mutex_unlock(&root->fs_info->trans_mutex);  	h->transid = cur_trans->transid;  	h->transaction = cur_trans;  	h->blocks_used = 0; -	h->block_group = 0;  	h->bytes_reserved = 0;  	h->delayed_ref_updates = 0; +	h->use_count = 1;  	h->block_rsv = NULL; +	h->orig_rsv = NULL;  	smp_mb();  	if (cur_trans->blocked && may_wait_transaction(root, type)) { @@ -241,11 +273,8 @@ again:  		}  	} -	if (type != TRANS_JOIN_NOLOCK) -		mutex_lock(&root->fs_info->trans_mutex); -	record_root_in_trans(h, root); -	if (type != TRANS_JOIN_NOLOCK) -		mutex_unlock(&root->fs_info->trans_mutex); +got_it: +	btrfs_record_root_in_trans(h, root);  	if (!current->journal_info && type != TRANS_USERSPACE)  		current->journal_info = h; @@ -257,22 +286,19 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,  {  	return start_transaction(root, num_items, TRANS_START);  } -struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, -						   int num_blocks) +struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)  {  	return start_transaction(root, 0, TRANS_JOIN);  } -struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, -							  int num_blocks) +struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)  {  	return start_transaction(root, 0, TRANS_JOIN_NOLOCK);  } -struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, -							 int num_blocks) +struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)  { -	return start_transaction(r, 0, TRANS_USERSPACE); +	return start_transaction(root, 0, TRANS_USERSPACE);  }  /* wait for a transaction commit to be fully complete */ @@ -280,17 +306,13 @@ static noinline int wait_for_commit(struct btrfs_root *root,  				    struct btrfs_transaction *commit)  {  	DEFINE_WAIT(wait); -	mutex_lock(&root->fs_info->trans_mutex);  	while (!commit->commit_done) {  		prepare_to_wait(&commit->commit_wait, &wait,  				TASK_UNINTERRUPTIBLE);  		if (commit->commit_done)  			break; -		mutex_unlock(&root->fs_info->trans_mutex);  		schedule(); -		mutex_lock(&root->fs_info->trans_mutex);  	} -	mutex_unlock(&root->fs_info->trans_mutex);  	finish_wait(&commit->commit_wait, &wait);  	return 0;  } @@ -300,59 +322,56 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)  	struct btrfs_transaction *cur_trans = NULL, *t;  	int ret; -	mutex_lock(&root->fs_info->trans_mutex); -  	ret = 0;  	if (transid) {  		if (transid <= root->fs_info->last_trans_committed) -			goto out_unlock; +			goto out;  		/* find specified transaction */ +		spin_lock(&root->fs_info->trans_lock);  		list_for_each_entry(t, &root->fs_info->trans_list, list) {  			if (t->transid == transid) {  				cur_trans = t; +				atomic_inc(&cur_trans->use_count);  				break;  			}  			if (t->transid > transid)  				break;  		} +		spin_unlock(&root->fs_info->trans_lock);  		ret = -EINVAL;  		if (!cur_trans) -			goto out_unlock;  /* bad transid */ +			goto out;  /* bad transid */  	} else {  		/* find newest transaction that is committing | committed */ +		spin_lock(&root->fs_info->trans_lock);  		list_for_each_entry_reverse(t, &root->fs_info->trans_list,  					    list) {  			if (t->in_commit) {  				if (t->commit_done) -					goto out_unlock; +					goto out;  				cur_trans = t; +				atomic_inc(&cur_trans->use_count);  				break;  			}  		} +		spin_unlock(&root->fs_info->trans_lock);  		if (!cur_trans) -			goto out_unlock;  /* nothing committing|committed */ +			goto out;  /* nothing committing|committed */  	} -	atomic_inc(&cur_trans->use_count); -	mutex_unlock(&root->fs_info->trans_mutex); -  	wait_for_commit(root, cur_trans); -	mutex_lock(&root->fs_info->trans_mutex);  	put_transaction(cur_trans);  	ret = 0; -out_unlock: -	mutex_unlock(&root->fs_info->trans_mutex); +out:  	return ret;  }  void btrfs_throttle(struct btrfs_root *root)  { -	mutex_lock(&root->fs_info->trans_mutex); -	if (!root->fs_info->open_ioctl_trans) +	if (!atomic_read(&root->fs_info->open_ioctl_trans))  		wait_current_trans(root); -	mutex_unlock(&root->fs_info->trans_mutex);  }  static int should_end_transaction(struct btrfs_trans_handle *trans, @@ -370,6 +389,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,  	struct btrfs_transaction *cur_trans = trans->transaction;  	int updates; +	smp_mb();  	if (cur_trans->blocked || cur_trans->delayed_refs.flushing)  		return 1; @@ -388,6 +408,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,  	struct btrfs_fs_info *info = root->fs_info;  	int count = 0; +	if (--trans->use_count) { +		trans->block_rsv = trans->orig_rsv; +		return 0; +	} +  	while (count < 4) {  		unsigned long cur = trans->delayed_ref_updates;  		trans->delayed_ref_updates = 0; @@ -410,9 +435,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,  	btrfs_trans_release_metadata(trans, root); -	if (lock && !root->fs_info->open_ioctl_trans && -	    should_end_transaction(trans, root)) +	if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && +	    should_end_transaction(trans, root)) {  		trans->transaction->blocked = 1; +		smp_wmb(); +	}  	if (lock && cur_trans->blocked && !cur_trans->in_commit) {  		if (throttle) @@ -703,9 +730,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,   */  int btrfs_add_dead_root(struct btrfs_root *root)  { -	mutex_lock(&root->fs_info->trans_mutex); +	spin_lock(&root->fs_info->trans_lock);  	list_add(&root->root_list, &root->fs_info->dead_roots); -	mutex_unlock(&root->fs_info->trans_mutex); +	spin_unlock(&root->fs_info->trans_lock);  	return 0;  } @@ -721,6 +748,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,  	int ret;  	int err = 0; +	spin_lock(&fs_info->fs_roots_radix_lock);  	while (1) {  		ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,  						 (void **)gang, 0, @@ -733,6 +761,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,  			radix_tree_tag_clear(&fs_info->fs_roots_radix,  					(unsigned long)root->root_key.objectid,  					BTRFS_ROOT_TRANS_TAG); +			spin_unlock(&fs_info->fs_roots_radix_lock);  			btrfs_free_log(trans, root);  			btrfs_update_reloc_root(trans, root); @@ -753,10 +782,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,  			err = btrfs_update_root(trans, fs_info->tree_root,  						&root->root_key,  						&root->root_item); +			spin_lock(&fs_info->fs_roots_radix_lock);  			if (err)  				break;  		}  	} +	spin_unlock(&fs_info->fs_roots_radix_lock);  	return err;  } @@ -851,7 +882,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,  	parent = dget_parent(dentry);  	parent_inode = parent->d_inode;  	parent_root = BTRFS_I(parent_inode)->root; -	record_root_in_trans(trans, parent_root); +	btrfs_record_root_in_trans(trans, parent_root);  	/*  	 * insert the directory item @@ -869,7 +900,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,  	ret = btrfs_update_inode(trans, parent_root, parent_inode);  	BUG_ON(ret); -	record_root_in_trans(trans, root); +	btrfs_record_root_in_trans(trans, root);  	btrfs_set_root_last_snapshot(&root->root_item, trans->transid);  	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));  	btrfs_check_and_init_root_item(new_root_item); @@ -967,20 +998,20 @@ static void update_super_roots(struct btrfs_root *root)  int btrfs_transaction_in_commit(struct btrfs_fs_info *info)  {  	int ret = 0; -	spin_lock(&info->new_trans_lock); +	spin_lock(&info->trans_lock);  	if (info->running_transaction)  		ret = info->running_transaction->in_commit; -	spin_unlock(&info->new_trans_lock); +	spin_unlock(&info->trans_lock);  	return ret;  }  int btrfs_transaction_blocked(struct btrfs_fs_info *info)  {  	int ret = 0; -	spin_lock(&info->new_trans_lock); +	spin_lock(&info->trans_lock);  	if (info->running_transaction)  		ret = info->running_transaction->blocked; -	spin_unlock(&info->new_trans_lock); +	spin_unlock(&info->trans_lock);  	return ret;  } @@ -1004,9 +1035,7 @@ static void wait_current_trans_commit_start(struct btrfs_root *root,  				    &wait);  			break;  		} -		mutex_unlock(&root->fs_info->trans_mutex);  		schedule(); -		mutex_lock(&root->fs_info->trans_mutex);  		finish_wait(&root->fs_info->transaction_blocked_wait, &wait);  	}  } @@ -1032,9 +1061,7 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,  				    &wait);  			break;  		} -		mutex_unlock(&root->fs_info->trans_mutex);  		schedule(); -		mutex_lock(&root->fs_info->trans_mutex);  		finish_wait(&root->fs_info->transaction_wait,  			    &wait);  	} @@ -1072,7 +1099,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,  	INIT_DELAYED_WORK(&ac->work, do_async_commit);  	ac->root = root; -	ac->newtrans = btrfs_join_transaction(root, 0); +	ac->newtrans = btrfs_join_transaction(root);  	if (IS_ERR(ac->newtrans)) {  		int err = PTR_ERR(ac->newtrans);  		kfree(ac); @@ -1080,22 +1107,18 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,  	}  	/* take transaction reference */ -	mutex_lock(&root->fs_info->trans_mutex);  	cur_trans = trans->transaction;  	atomic_inc(&cur_trans->use_count); -	mutex_unlock(&root->fs_info->trans_mutex);  	btrfs_end_transaction(trans, root);  	schedule_delayed_work(&ac->work, 0);  	/* wait for transaction to start and unblock */ -	mutex_lock(&root->fs_info->trans_mutex);  	if (wait_for_unblock)  		wait_current_trans_commit_start_and_unblock(root, cur_trans);  	else  		wait_current_trans_commit_start(root, cur_trans);  	put_transaction(cur_trans); -	mutex_unlock(&root->fs_info->trans_mutex);  	return 0;  } @@ -1139,38 +1162,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,  	ret = btrfs_run_delayed_refs(trans, root, 0);  	BUG_ON(ret); -	mutex_lock(&root->fs_info->trans_mutex); +	spin_lock(&cur_trans->commit_lock);  	if (cur_trans->in_commit) { +		spin_unlock(&cur_trans->commit_lock);  		atomic_inc(&cur_trans->use_count); -		mutex_unlock(&root->fs_info->trans_mutex);  		btrfs_end_transaction(trans, root);  		ret = wait_for_commit(root, cur_trans);  		BUG_ON(ret); -		mutex_lock(&root->fs_info->trans_mutex);  		put_transaction(cur_trans); -		mutex_unlock(&root->fs_info->trans_mutex);  		return 0;  	}  	trans->transaction->in_commit = 1;  	trans->transaction->blocked = 1; +	spin_unlock(&cur_trans->commit_lock);  	wake_up(&root->fs_info->transaction_blocked_wait); +	spin_lock(&root->fs_info->trans_lock);  	if (cur_trans->list.prev != &root->fs_info->trans_list) {  		prev_trans = list_entry(cur_trans->list.prev,  					struct btrfs_transaction, list);  		if (!prev_trans->commit_done) {  			atomic_inc(&prev_trans->use_count); -			mutex_unlock(&root->fs_info->trans_mutex); +			spin_unlock(&root->fs_info->trans_lock);  			wait_for_commit(root, prev_trans); -			mutex_lock(&root->fs_info->trans_mutex);  			put_transaction(prev_trans); +		} else { +			spin_unlock(&root->fs_info->trans_lock);  		} +	} else { +		spin_unlock(&root->fs_info->trans_lock);  	}  	if (now < cur_trans->start_time || now - cur_trans->start_time < 1) @@ -1178,12 +1204,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,  	do {  		int snap_pending = 0; +  		joined = cur_trans->num_joined;  		if (!list_empty(&trans->transaction->pending_snapshots))  			snap_pending = 1;  		WARN_ON(cur_trans != trans->transaction); -		mutex_unlock(&root->fs_info->trans_mutex);  		if (flush_on_commit || snap_pending) {  			btrfs_start_delalloc_inodes(root, 1); @@ -1206,14 +1232,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,  		prepare_to_wait(&cur_trans->writer_wait, &wait,  				TASK_UNINTERRUPTIBLE); -		smp_mb();  		if (atomic_read(&cur_trans->num_writers) > 1)  			schedule_timeout(MAX_SCHEDULE_TIMEOUT);  		else if (should_grow)  			schedule_timeout(1); -		mutex_lock(&root->fs_info->trans_mutex);  		finish_wait(&cur_trans->writer_wait, &wait); +		spin_lock(&root->fs_info->trans_lock); +		root->fs_info->trans_no_join = 1; +		spin_unlock(&root->fs_info->trans_lock);  	} while (atomic_read(&cur_trans->num_writers) > 1 ||  		 (should_grow && cur_trans->num_joined != joined)); @@ -1258,9 +1285,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,  	btrfs_prepare_extent_commit(trans, root);  	cur_trans = root->fs_info->running_transaction; -	spin_lock(&root->fs_info->new_trans_lock); -	root->fs_info->running_transaction = NULL; -	spin_unlock(&root->fs_info->new_trans_lock);  	btrfs_set_root_node(&root->fs_info->tree_root->root_item,  			    root->fs_info->tree_root->node); @@ -1281,10 +1305,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,  	       sizeof(root->fs_info->super_copy));  	trans->transaction->blocked = 0; +	spin_lock(&root->fs_info->trans_lock); +	root->fs_info->running_transaction = NULL; +	root->fs_info->trans_no_join = 0; +	spin_unlock(&root->fs_info->trans_lock);  	wake_up(&root->fs_info->transaction_wait); -	mutex_unlock(&root->fs_info->trans_mutex);  	ret = btrfs_write_and_wait_transaction(trans, root);  	BUG_ON(ret);  	write_ctree_super(trans, root, 0); @@ -1297,22 +1324,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,  	btrfs_finish_extent_commit(trans, root); -	mutex_lock(&root->fs_info->trans_mutex); -  	cur_trans->commit_done = 1;  	root->fs_info->last_trans_committed = cur_trans->transid;  	wake_up(&cur_trans->commit_wait); +	spin_lock(&root->fs_info->trans_lock);  	list_del_init(&cur_trans->list); +	spin_unlock(&root->fs_info->trans_lock); +  	put_transaction(cur_trans);  	put_transaction(cur_trans);  	trace_btrfs_transaction_commit(root); -	mutex_unlock(&root->fs_info->trans_mutex); -  	btrfs_scrub_continue(root);  	if (current->journal_info == trans) @@ -1334,9 +1360,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)  	LIST_HEAD(list);  	struct btrfs_fs_info *fs_info = root->fs_info; -	mutex_lock(&fs_info->trans_mutex); +	spin_lock(&fs_info->trans_lock);  	list_splice_init(&fs_info->dead_roots, &list); -	mutex_unlock(&fs_info->trans_mutex); +	spin_unlock(&fs_info->trans_lock);  	while (!list_empty(&list)) {  		root = list_entry(list.next, struct btrfs_root, root_list); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 804c88639e5..02564e6230a 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -28,10 +28,12 @@ struct btrfs_transaction {  	 * transaction can end  	 */  	atomic_t num_writers; +	atomic_t use_count;  	unsigned long num_joined; + +	spinlock_t commit_lock;  	int in_commit; -	atomic_t use_count;  	int commit_done;  	int blocked;  	struct list_head list; @@ -45,13 +47,14 @@ struct btrfs_transaction {  struct btrfs_trans_handle {  	u64 transid; -	u64 block_group;  	u64 bytes_reserved; +	unsigned long use_count;  	unsigned long blocks_reserved;  	unsigned long blocks_used;  	unsigned long delayed_ref_updates;  	struct btrfs_transaction *transaction;  	struct btrfs_block_rsv *block_rsv; +	struct btrfs_block_rsv *orig_rsv;  };  struct btrfs_pending_snapshot { @@ -66,19 +69,6 @@ struct btrfs_pending_snapshot {  	struct list_head list;  }; -static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, -					       struct inode *inode) -{ -	trans->block_group = BTRFS_I(inode)->block_group; -} - -static inline void btrfs_update_inode_block_group( -					  struct btrfs_trans_handle *trans, -					  struct inode *inode) -{ -	BTRFS_I(inode)->block_group = trans->block_group; -} -  static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,  					      struct inode *inode)  { @@ -92,12 +82,9 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,  				 struct btrfs_root *root);  struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,  						   int num_items); -struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, -						  int num_blocks); -struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, -							  int num_blocks); -struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, -							 int num_blocks); +struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); +struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); +struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);  int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);  int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,  				     struct btrfs_root *root); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index f3107e4b4d5..5366fe452ab 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -158,8 +158,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,  	if (IS_ERR(trans))  		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, inode); -  	ret = do_setxattr(trans, inode, name, value, size, flags);  	if (ret)  		goto out;  |