diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/backref.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/compression.c | 1 | ||||
| -rw-r--r-- | fs/btrfs/ctree.c | 9 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 3 | ||||
| -rw-r--r-- | fs/btrfs/delayed-inode.c | 12 | ||||
| -rw-r--r-- | fs/btrfs/delayed-ref.c | 163 | ||||
| -rw-r--r-- | fs/btrfs/delayed-ref.h | 4 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 53 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.h | 2 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 123 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 17 | ||||
| -rw-r--r-- | fs/btrfs/file-item.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 326 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/locking.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/qgroup.c | 12 | ||||
| -rw-r--r-- | fs/btrfs/root-tree.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 15 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 3 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 33 | ||||
| -rw-r--r-- | fs/btrfs/volumes.h | 2 | 
21 files changed, 418 insertions, 376 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index a256f3b2a84..ff6475f409d 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1438,10 +1438,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,  	ret = extent_from_logical(fs_info, logical, path,  					&found_key);  	btrfs_release_path(path); -	if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) -		ret = -EINVAL;  	if (ret < 0)  		return ret; +	if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) +		return -EINVAL;  	extent_item_pos = logical - found_key.objectid;  	ret = iterate_extent_inodes(fs_info, found_key.objectid, diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 86eff48dab7..43d1c5a3a03 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -818,6 +818,7 @@ static void free_workspace(int type, struct list_head *workspace)  	btrfs_compress_op[idx]->free_workspace(workspace);  	atomic_dec(alloc_workspace);  wake: +	smp_mb();  	if (waitqueue_active(workspace_wait))  		wake_up(workspace_wait);  } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9d7621f271f..6d183f60d63 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -421,12 +421,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,  	spin_unlock(&fs_info->tree_mod_seq_lock);  	/* -	 * we removed the lowest blocker from the blocker list, so there may be -	 * more processible delayed refs. -	 */ -	wake_up(&fs_info->tree_mod_seq_wait); - -	/*  	 * anything that's lower than the lowest existing (read: blocked)  	 * sequence number can be removed from the tree.  	 */ @@ -631,6 +625,9 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)  	u32 nritems;  	int ret; +	if (btrfs_header_level(eb) == 0) +		return; +  	nritems = btrfs_header_nritems(eb);  	for (i = nritems - 1; i >= 0; i--) {  		ret = tree_mod_log_insert_key_locked(fs_info, eb, i, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4bab807227a..0d195b50766 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1252,7 +1252,6 @@ struct btrfs_fs_info {  	atomic_t tree_mod_seq;  	struct list_head tree_mod_seq_list;  	struct seq_list tree_mod_seq_elem; -	wait_queue_head_t tree_mod_seq_wait;  	/* this protects tree_mod_log */  	rwlock_t tree_mod_log_lock; @@ -3192,7 +3191,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,  int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,  			  struct bio *bio, u32 *dst);  int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, -			      struct bio *bio, u64 logical_offset, u32 *dst); +			      struct bio *bio, u64 logical_offset);  int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,  			     struct btrfs_root *root,  			     u64 objectid, u64 pos, diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 335605c8cea..07d5eeb1e6f 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -512,8 +512,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)  	rb_erase(&delayed_item->rb_node, root);  	delayed_item->delayed_node->count--; -	atomic_dec(&delayed_root->items); -	if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND && +	if (atomic_dec_return(&delayed_root->items) < +	    BTRFS_DELAYED_BACKGROUND &&  	    waitqueue_active(&delayed_root->wait))  		wake_up(&delayed_root->wait);  } @@ -1028,9 +1028,10 @@ do_again:  		btrfs_release_delayed_item(prev);  		ret = 0;  		btrfs_release_path(path); -		if (curr) +		if (curr) { +			mutex_unlock(&node->mutex);  			goto do_again; -		else +		} else  			goto delete_fail;  	} @@ -1055,8 +1056,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)  		delayed_node->count--;  		delayed_root = delayed_node->root->fs_info->delayed_root; -		atomic_dec(&delayed_root->items); -		if (atomic_read(&delayed_root->items) < +		if (atomic_dec_return(&delayed_root->items) <  		    BTRFS_DELAYED_BACKGROUND &&  		    waitqueue_active(&delayed_root->wait))  			wake_up(&delayed_root->wait); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index da7419ed01b..ae941177339 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -38,17 +38,14 @@  static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,  			  struct btrfs_delayed_tree_ref *ref1)  { -	if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { -		if (ref1->root < ref2->root) -			return -1; -		if (ref1->root > ref2->root) -			return 1; -	} else { -		if (ref1->parent < ref2->parent) -			return -1; -		if (ref1->parent > ref2->parent) -			return 1; -	} +	if (ref1->root < ref2->root) +		return -1; +	if (ref1->root > ref2->root) +		return 1; +	if (ref1->parent < ref2->parent) +		return -1; +	if (ref1->parent > ref2->parent) +		return 1;  	return 0;  } @@ -85,7 +82,8 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,   * type of the delayed backrefs and content of delayed backrefs.   */  static int comp_entry(struct btrfs_delayed_ref_node *ref2, -		      struct btrfs_delayed_ref_node *ref1) +		      struct btrfs_delayed_ref_node *ref1, +		      bool compare_seq)  {  	if (ref1->bytenr < ref2->bytenr)  		return -1; @@ -102,10 +100,12 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,  	if (ref1->type > ref2->type)  		return 1;  	/* merging of sequenced refs is not allowed */ -	if (ref1->seq < ref2->seq) -		return -1; -	if (ref1->seq > ref2->seq) -		return 1; +	if (compare_seq) { +		if (ref1->seq < ref2->seq) +			return -1; +		if (ref1->seq > ref2->seq) +			return 1; +	}  	if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||  	    ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {  		return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), @@ -139,7 +139,7 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,  		entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,  				 rb_node); -		cmp = comp_entry(entry, ins); +		cmp = comp_entry(entry, ins, 1);  		if (cmp < 0)  			p = &(*p)->rb_left;  		else if (cmp > 0) @@ -233,6 +233,114 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,  	return 0;  } +static void inline drop_delayed_ref(struct btrfs_trans_handle *trans, +				    struct btrfs_delayed_ref_root *delayed_refs, +				    struct btrfs_delayed_ref_node *ref) +{ +	rb_erase(&ref->rb_node, &delayed_refs->root); +	ref->in_tree = 0; +	btrfs_put_delayed_ref(ref); +	delayed_refs->num_entries--; +	if (trans->delayed_ref_updates) +		trans->delayed_ref_updates--; +} + +static int merge_ref(struct btrfs_trans_handle *trans, +		     struct btrfs_delayed_ref_root *delayed_refs, +		     struct btrfs_delayed_ref_node *ref, u64 seq) +{ +	struct rb_node *node; +	int merged = 0; +	int mod = 0; +	int done = 0; + +	node = rb_prev(&ref->rb_node); +	while (node) { +		struct btrfs_delayed_ref_node *next; + +		next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); +		node = rb_prev(node); +		if (next->bytenr != ref->bytenr) +			break; +		if (seq && next->seq >= seq) +			break; +		if (comp_entry(ref, next, 0)) +			continue; + +		if (ref->action == next->action) { +			mod = next->ref_mod; +		} else { +			if (ref->ref_mod < next->ref_mod) { +				struct btrfs_delayed_ref_node *tmp; + +				tmp = ref; +				ref = next; +				next = tmp; +				done = 1; +			} +			mod = -next->ref_mod; +		} + +		merged++; +		drop_delayed_ref(trans, delayed_refs, next); +		ref->ref_mod += mod; +		if (ref->ref_mod == 0) { +			drop_delayed_ref(trans, delayed_refs, ref); +			break; +		} else { +			/* +			 * You can't have multiples of the same ref on a tree +			 * block. +			 */ +			WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || +				ref->type == BTRFS_SHARED_BLOCK_REF_KEY); +		} + +		if (done) +			break; +		node = rb_prev(&ref->rb_node); +	} + +	return merged; +} + +void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, +			      struct btrfs_fs_info *fs_info, +			      struct btrfs_delayed_ref_root *delayed_refs, +			      struct btrfs_delayed_ref_head *head) +{ +	struct rb_node *node; +	u64 seq = 0; + +	spin_lock(&fs_info->tree_mod_seq_lock); +	if (!list_empty(&fs_info->tree_mod_seq_list)) { +		struct seq_list *elem; + +		elem = list_first_entry(&fs_info->tree_mod_seq_list, +					struct seq_list, list); +		seq = elem->seq; +	} +	spin_unlock(&fs_info->tree_mod_seq_lock); + +	node = rb_prev(&head->node.rb_node); +	while (node) { +		struct btrfs_delayed_ref_node *ref; + +		ref = rb_entry(node, struct btrfs_delayed_ref_node, +			       rb_node); +		if (ref->bytenr != head->node.bytenr) +			break; + +		/* We can't merge refs that are outside of our seq count */ +		if (seq && ref->seq >= seq) +			break; +		if (merge_ref(trans, delayed_refs, ref, seq)) +			node = rb_prev(&head->node.rb_node); +		else +			node = rb_prev(node); +	} +} +  int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,  			    struct btrfs_delayed_ref_root *delayed_refs,  			    u64 seq) @@ -336,18 +444,11 @@ update_existing_ref(struct btrfs_trans_handle *trans,  		 * every changing the extent allocation tree.  		 */  		existing->ref_mod--; -		if (existing->ref_mod == 0) { -			rb_erase(&existing->rb_node, -				 &delayed_refs->root); -			existing->in_tree = 0; -			btrfs_put_delayed_ref(existing); -			delayed_refs->num_entries--; -			if (trans->delayed_ref_updates) -				trans->delayed_ref_updates--; -		} else { +		if (existing->ref_mod == 0) +			drop_delayed_ref(trans, delayed_refs, existing); +		else  			WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||  				existing->type == BTRFS_SHARED_BLOCK_REF_KEY); -		}  	} else {  		WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||  			existing->type == BTRFS_SHARED_BLOCK_REF_KEY); @@ -662,9 +763,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,  	add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,  				   num_bytes, parent, ref_root, level, action,  				   for_cow); -	if (!need_ref_seq(for_cow, ref_root) && -	    waitqueue_active(&fs_info->tree_mod_seq_wait)) -		wake_up(&fs_info->tree_mod_seq_wait);  	spin_unlock(&delayed_refs->lock);  	if (need_ref_seq(for_cow, ref_root))  		btrfs_qgroup_record_ref(trans, &ref->node, extent_op); @@ -713,9 +811,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,  	add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,  				   num_bytes, parent, ref_root, owner, offset,  				   action, for_cow); -	if (!need_ref_seq(for_cow, ref_root) && -	    waitqueue_active(&fs_info->tree_mod_seq_wait)) -		wake_up(&fs_info->tree_mod_seq_wait);  	spin_unlock(&delayed_refs->lock);  	if (need_ref_seq(for_cow, ref_root))  		btrfs_qgroup_record_ref(trans, &ref->node, extent_op); @@ -744,8 +839,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,  				   num_bytes, BTRFS_UPDATE_DELAYED_HEAD,  				   extent_op->is_data); -	if (waitqueue_active(&fs_info->tree_mod_seq_wait)) -		wake_up(&fs_info->tree_mod_seq_wait);  	spin_unlock(&delayed_refs->lock);  	return 0;  } diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 0d7c90c366b..ab530059584 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -167,6 +167,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,  				struct btrfs_trans_handle *trans,  				u64 bytenr, u64 num_bytes,  				struct btrfs_delayed_extent_op *extent_op); +void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, +			      struct btrfs_fs_info *fs_info, +			      struct btrfs_delayed_ref_root *delayed_refs, +			      struct btrfs_delayed_ref_head *head);  struct btrfs_delayed_ref_head *  btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 62e0cafd6e2..22e98e04c2e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -377,9 +377,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,  		ret = read_extent_buffer_pages(io_tree, eb, start,  					       WAIT_COMPLETE,  					       btree_get_extent, mirror_num); -		if (!ret && !verify_parent_transid(io_tree, eb, +		if (!ret) { +			if (!verify_parent_transid(io_tree, eb,  						   parent_transid, 0)) -			break; +				break; +			else +				ret = -EIO; +		}  		/*  		 * This buffer's crc is fine, but its contents are corrupted, so @@ -754,9 +758,7 @@ static void run_one_async_done(struct btrfs_work *work)  	limit = btrfs_async_submit_limit(fs_info);  	limit = limit * 2 / 3; -	atomic_dec(&fs_info->nr_async_submits); - -	if (atomic_read(&fs_info->nr_async_submits) < limit && +	if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&  	    waitqueue_active(&fs_info->async_submit_wait))  		wake_up(&fs_info->async_submit_wait); @@ -2032,8 +2034,6 @@ int open_ctree(struct super_block *sb,  	fs_info->free_chunk_space = 0;  	fs_info->tree_mod_log = RB_ROOT; -	init_waitqueue_head(&fs_info->tree_mod_seq_wait); -  	/* readahead state */  	INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);  	spin_lock_init(&fs_info->reada_lock); @@ -2528,8 +2528,7 @@ retry_root_backup:  		goto fail_trans_kthread;  	/* do not make disk changes in broken FS */ -	if (btrfs_super_log_root(disk_super) != 0 && -	    !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { +	if (btrfs_super_log_root(disk_super) != 0) {  		u64 bytenr = btrfs_super_log_root(disk_super);  		if (fs_devices->rw_devices == 0) { @@ -3189,30 +3188,14 @@ int close_ctree(struct btrfs_root *root)  	/* clear out the rbtree of defraggable inodes */  	btrfs_run_defrag_inodes(fs_info); -	/* -	 * Here come 2 situations when btrfs is broken to flip readonly: -	 * -	 * 1. when btrfs flips readonly somewhere else before -	 * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, -	 * and btrfs will skip to write sb directly to keep -	 * ERROR state on disk. -	 * -	 * 2. when btrfs flips readonly just in btrfs_commit_super, -	 * and in such case, btrfs cannot write sb via btrfs_commit_super, -	 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, -	 * btrfs will cleanup all FS resources first and write sb then. -	 */  	if (!(fs_info->sb->s_flags & MS_RDONLY)) {  		ret = btrfs_commit_super(root);  		if (ret)  			printk(KERN_ERR "btrfs: commit super ret %d\n", ret);  	} -	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { -		ret = btrfs_error_commit_super(root); -		if (ret) -			printk(KERN_ERR "btrfs: commit super ret %d\n", ret); -	} +	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) +		btrfs_error_commit_super(root);  	btrfs_put_block_group_cache(fs_info); @@ -3434,18 +3417,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,  	if (read_only)  		return 0; -	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { -		printk(KERN_WARNING "warning: mount fs with errors, " -		       "running btrfsck is recommended\n"); -	} -  	return 0;  } -int btrfs_error_commit_super(struct btrfs_root *root) +void btrfs_error_commit_super(struct btrfs_root *root)  { -	int ret; -  	mutex_lock(&root->fs_info->cleaner_mutex);  	btrfs_run_delayed_iputs(root);  	mutex_unlock(&root->fs_info->cleaner_mutex); @@ -3455,10 +3431,6 @@ int btrfs_error_commit_super(struct btrfs_root *root)  	/* cleanup FS via transaction */  	btrfs_cleanup_transaction(root); - -	ret = write_ctree_super(NULL, root, 0); - -	return ret;  }  static void btrfs_destroy_ordered_operations(struct btrfs_root *root) @@ -3782,14 +3754,17 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)  		/* FIXME: cleanup wait for commit */  		t->in_commit = 1;  		t->blocked = 1; +		smp_mb();  		if (waitqueue_active(&root->fs_info->transaction_blocked_wait))  			wake_up(&root->fs_info->transaction_blocked_wait);  		t->blocked = 0; +		smp_mb();  		if (waitqueue_active(&root->fs_info->transaction_wait))  			wake_up(&root->fs_info->transaction_wait);  		t->commit_done = 1; +		smp_mb();  		if (waitqueue_active(&t->commit_wait))  			wake_up(&t->commit_wait); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 95e147eea23..c5b00a735fe 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -54,7 +54,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,  		      struct btrfs_root *root, int max_mirrors);  struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);  int btrfs_commit_super(struct btrfs_root *root); -int btrfs_error_commit_super(struct btrfs_root *root); +void btrfs_error_commit_super(struct btrfs_root *root);  struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,  					    u64 bytenr, u32 blocksize);  struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4e1b153b7c4..ba58024d40d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2252,6 +2252,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,  		}  		/* +		 * We need to try and merge add/drops of the same ref since we +		 * can run into issues with relocate dropping the implicit ref +		 * and then it being added back again before the drop can +		 * finish.  If we merged anything we need to re-loop so we can +		 * get a good ref. +		 */ +		btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, +					 locked_ref); + +		/*  		 * locked_ref is the head node, so we have to go one  		 * node back for any delayed ref updates  		 */ @@ -2318,12 +2328,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,  		ref->in_tree = 0;  		rb_erase(&ref->rb_node, &delayed_refs->root);  		delayed_refs->num_entries--; -		/* -		 * we modified num_entries, but as we're currently running -		 * delayed refs, skip -		 *     wake_up(&delayed_refs->seq_wait); -		 * here. -		 */ +		if (locked_ref) { +			/* +			 * when we play the delayed ref, also correct the +			 * ref_mod on head +			 */ +			switch (ref->action) { +			case BTRFS_ADD_DELAYED_REF: +			case BTRFS_ADD_DELAYED_EXTENT: +				locked_ref->node.ref_mod -= ref->ref_mod; +				break; +			case BTRFS_DROP_DELAYED_REF: +				locked_ref->node.ref_mod += ref->ref_mod; +				break; +			default: +				WARN_ON(1); +			} +		}  		spin_unlock(&delayed_refs->lock);  		ret = run_one_delayed_ref(trans, root, ref, extent_op, @@ -2350,22 +2371,6 @@ next:  	return count;  } -static void wait_for_more_refs(struct btrfs_fs_info *fs_info, -			       struct btrfs_delayed_ref_root *delayed_refs, -			       unsigned long num_refs, -			       struct list_head *first_seq) -{ -	spin_unlock(&delayed_refs->lock); -	pr_debug("waiting for more refs (num %ld, first %p)\n", -		 num_refs, first_seq); -	wait_event(fs_info->tree_mod_seq_wait, -		   num_refs != delayed_refs->num_entries || -		   fs_info->tree_mod_seq_list.next != first_seq); -	pr_debug("done waiting for more refs (num %ld, first %p)\n", -		 delayed_refs->num_entries, fs_info->tree_mod_seq_list.next); -	spin_lock(&delayed_refs->lock); -} -  #ifdef SCRAMBLE_DELAYED_REFS  /*   * Normally delayed refs get processed in ascending bytenr order. This @@ -2460,13 +2465,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,  	struct btrfs_delayed_ref_root *delayed_refs;  	struct btrfs_delayed_ref_node *ref;  	struct list_head cluster; -	struct list_head *first_seq = NULL;  	int ret;  	u64 delayed_start;  	int run_all = count == (unsigned long)-1;  	int run_most = 0; -	unsigned long num_refs = 0; -	int consider_waiting; +	int loops;  	/* We'll clean this up in btrfs_cleanup_transaction */  	if (trans->aborted) @@ -2484,7 +2487,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,  	delayed_refs = &trans->transaction->delayed_refs;  	INIT_LIST_HEAD(&cluster);  again: -	consider_waiting = 0; +	loops = 0;  	spin_lock(&delayed_refs->lock);  #ifdef SCRAMBLE_DELAYED_REFS @@ -2512,31 +2515,6 @@ again:  		if (ret)  			break; -		if (delayed_start >= delayed_refs->run_delayed_start) { -			if (consider_waiting == 0) { -				/* -				 * btrfs_find_ref_cluster looped. let's do one -				 * more cycle. if we don't run any delayed ref -				 * during that cycle (because we can't because -				 * all of them are blocked) and if the number of -				 * refs doesn't change, we avoid busy waiting. -				 */ -				consider_waiting = 1; -				num_refs = delayed_refs->num_entries; -				first_seq = root->fs_info->tree_mod_seq_list.next; -			} else { -				wait_for_more_refs(root->fs_info, delayed_refs, -						   num_refs, first_seq); -				/* -				 * after waiting, things have changed. we -				 * dropped the lock and someone else might have -				 * run some refs, built new clusters and so on. -				 * therefore, we restart staleness detection. -				 */ -				consider_waiting = 0; -			} -		} -  		ret = run_clustered_refs(trans, root, &cluster);  		if (ret < 0) {  			spin_unlock(&delayed_refs->lock); @@ -2549,9 +2527,26 @@ again:  		if (count == 0)  			break; -		if (ret || delayed_refs->run_delayed_start == 0) { +		if (delayed_start >= delayed_refs->run_delayed_start) { +			if (loops == 0) { +				/* +				 * btrfs_find_ref_cluster looped. let's do one +				 * more cycle. if we don't run any delayed ref +				 * during that cycle (because we can't because +				 * all of them are blocked), bail out. +				 */ +				loops = 1; +			} else { +				/* +				 * no runnable refs left, stop trying +				 */ +				BUG_ON(run_all); +				break; +			} +		} +		if (ret) {  			/* refs were run, let's reset staleness detection */ -			consider_waiting = 0; +			loops = 0;  		}  	} @@ -3007,17 +3002,16 @@ again:  	}  	spin_unlock(&block_group->lock); -	num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); +	/* +	 * Try to preallocate enough space based on how big the block group is. +	 * Keep in mind this has to include any pinned space which could end up +	 * taking up quite a bit since it's not folded into the other space +	 * cache. +	 */ +	num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);  	if (!num_pages)  		num_pages = 1; -	/* -	 * Just to make absolutely sure we have enough space, we're going to -	 * preallocate 12 pages worth of space for each block group.  In -	 * practice we ought to use at most 8, but we need extra space so we can -	 * add our header and have a terminator between the extents and the -	 * bitmaps. -	 */  	num_pages *= 16;  	num_pages *= PAGE_CACHE_SIZE; @@ -4571,8 +4565,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)  	if (root->fs_info->quota_enabled) {  		ret = btrfs_qgroup_reserve(root, num_bytes +  					   nr_extents * root->leafsize); -		if (ret) +		if (ret) { +			mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);  			return ret; +		}  	}  	ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); @@ -5294,9 +5290,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,  	rb_erase(&head->node.rb_node, &delayed_refs->root);  	delayed_refs->num_entries--; -	smp_mb(); -	if (waitqueue_active(&root->fs_info->tree_mod_seq_wait)) -		wake_up(&root->fs_info->tree_mod_seq_wait);  	/*  	 * we don't take a ref on the node because we're removing it from the diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 45c81bb4ac8..4c878476bb9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2330,23 +2330,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err)  		if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {  			ret = tree->ops->readpage_end_io_hook(page, start, end,  							      state, mirror); -			if (ret) { -				/* no IO indicated but software detected errors -				 * in the block, either checksum errors or -				 * issues with the contents */ -				struct btrfs_root *root = -					BTRFS_I(page->mapping->host)->root; -				struct btrfs_device *device; - +			if (ret)  				uptodate = 0; -				device = btrfs_find_device_for_logical( -						root, start, mirror); -				if (device) -					btrfs_dev_stat_inc_and_print(device, -						BTRFS_DEV_STAT_CORRUPTION_ERRS); -			} else { +			else  				clean_io_failure(start, page); -			}  		}  		if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b45b9de0c21..857d93cd01d 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -272,9 +272,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,  }  int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, -			      struct bio *bio, u64 offset, u32 *dst) +			      struct bio *bio, u64 offset)  { -	return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); +	return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);  }  int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6e8f416773d..ec154f95464 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1008,9 +1008,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)  	nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>  		PAGE_CACHE_SHIFT; -	atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); - -	if (atomic_read(&root->fs_info->async_delalloc_pages) < +	if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <  	    5 * 1024 * 1024 &&  	    waitqueue_active(&root->fs_info->async_submit_wait))  		wake_up(&root->fs_info->async_submit_wait); @@ -1885,8 +1883,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  				trans = btrfs_join_transaction_nolock(root);  			else  				trans = btrfs_join_transaction(root); -			if (IS_ERR(trans)) -				return PTR_ERR(trans); +			if (IS_ERR(trans)) { +				ret = PTR_ERR(trans); +				trans = NULL; +				goto out; +			}  			trans->block_rsv = &root->fs_info->delalloc_block_rsv;  			ret = btrfs_update_inode_fallback(trans, root, inode);  			if (ret) /* -ENOMEM or corruption */ @@ -3174,7 +3175,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,  	btrfs_i_size_write(dir, dir->i_size - name_len * 2);  	inode_inc_iversion(dir);  	dir->i_mtime = dir->i_ctime = CURRENT_TIME; -	ret = btrfs_update_inode(trans, root, dir); +	ret = btrfs_update_inode_fallback(trans, root, dir);  	if (ret)  		btrfs_abort_transaction(trans, root, ret);  out: @@ -5774,18 +5775,112 @@ out:  	return ret;  } +static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, +			      struct extent_state **cached_state, int writing) +{ +	struct btrfs_ordered_extent *ordered; +	int ret = 0; + +	while (1) { +		lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, +				 0, cached_state); +		/* +		 * We're concerned with the entire range that we're going to be +		 * doing DIO to, so we need to make sure theres no ordered +		 * extents in this range. +		 */ +		ordered = btrfs_lookup_ordered_range(inode, lockstart, +						     lockend - lockstart + 1); + +		/* +		 * We need to make sure there are no buffered pages in this +		 * range either, we could have raced between the invalidate in +		 * generic_file_direct_write and locking the extent.  The +		 * invalidate needs to happen so that reads after a write do not +		 * get stale data. +		 */ +		if (!ordered && (!writing || +		    !test_range_bit(&BTRFS_I(inode)->io_tree, +				    lockstart, lockend, EXTENT_UPTODATE, 0, +				    *cached_state))) +			break; + +		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, +				     cached_state, GFP_NOFS); + +		if (ordered) { +			btrfs_start_ordered_extent(inode, ordered, 1); +			btrfs_put_ordered_extent(ordered); +		} else { +			/* Screw you mmap */ +			ret = filemap_write_and_wait_range(inode->i_mapping, +							   lockstart, +							   lockend); +			if (ret) +				break; + +			/* +			 * If we found a page that couldn't be invalidated just +			 * fall back to buffered. +			 */ +			ret = invalidate_inode_pages2_range(inode->i_mapping, +					lockstart >> PAGE_CACHE_SHIFT, +					lockend >> PAGE_CACHE_SHIFT); +			if (ret) +				break; +		} + +		cond_resched(); +	} + +	return ret; +} +  static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,  				   struct buffer_head *bh_result, int create)  {  	struct extent_map *em;  	struct btrfs_root *root = BTRFS_I(inode)->root; +	struct extent_state *cached_state = NULL;  	u64 start = iblock << inode->i_blkbits; +	u64 lockstart, lockend;  	u64 len = bh_result->b_size;  	struct btrfs_trans_handle *trans; +	int unlock_bits = EXTENT_LOCKED; +	int ret; + +	if (create) { +		ret = btrfs_delalloc_reserve_space(inode, len); +		if (ret) +			return ret; +		unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; +	} else { +		len = min_t(u64, len, root->sectorsize); +	} + +	lockstart = start; +	lockend = start + len - 1; + +	/* +	 * If this errors out it's because we couldn't invalidate pagecache for +	 * this range and we need to fallback to buffered. +	 */ +	if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) +		return -ENOTBLK; + +	if (create) { +		ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, +				     lockend, EXTENT_DELALLOC, NULL, +				     &cached_state, GFP_NOFS); +		if (ret) +			goto unlock_err; +	}  	em = btrfs_get_extent(inode, NULL, 0, start, len, 0); -	if (IS_ERR(em)) -		return PTR_ERR(em); +	if (IS_ERR(em)) { +		ret = PTR_ERR(em); +		goto unlock_err; +	}  	/*  	 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered @@ -5804,17 +5899,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,  	if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||  	    em->block_start == EXTENT_MAP_INLINE) {  		free_extent_map(em); -		return -ENOTBLK; +		ret = -ENOTBLK; +		goto unlock_err;  	}  	/* Just a good old fashioned hole, return */  	if (!create && (em->block_start == EXTENT_MAP_HOLE ||  			test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {  		free_extent_map(em); -		/* DIO will do one hole at a time, so just unlock a sector */ -		unlock_extent(&BTRFS_I(inode)->io_tree, start, -			      start + root->sectorsize - 1); -		return 0; +		ret = 0; +		goto unlock_err;  	}  	/* @@ -5827,8 +5921,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,  	 *  	 */  	if (!create) { -		len = em->len - (start - em->start); -		goto map; +		len = min(len, em->len - (start - em->start)); +		lockstart = start + len; +		goto unlock;  	}  	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || @@ -5860,7 +5955,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,  			btrfs_end_transaction(trans, root);  			if (ret) {  				free_extent_map(em); -				return ret; +				goto unlock_err;  			}  			goto unlock;  		} @@ -5873,14 +5968,12 @@ must_cow:  	 */  	len = bh_result->b_size;  	em = btrfs_new_extent_direct(inode, em, start, len); -	if (IS_ERR(em)) -		return PTR_ERR(em); +	if (IS_ERR(em)) { +		ret = PTR_ERR(em); +		goto unlock_err; +	}  	len = min(len, em->len - (start - em->start));  unlock: -	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, -			  EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, -			  0, NULL, GFP_NOFS); -map:  	bh_result->b_blocknr = (em->block_start + (start - em->start)) >>  		inode->i_blkbits;  	bh_result->b_size = len; @@ -5898,9 +5991,44 @@ map:  			i_size_write(inode, start + len);  	} +	/* +	 * In the case of write we need to clear and unlock the entire range, +	 * in the case of read we need to unlock only the end area that we +	 * aren't using if there is any left over space. +	 */ +	if (lockstart < lockend) { +		if (create && len < lockend - lockstart) { +			clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, +					 lockstart + len - 1, unlock_bits, 1, 0, +					 &cached_state, GFP_NOFS); +			/* +			 * Beside unlock, we also need to cleanup reserved space +			 * for the left range by attaching EXTENT_DO_ACCOUNTING. +			 */ +			clear_extent_bit(&BTRFS_I(inode)->io_tree, +					 lockstart + len, lockend, +					 unlock_bits | EXTENT_DO_ACCOUNTING, +					 1, 0, NULL, GFP_NOFS); +		} else { +			clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, +					 lockend, unlock_bits, 1, 0, +					 &cached_state, GFP_NOFS); +		} +	} else { +		free_extent_state(cached_state); +	} +  	free_extent_map(em);  	return 0; + +unlock_err: +	if (create) +		unlock_bits |= EXTENT_DO_ACCOUNTING; + +	clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, +			 unlock_bits, 1, 0, &cached_state, GFP_NOFS); +	return ret;  }  struct btrfs_dio_private { @@ -5908,7 +6036,6 @@ struct btrfs_dio_private {  	u64 logical_offset;  	u64 disk_bytenr;  	u64 bytes; -	u32 *csums;  	void *private;  	/* number of bios pending for this dio */ @@ -5928,7 +6055,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)  	struct inode *inode = dip->inode;  	struct btrfs_root *root = BTRFS_I(inode)->root;  	u64 start; -	u32 *private = dip->csums;  	start = dip->logical_offset;  	do { @@ -5936,8 +6062,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)  			struct page *page = bvec->bv_page;  			char *kaddr;  			u32 csum = ~(u32)0; +			u64 private = ~(u32)0;  			unsigned long flags; +			if (get_state_private(&BTRFS_I(inode)->io_tree, +					      start, &private)) +				goto failed;  			local_irq_save(flags);  			kaddr = kmap_atomic(page);  			csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, @@ -5947,18 +6077,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)  			local_irq_restore(flags);  			flush_dcache_page(bvec->bv_page); -			if (csum != *private) { +			if (csum != private) { +failed:  				printk(KERN_ERR "btrfs csum failed ino %llu off"  				      " %llu csum %u private %u\n",  				      (unsigned long long)btrfs_ino(inode),  				      (unsigned long long)start, -				      csum, *private); +				      csum, (unsigned)private);  				err = -EIO;  			}  		}  		start += bvec->bv_len; -		private++;  		bvec++;  	} while (bvec <= bvec_end); @@ -5966,7 +6096,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)  		      dip->logical_offset + dip->bytes - 1);  	bio->bi_private = dip->private; -	kfree(dip->csums);  	kfree(dip);  	/* If we had a csum failure make sure to clear the uptodate flag */ @@ -6072,7 +6201,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,  static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,  					 int rw, u64 file_offset, int skip_sum, -					 u32 *csums, int async_submit) +					 int async_submit)  {  	int write = rw & REQ_WRITE;  	struct btrfs_root *root = BTRFS_I(inode)->root; @@ -6105,8 +6234,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,  		if (ret)  			goto err;  	} else if (!skip_sum) { -		ret = btrfs_lookup_bio_sums_dio(root, inode, bio, -					  file_offset, csums); +		ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);  		if (ret)  			goto err;  	} @@ -6132,10 +6260,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,  	u64 submit_len = 0;  	u64 map_length;  	int nr_pages = 0; -	u32 *csums = dip->csums;  	int ret = 0;  	int async_submit = 0; -	int write = rw & REQ_WRITE;  	map_length = orig_bio->bi_size;  	ret = btrfs_map_block(map_tree, READ, start_sector << 9, @@ -6171,16 +6297,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,  			atomic_inc(&dip->pending_bios);  			ret = __btrfs_submit_dio_bio(bio, inode, rw,  						     file_offset, skip_sum, -						     csums, async_submit); +						     async_submit);  			if (ret) {  				bio_put(bio);  				atomic_dec(&dip->pending_bios);  				goto out_err;  			} -			/* Write's use the ordered csums */ -			if (!write && !skip_sum) -				csums = csums + nr_pages;  			start_sector += submit_len >> 9;  			file_offset += submit_len; @@ -6210,7 +6333,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,  submit:  	ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, -				     csums, async_submit); +				     async_submit);  	if (!ret)  		return 0; @@ -6246,17 +6369,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,  		ret = -ENOMEM;  		goto free_ordered;  	} -	dip->csums = NULL; - -	/* Write's use the ordered csum stuff, so we don't need dip->csums */ -	if (!write && !skip_sum) { -		dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); -		if (!dip->csums) { -			kfree(dip); -			ret = -ENOMEM; -			goto free_ordered; -		} -	}  	dip->private = bio->bi_private;  	dip->inode = inode; @@ -6341,132 +6453,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io  out:  	return retval;  } +  static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,  			const struct iovec *iov, loff_t offset,  			unsigned long nr_segs)  {  	struct file *file = iocb->ki_filp;  	struct inode *inode = file->f_mapping->host; -	struct btrfs_ordered_extent *ordered; -	struct extent_state *cached_state = NULL; -	u64 lockstart, lockend; -	ssize_t ret; -	int writing = rw & WRITE; -	int write_bits = 0; -	size_t count = iov_length(iov, nr_segs);  	if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, -			    offset, nr_segs)) { +			    offset, nr_segs))  		return 0; -	} - -	lockstart = offset; -	lockend = offset + count - 1; - -	if (writing) { -		ret = btrfs_delalloc_reserve_space(inode, count); -		if (ret) -			goto out; -	} - -	while (1) { -		lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, -				 0, &cached_state); -		/* -		 * We're concerned with the entire range that we're going to be -		 * doing DIO to, so we need to make sure theres no ordered -		 * extents in this range. -		 */ -		ordered = btrfs_lookup_ordered_range(inode, lockstart, -						     lockend - lockstart + 1); - -		/* -		 * We need to make sure there are no buffered pages in this -		 * range either, we could have raced between the invalidate in -		 * generic_file_direct_write and locking the extent.  The -		 * invalidate needs to happen so that reads after a write do not -		 * get stale data. -		 */ -		if (!ordered && (!writing || -		    !test_range_bit(&BTRFS_I(inode)->io_tree, -				    lockstart, lockend, EXTENT_UPTODATE, 0, -				    cached_state))) -			break; - -		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, -				     &cached_state, GFP_NOFS); - -		if (ordered) { -			btrfs_start_ordered_extent(inode, ordered, 1); -			btrfs_put_ordered_extent(ordered); -		} else { -			/* Screw you mmap */ -			ret = filemap_write_and_wait_range(file->f_mapping, -							   lockstart, -							   lockend); -			if (ret) -				goto out; - -			/* -			 * If we found a page that couldn't be invalidated just -			 * fall back to buffered. -			 */ -			ret = invalidate_inode_pages2_range(file->f_mapping, -					lockstart >> PAGE_CACHE_SHIFT, -					lockend >> PAGE_CACHE_SHIFT); -			if (ret) { -				if (ret == -EBUSY) -					ret = 0; -				goto out; -			} -		} - -		cond_resched(); -	} -	/* -	 * we don't use btrfs_set_extent_delalloc because we don't want -	 * the dirty or uptodate bits -	 */ -	if (writing) { -		write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; -		ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, -				     EXTENT_DELALLOC, NULL, &cached_state, -				     GFP_NOFS); -		if (ret) { -			clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, -					 lockend, EXTENT_LOCKED | write_bits, -					 1, 0, &cached_state, GFP_NOFS); -			goto out; -		} -	} - -	free_extent_state(cached_state); -	cached_state = NULL; - -	ret = __blockdev_direct_IO(rw, iocb, inode, +	return __blockdev_direct_IO(rw, iocb, inode,  		   BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,  		   iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,  		   btrfs_submit_direct, 0); - -	if (ret < 0 && ret != -EIOCBQUEUED) { -		clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, -			      offset + iov_length(iov, nr_segs) - 1, -			      EXTENT_LOCKED | write_bits, 1, 0, -			      &cached_state, GFP_NOFS); -	} else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { -		/* -		 * We're falling back to buffered, unlock the section we didn't -		 * do IO on. -		 */ -		clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, -			      offset + iov_length(iov, nr_segs) - 1, -			      EXTENT_LOCKED | write_bits, 1, 0, -			      &cached_state, GFP_NOFS); -	} -out: -	free_extent_state(cached_state); -	return ret;  }  static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7bb755677a2..9df50fa8a07 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -424,7 +424,7 @@ static noinline int create_subvol(struct btrfs_root *root,  	uuid_le_gen(&new_uuid);  	memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);  	root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); -	root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec); +	root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);  	root_item.ctime = root_item.otime;  	btrfs_set_root_ctransid(&root_item, trans->transid);  	btrfs_set_root_otransid(&root_item, trans->transid); diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index a44eff07480..2a1762c6604 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -67,7 +67,7 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)  {  	if (eb->lock_nested) {  		read_lock(&eb->lock); -		if (&eb->lock_nested && current->pid == eb->lock_owner) { +		if (eb->lock_nested && current->pid == eb->lock_owner) {  			read_unlock(&eb->lock);  			return;  		} diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index bc424ae5a81..38b42e7bc91 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1364,13 +1364,17 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,  	spin_lock(&fs_info->qgroup_lock);  	dstgroup = add_qgroup_rb(fs_info, objectid); -	if (!dstgroup) +	if (IS_ERR(dstgroup)) { +		ret = PTR_ERR(dstgroup);  		goto unlock; +	}  	if (srcid) {  		srcgroup = find_qgroup_rb(fs_info, srcid); -		if (!srcgroup) +		if (!srcgroup) { +			ret = -EINVAL;  			goto unlock; +		}  		dstgroup->rfer = srcgroup->rfer - level_size;  		dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;  		srcgroup->excl = level_size; @@ -1379,8 +1383,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,  		qgroup_dirty(fs_info, srcgroup);  	} -	if (!inherit) +	if (!inherit) { +		ret = -EINVAL;  		goto unlock; +	}  	i_qgroups = (u64 *)(inherit + 1);  	for (i = 0; i < inherit->num_qgroups; ++i) { diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 6bb465cca20..10d8e4d8807 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -544,8 +544,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,  	struct timespec ct = CURRENT_TIME;  	spin_lock(&root->root_times_lock); -	item->ctransid = trans->transid; +	item->ctransid = cpu_to_le64(trans->transid);  	item->ctime.sec = cpu_to_le64(ct.tv_sec); -	item->ctime.nsec = cpu_to_le64(ct.tv_nsec); +	item->ctime.nsec = cpu_to_le32(ct.tv_nsec);  	spin_unlock(&root->root_times_lock);  } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f2eb24c477a..83d6f9f9c22 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -838,7 +838,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)  	struct btrfs_trans_handle *trans;  	struct btrfs_fs_info *fs_info = btrfs_sb(sb);  	struct btrfs_root *root = fs_info->tree_root; -	int ret;  	trace_btrfs_sync_fs(wait); @@ -849,11 +848,17 @@ int btrfs_sync_fs(struct super_block *sb, int wait)  	btrfs_wait_ordered_extents(root, 0, 0); -	trans = btrfs_start_transaction(root, 0); +	spin_lock(&fs_info->trans_lock); +	if (!fs_info->running_transaction) { +		spin_unlock(&fs_info->trans_lock); +		return 0; +	} +	spin_unlock(&fs_info->trans_lock); + +	trans = btrfs_join_transaction(root);  	if (IS_ERR(trans))  		return PTR_ERR(trans); -	ret = btrfs_commit_transaction(trans, root); -	return ret; +	return btrfs_commit_transaction(trans, root);  }  static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) @@ -1530,6 +1535,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)  	while (cur_devices) {  		head = &cur_devices->devices;  		list_for_each_entry(dev, head, dev_list) { +			if (dev->missing) +				continue;  			if (!first_dev || dev->devid < first_dev->devid)  				first_dev = dev;  		} diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 17be3dedacb..27c26004e05 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1031,6 +1031,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,  	btrfs_i_size_write(parent_inode, parent_inode->i_size +  					 dentry->d_name.len * 2); +	parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;  	ret = btrfs_update_inode(trans, parent_root, parent_inode);  	if (ret)  		goto abort_trans_dput; @@ -1066,7 +1067,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,  	memcpy(new_root_item->parent_uuid, root->root_item.uuid,  			BTRFS_UUID_SIZE);  	new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); -	new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec); +	new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);  	btrfs_set_root_otransid(new_root_item, trans->transid);  	memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));  	memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e86ae04abe6..88b969aeeb7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -227,9 +227,8 @@ loop_lock:  		cur = pending;  		pending = pending->bi_next;  		cur->bi_next = NULL; -		atomic_dec(&fs_info->nr_async_bios); -		if (atomic_read(&fs_info->nr_async_bios) < limit && +		if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&  		    waitqueue_active(&fs_info->async_submit_wait))  			wake_up(&fs_info->async_submit_wait); @@ -569,9 +568,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)  		memcpy(new_device, device, sizeof(*new_device));  		/* Safe because we are under uuid_mutex */ -		name = rcu_string_strdup(device->name->str, GFP_NOFS); -		BUG_ON(device->name && !name); /* -ENOMEM */ -		rcu_assign_pointer(new_device->name, name); +		if (device->name) { +			name = rcu_string_strdup(device->name->str, GFP_NOFS); +			BUG_ON(device->name && !name); /* -ENOMEM */ +			rcu_assign_pointer(new_device->name, name); +		}  		new_device->bdev = NULL;  		new_device->writeable = 0;  		new_device->in_fs_metadata = 0; @@ -4605,28 +4606,6 @@ int btrfs_read_sys_array(struct btrfs_root *root)  	return ret;  } -struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, -						   u64 logical, int mirror_num) -{ -	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; -	int ret; -	u64 map_length = 0; -	struct btrfs_bio *bbio = NULL; -	struct btrfs_device *device; - -	BUG_ON(mirror_num == 0); -	ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio, -			      mirror_num); -	if (ret) { -		BUG_ON(bbio != NULL); -		return NULL; -	} -	BUG_ON(mirror_num != bbio->mirror_num); -	device = bbio->stripes[mirror_num - 1].dev; -	kfree(bbio); -	return device; -} -  int btrfs_read_chunk_tree(struct btrfs_root *root)  {  	struct btrfs_path *path; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5479325987b..53c06af92e8 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -289,8 +289,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);  int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);  int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,  			 u64 *start, u64 *max_avail); -struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, -						   u64 logical, int mirror_num);  void btrfs_dev_stat_print_on_error(struct btrfs_device *device);  void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);  int btrfs_get_dev_stats(struct btrfs_root *root,  |