diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/backref.c | 27 | ||||
| -rw-r--r-- | fs/btrfs/compression.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 4 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 22 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 26 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 62 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.h | 4 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 9 | ||||
| -rw-r--r-- | fs/btrfs/free-space-cache.c | 9 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 52 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 5 | ||||
| -rw-r--r-- | fs/btrfs/reada.c | 48 | ||||
| -rw-r--r-- | fs/btrfs/relocation.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/scrub.c | 19 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 7 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 15 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 33 | 
17 files changed, 188 insertions, 160 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f4e90748940..bcec0675023 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -22,6 +22,7 @@  #include "ulist.h"  #include "transaction.h"  #include "delayed-ref.h" +#include "locking.h"  /*   * this structure records all encountered refs on the way up to the root @@ -893,18 +894,22 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,  	s64 bytes_left = size - 1;  	struct extent_buffer *eb = eb_in;  	struct btrfs_key found_key; +	int leave_spinning = path->leave_spinning;  	if (bytes_left >= 0)  		dest[bytes_left] = '\0'; +	path->leave_spinning = 1;  	while (1) {  		len = btrfs_inode_ref_name_len(eb, iref);  		bytes_left -= len;  		if (bytes_left >= 0)  			read_extent_buffer(eb, dest + bytes_left,  						(unsigned long)(iref + 1), len); -		if (eb != eb_in) +		if (eb != eb_in) { +			btrfs_tree_read_unlock_blocking(eb);  			free_extent_buffer(eb); +		}  		ret = inode_ref_info(parent, 0, fs_root, path, &found_key);  		if (ret > 0)  			ret = -ENOENT; @@ -919,8 +924,11 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,  		slot = path->slots[0];  		eb = path->nodes[0];  		/* make sure we can use eb after releasing the path */ -		if (eb != eb_in) +		if (eb != eb_in) {  			atomic_inc(&eb->refs); +			btrfs_tree_read_lock(eb); +			btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); +		}  		btrfs_release_path(path);  		iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); @@ -931,6 +939,7 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,  	}  	btrfs_release_path(path); +	path->leave_spinning = leave_spinning;  	if (ret)  		return ERR_PTR(ret); @@ -1247,7 +1256,7 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,  				struct btrfs_path *path,  				iterate_irefs_t *iterate, void *ctx)  { -	int ret; +	int ret = 0;  	int slot;  	u32 cur;  	u32 len; @@ -1259,7 +1268,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,  	struct btrfs_inode_ref *iref;  	struct btrfs_key found_key; -	while (1) { +	while (!ret) { +		path->leave_spinning = 1;  		ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,  					&found_key);  		if (ret < 0) @@ -1275,6 +1285,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,  		eb = path->nodes[0];  		/* make sure we can use eb after releasing the path */  		atomic_inc(&eb->refs); +		btrfs_tree_read_lock(eb); +		btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);  		btrfs_release_path(path);  		item = btrfs_item_nr(eb, slot); @@ -1288,13 +1300,12 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,  				 (unsigned long long)found_key.objectid,  				 (unsigned long long)fs_root->objectid);  			ret = iterate(parent, iref, eb, ctx); -			if (ret) { -				free_extent_buffer(eb); +			if (ret)  				break; -			}  			len = sizeof(*iref) + name_len;  			iref = (struct btrfs_inode_ref *)((char *)iref + len);  		} +		btrfs_tree_read_unlock_blocking(eb);  		free_extent_buffer(eb);  	} @@ -1414,6 +1425,8 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,  void free_ipath(struct inode_fs_paths *ipath)  { +	if (!ipath) +		return;  	kfree(ipath->fspath);  	kfree(ipath);  } diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index d286b40a567..86eff48dab7 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -405,6 +405,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,  			bio_put(bio);  			bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); +			BUG_ON(!bio);  			bio->bi_private = cb;  			bio->bi_end_io = end_compressed_bio_write;  			bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); @@ -687,6 +688,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,  			comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,  							GFP_NOFS); +			BUG_ON(!comp_bio);  			comp_bio->bi_private = cb;  			comp_bio->bi_end_io = end_compressed_bio_read; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5b8ef8eb352..8fd72331d60 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1078,7 +1078,7 @@ struct btrfs_fs_info {  	 * is required instead of the faster short fsync log commits  	 */  	u64 last_trans_log_full_commit; -	unsigned long mount_opt:21; +	unsigned long mount_opt;  	unsigned long compress_type:4;  	u64 max_inline;  	u64 alloc_start; @@ -2166,7 +2166,7 @@ BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,  static inline bool btrfs_root_readonly(struct btrfs_root *root)  { -	return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; +	return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;  }  /* struct btrfs_root_backup */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 20196f41120..d0c969beaad 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -383,17 +383,16 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,  		if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))  			break; -		if (!failed_mirror) { -			failed = 1; -			printk(KERN_ERR "failed mirror was %d\n", eb->failed_mirror); -			failed_mirror = eb->failed_mirror; -		} -  		num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,  					      eb->start, eb->len);  		if (num_copies == 1)  			break; +		if (!failed_mirror) { +			failed = 1; +			failed_mirror = eb->read_mirror; +		} +  		mirror_num++;  		if (mirror_num == failed_mirror)  			mirror_num++; @@ -564,7 +563,7 @@ struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,  }  static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, -			       struct extent_state *state) +			       struct extent_state *state, int mirror)  {  	struct extent_io_tree *tree;  	u64 found_start; @@ -589,6 +588,7 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,  	if (!reads_done)  		goto err; +	eb->read_mirror = mirror;  	if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {  		ret = -EIO;  		goto err; @@ -652,7 +652,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)  	eb = (struct extent_buffer *)page->private;  	set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); -	eb->failed_mirror = failed_mirror; +	eb->read_mirror = failed_mirror;  	if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))  		btree_readahead_hook(root, eb, eb->start, -EIO);  	return -EIO;	/* we fixed nothing */ @@ -2254,9 +2254,9 @@ int open_ctree(struct super_block *sb,  		goto fail_sb_buffer;  	} -	if (sectorsize < PAGE_SIZE) { -		printk(KERN_WARNING "btrfs: Incompatible sector size " -		       "found on %s\n", sb->s_id); +	if (sectorsize != PAGE_SIZE) { +		printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) " +		       "found on %s\n", (unsigned long)sectorsize, sb->s_id);  		goto fail_sb_buffer;  	} diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a84420491c1..6fc2e6f5aab 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -529,9 +529,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,  	 * allocate blocks for the tree root we can't do the fast caching since  	 * we likely hold important locks.  	 */ -	if (trans && (!trans->transaction->in_commit) && -	    (root && root != root->fs_info->tree_root) && -	    btrfs_test_opt(root, SPACE_CACHE)) { +	if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {  		ret = load_free_space_cache(fs_info, cache);  		spin_lock(&cache->lock); @@ -2303,6 +2301,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,  				if (ret) {  					printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret); +					spin_lock(&delayed_refs->lock);  					return ret;  				} @@ -2333,6 +2332,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,  		if (ret) {  			printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret); +			spin_lock(&delayed_refs->lock);  			return ret;  		} @@ -3152,15 +3152,14 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)  /*   * returns target flags in extended format or 0 if restripe for this   * chunk_type is not in progress + * + * should be called with either volume_mutex or balance_lock held   */  static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)  {  	struct btrfs_balance_control *bctl = fs_info->balance_ctl;  	u64 target = 0; -	BUG_ON(!mutex_is_locked(&fs_info->volume_mutex) && -	       !spin_is_locked(&fs_info->balance_lock)); -  	if (!bctl)  		return 0; @@ -3772,13 +3771,10 @@ again:  		 */  		if (current->journal_info)  			return -EAGAIN; -		ret = wait_event_interruptible(space_info->wait, -					       !space_info->flush); -		/* Must have been interrupted, return */ -		if (ret) { -			printk(KERN_DEBUG "btrfs: %s returning -EINTR\n", __func__); +		ret = wait_event_killable(space_info->wait, !space_info->flush); +		/* Must have been killed, return */ +		if (ret)  			return -EINTR; -		}  		spin_lock(&space_info->lock);  	} @@ -4205,7 +4201,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)  	num_bytes += div64_u64(data_used + meta_used, 50);  	if (num_bytes * 3 > meta_used) -		num_bytes = div64_u64(meta_used, 3) * 2; +		num_bytes = div64_u64(meta_used, 3);  	return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);  } @@ -4218,8 +4214,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)  	num_bytes = calc_global_metadata_size(fs_info); -	spin_lock(&block_rsv->lock);  	spin_lock(&sinfo->lock); +	spin_lock(&block_rsv->lock);  	block_rsv->size = num_bytes; @@ -4245,8 +4241,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)  		block_rsv->full = 1;  	} -	spin_unlock(&sinfo->lock);  	spin_unlock(&block_rsv->lock); +	spin_unlock(&sinfo->lock);  }  static void init_global_block_rsv(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8d904dd7ea9..198c2ba2fa4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -402,20 +402,28 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,  	return 0;  } +static struct extent_state *next_state(struct extent_state *state) +{ +	struct rb_node *next = rb_next(&state->rb_node); +	if (next) +		return rb_entry(next, struct extent_state, rb_node); +	else +		return NULL; +} +  /*   * utility function to clear some bits in an extent state struct. - * it will optionally wake up any one waiting on this state (wake == 1), or - * forcibly remove the state from the tree (delete == 1). + * it will optionally wake up any one waiting on this state (wake == 1)   *   * If no bits are set on the state struct after clearing things, the   * struct is freed and removed from the tree   */ -static int clear_state_bit(struct extent_io_tree *tree, -			    struct extent_state *state, -			    int *bits, int wake) +static struct extent_state *clear_state_bit(struct extent_io_tree *tree, +					    struct extent_state *state, +					    int *bits, int wake)  { +	struct extent_state *next;  	int bits_to_clear = *bits & ~EXTENT_CTLBITS; -	int ret = state->state & bits_to_clear;  	if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {  		u64 range = state->end - state->start + 1; @@ -427,6 +435,7 @@ static int clear_state_bit(struct extent_io_tree *tree,  	if (wake)  		wake_up(&state->wq);  	if (state->state == 0) { +		next = next_state(state);  		if (state->tree) {  			rb_erase(&state->rb_node, &tree->state);  			state->tree = NULL; @@ -436,8 +445,9 @@ static int clear_state_bit(struct extent_io_tree *tree,  		}  	} else {  		merge_state(tree, state); +		next = next_state(state);  	} -	return ret; +	return next;  }  static struct extent_state * @@ -476,7 +486,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,  	struct extent_state *state;  	struct extent_state *cached;  	struct extent_state *prealloc = NULL; -	struct rb_node *next_node;  	struct rb_node *node;  	u64 last_end;  	int err; @@ -528,14 +537,11 @@ hit_next:  	WARN_ON(state->end < start);  	last_end = state->end; -	if (state->end < end && !need_resched()) -		next_node = rb_next(&state->rb_node); -	else -		next_node = NULL; -  	/* the state doesn't have the wanted bits, go ahead */ -	if (!(state->state & bits)) +	if (!(state->state & bits)) { +		state = next_state(state);  		goto next; +	}  	/*  	 *     | ---- desired range ---- | @@ -593,16 +599,13 @@ hit_next:  		goto out;  	} -	clear_state_bit(tree, state, &bits, wake); +	state = clear_state_bit(tree, state, &bits, wake);  next:  	if (last_end == (u64)-1)  		goto out;  	start = last_end + 1; -	if (start <= end && next_node) { -		state = rb_entry(next_node, struct extent_state, -				 rb_node); +	if (start <= end && state && !need_resched())  		goto hit_next; -	}  	goto search_again;  out: @@ -1937,7 +1940,7 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,  	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;  	u64 start = eb->start;  	unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); -	int ret; +	int ret = 0;  	for (i = 0; i < num_pages; i++) {  		struct page *p = extent_buffer_page(eb, i); @@ -2180,6 +2183,10 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,  	}  	bio = bio_alloc(GFP_NOFS, 1); +	if (!bio) { +		free_io_failure(inode, failrec, 0); +		return -EIO; +	}  	bio->bi_private = state;  	bio->bi_end_io = failed_bio->bi_end_io;  	bio->bi_sector = failrec->logical >> 9; @@ -2297,7 +2304,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)  	u64 start;  	u64 end;  	int whole_page; -	int failed_mirror; +	int mirror;  	int ret;  	if (err) @@ -2336,20 +2343,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err)  		}  		spin_unlock(&tree->lock); +		mirror = (int)(unsigned long)bio->bi_bdev;  		if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {  			ret = tree->ops->readpage_end_io_hook(page, start, end, -							      state); +							      state, mirror);  			if (ret)  				uptodate = 0;  			else  				clean_io_failure(start, page);  		} -		if (!uptodate) -			failed_mirror = (int)(unsigned long)bio->bi_bdev; -  		if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { -			ret = tree->ops->readpage_io_failed_hook(page, failed_mirror); +			ret = tree->ops->readpage_io_failed_hook(page, mirror);  			if (!ret && !err &&  			    test_bit(BIO_UPTODATE, &bio->bi_flags))  				uptodate = 1; @@ -2364,8 +2369,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)  			 * can't handle the error it will return -EIO and we  			 * remain responsible for that page.  			 */ -			ret = bio_readpage_error(bio, page, start, end, -							failed_mirror, NULL); +			ret = bio_readpage_error(bio, page, start, end, mirror, NULL);  			if (ret == 0) {  				uptodate =  					test_bit(BIO_UPTODATE, &bio->bi_flags); @@ -4458,7 +4462,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,  	}  	clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); -	eb->failed_mirror = 0; +	eb->read_mirror = 0;  	atomic_set(&eb->io_pages, num_reads);  	for (i = start_i; i < num_pages; i++) {  		page = extent_buffer_page(eb, i); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index faf10eb57f7..b516c3b8dec 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -79,7 +79,7 @@ struct extent_io_ops {  					u64 start, u64 end,  				       struct extent_state *state);  	int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, -				    struct extent_state *state); +				    struct extent_state *state, int mirror);  	int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,  				      struct extent_state *state, int uptodate);  	void (*set_bit_hook)(struct inode *inode, struct extent_state *state, @@ -135,7 +135,7 @@ struct extent_buffer {  	spinlock_t refs_lock;  	atomic_t refs;  	atomic_t io_pages; -	int failed_mirror; +	int read_mirror;  	struct list_head leak_list;  	struct rcu_head rcu_head;  	pid_t lock_owner; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d83260d7498..53bf2d764bb 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -567,6 +567,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,  	int extent_type;  	int recow;  	int ret; +	int modify_tree = -1;  	if (drop_cache)  		btrfs_drop_extent_cache(inode, start, end - 1, 0); @@ -575,10 +576,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,  	if (!path)  		return -ENOMEM; +	if (start >= BTRFS_I(inode)->disk_i_size) +		modify_tree = 0; +  	while (1) {  		recow = 0;  		ret = btrfs_lookup_file_extent(trans, root, path, ino, -					       search_start, -1); +					       search_start, modify_tree);  		if (ret < 0)  			break;  		if (ret > 0 && path->slots[0] > 0 && search_start == start) { @@ -634,7 +638,8 @@ next_slot:  		}  		search_start = max(key.offset, start); -		if (recow) { +		if (recow || !modify_tree) { +			modify_tree = -1;  			btrfs_release_path(path);  			continue;  		} diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e88330d3df5..202008ec367 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -748,13 +748,6 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,  	u64 used = btrfs_block_group_used(&block_group->item);  	/* -	 * If we're unmounting then just return, since this does a search on the -	 * normal root and not the commit root and we could deadlock. -	 */ -	if (btrfs_fs_closing(fs_info)) -		return 0; - -	/*  	 * If this block group has been marked to be cleared for one reason or  	 * another then we can't trust the on disk cache, so just return.  	 */ @@ -768,6 +761,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,  	path = btrfs_alloc_path();  	if (!path)  		return 0; +	path->search_commit_root = 1; +	path->skip_locking = 1;  	inode = lookup_free_space_inode(root, block_group, path);  	if (IS_ERR(inode)) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 115bc05e42b..61b16c641ce 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1947,7 +1947,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,   * extent_io.c will try to find good copies for us.   */  static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, -			       struct extent_state *state) +			       struct extent_state *state, int mirror)  {  	size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);  	struct inode *inode = page->mapping->host; @@ -4069,7 +4069,7 @@ static struct inode *new_simple_dir(struct super_block *s,  	BTRFS_I(inode)->dummy_inode = 1;  	inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; -	inode->i_op = &simple_dir_inode_operations; +	inode->i_op = &btrfs_dir_ro_inode_operations;  	inode->i_fop = &simple_dir_operations;  	inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;  	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; @@ -4140,14 +4140,18 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)  static int btrfs_dentry_delete(const struct dentry *dentry)  {  	struct btrfs_root *root; +	struct inode *inode = dentry->d_inode; -	if (!dentry->d_inode && !IS_ROOT(dentry)) -		dentry = dentry->d_parent; +	if (!inode && !IS_ROOT(dentry)) +		inode = dentry->d_parent->d_inode; -	if (dentry->d_inode) { -		root = BTRFS_I(dentry->d_inode)->root; +	if (inode) { +		root = BTRFS_I(inode)->root;  		if (btrfs_root_refs(&root->root_item) == 0)  			return 1; + +		if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) +			return 1;  	}  	return 0;  } @@ -4188,7 +4192,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,  	struct btrfs_path *path;  	struct list_head ins_list;  	struct list_head del_list; -	struct qstr q;  	int ret;  	struct extent_buffer *leaf;  	int slot; @@ -4279,7 +4282,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,  		while (di_cur < di_total) {  			struct btrfs_key location; -			struct dentry *tmp;  			if (verify_dir_item(root, leaf, di))  				break; @@ -4300,35 +4302,15 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,  			d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];  			btrfs_dir_item_key_to_cpu(leaf, di, &location); -			q.name = name_ptr; -			q.len = name_len; -			q.hash = full_name_hash(q.name, q.len); -			tmp = d_lookup(filp->f_dentry, &q); -			if (!tmp) { -				struct btrfs_key *newkey; -				newkey = kzalloc(sizeof(struct btrfs_key), -						 GFP_NOFS); -				if (!newkey) -					goto no_dentry; -				tmp = d_alloc(filp->f_dentry, &q); -				if (!tmp) { -					kfree(newkey); -					dput(tmp); -					goto no_dentry; -				} -				memcpy(newkey, &location, -				       sizeof(struct btrfs_key)); -				tmp->d_fsdata = newkey; -				tmp->d_flags |= DCACHE_NEED_LOOKUP; -				d_rehash(tmp); -				dput(tmp); -			} else { -				dput(tmp); -			} -no_dentry:  			/* is this a reference to our own snapshot? If so -			 * skip it +			 * skip it. +			 * +			 * In contrast to old kernels, we insert the snapshot's +			 * dir item and dir index after it has been created, so +			 * we won't find a reference to our own snapshot. We +			 * still keep the following code for backward +			 * compatibility.  			 */  			if (location.type == BTRFS_ROOT_ITEM_KEY &&  			    location.objectid == root->root_key.objectid) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 18cc23d164a..14f8e1faa46 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2262,7 +2262,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)  	di_args->bytes_used = dev->bytes_used;  	di_args->total_bytes = dev->total_bytes;  	memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); -	strncpy(di_args->path, dev->name, sizeof(di_args->path)); +	if (dev->name) +		strncpy(di_args->path, dev->name, sizeof(di_args->path)); +	else +		di_args->path[0] = '\0';  out:  	if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index dc5d33146fd..ac5d0108588 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -250,14 +250,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,  					  struct btrfs_bio *bbio)  {  	int ret; -	int looped = 0;  	struct reada_zone *zone;  	struct btrfs_block_group_cache *cache = NULL;  	u64 start;  	u64 end;  	int i; -again:  	zone = NULL;  	spin_lock(&fs_info->reada_lock);  	ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, @@ -274,9 +272,6 @@ again:  		spin_unlock(&fs_info->reada_lock);  	} -	if (looped) -		return NULL; -  	cache = btrfs_lookup_block_group(fs_info, logical);  	if (!cache)  		return NULL; @@ -307,13 +302,15 @@ again:  	ret = radix_tree_insert(&dev->reada_zones,  				(unsigned long)(zone->end >> PAGE_CACHE_SHIFT),  				zone); -	spin_unlock(&fs_info->reada_lock); -	if (ret) { +	if (ret == -EEXIST) {  		kfree(zone); -		looped = 1; -		goto again; +		ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, +					     logical >> PAGE_CACHE_SHIFT, 1); +		if (ret == 1) +			kref_get(&zone->refcnt);  	} +	spin_unlock(&fs_info->reada_lock);  	return zone;  } @@ -323,26 +320,26 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,  					      struct btrfs_key *top, int level)  {  	int ret; -	int looped = 0;  	struct reada_extent *re = NULL; +	struct reada_extent *re_exist = NULL;  	struct btrfs_fs_info *fs_info = root->fs_info;  	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;  	struct btrfs_bio *bbio = NULL;  	struct btrfs_device *dev; +	struct btrfs_device *prev_dev;  	u32 blocksize;  	u64 length;  	int nzones = 0;  	int i;  	unsigned long index = logical >> PAGE_CACHE_SHIFT; -again:  	spin_lock(&fs_info->reada_lock);  	re = radix_tree_lookup(&fs_info->reada_tree, index);  	if (re)  		kref_get(&re->refcnt);  	spin_unlock(&fs_info->reada_lock); -	if (re || looped) +	if (re)  		return re;  	re = kzalloc(sizeof(*re), GFP_NOFS); @@ -398,16 +395,31 @@ again:  	/* insert extent in reada_tree + all per-device trees, all or nothing */  	spin_lock(&fs_info->reada_lock);  	ret = radix_tree_insert(&fs_info->reada_tree, index, re); +	if (ret == -EEXIST) { +		re_exist = radix_tree_lookup(&fs_info->reada_tree, index); +		BUG_ON(!re_exist); +		kref_get(&re_exist->refcnt); +		spin_unlock(&fs_info->reada_lock); +		goto error; +	}  	if (ret) {  		spin_unlock(&fs_info->reada_lock); -		if (ret != -ENOMEM) { -			/* someone inserted the extent in the meantime */ -			looped = 1; -		}  		goto error;  	} +	prev_dev = NULL;  	for (i = 0; i < nzones; ++i) {  		dev = bbio->stripes[i].dev; +		if (dev == prev_dev) { +			/* +			 * in case of DUP, just add the first zone. As both +			 * are on the same device, there's nothing to gain +			 * from adding both. +			 * Also, it wouldn't work, as the tree is per device +			 * and adding would fail with EEXIST +			 */ +			continue; +		} +		prev_dev = dev;  		ret = radix_tree_insert(&dev->reada_extents, index, re);  		if (ret) {  			while (--i >= 0) { @@ -450,9 +462,7 @@ error:  	}  	kfree(bbio);  	kfree(re); -	if (looped) -		goto again; -	return NULL; +	return re_exist;  }  static void reada_kref_dummy(struct kref *kr) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 017281dbb2a..646ee21bb03 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1279,7 +1279,9 @@ static int __update_reloc_root(struct btrfs_root *root, int del)  		if (rb_node)  			backref_tree_panic(rb_node, -EEXIST, node->bytenr);  	} else { +		spin_lock(&root->fs_info->trans_lock);  		list_del_init(&root->root_list); +		spin_unlock(&root->fs_info->trans_lock);  		kfree(node);  	}  	return 0; @@ -3811,7 +3813,7 @@ restart:  		ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5);  		if (ret < 0) { -			if (ret != -EAGAIN) { +			if (ret != -ENOSPC) {  				err = ret;  				WARN_ON(1);  				break; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 90acc82046c..4f76fc3f8e8 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1044,6 +1044,8 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info,  		BUG_ON(!page->page);  		bio = bio_alloc(GFP_NOFS, 1); +		if (!bio) +			return -EIO;  		bio->bi_bdev = page->bdev;  		bio->bi_sector = page->physical >> 9;  		bio->bi_end_io = scrub_complete_bio_end_io; @@ -1171,6 +1173,8 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,  		DECLARE_COMPLETION_ONSTACK(complete);  		bio = bio_alloc(GFP_NOFS, 1); +		if (!bio) +			return -EIO;  		bio->bi_bdev = page_bad->bdev;  		bio->bi_sector = page_bad->physical >> 9;  		bio->bi_end_io = scrub_complete_bio_end_io; @@ -1253,12 +1257,6 @@ static int scrub_checksum_data(struct scrub_block *sblock)  	if (memcmp(csum, on_disk_csum, sdev->csum_size))  		fail = 1; -	if (fail) { -		spin_lock(&sdev->stat_lock); -		++sdev->stat.csum_errors; -		spin_unlock(&sdev->stat_lock); -	} -  	return fail;  } @@ -1331,15 +1329,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)  	if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))  		++crc_fail; -	if (crc_fail || fail) { -		spin_lock(&sdev->stat_lock); -		if (crc_fail) -			++sdev->stat.csum_errors; -		if (fail) -			++sdev->stat.verify_errors; -		spin_unlock(&sdev->stat_lock); -	} -  	return fail || crc_fail;  } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8d5d380f7bd..c5f8fca4195 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -815,7 +815,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)  		return 0;  	} -	btrfs_start_delalloc_inodes(root, 0);  	btrfs_wait_ordered_extents(root, 0, 0);  	trans = btrfs_start_transaction(root, 0); @@ -1148,13 +1147,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)  		if (ret)  			goto restore;  	} else { -		if (fs_info->fs_devices->rw_devices == 0) +		if (fs_info->fs_devices->rw_devices == 0) {  			ret = -EACCES;  			goto restore; +		} -		if (btrfs_super_log_root(fs_info->super_copy) != 0) +		if (btrfs_super_log_root(fs_info->super_copy) != 0) {  			ret = -EINVAL;  			goto restore; +		}  		ret = btrfs_cleanup_fs_roots(fs_info);  		if (ret) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8da29e8e4de..36422254ef6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -73,8 +73,10 @@ loop:  	cur_trans = root->fs_info->running_transaction;  	if (cur_trans) { -		if (cur_trans->aborted) +		if (cur_trans->aborted) { +			spin_unlock(&root->fs_info->trans_lock);  			return cur_trans->aborted; +		}  		atomic_inc(&cur_trans->use_count);  		atomic_inc(&cur_trans->num_writers);  		cur_trans->num_joined++; @@ -480,6 +482,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,  	struct btrfs_transaction *cur_trans = trans->transaction;  	struct btrfs_fs_info *info = root->fs_info;  	int count = 0; +	int err = 0;  	if (--trans->use_count) {  		trans->block_rsv = trans->orig_rsv; @@ -532,18 +535,18 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,  	if (current->journal_info == trans)  		current->journal_info = NULL; -	memset(trans, 0, sizeof(*trans)); -	kmem_cache_free(btrfs_trans_handle_cachep, trans);  	if (throttle)  		btrfs_run_delayed_iputs(root);  	if (trans->aborted ||  	    root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { -		return -EIO; +		err = -EIO;  	} -	return 0; +	memset(trans, 0, sizeof(*trans)); +	kmem_cache_free(btrfs_trans_handle_cachep, trans); +	return err;  }  int btrfs_end_transaction(struct btrfs_trans_handle *trans, @@ -1399,6 +1402,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,  	ret = commit_fs_roots(trans, root);  	if (ret) {  		mutex_unlock(&root->fs_info->tree_log_mutex); +		mutex_unlock(&root->fs_info->reloc_mutex);  		goto cleanup_transaction;  	} @@ -1410,6 +1414,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,  	ret = commit_cowonly_roots(trans, root);  	if (ret) {  		mutex_unlock(&root->fs_info->tree_log_mutex); +		mutex_unlock(&root->fs_info->reloc_mutex);  		goto cleanup_transaction;  	} diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a872b48be0a..1411b99555a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3324,12 +3324,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,  	stripe_size = devices_info[ndevs-1].max_avail;  	num_stripes = ndevs * dev_stripes; -	if (stripe_size * num_stripes > max_chunk_size * ncopies) { +	if (stripe_size * ndevs > max_chunk_size * ncopies) {  		stripe_size = max_chunk_size * ncopies; -		do_div(stripe_size, num_stripes); +		do_div(stripe_size, ndevs);  	}  	do_div(stripe_size, dev_stripes); + +	/* align to BTRFS_STRIPE_LEN */  	do_div(stripe_size, BTRFS_STRIPE_LEN);  	stripe_size *= BTRFS_STRIPE_LEN; @@ -3805,10 +3807,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,  		else if (mirror_num)  			stripe_index += mirror_num - 1;  		else { +			int old_stripe_index = stripe_index;  			stripe_index = find_live_mirror(map, stripe_index,  					      map->sub_stripes, stripe_index +  					      current->pid % map->sub_stripes); -			mirror_num = stripe_index + 1; +			mirror_num = stripe_index - old_stripe_index + 1;  		}  	} else {  		/* @@ -3833,6 +3836,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,  		int sub_stripes = 0;  		u64 stripes_per_dev = 0;  		u32 remaining_stripes = 0; +		u32 last_stripe = 0;  		if (map->type &  		    (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { @@ -3846,6 +3850,8 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,  						      stripe_nr_orig,  						      factor,  						      &remaining_stripes); +			div_u64_rem(stripe_nr_end - 1, factor, &last_stripe); +			last_stripe *= sub_stripes;  		}  		for (i = 0; i < num_stripes; i++) { @@ -3858,16 +3864,29 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,  					 BTRFS_BLOCK_GROUP_RAID10)) {  				bbio->stripes[i].length = stripes_per_dev *  							  map->stripe_len; +  				if (i / sub_stripes < remaining_stripes)  					bbio->stripes[i].length +=  						map->stripe_len; + +				/* +				 * Special for the first stripe and +				 * the last stripe: +				 * +				 * |-------|...|-------| +				 *     |----------| +				 *    off     end_off +				 */  				if (i < sub_stripes)  					bbio->stripes[i].length -=  						stripe_offset; -				if ((i / sub_stripes + 1) % -				    sub_stripes == remaining_stripes) + +				if (stripe_index >= last_stripe && +				    stripe_index <= (last_stripe + +						     sub_stripes - 1))  					bbio->stripes[i].length -=  						stripe_end_offset; +  				if (i == sub_stripes - 1)  					stripe_offset = 0;  			} else @@ -4334,8 +4353,10 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid)  	ret = __btrfs_open_devices(fs_devices, FMODE_READ,  				   root->fs_info->bdev_holder); -	if (ret) +	if (ret) { +		free_fs_devices(fs_devices);  		goto out; +	}  	if (!fs_devices->seeding) {  		__btrfs_close_devices(fs_devices);  |