diff options
| -rw-r--r-- | fs/btrfs/backref.c | 3 | ||||
| -rw-r--r-- | fs/btrfs/check-integrity.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/ctree.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 8 | ||||
| -rw-r--r-- | fs/btrfs/delayed-ref.h | 1 | ||||
| -rw-r--r-- | fs/btrfs/dev-replace.c | 5 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 52 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 94 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 138 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.h | 2 | ||||
| -rw-r--r-- | fs/btrfs/free-space-cache.c | 43 | ||||
| -rw-r--r-- | fs/btrfs/free-space-cache.h | 2 | ||||
| -rw-r--r-- | fs/btrfs/inode-map.c | 8 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 81 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 10 | ||||
| -rw-r--r-- | fs/btrfs/raid56.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/relocation.c | 7 | ||||
| -rw-r--r-- | fs/btrfs/scrub.c | 10 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 1 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 54 | ||||
| -rw-r--r-- | fs/btrfs/volumes.h | 20 | 
21 files changed, 301 insertions, 246 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index b4fb4155811..290e347b6db 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -918,7 +918,8 @@ again:  							   ref->parent, bsz, 0);  				if (!eb || !extent_buffer_uptodate(eb)) {  					free_extent_buffer(eb); -					return -EIO; +					ret = -EIO; +					goto out;  				}  				ret = find_extent_in_eb(eb, bytenr,  							*extent_item_pos, &eie); diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 18af6f48781..1431a696501 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1700,7 +1700,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,  		unsigned int j;  		DECLARE_COMPLETION_ONSTACK(complete); -		bio = bio_alloc(GFP_NOFS, num_pages - i); +		bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);  		if (!bio) {  			printk(KERN_INFO  			       "btrfsic: bio_alloc() for %u pages failed!\n", diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index de6de8e60b4..02fae7f7e42 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -951,10 +951,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,  			BUG_ON(ret); /* -ENOMEM */  		}  		if (new_flags != 0) { +			int level = btrfs_header_level(buf); +  			ret = btrfs_set_disk_extent_flags(trans, root,  							  buf->start,  							  buf->len, -							  new_flags, 0); +							  new_flags, level, 0);  			if (ret)  				return ret;  		} diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 63c328a9ce9..d6dd49b51ba 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -88,12 +88,12 @@ struct btrfs_ordered_sum;  /* holds checksums of all the data extents */  #define BTRFS_CSUM_TREE_OBJECTID 7ULL -/* for storing balance parameters in the root tree */ -#define BTRFS_BALANCE_OBJECTID -4ULL -  /* holds quota configuration and tracking */  #define BTRFS_QUOTA_TREE_OBJECTID 8ULL +/* for storing balance parameters in the root tree */ +#define BTRFS_BALANCE_OBJECTID -4ULL +  /* orhpan objectid for tracking unlinked/truncated files */  #define BTRFS_ORPHAN_OBJECTID -5ULL @@ -3075,7 +3075,7 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,  int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,  				struct btrfs_root *root,  				u64 bytenr, u64 num_bytes, u64 flags, -				int is_data); +				int level, int is_data);  int btrfs_free_extent(struct btrfs_trans_handle *trans,  		      struct btrfs_root *root,  		      u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index f75fcaf79ae..70b962cc177 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -60,6 +60,7 @@ struct btrfs_delayed_ref_node {  struct btrfs_delayed_extent_op {  	struct btrfs_disk_key key;  	u64 flags_to_set; +	int level;  	unsigned int update_key:1;  	unsigned int update_flags:1;  	unsigned int is_data:1; diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 7ba7b3900cb..65241f32d3f 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -313,6 +313,11 @@ int btrfs_dev_replace_start(struct btrfs_root *root,  	struct btrfs_device *tgt_device = NULL;  	struct btrfs_device *src_device = NULL; +	if (btrfs_fs_incompat(fs_info, RAID56)) { +		pr_warn("btrfs: dev_replace cannot yet handle RAID5/RAID6\n"); +		return -EINVAL; +	} +  	switch (args->start.cont_reading_from_srcdev_mode) {  	case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS:  	case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4e9ebe1f182..e7b3cb5286a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -152,7 +152,7 @@ static struct btrfs_lockdep_keyset {  	{ .id = BTRFS_DEV_TREE_OBJECTID,	.name_stem = "dev"	},  	{ .id = BTRFS_FS_TREE_OBJECTID,		.name_stem = "fs"	},  	{ .id = BTRFS_CSUM_TREE_OBJECTID,	.name_stem = "csum"	}, -	{ .id = BTRFS_ORPHAN_OBJECTID,		.name_stem = "orphan"	}, +	{ .id = BTRFS_QUOTA_TREE_OBJECTID,	.name_stem = "quota"	},  	{ .id = BTRFS_TREE_LOG_OBJECTID,	.name_stem = "log"	},  	{ .id = BTRFS_TREE_RELOC_OBJECTID,	.name_stem = "treloc"	},  	{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID,	.name_stem = "dreloc"	}, @@ -1513,7 +1513,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,  	}  	root->commit_root = btrfs_root_node(root); -	BUG_ON(!root->node); /* -ENOMEM */  out:  	if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {  		root->ref_cows = 1; @@ -1988,30 +1987,33 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)  {  	free_extent_buffer(info->tree_root->node);  	free_extent_buffer(info->tree_root->commit_root); -	free_extent_buffer(info->dev_root->node); -	free_extent_buffer(info->dev_root->commit_root); -	free_extent_buffer(info->extent_root->node); -	free_extent_buffer(info->extent_root->commit_root); -	free_extent_buffer(info->csum_root->node); -	free_extent_buffer(info->csum_root->commit_root); -	if (info->quota_root) { -		free_extent_buffer(info->quota_root->node); -		free_extent_buffer(info->quota_root->commit_root); -	} -  	info->tree_root->node = NULL;  	info->tree_root->commit_root = NULL; -	info->dev_root->node = NULL; -	info->dev_root->commit_root = NULL; -	info->extent_root->node = NULL; -	info->extent_root->commit_root = NULL; -	info->csum_root->node = NULL; -	info->csum_root->commit_root = NULL; + +	if (info->dev_root) { +		free_extent_buffer(info->dev_root->node); +		free_extent_buffer(info->dev_root->commit_root); +		info->dev_root->node = NULL; +		info->dev_root->commit_root = NULL; +	} +	if (info->extent_root) { +		free_extent_buffer(info->extent_root->node); +		free_extent_buffer(info->extent_root->commit_root); +		info->extent_root->node = NULL; +		info->extent_root->commit_root = NULL; +	} +	if (info->csum_root) { +		free_extent_buffer(info->csum_root->node); +		free_extent_buffer(info->csum_root->commit_root); +		info->csum_root->node = NULL; +		info->csum_root->commit_root = NULL; +	}  	if (info->quota_root) { +		free_extent_buffer(info->quota_root->node); +		free_extent_buffer(info->quota_root->commit_root);  		info->quota_root->node = NULL;  		info->quota_root->commit_root = NULL;  	} -  	if (chunk_root) {  		free_extent_buffer(info->chunk_root->node);  		free_extent_buffer(info->chunk_root->commit_root); @@ -3128,7 +3130,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)  	 * caller  	 */  	device->flush_bio = NULL; -	bio = bio_alloc(GFP_NOFS, 0); +	bio = btrfs_io_bio_alloc(GFP_NOFS, 0);  	if (!bio)  		return -ENOMEM; @@ -3659,8 +3661,11 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,  					 ordered_operations);  		list_del_init(&btrfs_inode->ordered_operations); +		spin_unlock(&root->fs_info->ordered_extent_lock);  		btrfs_invalidate_inodes(btrfs_inode->root); + +		spin_lock(&root->fs_info->ordered_extent_lock);  	}  	spin_unlock(&root->fs_info->ordered_extent_lock); @@ -3782,8 +3787,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)  		list_del_init(&btrfs_inode->delalloc_inodes);  		clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,  			  &btrfs_inode->runtime_flags); +		spin_unlock(&root->fs_info->delalloc_lock);  		btrfs_invalidate_inodes(btrfs_inode->root); + +		spin_lock(&root->fs_info->delalloc_lock);  	}  	spin_unlock(&root->fs_info->delalloc_lock); @@ -3808,7 +3816,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,  		while (start <= end) {  			eb = btrfs_find_tree_block(root, start,  						   root->leafsize); -			start += eb->len; +			start += root->leafsize;  			if (!eb)  				continue;  			wait_on_extent_buffer_writeback(eb); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2305b5c5cf0..df472ab1b5a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2070,8 +2070,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,  	u32 item_size;  	int ret;  	int err = 0; -	int metadata = (node->type == BTRFS_TREE_BLOCK_REF_KEY || -			node->type == BTRFS_SHARED_BLOCK_REF_KEY); +	int metadata = !extent_op->is_data;  	if (trans->aborted)  		return 0; @@ -2086,11 +2085,8 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,  	key.objectid = node->bytenr;  	if (metadata) { -		struct btrfs_delayed_tree_ref *tree_ref; - -		tree_ref = btrfs_delayed_node_to_tree_ref(node);  		key.type = BTRFS_METADATA_ITEM_KEY; -		key.offset = tree_ref->level; +		key.offset = extent_op->level;  	} else {  		key.type = BTRFS_EXTENT_ITEM_KEY;  		key.offset = node->num_bytes; @@ -2719,7 +2715,7 @@ out:  int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,  				struct btrfs_root *root,  				u64 bytenr, u64 num_bytes, u64 flags, -				int is_data) +				int level, int is_data)  {  	struct btrfs_delayed_extent_op *extent_op;  	int ret; @@ -2732,6 +2728,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,  	extent_op->update_flags = 1;  	extent_op->update_key = 0;  	extent_op->is_data = is_data ? 1 : 0; +	extent_op->level = level;  	ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,  					  num_bytes, extent_op); @@ -3109,6 +3106,11 @@ again:  	WARN_ON(ret);  	if (i_size_read(inode) > 0) { +		ret = btrfs_check_trunc_cache_free_space(root, +					&root->fs_info->global_block_rsv); +		if (ret) +			goto out_put; +  		ret = btrfs_truncate_free_space_cache(root, trans, path,  						      inode);  		if (ret) @@ -4562,6 +4564,8 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)  	fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;  	fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;  	fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; +	if (fs_info->quota_root) +		fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;  	fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;  	update_global_block_rsv(fs_info); @@ -6651,51 +6655,51 @@ use_block_rsv(struct btrfs_trans_handle *trans,  	struct btrfs_block_rsv *block_rsv;  	struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;  	int ret; +	bool global_updated = false;  	block_rsv = get_block_rsv(trans, root); -	if (block_rsv->size == 0) { -		ret = reserve_metadata_bytes(root, block_rsv, blocksize, -					     BTRFS_RESERVE_NO_FLUSH); -		/* -		 * If we couldn't reserve metadata bytes try and use some from -		 * the global reserve. -		 */ -		if (ret && block_rsv != global_rsv) { -			ret = block_rsv_use_bytes(global_rsv, blocksize); -			if (!ret) -				return global_rsv; -			return ERR_PTR(ret); -		} else if (ret) { -			return ERR_PTR(ret); -		} +	if (unlikely(block_rsv->size == 0)) +		goto try_reserve; +again: +	ret = block_rsv_use_bytes(block_rsv, blocksize); +	if (!ret)  		return block_rsv; + +	if (block_rsv->failfast) +		return ERR_PTR(ret); + +	if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) { +		global_updated = true; +		update_global_block_rsv(root->fs_info); +		goto again;  	} -	ret = block_rsv_use_bytes(block_rsv, blocksize); +	if (btrfs_test_opt(root, ENOSPC_DEBUG)) { +		static DEFINE_RATELIMIT_STATE(_rs, +				DEFAULT_RATELIMIT_INTERVAL * 10, +				/*DEFAULT_RATELIMIT_BURST*/ 1); +		if (__ratelimit(&_rs)) +			WARN(1, KERN_DEBUG +				"btrfs: block rsv returned %d\n", ret); +	} +try_reserve: +	ret = reserve_metadata_bytes(root, block_rsv, blocksize, +				     BTRFS_RESERVE_NO_FLUSH);  	if (!ret)  		return block_rsv; -	if (ret && !block_rsv->failfast) { -		if (btrfs_test_opt(root, ENOSPC_DEBUG)) { -			static DEFINE_RATELIMIT_STATE(_rs, -					DEFAULT_RATELIMIT_INTERVAL * 10, -					/*DEFAULT_RATELIMIT_BURST*/ 1); -			if (__ratelimit(&_rs)) -				WARN(1, KERN_DEBUG -					"btrfs: block rsv returned %d\n", ret); -		} -		ret = reserve_metadata_bytes(root, block_rsv, blocksize, -					     BTRFS_RESERVE_NO_FLUSH); -		if (!ret) { -			return block_rsv; -		} else if (ret && block_rsv != global_rsv) { -			ret = block_rsv_use_bytes(global_rsv, blocksize); -			if (!ret) -				return global_rsv; -		} +	/* +	 * If we couldn't reserve metadata bytes try and use some from +	 * the global reserve if its space type is the same as the global +	 * reservation. +	 */ +	if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL && +	    block_rsv->space_info == global_rsv->space_info) { +		ret = block_rsv_use_bytes(global_rsv, blocksize); +		if (!ret) +			return global_rsv;  	} - -	return ERR_PTR(-ENOSPC); +	return ERR_PTR(ret);  }  static void unuse_block_rsv(struct btrfs_fs_info *fs_info, @@ -6763,6 +6767,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,  			extent_op->update_key = 1;  		extent_op->update_flags = 1;  		extent_op->is_data = 0; +		extent_op->level = level;  		ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,  					ins.objectid, @@ -6934,7 +6939,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,  		ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);  		BUG_ON(ret); /* -ENOMEM */  		ret = btrfs_set_disk_extent_flags(trans, root, eb->start, -						  eb->len, flag, 0); +						  eb->len, flag, +						  btrfs_header_level(eb), 0);  		BUG_ON(ret); /* -ENOMEM */  		wc->flags[level] |= flag;  	} diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 32d67a822e9..e7e7afb4a87 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -23,6 +23,7 @@  static struct kmem_cache *extent_state_cache;  static struct kmem_cache *extent_buffer_cache; +static struct bio_set *btrfs_bioset;  #ifdef CONFIG_BTRFS_DEBUG  static LIST_HEAD(buffers); @@ -125,10 +126,20 @@ int __init extent_io_init(void)  			SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);  	if (!extent_buffer_cache)  		goto free_state_cache; + +	btrfs_bioset = bioset_create(BIO_POOL_SIZE, +				     offsetof(struct btrfs_io_bio, bio)); +	if (!btrfs_bioset) +		goto free_buffer_cache;  	return 0; +free_buffer_cache: +	kmem_cache_destroy(extent_buffer_cache); +	extent_buffer_cache = NULL; +  free_state_cache:  	kmem_cache_destroy(extent_state_cache); +	extent_state_cache = NULL;  	return -ENOMEM;  } @@ -145,6 +156,8 @@ void extent_io_exit(void)  		kmem_cache_destroy(extent_state_cache);  	if (extent_buffer_cache)  		kmem_cache_destroy(extent_buffer_cache); +	if (btrfs_bioset) +		bioset_free(btrfs_bioset);  }  void extent_io_tree_init(struct extent_io_tree *tree, @@ -1948,28 +1961,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)  }  /* - * helper function to unlock a page if all the extents in the tree - * for that page are unlocked - */ -static void check_page_locked(struct extent_io_tree *tree, struct page *page) -{ -	u64 start = page_offset(page); -	u64 end = start + PAGE_CACHE_SIZE - 1; -	if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) -		unlock_page(page); -} - -/* - * helper function to end page writeback if all the extents - * in the tree for that page are done with writeback - */ -static void check_page_writeback(struct extent_io_tree *tree, -				 struct page *page) -{ -	end_page_writeback(page); -} - -/*   * When IO fails, either with EIO or csum verification fails, we   * try other mirrors that might have a good copy of the data.  This   * io_failure_record is used to record state as we go through all the @@ -2046,7 +2037,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,  	if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))  		return 0; -	bio = bio_alloc(GFP_NOFS, 1); +	bio = btrfs_io_bio_alloc(GFP_NOFS, 1);  	if (!bio)  		return -EIO;  	bio->bi_private = &compl; @@ -2336,7 +2327,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,  		return -EIO;  	} -	bio = bio_alloc(GFP_NOFS, 1); +	bio = btrfs_io_bio_alloc(GFP_NOFS, 1);  	if (!bio) {  		free_io_failure(inode, failrec, 0);  		return -EIO; @@ -2398,19 +2389,24 @@ static void end_bio_extent_writepage(struct bio *bio, int err)  	struct extent_io_tree *tree;  	u64 start;  	u64 end; -	int whole_page;  	do {  		struct page *page = bvec->bv_page;  		tree = &BTRFS_I(page->mapping->host)->io_tree; -		start = page_offset(page) + bvec->bv_offset; -		end = start + bvec->bv_len - 1; +		/* We always issue full-page reads, but if some block +		 * in a page fails to read, blk_update_request() will +		 * advance bv_offset and adjust bv_len to compensate. +		 * Print a warning for nonzero offsets, and an error +		 * if they don't add up to a full page.  */ +		if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) +			printk("%s page write in btrfs with offset %u and length %u\n", +			       bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE +			       ? KERN_ERR "partial" : KERN_INFO "incomplete", +			       bvec->bv_offset, bvec->bv_len); -		if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) -			whole_page = 1; -		else -			whole_page = 0; +		start = page_offset(page); +		end = start + bvec->bv_offset + bvec->bv_len - 1;  		if (--bvec >= bio->bi_io_vec)  			prefetchw(&bvec->bv_page->flags); @@ -2418,10 +2414,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)  		if (end_extent_writepage(page, err, start, end))  			continue; -		if (whole_page) -			end_page_writeback(page); -		else -			check_page_writeback(tree, page); +		end_page_writeback(page);  	} while (bvec >= bio->bi_io_vec);  	bio_put(bio); @@ -2446,7 +2439,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)  	struct extent_io_tree *tree;  	u64 start;  	u64 end; -	int whole_page;  	int mirror;  	int ret; @@ -2457,19 +2449,26 @@ static void end_bio_extent_readpage(struct bio *bio, int err)  		struct page *page = bvec->bv_page;  		struct extent_state *cached = NULL;  		struct extent_state *state; +		struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);  		pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " -			 "mirror=%ld\n", (u64)bio->bi_sector, err, -			 (long int)bio->bi_bdev); +			 "mirror=%lu\n", (u64)bio->bi_sector, err, +			 io_bio->mirror_num);  		tree = &BTRFS_I(page->mapping->host)->io_tree; -		start = page_offset(page) + bvec->bv_offset; -		end = start + bvec->bv_len - 1; +		/* We always issue full-page reads, but if some block +		 * in a page fails to read, blk_update_request() will +		 * advance bv_offset and adjust bv_len to compensate. +		 * Print a warning for nonzero offsets, and an error +		 * if they don't add up to a full page.  */ +		if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) +			printk("%s page read in btrfs with offset %u and length %u\n", +			       bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE +			       ? KERN_ERR "partial" : KERN_INFO "incomplete", +			       bvec->bv_offset, bvec->bv_len); -		if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) -			whole_page = 1; -		else -			whole_page = 0; +		start = page_offset(page); +		end = start + bvec->bv_offset + bvec->bv_len - 1;  		if (++bvec <= bvec_end)  			prefetchw(&bvec->bv_page->flags); @@ -2485,7 +2484,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)  		}  		spin_unlock(&tree->lock); -		mirror = (int)(unsigned long)bio->bi_bdev; +		mirror = io_bio->mirror_num;  		if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {  			ret = tree->ops->readpage_end_io_hook(page, start, end,  							      state, mirror); @@ -2528,39 +2527,35 @@ static void end_bio_extent_readpage(struct bio *bio, int err)  		}  		unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); -		if (whole_page) { -			if (uptodate) { -				SetPageUptodate(page); -			} else { -				ClearPageUptodate(page); -				SetPageError(page); -			} -			unlock_page(page); +		if (uptodate) { +			SetPageUptodate(page);  		} else { -			if (uptodate) { -				check_page_uptodate(tree, page); -			} else { -				ClearPageUptodate(page); -				SetPageError(page); -			} -			check_page_locked(tree, page); +			ClearPageUptodate(page); +			SetPageError(page);  		} +		unlock_page(page);  	} while (bvec <= bvec_end);  	bio_put(bio);  } +/* + * this allocates from the btrfs_bioset.  We're returning a bio right now + * but you can call btrfs_io_bio for the appropriate container_of magic + */  struct bio *  btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,  		gfp_t gfp_flags)  {  	struct bio *bio; -	bio = bio_alloc(gfp_flags, nr_vecs); +	bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);  	if (bio == NULL && (current->flags & PF_MEMALLOC)) { -		while (!bio && (nr_vecs /= 2)) -			bio = bio_alloc(gfp_flags, nr_vecs); +		while (!bio && (nr_vecs /= 2)) { +			bio = bio_alloc_bioset(gfp_flags, +					       nr_vecs, btrfs_bioset); +		}  	}  	if (bio) { @@ -2571,6 +2566,19 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,  	return bio;  } +struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) +{ +	return bio_clone_bioset(bio, gfp_mask, btrfs_bioset); +} + + +/* this also allocates from the btrfs_bioset */ +struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) +{ +	return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); +} + +  static int __must_check submit_one_bio(int rw, struct bio *bio,  				       int mirror_num, unsigned long bio_flags)  { @@ -3988,7 +3996,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,  		last_for_get_extent = isize;  	} -	lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, +	lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,  			 &cached_state);  	em = get_extent_skip_holes(inode, start, last_for_get_extent, @@ -4075,7 +4083,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,  out_free:  	free_extent_map(em);  out: -	unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len, +	unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,  			     &cached_state, GFP_NOFS);  	return ret;  } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a2c03a17500..41fb81e7ec5 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -336,6 +336,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,  struct bio *  btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,  		gfp_t gfp_flags); +struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs); +struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);  struct btrfs_fs_info; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index ecca6c7375a..e53009657f0 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -197,30 +197,32 @@ int create_free_space_inode(struct btrfs_root *root,  					 block_group->key.objectid);  } -int btrfs_truncate_free_space_cache(struct btrfs_root *root, -				    struct btrfs_trans_handle *trans, -				    struct btrfs_path *path, -				    struct inode *inode) +int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, +				       struct btrfs_block_rsv *rsv)  { -	struct btrfs_block_rsv *rsv;  	u64 needed_bytes; -	loff_t oldsize; -	int ret = 0; - -	rsv = trans->block_rsv; -	trans->block_rsv = &root->fs_info->global_block_rsv; +	int ret;  	/* 1 for slack space, 1 for updating the inode */  	needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +  		btrfs_calc_trans_metadata_size(root, 1); -	spin_lock(&trans->block_rsv->lock); -	if (trans->block_rsv->reserved < needed_bytes) { -		spin_unlock(&trans->block_rsv->lock); -		trans->block_rsv = rsv; -		return -ENOSPC; -	} -	spin_unlock(&trans->block_rsv->lock); +	spin_lock(&rsv->lock); +	if (rsv->reserved < needed_bytes) +		ret = -ENOSPC; +	else +		ret = 0; +	spin_unlock(&rsv->lock); +	return 0; +} + +int btrfs_truncate_free_space_cache(struct btrfs_root *root, +				    struct btrfs_trans_handle *trans, +				    struct btrfs_path *path, +				    struct inode *inode) +{ +	loff_t oldsize; +	int ret = 0;  	oldsize = i_size_read(inode);  	btrfs_i_size_write(inode, 0); @@ -232,9 +234,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,  	 */  	ret = btrfs_truncate_inode_items(trans, root, inode,  					 0, BTRFS_EXTENT_DATA_KEY); -  	if (ret) { -		trans->block_rsv = rsv;  		btrfs_abort_transaction(trans, root, ret);  		return ret;  	} @@ -242,7 +242,6 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,  	ret = btrfs_update_inode(trans, root, inode);  	if (ret)  		btrfs_abort_transaction(trans, root, ret); -	trans->block_rsv = rsv;  	return ret;  } @@ -920,10 +919,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,  	/* Make sure we can fit our crcs into the first page */  	if (io_ctl.check_crcs && -	    (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) { -		WARN_ON(1); +	    (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)  		goto out_nospc; -	}  	io_ctl_set_generation(&io_ctl, trans->transid); diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 4dc17d8809c..8b7f19f4496 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -54,6 +54,8 @@ int create_free_space_inode(struct btrfs_root *root,  			    struct btrfs_block_group_cache *block_group,  			    struct btrfs_path *path); +int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, +				       struct btrfs_block_rsv *rsv);  int btrfs_truncate_free_space_cache(struct btrfs_root *root,  				    struct btrfs_trans_handle *trans,  				    struct btrfs_path *path, diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index d26f67a59e3..2c66ddbbe67 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -429,11 +429,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root,  	num_bytes = trans->bytes_reserved;  	/*  	 * 1 item for inode item insertion if need -	 * 3 items for inode item update (in the worst case) +	 * 4 items for inode item update (in the worst case) +	 * 1 items for slack space if we need do truncation  	 * 1 item for free space object  	 * 3 items for pre-allocation  	 */ -	trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8); +	trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 10);  	ret = btrfs_block_rsv_add(root, trans->block_rsv,  				  trans->bytes_reserved,  				  BTRFS_RESERVE_NO_FLUSH); @@ -468,7 +469,8 @@ again:  	if (i_size_read(inode) > 0) {  		ret = btrfs_truncate_free_space_cache(root, trans, path, inode);  		if (ret) { -			btrfs_abort_transaction(trans, root, ret); +			if (ret != -ENOSPC) +				btrfs_abort_transaction(trans, root, ret);  			goto out_put;  		}  	} diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9b31b3b091f..af978f7682b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -715,8 +715,10 @@ retry:  					async_extent->ram_size - 1, 0);  		em = alloc_extent_map(); -		if (!em) +		if (!em) { +			ret = -ENOMEM;  			goto out_free_reserve; +		}  		em->start = async_extent->start;  		em->len = async_extent->ram_size;  		em->orig_start = em->start; @@ -923,8 +925,10 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,  		}  		em = alloc_extent_map(); -		if (!em) +		if (!em) { +			ret = -ENOMEM;  			goto out_reserve; +		}  		em->start = start;  		em->orig_start = em->start;  		ram_size = ins.offset; @@ -4724,6 +4728,7 @@ void btrfs_evict_inode(struct inode *inode)  	btrfs_end_transaction(trans, root);  	btrfs_btree_balance_dirty(root);  no_delete: +	btrfs_remove_delayed_node(inode);  	clear_inode(inode);  	return;  } @@ -4839,14 +4844,13 @@ static void inode_tree_add(struct inode *inode)  	struct rb_node **p;  	struct rb_node *parent;  	u64 ino = btrfs_ino(inode); -again: -	p = &root->inode_tree.rb_node; -	parent = NULL;  	if (inode_unhashed(inode))  		return; - +again: +	parent = NULL;  	spin_lock(&root->inode_lock); +	p = &root->inode_tree.rb_node;  	while (*p) {  		parent = *p;  		entry = rb_entry(parent, struct btrfs_inode, rb_node); @@ -6928,7 +6932,11 @@ struct btrfs_dio_private {  	/* IO errors */  	int errors; +	/* orig_bio is our btrfs_io_bio */  	struct bio *orig_bio; + +	/* dio_bio came from fs/direct-io.c */ +	struct bio *dio_bio;  };  static void btrfs_endio_direct_read(struct bio *bio, int err) @@ -6938,6 +6946,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)  	struct bio_vec *bvec = bio->bi_io_vec;  	struct inode *inode = dip->inode;  	struct btrfs_root *root = BTRFS_I(inode)->root; +	struct bio *dio_bio;  	u64 start;  	start = dip->logical_offset; @@ -6977,14 +6986,15 @@ failed:  	unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,  		      dip->logical_offset + dip->bytes - 1); -	bio->bi_private = dip->private; +	dio_bio = dip->dio_bio;  	kfree(dip);  	/* If we had a csum failure make sure to clear the uptodate flag */  	if (err) -		clear_bit(BIO_UPTODATE, &bio->bi_flags); -	dio_end_io(bio, err); +		clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); +	dio_end_io(dio_bio, err); +	bio_put(bio);  }  static void btrfs_endio_direct_write(struct bio *bio, int err) @@ -6995,6 +7005,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)  	struct btrfs_ordered_extent *ordered = NULL;  	u64 ordered_offset = dip->logical_offset;  	u64 ordered_bytes = dip->bytes; +	struct bio *dio_bio;  	int ret;  	if (err) @@ -7022,14 +7033,15 @@ out_test:  		goto again;  	}  out_done: -	bio->bi_private = dip->private; +	dio_bio = dip->dio_bio;  	kfree(dip);  	/* If we had an error make sure to clear the uptodate flag */  	if (err) -		clear_bit(BIO_UPTODATE, &bio->bi_flags); -	dio_end_io(bio, err); +		clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); +	dio_end_io(dio_bio, err); +	bio_put(bio);  }  static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, @@ -7065,10 +7077,10 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)  	if (!atomic_dec_and_test(&dip->pending_bios))  		goto out; -	if (dip->errors) +	if (dip->errors) {  		bio_io_error(dip->orig_bio); -	else { -		set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags); +	} else { +		set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);  		bio_endio(dip->orig_bio, 0);  	}  out: @@ -7243,25 +7255,34 @@ out_err:  	return 0;  } -static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, -				loff_t file_offset) +static void btrfs_submit_direct(int rw, struct bio *dio_bio, +				struct inode *inode, loff_t file_offset)  {  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct btrfs_dio_private *dip; -	struct bio_vec *bvec = bio->bi_io_vec; +	struct bio_vec *bvec = dio_bio->bi_io_vec; +	struct bio *io_bio;  	int skip_sum;  	int write = rw & REQ_WRITE;  	int ret = 0;  	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; +	io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS); + +	if (!io_bio) { +		ret = -ENOMEM; +		goto free_ordered; +	} +  	dip = kmalloc(sizeof(*dip), GFP_NOFS);  	if (!dip) {  		ret = -ENOMEM; -		goto free_ordered; +		goto free_io_bio;  	} -	dip->private = bio->bi_private; +	dip->private = dio_bio->bi_private; +	io_bio->bi_private = dio_bio->bi_private;  	dip->inode = inode;  	dip->logical_offset = file_offset; @@ -7269,22 +7290,27 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,  	do {  		dip->bytes += bvec->bv_len;  		bvec++; -	} while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1)); +	} while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1)); -	dip->disk_bytenr = (u64)bio->bi_sector << 9; -	bio->bi_private = dip; +	dip->disk_bytenr = (u64)dio_bio->bi_sector << 9; +	io_bio->bi_private = dip;  	dip->errors = 0; -	dip->orig_bio = bio; +	dip->orig_bio = io_bio; +	dip->dio_bio = dio_bio;  	atomic_set(&dip->pending_bios, 0);  	if (write) -		bio->bi_end_io = btrfs_endio_direct_write; +		io_bio->bi_end_io = btrfs_endio_direct_write;  	else -		bio->bi_end_io = btrfs_endio_direct_read; +		io_bio->bi_end_io = btrfs_endio_direct_read;  	ret = btrfs_submit_direct_hook(rw, dip, skip_sum);  	if (!ret)  		return; + +free_io_bio: +	bio_put(io_bio); +  free_ordered:  	/*  	 * If this is a write, we need to clean up the reserved space and kill @@ -7300,7 +7326,7 @@ free_ordered:  		btrfs_put_ordered_extent(ordered);  		btrfs_put_ordered_extent(ordered);  	} -	bio_endio(bio, ret); +	bio_endio(dio_bio, ret);  }  static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, @@ -7979,7 +8005,6 @@ void btrfs_destroy_inode(struct inode *inode)  	inode_tree_del(inode);  	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);  free: -	btrfs_remove_delayed_node(inode);  	call_rcu(&inode->i_rcu, btrfs_i_callback);  } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0de4a2fcfb2..0f81d67cdc8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1801,7 +1801,11 @@ static noinline int copy_to_sk(struct btrfs_root *root,  		item_off = btrfs_item_ptr_offset(leaf, i);  		item_len = btrfs_item_size_nr(leaf, i); -		if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE) +		btrfs_item_key_to_cpu(leaf, key, i); +		if (!key_in_sk(key, sk)) +			continue; + +		if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE)  			item_len = 0;  		if (sizeof(sh) + item_len + *sk_offset > @@ -1810,10 +1814,6 @@ static noinline int copy_to_sk(struct btrfs_root *root,  			goto overflow;  		} -		btrfs_item_key_to_cpu(leaf, key, i); -		if (!key_in_sk(key, sk)) -			continue; -  		sh.objectid = key->objectid;  		sh.offset = key->offset;  		sh.type = key->type; diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 0740621daf6..0525e1389f5 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1050,7 +1050,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,  	}  	/* put a new bio on the list */ -	bio = bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1); +	bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);  	if (!bio)  		return -ENOMEM; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 704a1b8d2a2..395b82031a4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1773,7 +1773,7 @@ again:  			if (!eb || !extent_buffer_uptodate(eb)) {  				ret = (!eb) ? -ENOMEM : -EIO;  				free_extent_buffer(eb); -				return ret; +				break;  			}  			btrfs_tree_lock(eb);  			if (cow) { @@ -3350,6 +3350,11 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,  	}  truncate: +	ret = btrfs_check_trunc_cache_free_space(root, +						 &fs_info->global_block_rsv); +	if (ret) +		goto out; +  	path = btrfs_alloc_path();  	if (!path) {  		ret = -ENOMEM; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index f489e24659a..79bd479317c 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1296,7 +1296,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,  		}  		WARN_ON(!page->page); -		bio = bio_alloc(GFP_NOFS, 1); +		bio = btrfs_io_bio_alloc(GFP_NOFS, 1);  		if (!bio) {  			page->io_error = 1;  			sblock->no_io_error_seen = 0; @@ -1431,7 +1431,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,  			return -EIO;  		} -		bio = bio_alloc(GFP_NOFS, 1); +		bio = btrfs_io_bio_alloc(GFP_NOFS, 1);  		if (!bio)  			return -EIO;  		bio->bi_bdev = page_bad->dev->bdev; @@ -1522,7 +1522,7 @@ again:  		sbio->dev = wr_ctx->tgtdev;  		bio = sbio->bio;  		if (!bio) { -			bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio); +			bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);  			if (!bio) {  				mutex_unlock(&wr_ctx->wr_lock);  				return -ENOMEM; @@ -1930,7 +1930,7 @@ again:  		sbio->dev = spage->dev;  		bio = sbio->bio;  		if (!bio) { -			bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio); +			bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);  			if (!bio)  				return -ENOMEM;  			sbio->bio = bio; @@ -3307,7 +3307,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,  			"btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");  		return -EIO;  	} -	bio = bio_alloc(GFP_NOFS, 1); +	bio = btrfs_io_bio_alloc(GFP_NOFS, 1);  	if (!bio) {  		spin_lock(&sctx->stat_lock);  		sctx->stat.malloc_errors++; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a4807ced23c..f0857e092a3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1263,6 +1263,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)  		btrfs_dev_replace_suspend_for_unmount(fs_info);  		btrfs_scrub_cancel(fs_info); +		btrfs_pause_balance(fs_info);  		ret = btrfs_commit_super(root);  		if (ret) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0e925ced971..8bffb9174af 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3120,14 +3120,13 @@ int btrfs_balance(struct btrfs_balance_control *bctl,  	allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;  	if (num_devices == 1)  		allowed |= BTRFS_BLOCK_GROUP_DUP; -	else if (num_devices < 4) +	else if (num_devices > 1)  		allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); -	else -		allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | -				BTRFS_BLOCK_GROUP_RAID10 | -				BTRFS_BLOCK_GROUP_RAID5 | -				BTRFS_BLOCK_GROUP_RAID6); - +	if (num_devices > 2) +		allowed |= BTRFS_BLOCK_GROUP_RAID5; +	if (num_devices > 3) +		allowed |= (BTRFS_BLOCK_GROUP_RAID10 | +			    BTRFS_BLOCK_GROUP_RAID6);  	if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&  	    (!alloc_profile_is_valid(bctl->data.target, 1) ||  	     (bctl->data.target & ~allowed))) { @@ -5019,42 +5018,16 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,  	return 0;  } -static void *merge_stripe_index_into_bio_private(void *bi_private, -						 unsigned int stripe_index) -{ -	/* -	 * with single, dup, RAID0, RAID1 and RAID10, stripe_index is -	 * at most 1. -	 * The alternative solution (instead of stealing bits from the -	 * pointer) would be to allocate an intermediate structure -	 * that contains the old private pointer plus the stripe_index. -	 */ -	BUG_ON((((uintptr_t)bi_private) & 3) != 0); -	BUG_ON(stripe_index > 3); -	return (void *)(((uintptr_t)bi_private) | stripe_index); -} - -static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private) -{ -	return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3)); -} - -static unsigned int extract_stripe_index_from_bio_private(void *bi_private) -{ -	return (unsigned int)((uintptr_t)bi_private) & 3; -} -  static void btrfs_end_bio(struct bio *bio, int err)  { -	struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private); +	struct btrfs_bio *bbio = bio->bi_private;  	int is_orig_bio = 0;  	if (err) {  		atomic_inc(&bbio->error);  		if (err == -EIO || err == -EREMOTEIO) {  			unsigned int stripe_index = -				extract_stripe_index_from_bio_private( -					bio->bi_private); +				btrfs_io_bio(bio)->stripe_index;  			struct btrfs_device *dev;  			BUG_ON(stripe_index >= bbio->num_stripes); @@ -5084,8 +5057,7 @@ static void btrfs_end_bio(struct bio *bio, int err)  		}  		bio->bi_private = bbio->private;  		bio->bi_end_io = bbio->end_io; -		bio->bi_bdev = (struct block_device *) -					(unsigned long)bbio->mirror_num; +		btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;  		/* only send an error to the higher layers if it is  		 * beyond the tolerance of the btrfs bio  		 */ @@ -5211,8 +5183,7 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,  	struct btrfs_device *dev = bbio->stripes[dev_nr].dev;  	bio->bi_private = bbio; -	bio->bi_private = merge_stripe_index_into_bio_private( -			bio->bi_private, (unsigned int)dev_nr); +	btrfs_io_bio(bio)->stripe_index = dev_nr;  	bio->bi_end_io = btrfs_end_bio;  	bio->bi_sector = physical >> 9;  #ifdef DEBUG @@ -5273,8 +5244,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)  	if (atomic_dec_and_test(&bbio->stripes_pending)) {  		bio->bi_private = bbio->private;  		bio->bi_end_io = bbio->end_io; -		bio->bi_bdev = (struct block_device *) -			(unsigned long)bbio->mirror_num; +		btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;  		bio->bi_sector = logical >> 9;  		kfree(bbio);  		bio_endio(bio, -EIO); @@ -5352,7 +5322,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,  		}  		if (dev_nr < total_devs - 1) { -			bio = bio_clone(first_bio, GFP_NOFS); +			bio = btrfs_bio_clone(first_bio, GFP_NOFS);  			BUG_ON(!bio); /* -ENOMEM */  		} else {  			bio = first_bio; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 845ccbb0d2e..f6247e2a47f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -152,6 +152,26 @@ struct btrfs_fs_devices {  	int rotating;  }; +/* + * we need the mirror number and stripe index to be passed around + * the call chain while we are processing end_io (especially errors). + * Really, what we need is a btrfs_bio structure that has this info + * and is properly sized with its stripe array, but we're not there + * quite yet.  We have our own btrfs bioset, and all of the bios + * we allocate are actually btrfs_io_bios.  We'll cram as much of + * struct btrfs_bio as we can into this over time. + */ +struct btrfs_io_bio { +	unsigned long mirror_num; +	unsigned long stripe_index; +	struct bio bio; +}; + +static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio) +{ +	return container_of(bio, struct btrfs_io_bio, bio); +} +  struct btrfs_bio_stripe {  	struct btrfs_device *dev;  	u64 physical;  |