diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/backref.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/ctree.c | 17 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 8 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 147 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 153 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 36 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.h | 2 | ||||
| -rw-r--r-- | fs/btrfs/free-space-cache.c | 65 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 8 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 17 | ||||
| -rw-r--r-- | fs/btrfs/scrub.c | 7 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 44 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 8 | ||||
| -rw-r--r-- | fs/btrfs/volumes.h | 6 | 
14 files changed, 354 insertions, 166 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 8855aad3929..22c64fff1bd 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -683,7 +683,7 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref,  		return PTR_ERR(fspath);  	if (fspath > fspath_min) { -		ipath->fspath->val[i] = (u64)fspath; +		ipath->fspath->val[i] = (u64)(unsigned long)fspath;  		++ipath->fspath->elem_cnt;  		ipath->fspath->bytes_left = fspath - fspath_min;  	} else { diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0fe615e4ea3..dede441bdee 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -514,10 +514,25 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,  				   struct btrfs_root *root,  				   struct extent_buffer *buf)  { +	/* ensure we can see the force_cow */ +	smp_rmb(); + +	/* +	 * We do not need to cow a block if +	 * 1) this block is not created or changed in this transaction; +	 * 2) this block does not belong to TREE_RELOC tree; +	 * 3) the root is not forced COW. +	 * +	 * What is forced COW: +	 *    when we create snapshot during commiting the transaction, +	 *    after we've finished coping src root, we must COW the shared +	 *    block to ensure the metadata consistency. +	 */  	if (btrfs_header_generation(buf) == trans->transid &&  	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&  	    !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && -	      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) +	      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) && +	    !root->force_cow)  		return 0;  	return 1;  } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b9ba59ff929..50634abef9b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -848,7 +848,8 @@ struct btrfs_free_cluster {  enum btrfs_caching_type {  	BTRFS_CACHE_NO		= 0,  	BTRFS_CACHE_STARTED	= 1, -	BTRFS_CACHE_FINISHED	= 2, +	BTRFS_CACHE_FAST	= 2, +	BTRFS_CACHE_FINISHED	= 3,  };  enum btrfs_disk_cache_state { @@ -1271,6 +1272,8 @@ struct btrfs_root {  	 * for stat.  It may be used for more later  	 */  	dev_t anon_dev; + +	int force_cow;  };  struct btrfs_ioctl_defrag_range_args { @@ -2366,6 +2369,9 @@ int btrfs_block_rsv_check(struct btrfs_root *root,  int btrfs_block_rsv_refill(struct btrfs_root *root,  			  struct btrfs_block_rsv *block_rsv,  			  u64 min_reserved); +int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, +				   struct btrfs_block_rsv *block_rsv, +				   u64 min_reserved);  int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,  			    struct btrfs_block_rsv *dst_rsv,  			    u64 num_bytes); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 62afe5c5694..632f8f3cc9d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -620,7 +620,7 @@ out:  static int btree_io_failed_hook(struct bio *failed_bio,  			 struct page *page, u64 start, u64 end, -			 u64 mirror_num, struct extent_state *state) +			 int mirror_num, struct extent_state *state)  {  	struct extent_io_tree *tree;  	unsigned long len; @@ -2573,22 +2573,10 @@ static int write_dev_supers(struct btrfs_device *device,  	int errors = 0;  	u32 crc;  	u64 bytenr; -	int last_barrier = 0;  	if (max_mirrors == 0)  		max_mirrors = BTRFS_SUPER_MIRROR_MAX; -	/* make sure only the last submit_bh does a barrier */ -	if (do_barriers) { -		for (i = 0; i < max_mirrors; i++) { -			bytenr = btrfs_sb_offset(i); -			if (bytenr + BTRFS_SUPER_INFO_SIZE >= -			    device->total_bytes) -				break; -			last_barrier = i; -		} -	} -  	for (i = 0; i < max_mirrors; i++) {  		bytenr = btrfs_sb_offset(i);  		if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) @@ -2634,17 +2622,136 @@ static int write_dev_supers(struct btrfs_device *device,  			bh->b_end_io = btrfs_end_buffer_write_sync;  		} -		if (i == last_barrier && do_barriers) -			ret = submit_bh(WRITE_FLUSH_FUA, bh); -		else -			ret = submit_bh(WRITE_SYNC, bh); - +		/* +		 * we fua the first super.  The others we allow +		 * to go down lazy. +		 */ +		ret = submit_bh(WRITE_FUA, bh);  		if (ret)  			errors++;  	}  	return errors < i ? 0 : -1;  } +/* + * endio for the write_dev_flush, this will wake anyone waiting + * for the barrier when it is done + */ +static void btrfs_end_empty_barrier(struct bio *bio, int err) +{ +	if (err) { +		if (err == -EOPNOTSUPP) +			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); +		clear_bit(BIO_UPTODATE, &bio->bi_flags); +	} +	if (bio->bi_private) +		complete(bio->bi_private); +	bio_put(bio); +} + +/* + * trigger flushes for one the devices.  If you pass wait == 0, the flushes are + * sent down.  With wait == 1, it waits for the previous flush. + * + * any device where the flush fails with eopnotsupp are flagged as not-barrier + * capable + */ +static int write_dev_flush(struct btrfs_device *device, int wait) +{ +	struct bio *bio; +	int ret = 0; + +	if (device->nobarriers) +		return 0; + +	if (wait) { +		bio = device->flush_bio; +		if (!bio) +			return 0; + +		wait_for_completion(&device->flush_wait); + +		if (bio_flagged(bio, BIO_EOPNOTSUPP)) { +			printk("btrfs: disabling barriers on dev %s\n", +			       device->name); +			device->nobarriers = 1; +		} +		if (!bio_flagged(bio, BIO_UPTODATE)) { +			ret = -EIO; +		} + +		/* drop the reference from the wait == 0 run */ +		bio_put(bio); +		device->flush_bio = NULL; + +		return ret; +	} + +	/* +	 * one reference for us, and we leave it for the +	 * caller +	 */ +	device->flush_bio = NULL;; +	bio = bio_alloc(GFP_NOFS, 0); +	if (!bio) +		return -ENOMEM; + +	bio->bi_end_io = btrfs_end_empty_barrier; +	bio->bi_bdev = device->bdev; +	init_completion(&device->flush_wait); +	bio->bi_private = &device->flush_wait; +	device->flush_bio = bio; + +	bio_get(bio); +	submit_bio(WRITE_FLUSH, bio); + +	return 0; +} + +/* + * send an empty flush down to each device in parallel, + * then wait for them + */ +static int barrier_all_devices(struct btrfs_fs_info *info) +{ +	struct list_head *head; +	struct btrfs_device *dev; +	int errors = 0; +	int ret; + +	/* send down all the barriers */ +	head = &info->fs_devices->devices; +	list_for_each_entry_rcu(dev, head, dev_list) { +		if (!dev->bdev) { +			errors++; +			continue; +		} +		if (!dev->in_fs_metadata || !dev->writeable) +			continue; + +		ret = write_dev_flush(dev, 0); +		if (ret) +			errors++; +	} + +	/* wait for all the barriers */ +	list_for_each_entry_rcu(dev, head, dev_list) { +		if (!dev->bdev) { +			errors++; +			continue; +		} +		if (!dev->in_fs_metadata || !dev->writeable) +			continue; + +		ret = write_dev_flush(dev, 1); +		if (ret) +			errors++; +	} +	if (errors) +		return -EIO; +	return 0; +} +  int write_all_supers(struct btrfs_root *root, int max_mirrors)  {  	struct list_head *head; @@ -2666,6 +2773,10 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)  	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);  	head = &root->fs_info->fs_devices->devices; + +	if (do_barriers) +		barrier_all_devices(root->fs_info); +  	list_for_each_entry_rcu(dev, head, dev_list) {  		if (!dev->bdev) {  			total_errors++; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b232150b5b6..f0d5718d258 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -467,13 +467,59 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,  			     struct btrfs_root *root,  			     int load_cache_only)  { +	DEFINE_WAIT(wait);  	struct btrfs_fs_info *fs_info = cache->fs_info;  	struct btrfs_caching_control *caching_ctl;  	int ret = 0; -	smp_mb(); -	if (cache->cached != BTRFS_CACHE_NO) +	caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); +	BUG_ON(!caching_ctl); + +	INIT_LIST_HEAD(&caching_ctl->list); +	mutex_init(&caching_ctl->mutex); +	init_waitqueue_head(&caching_ctl->wait); +	caching_ctl->block_group = cache; +	caching_ctl->progress = cache->key.objectid; +	atomic_set(&caching_ctl->count, 1); +	caching_ctl->work.func = caching_thread; + +	spin_lock(&cache->lock); +	/* +	 * This should be a rare occasion, but this could happen I think in the +	 * case where one thread starts to load the space cache info, and then +	 * some other thread starts a transaction commit which tries to do an +	 * allocation while the other thread is still loading the space cache +	 * info.  The previous loop should have kept us from choosing this block +	 * group, but if we've moved to the state where we will wait on caching +	 * block groups we need to first check if we're doing a fast load here, +	 * so we can wait for it to finish, otherwise we could end up allocating +	 * from a block group who's cache gets evicted for one reason or +	 * another. +	 */ +	while (cache->cached == BTRFS_CACHE_FAST) { +		struct btrfs_caching_control *ctl; + +		ctl = cache->caching_ctl; +		atomic_inc(&ctl->count); +		prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE); +		spin_unlock(&cache->lock); + +		schedule(); + +		finish_wait(&ctl->wait, &wait); +		put_caching_control(ctl); +		spin_lock(&cache->lock); +	} + +	if (cache->cached != BTRFS_CACHE_NO) { +		spin_unlock(&cache->lock); +		kfree(caching_ctl);  		return 0; +	} +	WARN_ON(cache->caching_ctl); +	cache->caching_ctl = caching_ctl; +	cache->cached = BTRFS_CACHE_FAST; +	spin_unlock(&cache->lock);  	/*  	 * We can't do the read from on-disk cache during a commit since we need @@ -484,56 +530,51 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,  	if (trans && (!trans->transaction->in_commit) &&  	    (root && root != root->fs_info->tree_root) &&  	    btrfs_test_opt(root, SPACE_CACHE)) { -		spin_lock(&cache->lock); -		if (cache->cached != BTRFS_CACHE_NO) { -			spin_unlock(&cache->lock); -			return 0; -		} -		cache->cached = BTRFS_CACHE_STARTED; -		spin_unlock(&cache->lock); -  		ret = load_free_space_cache(fs_info, cache);  		spin_lock(&cache->lock);  		if (ret == 1) { +			cache->caching_ctl = NULL;  			cache->cached = BTRFS_CACHE_FINISHED;  			cache->last_byte_to_unpin = (u64)-1;  		} else { -			cache->cached = BTRFS_CACHE_NO; +			if (load_cache_only) { +				cache->caching_ctl = NULL; +				cache->cached = BTRFS_CACHE_NO; +			} else { +				cache->cached = BTRFS_CACHE_STARTED; +			}  		}  		spin_unlock(&cache->lock); +		wake_up(&caching_ctl->wait);  		if (ret == 1) { +			put_caching_control(caching_ctl);  			free_excluded_extents(fs_info->extent_root, cache);  			return 0;  		} +	} else { +		/* +		 * We are not going to do the fast caching, set cached to the +		 * appropriate value and wakeup any waiters. +		 */ +		spin_lock(&cache->lock); +		if (load_cache_only) { +			cache->caching_ctl = NULL; +			cache->cached = BTRFS_CACHE_NO; +		} else { +			cache->cached = BTRFS_CACHE_STARTED; +		} +		spin_unlock(&cache->lock); +		wake_up(&caching_ctl->wait);  	} -	if (load_cache_only) -		return 0; - -	caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); -	BUG_ON(!caching_ctl); - -	INIT_LIST_HEAD(&caching_ctl->list); -	mutex_init(&caching_ctl->mutex); -	init_waitqueue_head(&caching_ctl->wait); -	caching_ctl->block_group = cache; -	caching_ctl->progress = cache->key.objectid; -	/* one for caching kthread, one for caching block group list */ -	atomic_set(&caching_ctl->count, 2); -	caching_ctl->work.func = caching_thread; - -	spin_lock(&cache->lock); -	if (cache->cached != BTRFS_CACHE_NO) { -		spin_unlock(&cache->lock); -		kfree(caching_ctl); +	if (load_cache_only) { +		put_caching_control(caching_ctl);  		return 0;  	} -	cache->caching_ctl = caching_ctl; -	cache->cached = BTRFS_CACHE_STARTED; -	spin_unlock(&cache->lock);  	down_write(&fs_info->extent_commit_sem); +	atomic_inc(&caching_ctl->count);  	list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);  	up_write(&fs_info->extent_commit_sem); @@ -3847,9 +3888,9 @@ int btrfs_block_rsv_check(struct btrfs_root *root,  	return ret;  } -int btrfs_block_rsv_refill(struct btrfs_root *root, -			  struct btrfs_block_rsv *block_rsv, -			  u64 min_reserved) +static inline int __btrfs_block_rsv_refill(struct btrfs_root *root, +					   struct btrfs_block_rsv *block_rsv, +					   u64 min_reserved, int flush)  {  	u64 num_bytes = 0;  	int ret = -ENOSPC; @@ -3868,7 +3909,7 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,  	if (!ret)  		return 0; -	ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); +	ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);  	if (!ret) {  		block_rsv_add_bytes(block_rsv, num_bytes, 0);  		return 0; @@ -3877,6 +3918,20 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,  	return ret;  } +int btrfs_block_rsv_refill(struct btrfs_root *root, +			   struct btrfs_block_rsv *block_rsv, +			   u64 min_reserved) +{ +	return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1); +} + +int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, +				   struct btrfs_block_rsv *block_rsv, +				   u64 min_reserved) +{ +	return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0); +} +  int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,  			    struct btrfs_block_rsv *dst_rsv,  			    u64 num_bytes) @@ -5178,13 +5233,15 @@ search:  		}  have_block_group: -		if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { +		cached = block_group_cache_done(block_group); +		if (unlikely(!cached)) {  			u64 free_percent; +			found_uncached_bg = true;  			ret = cache_block_group(block_group, trans,  						orig_root, 1);  			if (block_group->cached == BTRFS_CACHE_FINISHED) -				goto have_block_group; +				goto alloc;  			free_percent = btrfs_block_group_used(&block_group->item);  			free_percent *= 100; @@ -5206,7 +5263,6 @@ have_block_group:  							orig_root, 0);  				BUG_ON(ret);  			} -			found_uncached_bg = true;  			/*  			 * If loop is set for cached only, try the next block @@ -5216,17 +5272,14 @@ have_block_group:  				goto loop;  		} -		cached = block_group_cache_done(block_group); -		if (unlikely(!cached)) -			found_uncached_bg = true; - +alloc:  		if (unlikely(block_group->ro))  			goto loop;  		spin_lock(&block_group->free_space_ctl->tree_lock);  		if (cached &&  		    block_group->free_space_ctl->free_space < -		    num_bytes + empty_size) { +		    num_bytes + empty_cluster + empty_size) {  			spin_unlock(&block_group->free_space_ctl->tree_lock);  			goto loop;  		} @@ -5247,12 +5300,10 @@ have_block_group:  			 * people trying to start a new cluster  			 */  			spin_lock(&last_ptr->refill_lock); -			if (last_ptr->block_group && -			    (last_ptr->block_group->ro || -			    !block_group_bits(last_ptr->block_group, data))) { -				offset = 0; +			if (!last_ptr->block_group || +			    last_ptr->block_group->ro || +			    !block_group_bits(last_ptr->block_group, data))  				goto refill_cluster; -			}  			offset = btrfs_alloc_from_cluster(block_group, last_ptr,  						 num_bytes, search_start); @@ -5303,7 +5354,7 @@ refill_cluster:  			/* allocate a cluster in this block group */  			ret = btrfs_find_space_cluster(trans, root,  					       block_group, last_ptr, -					       offset, num_bytes, +					       search_start, num_bytes,  					       empty_cluster + empty_size);  			if (ret == 0) {  				/* diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1f87c4d0e7a..be1bf627a14 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2285,16 +2285,22 @@ static void end_bio_extent_readpage(struct bio *bio, int err)  				clean_io_failure(start, page);  		}  		if (!uptodate) { -			u64 failed_mirror; -			failed_mirror = (u64)bio->bi_bdev; -			if (tree->ops && tree->ops->readpage_io_failed_hook) -				ret = tree->ops->readpage_io_failed_hook( -						bio, page, start, end, -						failed_mirror, state); -			else -				ret = bio_readpage_error(bio, page, start, end, -							 failed_mirror, NULL); +			int failed_mirror; +			failed_mirror = (int)(unsigned long)bio->bi_bdev; +			/* +			 * The generic bio_readpage_error handles errors the +			 * following way: If possible, new read requests are +			 * created and submitted and will end up in +			 * end_bio_extent_readpage as well (if we're lucky, not +			 * in the !uptodate case). In that case it returns 0 and +			 * we just go on with the next page in our bio. If it +			 * can't handle the error it will return -EIO and we +			 * remain responsible for that page. +			 */ +			ret = bio_readpage_error(bio, page, start, end, +							failed_mirror, NULL);  			if (ret == 0) { +error_handled:  				uptodate =  					test_bit(BIO_UPTODATE, &bio->bi_flags);  				if (err) @@ -2302,6 +2308,13 @@ static void end_bio_extent_readpage(struct bio *bio, int err)  				uncache_state(&cached);  				continue;  			} +			if (tree->ops && tree->ops->readpage_io_failed_hook) { +				ret = tree->ops->readpage_io_failed_hook( +							bio, page, start, end, +							failed_mirror, state); +				if (ret == 0) +					goto error_handled; +			}  		}  		if (uptodate) { @@ -3366,6 +3379,9 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,  		return -ENOMEM;  	path->leave_spinning = 1; +	start = ALIGN(start, BTRFS_I(inode)->root->sectorsize); +	len = ALIGN(len, BTRFS_I(inode)->root->sectorsize); +  	/*  	 * lookup the last file extent.  We're not using i_size here  	 * because there might be preallocation past i_size @@ -3413,7 +3429,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,  	lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,  			 &cached_state, GFP_NOFS); -	em = get_extent_skip_holes(inode, off, last_for_get_extent, +	em = get_extent_skip_holes(inode, start, last_for_get_extent,  				   get_extent);  	if (!em)  		goto out; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index feb9be0e23b..7604c300132 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -70,7 +70,7 @@ struct extent_io_ops {  			      unsigned long bio_flags);  	int (*readpage_io_hook)(struct page *page, u64 start, u64 end);  	int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, -				       u64 start, u64 end, u64 failed_mirror, +				       u64 start, u64 end, int failed_mirror,  				       struct extent_state *state);  	int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,  					u64 start, u64 end, diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 181760f9d2a..ec23d43d0c3 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -351,6 +351,11 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,  		}  	} +	for (i = 0; i < io_ctl->num_pages; i++) { +		clear_page_dirty_for_io(io_ctl->pages[i]); +		set_page_extent_mapped(io_ctl->pages[i]); +	} +  	return 0;  } @@ -1465,6 +1470,7 @@ static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,  {  	info->offset = offset_to_bitmap(ctl, offset);  	info->bytes = 0; +	INIT_LIST_HEAD(&info->list);  	link_free_space(ctl, info);  	ctl->total_bitmaps++; @@ -1844,7 +1850,13 @@ again:  		info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),  					  1, 0);  		if (!info) { -			WARN_ON(1); +			/* the tree logging code might be calling us before we +			 * have fully loaded the free space rbtree for this +			 * block group.  So it is possible the entry won't +			 * be in the rbtree yet at all.  The caching code +			 * will make sure not to put it in the rbtree if +			 * the logging code has pinned it. +			 */  			goto out_lock;  		}  	} @@ -2308,6 +2320,7 @@ again:  	if (!found) {  		start = i; +		cluster->max_size = 0;  		found = true;  	} @@ -2451,16 +2464,23 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,  {  	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;  	struct btrfs_free_space *entry; -	struct rb_node *node;  	int ret = -ENOSPC; +	u64 bitmap_offset = offset_to_bitmap(ctl, offset);  	if (ctl->total_bitmaps == 0)  		return -ENOSPC;  	/* -	 * First check our cached list of bitmaps and see if there is an entry -	 * here that will work. +	 * The bitmap that covers offset won't be in the list unless offset +	 * is just its start offset.  	 */ +	entry = list_first_entry(bitmaps, struct btrfs_free_space, list); +	if (entry->offset != bitmap_offset) { +		entry = tree_search_offset(ctl, bitmap_offset, 1, 0); +		if (entry && list_empty(&entry->list)) +			list_add(&entry->list, bitmaps); +	} +  	list_for_each_entry(entry, bitmaps, list) {  		if (entry->bytes < min_bytes)  			continue; @@ -2471,38 +2491,10 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,  	}  	/* -	 * If we do have entries on our list and we are here then we didn't find -	 * anything, so go ahead and get the next entry after the last entry in -	 * this list and start the search from there. +	 * The bitmaps list has all the bitmaps that record free space +	 * starting after offset, so no more search is required.  	 */ -	if (!list_empty(bitmaps)) { -		entry = list_entry(bitmaps->prev, struct btrfs_free_space, -				   list); -		node = rb_next(&entry->offset_index); -		if (!node) -			return -ENOSPC; -		entry = rb_entry(node, struct btrfs_free_space, offset_index); -		goto search; -	} - -	entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1); -	if (!entry) -		return -ENOSPC; - -search: -	node = &entry->offset_index; -	do { -		entry = rb_entry(node, struct btrfs_free_space, offset_index); -		node = rb_next(&entry->offset_index); -		if (!entry->bitmap) -			continue; -		if (entry->bytes < min_bytes) -			continue; -		ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, -					   bytes, min_bytes); -	} while (ret && node); - -	return ret; +	return -ENOSPC;  }  /* @@ -2520,8 +2512,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,  			     u64 offset, u64 bytes, u64 empty_size)  {  	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; -	struct list_head bitmaps;  	struct btrfs_free_space *entry, *tmp; +	LIST_HEAD(bitmaps);  	u64 min_bytes;  	int ret; @@ -2560,7 +2552,6 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,  		goto out;  	} -	INIT_LIST_HEAD(&bitmaps);  	ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,  				      bytes, min_bytes);  	if (ret) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 116ab67a06d..2c984f7d4c2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3490,7 +3490,7 @@ void btrfs_evict_inode(struct inode *inode)  	 * doing the truncate.  	 */  	while (1) { -		ret = btrfs_block_rsv_refill(root, rsv, min_size); +		ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size);  		/*  		 * Try and steal from the global reserve since we will @@ -6794,11 +6794,13 @@ static int btrfs_getattr(struct vfsmount *mnt,  			 struct dentry *dentry, struct kstat *stat)  {  	struct inode *inode = dentry->d_inode; +	u32 blocksize = inode->i_sb->s_blocksize; +  	generic_fillattr(inode, stat);  	stat->dev = BTRFS_I(inode)->root->anon_dev;  	stat->blksize = PAGE_CACHE_SIZE; -	stat->blocks = (inode_get_bytes(inode) + -			BTRFS_I(inode)->delalloc_bytes) >> 9; +	stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) + +		ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9;  	return 0;  } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4a34c472f12..72d461656f6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1216,12 +1216,12 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,  		*devstr = '\0';  		devstr = vol_args->name;  		devid = simple_strtoull(devstr, &end, 10); -		printk(KERN_INFO "resizing devid %llu\n", +		printk(KERN_INFO "btrfs: resizing devid %llu\n",  		       (unsigned long long)devid);  	}  	device = btrfs_find_device(root, devid, NULL, NULL);  	if (!device) { -		printk(KERN_INFO "resizer unable to find device %llu\n", +		printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",  		       (unsigned long long)devid);  		ret = -EINVAL;  		goto out_unlock; @@ -1267,7 +1267,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,  	do_div(new_size, root->sectorsize);  	new_size *= root->sectorsize; -	printk(KERN_INFO "new size for %s is %llu\n", +	printk(KERN_INFO "btrfs: new size for %s is %llu\n",  		device->name, (unsigned long long)new_size);  	if (new_size > old_size) { @@ -1278,7 +1278,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,  		}  		ret = btrfs_grow_device(trans, device, new_size);  		btrfs_commit_transaction(trans, root); -	} else { +	} else if (new_size < old_size) {  		ret = btrfs_shrink_device(device, new_size);  	} @@ -2930,11 +2930,13 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)  		goto out;  	for (i = 0; i < ipath->fspath->elem_cnt; ++i) { -		rel_ptr = ipath->fspath->val[i] - (u64)ipath->fspath->val; +		rel_ptr = ipath->fspath->val[i] - +			  (u64)(unsigned long)ipath->fspath->val;  		ipath->fspath->val[i] = rel_ptr;  	} -	ret = copy_to_user((void *)ipa->fspath, (void *)ipath->fspath, size); +	ret = copy_to_user((void *)(unsigned long)ipa->fspath, +			   (void *)(unsigned long)ipath->fspath, size);  	if (ret) {  		ret = -EFAULT;  		goto out; @@ -3017,7 +3019,8 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,  	if (ret < 0)  		goto out; -	ret = copy_to_user((void *)loi->inodes, (void *)inodes, size); +	ret = copy_to_user((void *)(unsigned long)loi->inodes, +			   (void *)(unsigned long)inodes, size);  	if (ret)  		ret = -EFAULT; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index f4190f22edf..c27bcb67f33 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -256,6 +256,11 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)  	btrfs_release_path(swarn->path);  	ipath = init_ipath(4096, local_root, swarn->path); +	if (IS_ERR(ipath)) { +		ret = PTR_ERR(ipath); +		ipath = NULL; +		goto err; +	}  	ret = paths_from_inode(inum, ipath);  	if (ret < 0) @@ -272,7 +277,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)  			swarn->logical, swarn->dev->name,  			(unsigned long long)swarn->sector, root, inum, offset,  			min(isize - offset, (u64)PAGE_SIZE), nlink, -			(char *)ipath->fspath->val[i]); +			(char *)(unsigned long)ipath->fspath->val[i]);  	free_ipath(ipath);  	return 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8bd9d6d0e07..e28ad4baf48 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -825,13 +825,9 @@ static char *setup_root_args(char *args)  static struct dentry *mount_subvol(const char *subvol_name, int flags,  				   const char *device_name, char *data)  { -	struct super_block *s;  	struct dentry *root;  	struct vfsmount *mnt; -	struct mnt_namespace *ns_private;  	char *newargs; -	struct path path; -	int error;  	newargs = setup_root_args(data);  	if (!newargs) @@ -842,39 +838,17 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,  	if (IS_ERR(mnt))  		return ERR_CAST(mnt); -	ns_private = create_mnt_ns(mnt); -	if (IS_ERR(ns_private)) { -		mntput(mnt); -		return ERR_CAST(ns_private); -	} +	root = mount_subtree(mnt, subvol_name); -	/* -	 * This will trigger the automount of the subvol so we can just -	 * drop the mnt we have here and return the dentry that we -	 * found. -	 */ -	error = vfs_path_lookup(mnt->mnt_root, mnt, subvol_name, -				LOOKUP_FOLLOW, &path); -	put_mnt_ns(ns_private); -	if (error) -		return ERR_PTR(error); - -	if (!is_subvolume_inode(path.dentry->d_inode)) { -		path_put(&path); -		mntput(mnt); -		error = -EINVAL; +	if (!IS_ERR(root) && !is_subvolume_inode(root->d_inode)) { +		struct super_block *s = root->d_sb; +		dput(root); +		root = ERR_PTR(-EINVAL); +		deactivate_locked_super(s);  		printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n",  				subvol_name); -		return ERR_PTR(-EINVAL);  	} -	/* Get a ref to the sb and the dentry we found and return it */ -	s = path.mnt->mnt_sb; -	atomic_inc(&s->s_active); -	root = dget(path.dentry); -	path_put(&path); -	down_write(&s->s_umount); -  	return root;  } @@ -1083,7 +1057,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)  	int i = 0, nr_devices;  	int ret; -	nr_devices = fs_info->fs_devices->rw_devices; +	nr_devices = fs_info->fs_devices->open_devices;  	BUG_ON(!nr_devices);  	devices_info = kmalloc(sizeof(*devices_info) * nr_devices, @@ -1105,8 +1079,8 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)  	else  		min_stripe_size = BTRFS_STRIPE_LEN; -	list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { -		if (!device->in_fs_metadata) +	list_for_each_entry(device, &fs_devices->devices, dev_list) { +		if (!device->in_fs_metadata || !device->bdev)  			continue;  		avail_space = device->total_bytes - device->bytes_used; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 6a0574e923b..81376d94cd3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -785,6 +785,10 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,  			btrfs_save_ino_cache(root, trans); +			/* see comments in should_cow_block() */ +			root->force_cow = 0; +			smp_wmb(); +  			if (root->commit_root != root->node) {  				mutex_lock(&root->fs_commit_mutex);  				switch_commit_root(root); @@ -947,6 +951,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,  	btrfs_tree_unlock(old);  	free_extent_buffer(old); +	/* see comments in should_cow_block() */ +	root->force_cow = 1; +	smp_wmb(); +  	btrfs_set_root_node(new_root_item, tmp);  	/* record when the snapshot was created in key.offset */  	key.offset = trans->transid; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index ab5b1c49f35..78f2d4d4f37 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -100,6 +100,12 @@ struct btrfs_device {  	struct reada_zone *reada_curr_zone;  	struct radix_tree_root reada_zones;  	struct radix_tree_root reada_extents; + +	/* for sending down flush barriers */ +	struct bio *flush_bio; +	struct completion flush_wait; +	int nobarriers; +  };  struct btrfs_fs_devices {  |