diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-10 10:49:20 +0900 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-10 10:49:20 +0900 | 
| commit | 72055425e53540d9d0e59a57ac8c9b8ce77b62d5 (patch) | |
| tree | 8033d7d7bfdf8725eed785d02f7121d201052d2e /fs/btrfs/extent_io.c | |
| parent | fc81c038c2d61d4fcd8150f383fec1ce23087597 (diff) | |
| parent | f46dbe3dee853f8a860f889cb2b7ff4c624f2a7a (diff) | |
| download | olio-linux-3.10-72055425e53540d9d0e59a57ac8c9b8ce77b62d5.tar.xz olio-linux-3.10-72055425e53540d9d0e59a57ac8c9b8ce77b62d5.zip  | |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs update from Chris Mason:
 "This is a large pull, with the bulk of the updates coming from:
   - Hole punching
   - send/receive fixes
   - fsync performance
   - Disk format extension allowing more hardlinks inside a single
     directory (btrfs-progs patch required to enable the compat bit for
     this one)
  I'm cooking more unrelated RAID code, but I wanted to make sure this
  original batch makes it in.  The largest updates here are relatively
  old and have been in testing for some time."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (121 commits)
  btrfs: init ref_index to zero in add_inode_ref
  Btrfs: remove repeated eb->pages check in, disk-io.c/csum_dirty_buffer
  Btrfs: fix page leakage
  Btrfs: do not warn_on when we cannot alloc a page for an extent buffer
  Btrfs: don't bug on enomem in readpage
  Btrfs: cleanup pages properly when ENOMEM in compression
  Btrfs: make filesystem read-only when submitting barrier fails
  Btrfs: detect corrupted filesystem after write I/O errors
  Btrfs: make compress and nodatacow mount options mutually exclusive
  btrfs: fix message printing
  Btrfs: don't bother committing delayed inode updates when fsyncing
  btrfs: move inline function code to header file
  Btrfs: remove unnecessary IS_ERR in bio_readpage_error()
  btrfs: remove unused function btrfs_insert_some_items()
  Btrfs: don't commit instead of overcommitting
  Btrfs: confirmation of value is added before trace_btrfs_get_extent() is called
  Btrfs: be smarter about dropping things from the tree log
  Btrfs: don't lookup csums for prealloc extents
  Btrfs: cache extent state when writing out dirty metadata pages
  Btrfs: do not hold the file extent leaf locked when adding extent item
  ...
Diffstat (limited to 'fs/btrfs/extent_io.c')
| -rw-r--r-- | fs/btrfs/extent_io.c | 128 | 
1 files changed, 94 insertions, 34 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b08ea4717e9..8036d3a8485 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -45,6 +45,7 @@ struct extent_page_data {  	struct bio *bio;  	struct extent_io_tree *tree;  	get_extent_t *get_extent; +	unsigned long bio_flags;  	/* tells writepage not to lock the state bits for this range  	 * it still does the unlocking @@ -64,13 +65,13 @@ tree_fs_info(struct extent_io_tree *tree)  int __init extent_io_init(void)  { -	extent_state_cache = kmem_cache_create("extent_state", +	extent_state_cache = kmem_cache_create("btrfs_extent_state",  			sizeof(struct extent_state), 0,  			SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);  	if (!extent_state_cache)  		return -ENOMEM; -	extent_buffer_cache = kmem_cache_create("extent_buffers", +	extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",  			sizeof(struct extent_buffer), 0,  			SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);  	if (!extent_buffer_cache) @@ -942,6 +943,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,   * @end:	the end offset in bytes (inclusive)   * @bits:	the bits to set in this range   * @clear_bits:	the bits to clear in this range + * @cached_state:	state that we're going to cache   * @mask:	the allocation mask   *   * This will go through and set bits for the given range.  If any states exist @@ -951,7 +953,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,   * boundary bits like LOCK.   */  int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, -		       int bits, int clear_bits, gfp_t mask) +		       int bits, int clear_bits, +		       struct extent_state **cached_state, gfp_t mask)  {  	struct extent_state *state;  	struct extent_state *prealloc = NULL; @@ -968,6 +971,15 @@ again:  	}  	spin_lock(&tree->lock); +	if (cached_state && *cached_state) { +		state = *cached_state; +		if (state->start <= start && state->end > start && +		    state->tree) { +			node = &state->rb_node; +			goto hit_next; +		} +	} +  	/*  	 * this search will find all the extents that end after  	 * our range starts. @@ -998,6 +1010,7 @@ hit_next:  	 */  	if (state->start == start && state->end <= end) {  		set_state_bits(tree, state, &bits); +		cache_state(state, cached_state);  		state = clear_state_bit(tree, state, &clear_bits, 0);  		if (last_end == (u64)-1)  			goto out; @@ -1038,6 +1051,7 @@ hit_next:  			goto out;  		if (state->end <= end) {  			set_state_bits(tree, state, &bits); +			cache_state(state, cached_state);  			state = clear_state_bit(tree, state, &clear_bits, 0);  			if (last_end == (u64)-1)  				goto out; @@ -1076,6 +1090,7 @@ hit_next:  				   &bits);  		if (err)  			extent_io_tree_panic(tree, err); +		cache_state(prealloc, cached_state);  		prealloc = NULL;  		start = this_end + 1;  		goto search_again; @@ -1098,6 +1113,7 @@ hit_next:  			extent_io_tree_panic(tree, err);  		set_state_bits(tree, prealloc, &bits); +		cache_state(prealloc, cached_state);  		clear_state_bit(tree, prealloc, &clear_bits, 0);  		prealloc = NULL;  		goto out; @@ -1150,6 +1166,14 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,  			      NULL, cached_state, mask);  } +int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, +		      struct extent_state **cached_state, gfp_t mask) +{ +	return set_extent_bit(tree, start, end, +			      EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG, +			      NULL, cached_state, mask); +} +  int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,  		       gfp_t mask)  { @@ -1294,18 +1318,42 @@ out:   * If nothing was found, 1 is returned. If found something, return 0.   */  int find_first_extent_bit(struct extent_io_tree *tree, u64 start, -			  u64 *start_ret, u64 *end_ret, int bits) +			  u64 *start_ret, u64 *end_ret, int bits, +			  struct extent_state **cached_state)  {  	struct extent_state *state; +	struct rb_node *n;  	int ret = 1;  	spin_lock(&tree->lock); +	if (cached_state && *cached_state) { +		state = *cached_state; +		if (state->end == start - 1 && state->tree) { +			n = rb_next(&state->rb_node); +			while (n) { +				state = rb_entry(n, struct extent_state, +						 rb_node); +				if (state->state & bits) +					goto got_it; +				n = rb_next(n); +			} +			free_extent_state(*cached_state); +			*cached_state = NULL; +			goto out; +		} +		free_extent_state(*cached_state); +		*cached_state = NULL; +	} +  	state = find_first_extent_bit_state(tree, start, bits); +got_it:  	if (state) { +		cache_state(state, cached_state);  		*start_ret = state->start;  		*end_ret = state->end;  		ret = 0;  	} +out:  	spin_unlock(&tree->lock);  	return ret;  } @@ -2068,7 +2116,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,  		}  		read_unlock(&em_tree->lock); -		if (!em || IS_ERR(em)) { +		if (!em) {  			kfree(failrec);  			return -EIO;  		} @@ -2304,8 +2352,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err)  		struct extent_state *cached = NULL;  		struct extent_state *state; -		pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, " -			 "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err, +		pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " +			 "mirror=%ld\n", (u64)bio->bi_sector, err,  			 (long int)bio->bi_bdev);  		tree = &BTRFS_I(page->mapping->host)->io_tree; @@ -2709,12 +2757,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree,  					 end_bio_extent_readpage, mirror_num,  					 *bio_flags,  					 this_bio_flag); -			BUG_ON(ret == -ENOMEM); -			nr++; -			*bio_flags = this_bio_flag; +			if (!ret) { +				nr++; +				*bio_flags = this_bio_flag; +			}  		} -		if (ret) +		if (ret) {  			SetPageError(page); +			unlock_extent(tree, cur, cur + iosize - 1); +		}  		cur = cur + iosize;  		pg_offset += iosize;  	} @@ -3161,12 +3212,16 @@ static int write_one_eb(struct extent_buffer *eb,  	struct block_device *bdev = fs_info->fs_devices->latest_bdev;  	u64 offset = eb->start;  	unsigned long i, num_pages; +	unsigned long bio_flags = 0;  	int rw = (epd->sync_io ? WRITE_SYNC : WRITE);  	int ret = 0;  	clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);  	num_pages = num_extent_pages(eb->start, eb->len);  	atomic_set(&eb->io_pages, num_pages); +	if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) +		bio_flags = EXTENT_BIO_TREE_LOG; +  	for (i = 0; i < num_pages; i++) {  		struct page *p = extent_buffer_page(eb, i); @@ -3175,7 +3230,8 @@ static int write_one_eb(struct extent_buffer *eb,  		ret = submit_extent_page(rw, eb->tree, p, offset >> 9,  					 PAGE_CACHE_SIZE, 0, bdev, &epd->bio,  					 -1, end_bio_extent_buffer_writepage, -					 0, 0, 0); +					 0, epd->bio_flags, bio_flags); +		epd->bio_flags = bio_flags;  		if (ret) {  			set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);  			SetPageError(p); @@ -3210,6 +3266,7 @@ int btree_write_cache_pages(struct address_space *mapping,  		.tree = tree,  		.extent_locked = 0,  		.sync_io = wbc->sync_mode == WB_SYNC_ALL, +		.bio_flags = 0,  	};  	int ret = 0;  	int done = 0; @@ -3254,19 +3311,34 @@ retry:  				break;  			} +			spin_lock(&mapping->private_lock); +			if (!PagePrivate(page)) { +				spin_unlock(&mapping->private_lock); +				continue; +			} +  			eb = (struct extent_buffer *)page->private; + +			/* +			 * Shouldn't happen and normally this would be a BUG_ON +			 * but no sense in crashing the users box for something +			 * we can survive anyway. +			 */  			if (!eb) { +				spin_unlock(&mapping->private_lock);  				WARN_ON(1);  				continue;  			} -			if (eb == prev_eb) +			if (eb == prev_eb) { +				spin_unlock(&mapping->private_lock);  				continue; +			} -			if (!atomic_inc_not_zero(&eb->refs)) { -				WARN_ON(1); +			ret = atomic_inc_not_zero(&eb->refs); +			spin_unlock(&mapping->private_lock); +			if (!ret)  				continue; -			}  			prev_eb = eb;  			ret = lock_extent_buffer_for_io(eb, fs_info, &epd); @@ -3457,7 +3529,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd)  		if (epd->sync_io)  			rw = WRITE_SYNC; -		ret = submit_one_bio(rw, epd->bio, 0, 0); +		ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags);  		BUG_ON(ret < 0); /* -ENOMEM */  		epd->bio = NULL;  	} @@ -3480,6 +3552,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,  		.get_extent = get_extent,  		.extent_locked = 0,  		.sync_io = wbc->sync_mode == WB_SYNC_ALL, +		.bio_flags = 0,  	};  	ret = __extent_writepage(page, wbc, &epd); @@ -3504,6 +3577,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,  		.get_extent = get_extent,  		.extent_locked = 1,  		.sync_io = mode == WB_SYNC_ALL, +		.bio_flags = 0,  	};  	struct writeback_control wbc_writepages = {  		.sync_mode	= mode, @@ -3543,6 +3617,7 @@ int extent_writepages(struct extent_io_tree *tree,  		.get_extent = get_extent,  		.extent_locked = 0,  		.sync_io = wbc->sync_mode == WB_SYNC_ALL, +		.bio_flags = 0,  	};  	ret = extent_write_cache_pages(tree, mapping, wbc, @@ -3920,18 +3995,6 @@ out:  	return ret;  } -inline struct page *extent_buffer_page(struct extent_buffer *eb, -					      unsigned long i) -{ -	return eb->pages[i]; -} - -inline unsigned long num_extent_pages(u64 start, u64 len) -{ -	return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - -		(start >> PAGE_CACHE_SHIFT); -} -  static void __free_extent_buffer(struct extent_buffer *eb)  {  #if LEAK_DEBUG @@ -4047,7 +4110,7 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)  	return eb;  err: -	for (i--; i > 0; i--) +	for (i--; i >= 0; i--)  		__free_page(eb->pages[i]);  	__free_extent_buffer(eb);  	return NULL; @@ -4192,10 +4255,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,  	for (i = 0; i < num_pages; i++, index++) {  		p = find_or_create_page(mapping, index, GFP_NOFS); -		if (!p) { -			WARN_ON(1); +		if (!p)  			goto free_eb; -		}  		spin_lock(&mapping->private_lock);  		if (PagePrivate(p)) { @@ -4338,7 +4399,6 @@ static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask)  		/* Should be safe to release our pages at this point */  		btrfs_release_extent_buffer_page(eb, 0); -  		call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);  		return 1;  	}  |