diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-10 10:49:20 +0900 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-10 10:49:20 +0900 | 
| commit | 72055425e53540d9d0e59a57ac8c9b8ce77b62d5 (patch) | |
| tree | 8033d7d7bfdf8725eed785d02f7121d201052d2e /fs/btrfs/file.c | |
| parent | fc81c038c2d61d4fcd8150f383fec1ce23087597 (diff) | |
| parent | f46dbe3dee853f8a860f889cb2b7ff4c624f2a7a (diff) | |
| download | olio-linux-3.10-72055425e53540d9d0e59a57ac8c9b8ce77b62d5.tar.xz olio-linux-3.10-72055425e53540d9d0e59a57ac8c9b8ce77b62d5.zip  | |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs update from Chris Mason:
 "This is a large pull, with the bulk of the updates coming from:
   - Hole punching
   - send/receive fixes
   - fsync performance
   - Disk format extension allowing more hardlinks inside a single
     directory (btrfs-progs patch required to enable the compat bit for
     this one)
  I'm cooking more unrelated RAID code, but I wanted to make sure this
  original batch makes it in.  The largest updates here are relatively
  old and have been in testing for some time."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (121 commits)
  btrfs: init ref_index to zero in add_inode_ref
  Btrfs: remove repeated eb->pages check in, disk-io.c/csum_dirty_buffer
  Btrfs: fix page leakage
  Btrfs: do not warn_on when we cannot alloc a page for an extent buffer
  Btrfs: don't bug on enomem in readpage
  Btrfs: cleanup pages properly when ENOMEM in compression
  Btrfs: make filesystem read-only when submitting barrier fails
  Btrfs: detect corrupted filesystem after write I/O errors
  Btrfs: make compress and nodatacow mount options mutually exclusive
  btrfs: fix message printing
  Btrfs: don't bother committing delayed inode updates when fsyncing
  btrfs: move inline function code to header file
  Btrfs: remove unnecessary IS_ERR in bio_readpage_error()
  btrfs: remove unused function btrfs_insert_some_items()
  Btrfs: don't commit instead of overcommitting
  Btrfs: confirmation of value is added before trace_btrfs_get_extent() is called
  Btrfs: be smarter about dropping things from the tree log
  Btrfs: don't lookup csums for prealloc extents
  Btrfs: cache extent state when writing out dirty metadata pages
  Btrfs: do not hold the file extent leaf locked when adding extent item
  ...
Diffstat (limited to 'fs/btrfs/file.c')
| -rw-r--r-- | fs/btrfs/file.c | 447 | 
1 files changed, 408 insertions, 39 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f6b40e86121..9ab1bed8811 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -39,6 +39,7 @@  #include "tree-log.h"  #include "locking.h"  #include "compat.h" +#include "volumes.h"  /*   * when auto defrag is enabled we @@ -458,14 +459,15 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,   * this drops all the extents in the cache that intersect the range   * [start, end].  Existing extents are split as required.   */ -int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, -			    int skip_pinned) +void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, +			     int skip_pinned)  {  	struct extent_map *em;  	struct extent_map *split = NULL;  	struct extent_map *split2 = NULL;  	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;  	u64 len = end - start + 1; +	u64 gen;  	int ret;  	int testend = 1;  	unsigned long flags; @@ -477,11 +479,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,  		testend = 0;  	}  	while (1) { +		int no_splits = 0; +  		if (!split)  			split = alloc_extent_map();  		if (!split2)  			split2 = alloc_extent_map(); -		BUG_ON(!split || !split2); /* -ENOMEM */ +		if (!split || !split2) +			no_splits = 1;  		write_lock(&em_tree->lock);  		em = lookup_extent_mapping(em_tree, start, len); @@ -490,6 +495,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,  			break;  		}  		flags = em->flags; +		gen = em->generation;  		if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {  			if (testend && em->start + em->len >= start + len) {  				free_extent_map(em); @@ -506,6 +512,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,  		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);  		clear_bit(EXTENT_FLAG_PINNED, &em->flags);  		remove_extent_mapping(em_tree, em); +		if (no_splits) +			goto next;  		if (em->block_start < EXTENT_MAP_LAST_BYTE &&  		    em->start < start) { @@ -518,12 +526,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,  				split->block_len = em->block_len;  			else  				split->block_len = split->len; - +			split->generation = gen;  			split->bdev = em->bdev;  			split->flags = flags;  			split->compress_type = em->compress_type;  			ret = add_extent_mapping(em_tree, split);  			BUG_ON(ret); /* Logic error */ +			list_move(&split->list, &em_tree->modified_extents);  			free_extent_map(split);  			split = split2;  			split2 = NULL; @@ -537,6 +546,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,  			split->bdev = em->bdev;  			split->flags = flags;  			split->compress_type = em->compress_type; +			split->generation = gen;  			if (compressed) {  				split->block_len = em->block_len; @@ -550,9 +560,11 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,  			ret = add_extent_mapping(em_tree, split);  			BUG_ON(ret); /* Logic error */ +			list_move(&split->list, &em_tree->modified_extents);  			free_extent_map(split);  			split = NULL;  		} +next:  		write_unlock(&em_tree->lock);  		/* once for us */ @@ -564,7 +576,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,  		free_extent_map(split);  	if (split2)  		free_extent_map(split2); -	return 0;  }  /* @@ -576,13 +587,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,   * it is either truncated or split.  Anything entirely inside the range   * is deleted from the tree.   */ -int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, -		       u64 start, u64 end, u64 *hint_byte, int drop_cache) +int __btrfs_drop_extents(struct btrfs_trans_handle *trans, +			 struct btrfs_root *root, struct inode *inode, +			 struct btrfs_path *path, u64 start, u64 end, +			 u64 *drop_end, int drop_cache)  { -	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct extent_buffer *leaf;  	struct btrfs_file_extent_item *fi; -	struct btrfs_path *path;  	struct btrfs_key key;  	struct btrfs_key new_key;  	u64 ino = btrfs_ino(inode); @@ -597,14 +608,12 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,  	int recow;  	int ret;  	int modify_tree = -1; +	int update_refs = (root->ref_cows || root == root->fs_info->tree_root); +	int found = 0;  	if (drop_cache)  		btrfs_drop_extent_cache(inode, start, end - 1, 0); -	path = btrfs_alloc_path(); -	if (!path) -		return -ENOMEM; -  	if (start >= BTRFS_I(inode)->disk_i_size)  		modify_tree = 0; @@ -666,6 +675,7 @@ next_slot:  			goto next_slot;  		} +		found = 1;  		search_start = max(key.offset, start);  		if (recow || !modify_tree) {  			modify_tree = -1; @@ -707,14 +717,13 @@ next_slot:  							extent_end - start);  			btrfs_mark_buffer_dirty(leaf); -			if (disk_bytenr > 0) { +			if (update_refs && disk_bytenr > 0) {  				ret = btrfs_inc_extent_ref(trans, root,  						disk_bytenr, num_bytes, 0,  						root->root_key.objectid,  						new_key.objectid,  						start - extent_offset, 0);  				BUG_ON(ret); /* -ENOMEM */ -				*hint_byte = disk_bytenr;  			}  			key.offset = start;  		} @@ -734,10 +743,8 @@ next_slot:  			btrfs_set_file_extent_num_bytes(leaf, fi,  							extent_end - end);  			btrfs_mark_buffer_dirty(leaf); -			if (disk_bytenr > 0) { +			if (update_refs && disk_bytenr > 0)  				inode_sub_bytes(inode, end - key.offset); -				*hint_byte = disk_bytenr; -			}  			break;  		} @@ -753,10 +760,8 @@ next_slot:  			btrfs_set_file_extent_num_bytes(leaf, fi,  							start - key.offset);  			btrfs_mark_buffer_dirty(leaf); -			if (disk_bytenr > 0) { +			if (update_refs && disk_bytenr > 0)  				inode_sub_bytes(inode, extent_end - start); -				*hint_byte = disk_bytenr; -			}  			if (end == extent_end)  				break; @@ -777,12 +782,13 @@ next_slot:  				del_nr++;  			} -			if (extent_type == BTRFS_FILE_EXTENT_INLINE) { +			if (update_refs && +			    extent_type == BTRFS_FILE_EXTENT_INLINE) {  				inode_sub_bytes(inode,  						extent_end - key.offset);  				extent_end = ALIGN(extent_end,  						   root->sectorsize); -			} else if (disk_bytenr > 0) { +			} else if (update_refs && disk_bytenr > 0) {  				ret = btrfs_free_extent(trans, root,  						disk_bytenr, num_bytes, 0,  						root->root_key.objectid, @@ -791,7 +797,6 @@ next_slot:  				BUG_ON(ret); /* -ENOMEM */  				inode_sub_bytes(inode,  						extent_end - key.offset); -				*hint_byte = disk_bytenr;  			}  			if (end == extent_end) @@ -806,7 +811,7 @@ next_slot:  					      del_nr);  			if (ret) {  				btrfs_abort_transaction(trans, root, ret); -				goto out; +				break;  			}  			del_nr = 0; @@ -825,7 +830,24 @@ next_slot:  			btrfs_abort_transaction(trans, root, ret);  	} -out: +	if (drop_end) +		*drop_end = found ? min(end, extent_end) : end; +	btrfs_release_path(path); +	return ret; +} + +int btrfs_drop_extents(struct btrfs_trans_handle *trans, +		       struct btrfs_root *root, struct inode *inode, u64 start, +		       u64 end, int drop_cache) +{ +	struct btrfs_path *path; +	int ret; + +	path = btrfs_alloc_path(); +	if (!path) +		return -ENOMEM; +	ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL, +				   drop_cache);  	btrfs_free_path(path);  	return ret;  } @@ -892,8 +914,6 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,  	int ret;  	u64 ino = btrfs_ino(inode); -	btrfs_drop_extent_cache(inode, start, end - 1, 0); -  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; @@ -935,12 +955,16 @@ again:  			btrfs_set_item_key_safe(trans, root, path, &new_key);  			fi = btrfs_item_ptr(leaf, path->slots[0],  					    struct btrfs_file_extent_item); +			btrfs_set_file_extent_generation(leaf, fi, +							 trans->transid);  			btrfs_set_file_extent_num_bytes(leaf, fi,  							extent_end - end);  			btrfs_set_file_extent_offset(leaf, fi,  						     end - orig_offset);  			fi = btrfs_item_ptr(leaf, path->slots[0] - 1,  					    struct btrfs_file_extent_item); +			btrfs_set_file_extent_generation(leaf, fi, +							 trans->transid);  			btrfs_set_file_extent_num_bytes(leaf, fi,  							end - other_start);  			btrfs_mark_buffer_dirty(leaf); @@ -958,12 +982,16 @@ again:  					    struct btrfs_file_extent_item);  			btrfs_set_file_extent_num_bytes(leaf, fi,  							start - key.offset); +			btrfs_set_file_extent_generation(leaf, fi, +							 trans->transid);  			path->slots[0]++;  			new_key.offset = start;  			btrfs_set_item_key_safe(trans, root, path, &new_key);  			fi = btrfs_item_ptr(leaf, path->slots[0],  					    struct btrfs_file_extent_item); +			btrfs_set_file_extent_generation(leaf, fi, +							 trans->transid);  			btrfs_set_file_extent_num_bytes(leaf, fi,  							other_end - start);  			btrfs_set_file_extent_offset(leaf, fi, @@ -991,12 +1019,14 @@ again:  		leaf = path->nodes[0];  		fi = btrfs_item_ptr(leaf, path->slots[0] - 1,  				    struct btrfs_file_extent_item); +		btrfs_set_file_extent_generation(leaf, fi, trans->transid);  		btrfs_set_file_extent_num_bytes(leaf, fi,  						split - key.offset);  		fi = btrfs_item_ptr(leaf, path->slots[0],  				    struct btrfs_file_extent_item); +		btrfs_set_file_extent_generation(leaf, fi, trans->transid);  		btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);  		btrfs_set_file_extent_num_bytes(leaf, fi,  						extent_end - split); @@ -1056,12 +1086,14 @@ again:  			   struct btrfs_file_extent_item);  		btrfs_set_file_extent_type(leaf, fi,  					   BTRFS_FILE_EXTENT_REG); +		btrfs_set_file_extent_generation(leaf, fi, trans->transid);  		btrfs_mark_buffer_dirty(leaf);  	} else {  		fi = btrfs_item_ptr(leaf, del_slot - 1,  			   struct btrfs_file_extent_item);  		btrfs_set_file_extent_type(leaf, fi,  					   BTRFS_FILE_EXTENT_REG); +		btrfs_set_file_extent_generation(leaf, fi, trans->transid);  		btrfs_set_file_extent_num_bytes(leaf, fi,  						extent_end - key.offset);  		btrfs_mark_buffer_dirty(leaf); @@ -1173,8 +1205,8 @@ again:  		clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,  				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | -				  EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, -				  GFP_NOFS); +				  EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, +				  0, 0, &cached_state, GFP_NOFS);  		unlock_extent_cached(&BTRFS_I(inode)->io_tree,  				     start_pos, last_pos - 1, &cached_state,  				     GFP_NOFS); @@ -1514,16 +1546,24 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  	trace_btrfs_sync_file(file, datasync); +	/* +	 * We write the dirty pages in the range and wait until they complete +	 * out of the ->i_mutex. If so, we can flush the dirty pages by +	 * multi-task, and make the performance up. +	 */ +	ret = filemap_write_and_wait_range(inode->i_mapping, start, end); +	if (ret) +		return ret; +  	mutex_lock(&inode->i_mutex);  	/* -	 * we wait first, since the writeback may change the inode, also wait -	 * ordered range does a filemape_write_and_wait_range which is why we -	 * don't do it above like other file systems. +	 * We flush the dirty pages again to avoid some dirty pages in the +	 * range being left.  	 */ -	root->log_batch++; +	atomic_inc(&root->log_batch);  	btrfs_wait_ordered_range(inode, start, end); -	root->log_batch++; +	atomic_inc(&root->log_batch);  	/*  	 * check the transaction that last modified this inode @@ -1544,6 +1584,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  	    BTRFS_I(inode)->last_trans <=  	    root->fs_info->last_trans_committed) {  		BTRFS_I(inode)->last_trans = 0; + +		/* +		 * We'v had everything committed since the last time we were +		 * modified so clear this flag in case it was set for whatever +		 * reason, it's no longer relevant. +		 */ +		clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, +			  &BTRFS_I(inode)->runtime_flags);  		mutex_unlock(&inode->i_mutex);  		goto out;  	} @@ -1615,6 +1663,324 @@ static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)  	return 0;  } +static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf, +			  int slot, u64 start, u64 end) +{ +	struct btrfs_file_extent_item *fi; +	struct btrfs_key key; + +	if (slot < 0 || slot >= btrfs_header_nritems(leaf)) +		return 0; + +	btrfs_item_key_to_cpu(leaf, &key, slot); +	if (key.objectid != btrfs_ino(inode) || +	    key.type != BTRFS_EXTENT_DATA_KEY) +		return 0; + +	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + +	if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) +		return 0; + +	if (btrfs_file_extent_disk_bytenr(leaf, fi)) +		return 0; + +	if (key.offset == end) +		return 1; +	if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start) +		return 1; +	return 0; +} + +static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, +		      struct btrfs_path *path, u64 offset, u64 end) +{ +	struct btrfs_root *root = BTRFS_I(inode)->root; +	struct extent_buffer *leaf; +	struct btrfs_file_extent_item *fi; +	struct extent_map *hole_em; +	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; +	struct btrfs_key key; +	int ret; + +	key.objectid = btrfs_ino(inode); +	key.type = BTRFS_EXTENT_DATA_KEY; +	key.offset = offset; + + +	ret = btrfs_search_slot(trans, root, &key, path, 0, 1); +	if (ret < 0) +		return ret; +	BUG_ON(!ret); + +	leaf = path->nodes[0]; +	if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) { +		u64 num_bytes; + +		path->slots[0]--; +		fi = btrfs_item_ptr(leaf, path->slots[0], +				    struct btrfs_file_extent_item); +		num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + +			end - offset; +		btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); +		btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); +		btrfs_set_file_extent_offset(leaf, fi, 0); +		btrfs_mark_buffer_dirty(leaf); +		goto out; +	} + +	if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) { +		u64 num_bytes; + +		path->slots[0]++; +		key.offset = offset; +		btrfs_set_item_key_safe(trans, root, path, &key); +		fi = btrfs_item_ptr(leaf, path->slots[0], +				    struct btrfs_file_extent_item); +		num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - +			offset; +		btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); +		btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); +		btrfs_set_file_extent_offset(leaf, fi, 0); +		btrfs_mark_buffer_dirty(leaf); +		goto out; +	} +	btrfs_release_path(path); + +	ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset, +				       0, 0, end - offset, 0, end - offset, +				       0, 0, 0); +	if (ret) +		return ret; + +out: +	btrfs_release_path(path); + +	hole_em = alloc_extent_map(); +	if (!hole_em) { +		btrfs_drop_extent_cache(inode, offset, end - 1, 0); +		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, +			&BTRFS_I(inode)->runtime_flags); +	} else { +		hole_em->start = offset; +		hole_em->len = end - offset; +		hole_em->orig_start = offset; + +		hole_em->block_start = EXTENT_MAP_HOLE; +		hole_em->block_len = 0; +		hole_em->bdev = root->fs_info->fs_devices->latest_bdev; +		hole_em->compress_type = BTRFS_COMPRESS_NONE; +		hole_em->generation = trans->transid; + +		do { +			btrfs_drop_extent_cache(inode, offset, end - 1, 0); +			write_lock(&em_tree->lock); +			ret = add_extent_mapping(em_tree, hole_em); +			if (!ret) +				list_move(&hole_em->list, +					  &em_tree->modified_extents); +			write_unlock(&em_tree->lock); +		} while (ret == -EEXIST); +		free_extent_map(hole_em); +		if (ret) +			set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, +				&BTRFS_I(inode)->runtime_flags); +	} + +	return 0; +} + +static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) +{ +	struct btrfs_root *root = BTRFS_I(inode)->root; +	struct extent_state *cached_state = NULL; +	struct btrfs_path *path; +	struct btrfs_block_rsv *rsv; +	struct btrfs_trans_handle *trans; +	u64 mask = BTRFS_I(inode)->root->sectorsize - 1; +	u64 lockstart = (offset + mask) & ~mask; +	u64 lockend = ((offset + len) & ~mask) - 1; +	u64 cur_offset = lockstart; +	u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); +	u64 drop_end; +	unsigned long nr; +	int ret = 0; +	int err = 0; +	bool same_page = (offset >> PAGE_CACHE_SHIFT) == +		((offset + len) >> PAGE_CACHE_SHIFT); + +	btrfs_wait_ordered_range(inode, offset, len); + +	mutex_lock(&inode->i_mutex); +	if (offset >= inode->i_size) { +		mutex_unlock(&inode->i_mutex); +		return 0; +	} + +	/* +	 * Only do this if we are in the same page and we aren't doing the +	 * entire page. +	 */ +	if (same_page && len < PAGE_CACHE_SIZE) { +		ret = btrfs_truncate_page(inode, offset, len, 0); +		mutex_unlock(&inode->i_mutex); +		return ret; +	} + +	/* zero back part of the first page */ +	ret = btrfs_truncate_page(inode, offset, 0, 0); +	if (ret) { +		mutex_unlock(&inode->i_mutex); +		return ret; +	} + +	/* zero the front end of the last page */ +	ret = btrfs_truncate_page(inode, offset + len, 0, 1); +	if (ret) { +		mutex_unlock(&inode->i_mutex); +		return ret; +	} + +	if (lockend < lockstart) { +		mutex_unlock(&inode->i_mutex); +		return 0; +	} + +	while (1) { +		struct btrfs_ordered_extent *ordered; + +		truncate_pagecache_range(inode, lockstart, lockend); + +		lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, +				 0, &cached_state); +		ordered = btrfs_lookup_first_ordered_extent(inode, lockend); + +		/* +		 * We need to make sure we have no ordered extents in this range +		 * and nobody raced in and read a page in this range, if we did +		 * we need to try again. +		 */ +		if ((!ordered || +		    (ordered->file_offset + ordered->len < lockstart || +		     ordered->file_offset > lockend)) && +		     !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, +				     lockend, EXTENT_UPTODATE, 0, +				     cached_state)) { +			if (ordered) +				btrfs_put_ordered_extent(ordered); +			break; +		} +		if (ordered) +			btrfs_put_ordered_extent(ordered); +		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, +				     lockend, &cached_state, GFP_NOFS); +		btrfs_wait_ordered_range(inode, lockstart, +					 lockend - lockstart + 1); +	} + +	path = btrfs_alloc_path(); +	if (!path) { +		ret = -ENOMEM; +		goto out; +	} + +	rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); +	if (!rsv) { +		ret = -ENOMEM; +		goto out_free; +	} +	rsv->size = btrfs_calc_trunc_metadata_size(root, 1); +	rsv->failfast = 1; + +	/* +	 * 1 - update the inode +	 * 1 - removing the extents in the range +	 * 1 - adding the hole extent +	 */ +	trans = btrfs_start_transaction(root, 3); +	if (IS_ERR(trans)) { +		err = PTR_ERR(trans); +		goto out_free; +	} + +	ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, +				      min_size); +	BUG_ON(ret); +	trans->block_rsv = rsv; + +	while (cur_offset < lockend) { +		ret = __btrfs_drop_extents(trans, root, inode, path, +					   cur_offset, lockend + 1, +					   &drop_end, 1); +		if (ret != -ENOSPC) +			break; + +		trans->block_rsv = &root->fs_info->trans_block_rsv; + +		ret = fill_holes(trans, inode, path, cur_offset, drop_end); +		if (ret) { +			err = ret; +			break; +		} + +		cur_offset = drop_end; + +		ret = btrfs_update_inode(trans, root, inode); +		if (ret) { +			err = ret; +			break; +		} + +		nr = trans->blocks_used; +		btrfs_end_transaction(trans, root); +		btrfs_btree_balance_dirty(root, nr); + +		trans = btrfs_start_transaction(root, 3); +		if (IS_ERR(trans)) { +			ret = PTR_ERR(trans); +			trans = NULL; +			break; +		} + +		ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, +					      rsv, min_size); +		BUG_ON(ret);	/* shouldn't happen */ +		trans->block_rsv = rsv; +	} + +	if (ret) { +		err = ret; +		goto out_trans; +	} + +	trans->block_rsv = &root->fs_info->trans_block_rsv; +	ret = fill_holes(trans, inode, path, cur_offset, drop_end); +	if (ret) { +		err = ret; +		goto out_trans; +	} + +out_trans: +	if (!trans) +		goto out_free; + +	trans->block_rsv = &root->fs_info->trans_block_rsv; +	ret = btrfs_update_inode(trans, root, inode); +	nr = trans->blocks_used; +	btrfs_end_transaction(trans, root); +	btrfs_btree_balance_dirty(root, nr); +out_free: +	btrfs_free_path(path); +	btrfs_free_block_rsv(root, rsv); +out: +	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, +			     &cached_state, GFP_NOFS); +	mutex_unlock(&inode->i_mutex); +	if (ret && !err) +		err = ret; +	return err; +} +  static long btrfs_fallocate(struct file *file, int mode,  			    loff_t offset, loff_t len)  { @@ -1633,15 +1999,18 @@ static long btrfs_fallocate(struct file *file, int mode,  	alloc_start = offset & ~mask;  	alloc_end =  (offset + len + mask) & ~mask; -	/* We only support the FALLOC_FL_KEEP_SIZE mode */ -	if (mode & ~FALLOC_FL_KEEP_SIZE) +	/* Make sure we aren't being give some crap mode */ +	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))  		return -EOPNOTSUPP; +	if (mode & FALLOC_FL_PUNCH_HOLE) +		return btrfs_punch_hole(inode, offset, len); +  	/*  	 * Make sure we have enough space before we do the  	 * allocation.  	 */ -	ret = btrfs_check_data_free_space(inode, len); +	ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start + 1);  	if (ret)  		return ret; @@ -1748,7 +2117,7 @@ static long btrfs_fallocate(struct file *file, int mode,  out:  	mutex_unlock(&inode->i_mutex);  	/* Let go of our reservation. */ -	btrfs_free_reserved_data_space(inode, len); +	btrfs_free_reserved_data_space(inode, alloc_end - alloc_start + 1);  	return ret;  }  |