diff options
Diffstat (limited to 'fs/ext4/extents.c')
| -rw-r--r-- | fs/ext4/extents.c | 1410 | 
1 files changed, 864 insertions, 546 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4890d6f3ad1..5199bac7fc6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -46,6 +46,13 @@  #include <trace/events/ext4.h> +static int ext4_split_extent(handle_t *handle, +				struct inode *inode, +				struct ext4_ext_path *path, +				struct ext4_map_blocks *map, +				int split_flag, +				int flags); +  static int ext4_ext_truncate_extend_restart(handle_t *handle,  					    struct inode *inode,  					    int needed) @@ -192,12 +199,13 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,  static ext4_fsblk_t  ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,  			struct ext4_ext_path *path, -			struct ext4_extent *ex, int *err) +			struct ext4_extent *ex, int *err, unsigned int flags)  {  	ext4_fsblk_t goal, newblock;  	goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); -	newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err); +	newblock = ext4_new_meta_blocks(handle, inode, goal, flags, +					NULL, err);  	return newblock;  } @@ -474,9 +482,43 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)  	}  	ext_debug("\n");  } + +static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, +			ext4_fsblk_t newblock, int level) +{ +	int depth = ext_depth(inode); +	struct ext4_extent *ex; + +	if (depth != level) { +		struct ext4_extent_idx *idx; +		idx = path[level].p_idx; +		while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) { +			ext_debug("%d: move %d:%llu in new index %llu\n", level, +					le32_to_cpu(idx->ei_block), +					ext4_idx_pblock(idx), +					newblock); +			idx++; +		} + +		return; +	} + +	ex = path[depth].p_ext; +	while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) { +		ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", +				le32_to_cpu(ex->ee_block), +				ext4_ext_pblock(ex), +				ext4_ext_is_uninitialized(ex), +				ext4_ext_get_actual_len(ex), +				newblock); +		ex++; +	} +} +  #else  #define ext4_ext_show_path(inode, path)  #define ext4_ext_show_leaf(inode, path) +#define ext4_ext_show_move(inode, path, newblock, level)  #endif  void ext4_ext_drop_refs(struct ext4_ext_path *path) @@ -792,14 +834,14 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,   * - initializes subtree   */  static int ext4_ext_split(handle_t *handle, struct inode *inode, -				struct ext4_ext_path *path, -				struct ext4_extent *newext, int at) +			  unsigned int flags, +			  struct ext4_ext_path *path, +			  struct ext4_extent *newext, int at)  {  	struct buffer_head *bh = NULL;  	int depth = ext_depth(inode);  	struct ext4_extent_header *neh;  	struct ext4_extent_idx *fidx; -	struct ext4_extent *ex;  	int i = at, k, m, a;  	ext4_fsblk_t newblock, oldblock;  	__le32 border; @@ -847,7 +889,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,  	ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);  	for (a = 0; a < depth - at; a++) {  		newblock = ext4_ext_new_meta_block(handle, inode, path, -						   newext, &err); +						   newext, &err, flags);  		if (newblock == 0)  			goto cleanup;  		ablocks[a] = newblock; @@ -876,7 +918,6 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,  	neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));  	neh->eh_magic = EXT4_EXT_MAGIC;  	neh->eh_depth = 0; -	ex = EXT_FIRST_EXTENT(neh);  	/* move remainder of path[depth] to the new leaf */  	if (unlikely(path[depth].p_hdr->eh_entries != @@ -888,25 +929,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,  		goto cleanup;  	}  	/* start copy from next extent */ -	/* TODO: we could do it by single memmove */ -	m = 0; -	path[depth].p_ext++; -	while (path[depth].p_ext <= -			EXT_MAX_EXTENT(path[depth].p_hdr)) { -		ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", -				le32_to_cpu(path[depth].p_ext->ee_block), -				ext4_ext_pblock(path[depth].p_ext), -				ext4_ext_is_uninitialized(path[depth].p_ext), -				ext4_ext_get_actual_len(path[depth].p_ext), -				newblock); -		/*memmove(ex++, path[depth].p_ext++, -				sizeof(struct ext4_extent)); -		neh->eh_entries++;*/ -		path[depth].p_ext++; -		m++; -	} +	m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++; +	ext4_ext_show_move(inode, path, newblock, depth);  	if (m) { -		memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); +		struct ext4_extent *ex; +		ex = EXT_FIRST_EXTENT(neh); +		memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m);  		le16_add_cpu(&neh->eh_entries, m);  	} @@ -968,12 +996,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,  		ext_debug("int.index at %d (block %llu): %u -> %llu\n",  				i, newblock, le32_to_cpu(border), oldblock); -		/* copy indexes */ -		m = 0; -		path[i].p_idx++; -		ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, -				EXT_MAX_INDEX(path[i].p_hdr)); +		/* move remainder of path[i] to the new index block */  		if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=  					EXT_LAST_INDEX(path[i].p_hdr))) {  			EXT4_ERROR_INODE(inode, @@ -982,20 +1006,13 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,  			err = -EIO;  			goto cleanup;  		} -		while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { -			ext_debug("%d: move %d:%llu in new index %llu\n", i, -					le32_to_cpu(path[i].p_idx->ei_block), -					ext4_idx_pblock(path[i].p_idx), -					newblock); -			/*memmove(++fidx, path[i].p_idx++, -					sizeof(struct ext4_extent_idx)); -			neh->eh_entries++; -			BUG_ON(neh->eh_entries > neh->eh_max);*/ -			path[i].p_idx++; -			m++; -		} +		/* start copy indexes */ +		m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++; +		ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, +				EXT_MAX_INDEX(path[i].p_hdr)); +		ext4_ext_show_move(inode, path, newblock, i);  		if (m) { -			memmove(++fidx, path[i].p_idx - m, +			memmove(++fidx, path[i].p_idx,  				sizeof(struct ext4_extent_idx) * m);  			le16_add_cpu(&neh->eh_entries, m);  		} @@ -1056,8 +1073,9 @@ cleanup:   *   just created block   */  static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, -					struct ext4_ext_path *path, -					struct ext4_extent *newext) +				 unsigned int flags, +				 struct ext4_ext_path *path, +				 struct ext4_extent *newext)  {  	struct ext4_ext_path *curp = path;  	struct ext4_extent_header *neh; @@ -1065,7 +1083,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,  	ext4_fsblk_t newblock;  	int err = 0; -	newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err); +	newblock = ext4_ext_new_meta_block(handle, inode, path, +		newext, &err, flags);  	if (newblock == 0)  		return err; @@ -1140,8 +1159,9 @@ out:   * if no free index is found, then it requests in-depth growing.   */  static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, -					struct ext4_ext_path *path, -					struct ext4_extent *newext) +				    unsigned int flags, +				    struct ext4_ext_path *path, +				    struct ext4_extent *newext)  {  	struct ext4_ext_path *curp;  	int depth, i, err = 0; @@ -1161,7 +1181,7 @@ repeat:  	if (EXT_HAS_FREE_INDEX(curp)) {  		/* if we found index with free entry, then use that  		 * entry: create all needed subtree and add new leaf */ -		err = ext4_ext_split(handle, inode, path, newext, i); +		err = ext4_ext_split(handle, inode, flags, path, newext, i);  		if (err)  			goto out; @@ -1174,7 +1194,8 @@ repeat:  			err = PTR_ERR(path);  	} else {  		/* tree is full, time to grow in depth */ -		err = ext4_ext_grow_indepth(handle, inode, path, newext); +		err = ext4_ext_grow_indepth(handle, inode, flags, +					    path, newext);  		if (err)  			goto out; @@ -1563,7 +1584,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,   * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns   * 1 if they got merged.   */ -static int ext4_ext_try_to_merge(struct inode *inode, +static int ext4_ext_try_to_merge_right(struct inode *inode,  				 struct ext4_ext_path *path,  				 struct ext4_extent *ex)  { @@ -1603,6 +1624,31 @@ static int ext4_ext_try_to_merge(struct inode *inode,  }  /* + * This function tries to merge the @ex extent to neighbours in the tree. + * return 1 if merge left else 0. + */ +static int ext4_ext_try_to_merge(struct inode *inode, +				  struct ext4_ext_path *path, +				  struct ext4_extent *ex) { +	struct ext4_extent_header *eh; +	unsigned int depth; +	int merge_done = 0; +	int ret = 0; + +	depth = ext_depth(inode); +	BUG_ON(path[depth].p_hdr == NULL); +	eh = path[depth].p_hdr; + +	if (ex > EXT_FIRST_EXTENT(eh)) +		merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); + +	if (!merge_done) +		ret = ext4_ext_try_to_merge_right(inode, path, ex); + +	return ret; +} + +/*   * check if a portion of the "newext" extent overlaps with an   * existing extent.   * @@ -1668,6 +1714,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,  	int depth, len, err;  	ext4_lblk_t next;  	unsigned uninitialized = 0; +	int flags = 0;  	if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {  		EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); @@ -1742,7 +1789,9 @@ repeat:  	 * There is no free space in the found leaf.  	 * We're gonna add a new leaf in the tree.  	 */ -	err = ext4_ext_create_new_leaf(handle, inode, path, newext); +	if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) +		flags = EXT4_MB_USE_ROOT_BLOCKS; +	err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext);  	if (err)  		goto cleanup;  	depth = ext_depth(inode); @@ -2003,13 +2052,25 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,  }  /* + * ext4_ext_in_cache() + * Checks to see if the given block is in the cache. + * If it is, the cached extent is stored in the given + * cache extent pointer.  If the cached extent is a hole, + * this routine should be used instead of + * ext4_ext_in_cache if the calling function needs to + * know the size of the hole. + * + * @inode: The files inode + * @block: The block to look for in the cache + * @ex:    Pointer where the cached extent will be stored + *         if it contains block + *   * Return 0 if cache is invalid; 1 if the cache is valid   */ -static int -ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, -			struct ext4_extent *ex) -{ +static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, +	struct ext4_ext_cache *ex){  	struct ext4_ext_cache *cex; +	struct ext4_sb_info *sbi;  	int ret = 0;  	/* @@ -2017,26 +2078,60 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,  	 */  	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);  	cex = &EXT4_I(inode)->i_cached_extent; +	sbi = EXT4_SB(inode->i_sb);  	/* has cache valid data? */  	if (cex->ec_len == 0)  		goto errout;  	if (in_range(block, cex->ec_block, cex->ec_len)) { -		ex->ee_block = cpu_to_le32(cex->ec_block); -		ext4_ext_store_pblock(ex, cex->ec_start); -		ex->ee_len = cpu_to_le16(cex->ec_len); +		memcpy(ex, cex, sizeof(struct ext4_ext_cache));  		ext_debug("%u cached by %u:%u:%llu\n",  				block,  				cex->ec_block, cex->ec_len, cex->ec_start);  		ret = 1;  	}  errout: +	if (!ret) +		sbi->extent_cache_misses++; +	else +		sbi->extent_cache_hits++;  	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);  	return ret;  }  /* + * ext4_ext_in_cache() + * Checks to see if the given block is in the cache. + * If it is, the cached extent is stored in the given + * extent pointer. + * + * @inode: The files inode + * @block: The block to look for in the cache + * @ex:    Pointer where the cached extent will be stored + *         if it contains block + * + * Return 0 if cache is invalid; 1 if the cache is valid + */ +static int +ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, +			struct ext4_extent *ex) +{ +	struct ext4_ext_cache cex; +	int ret = 0; + +	if (ext4_ext_check_cache(inode, block, &cex)) { +		ex->ee_block = cpu_to_le32(cex.ec_block); +		ext4_ext_store_pblock(ex, cex.ec_start); +		ex->ee_len = cpu_to_le16(cex.ec_len); +		ret = 1; +	} + +	return ret; +} + + +/*   * ext4_ext_rm_idx:   * removes index from the index block.   * It's used in truncate case only, thus all requests are for @@ -2163,8 +2258,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,  		ext4_free_blocks(handle, inode, NULL, start, num, flags);  	} else if (from == le32_to_cpu(ex->ee_block)  		   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { -		printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", -			from, to, le32_to_cpu(ex->ee_block), ee_len); +		/* head removal */ +		ext4_lblk_t num; +		ext4_fsblk_t start; + +		num = to - from; +		start = ext4_ext_pblock(ex); + +		ext_debug("free first %u blocks starting %llu\n", num, start); +		ext4_free_blocks(handle, inode, 0, start, num, flags); +  	} else {  		printk(KERN_INFO "strange request: removal(2) "  				"%u-%u from %u:%u\n", @@ -2173,9 +2276,22 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,  	return 0;  } + +/* + * ext4_ext_rm_leaf() Removes the extents associated with the + * blocks appearing between "start" and "end", and splits the extents + * if "start" and "end" appear in the same extent + * + * @handle: The journal handle + * @inode:  The files inode + * @path:   The path to the leaf + * @start:  The first block to remove + * @end:   The last block to remove + */  static int  ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, -		struct ext4_ext_path *path, ext4_lblk_t start) +		struct ext4_ext_path *path, ext4_lblk_t start, +		ext4_lblk_t end)  {  	int err = 0, correct_index = 0;  	int depth = ext_depth(inode), credits; @@ -2186,6 +2302,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,  	unsigned short ex_ee_len;  	unsigned uninitialized = 0;  	struct ext4_extent *ex; +	struct ext4_map_blocks map;  	/* the header must be checked already in ext4_ext_remove_space() */  	ext_debug("truncate since %u in leaf\n", start); @@ -2215,31 +2332,95 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,  		path[depth].p_ext = ex;  		a = ex_ee_block > start ? ex_ee_block : start; -		b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ? -			ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK; +		b = ex_ee_block+ex_ee_len - 1 < end ? +			ex_ee_block+ex_ee_len - 1 : end;  		ext_debug("  border %u:%u\n", a, b); -		if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { -			block = 0; -			num = 0; -			BUG(); +		/* If this extent is beyond the end of the hole, skip it */ +		if (end <= ex_ee_block) { +			ex--; +			ex_ee_block = le32_to_cpu(ex->ee_block); +			ex_ee_len = ext4_ext_get_actual_len(ex); +			continue; +		} else if (a != ex_ee_block && +			b != ex_ee_block + ex_ee_len - 1) { +			/* +			 * If this is a truncate, then this condition should +			 * never happen because at least one of the end points +			 * needs to be on the edge of the extent. +			 */ +			if (end == EXT_MAX_BLOCK) { +				ext_debug("  bad truncate %u:%u\n", +						start, end); +				block = 0; +				num = 0; +				err = -EIO; +				goto out; +			} +			/* +			 * else this is a hole punch, so the extent needs to +			 * be split since neither edge of the hole is on the +			 * extent edge +			 */ +			else{ +				map.m_pblk = ext4_ext_pblock(ex); +				map.m_lblk = ex_ee_block; +				map.m_len = b - ex_ee_block; + +				err = ext4_split_extent(handle, +					inode, path, &map, 0, +					EXT4_GET_BLOCKS_PUNCH_OUT_EXT | +					EXT4_GET_BLOCKS_PRE_IO); + +				if (err < 0) +					goto out; + +				ex_ee_len = ext4_ext_get_actual_len(ex); + +				b = ex_ee_block+ex_ee_len - 1 < end ? +					ex_ee_block+ex_ee_len - 1 : end; + +				/* Then remove tail of this extent */ +				block = ex_ee_block; +				num = a - block; +			}  		} else if (a != ex_ee_block) {  			/* remove tail of the extent */  			block = ex_ee_block;  			num = a - block;  		} else if (b != ex_ee_block + ex_ee_len - 1) {  			/* remove head of the extent */ -			block = a; -			num = b - a; -			/* there is no "make a hole" API yet */ -			BUG(); +			block = b; +			num =  ex_ee_block + ex_ee_len - b; + +			/* +			 * If this is a truncate, this condition +			 * should never happen +			 */ +			if (end == EXT_MAX_BLOCK) { +				ext_debug("  bad truncate %u:%u\n", +					start, end); +				err = -EIO; +				goto out; +			}  		} else {  			/* remove whole extent: excellent! */  			block = ex_ee_block;  			num = 0; -			BUG_ON(a != ex_ee_block); -			BUG_ON(b != ex_ee_block + ex_ee_len - 1); +			if (a != ex_ee_block) { +				ext_debug("  bad truncate %u:%u\n", +					start, end); +				err = -EIO; +				goto out; +			} + +			if (b != ex_ee_block + ex_ee_len - 1) { +				ext_debug("  bad truncate %u:%u\n", +					start, end); +				err = -EIO; +				goto out; +			}  		}  		/* @@ -2270,7 +2451,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,  		if (num == 0) {  			/* this extent is removed; mark slot entirely unused */  			ext4_ext_store_pblock(ex, 0); -			le16_add_cpu(&eh->eh_entries, -1); +		} else if (block != ex_ee_block) { +			/* +			 * If this was a head removal, then we need to update +			 * the physical block since it is now at a different +			 * location +			 */ +			ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a));  		}  		ex->ee_block = cpu_to_le32(block); @@ -2286,6 +2473,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,  		if (err)  			goto out; +		/* +		 * If the extent was completely released, +		 * we need to remove it from the leaf +		 */ +		if (num == 0) { +			if (end != EXT_MAX_BLOCK) { +				/* +				 * For hole punching, we need to scoot all the +				 * extents up when an extent is removed so that +				 * we dont have blank extents in the middle +				 */ +				memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) * +					sizeof(struct ext4_extent)); + +				/* Now get rid of the one at the end */ +				memset(EXT_LAST_EXTENT(eh), 0, +					sizeof(struct ext4_extent)); +			} +			le16_add_cpu(&eh->eh_entries, -1); +		} +  		ext_debug("new extent: %u:%u:%llu\n", block, num,  				ext4_ext_pblock(ex));  		ex--; @@ -2326,7 +2534,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)  	return 1;  } -static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) +static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, +				ext4_lblk_t end)  {  	struct super_block *sb = inode->i_sb;  	int depth = ext_depth(inode); @@ -2365,7 +2574,8 @@ again:  	while (i >= 0 && err == 0) {  		if (i == depth) {  			/* this is leaf block */ -			err = ext4_ext_rm_leaf(handle, inode, path, start); +			err = ext4_ext_rm_leaf(handle, inode, path, +					start, end);  			/* root level has p_bh == NULL, brelse() eats this */  			brelse(path[i].p_bh);  			path[i].p_bh = NULL; @@ -2529,6 +2739,195 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)  	return ret;  } +/* + * used by extent splitting. + */ +#define EXT4_EXT_MAY_ZEROOUT	0x1  /* safe to zeroout if split fails \ +					due to ENOSPC */ +#define EXT4_EXT_MARK_UNINIT1	0x2  /* mark first half uninitialized */ +#define EXT4_EXT_MARK_UNINIT2	0x4  /* mark second half uninitialized */ + +/* + * ext4_split_extent_at() splits an extent at given block. + * + * @handle: the journal handle + * @inode: the file inode + * @path: the path to the extent + * @split: the logical block where the extent is splitted. + * @split_flags: indicates if the extent could be zeroout if split fails, and + *		 the states(init or uninit) of new extents. + * @flags: flags used to insert new extent to extent tree. + * + * + * Splits extent [a, b] into two extents [a, @split) and [@split, b], states + * of which are deterimined by split_flag. + * + * There are two cases: + *  a> the extent are splitted into two extent. + *  b> split is not needed, and just mark the extent. + * + * return 0 on success. + */ +static int ext4_split_extent_at(handle_t *handle, +			     struct inode *inode, +			     struct ext4_ext_path *path, +			     ext4_lblk_t split, +			     int split_flag, +			     int flags) +{ +	ext4_fsblk_t newblock; +	ext4_lblk_t ee_block; +	struct ext4_extent *ex, newex, orig_ex; +	struct ext4_extent *ex2 = NULL; +	unsigned int ee_len, depth; +	int err = 0; + +	ext_debug("ext4_split_extents_at: inode %lu, logical" +		"block %llu\n", inode->i_ino, (unsigned long long)split); + +	ext4_ext_show_leaf(inode, path); + +	depth = ext_depth(inode); +	ex = path[depth].p_ext; +	ee_block = le32_to_cpu(ex->ee_block); +	ee_len = ext4_ext_get_actual_len(ex); +	newblock = split - ee_block + ext4_ext_pblock(ex); + +	BUG_ON(split < ee_block || split >= (ee_block + ee_len)); + +	err = ext4_ext_get_access(handle, inode, path + depth); +	if (err) +		goto out; + +	if (split == ee_block) { +		/* +		 * case b: block @split is the block that the extent begins with +		 * then we just change the state of the extent, and splitting +		 * is not needed. +		 */ +		if (split_flag & EXT4_EXT_MARK_UNINIT2) +			ext4_ext_mark_uninitialized(ex); +		else +			ext4_ext_mark_initialized(ex); + +		if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) +			ext4_ext_try_to_merge(inode, path, ex); + +		err = ext4_ext_dirty(handle, inode, path + depth); +		goto out; +	} + +	/* case a */ +	memcpy(&orig_ex, ex, sizeof(orig_ex)); +	ex->ee_len = cpu_to_le16(split - ee_block); +	if (split_flag & EXT4_EXT_MARK_UNINIT1) +		ext4_ext_mark_uninitialized(ex); + +	/* +	 * path may lead to new leaf, not to original leaf any more +	 * after ext4_ext_insert_extent() returns, +	 */ +	err = ext4_ext_dirty(handle, inode, path + depth); +	if (err) +		goto fix_extent_len; + +	ex2 = &newex; +	ex2->ee_block = cpu_to_le32(split); +	ex2->ee_len   = cpu_to_le16(ee_len - (split - ee_block)); +	ext4_ext_store_pblock(ex2, newblock); +	if (split_flag & EXT4_EXT_MARK_UNINIT2) +		ext4_ext_mark_uninitialized(ex2); + +	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); +	if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { +		err = ext4_ext_zeroout(inode, &orig_ex); +		if (err) +			goto fix_extent_len; +		/* update the extent length and mark as initialized */ +		ex->ee_len = cpu_to_le32(ee_len); +		ext4_ext_try_to_merge(inode, path, ex); +		err = ext4_ext_dirty(handle, inode, path + depth); +		goto out; +	} else if (err) +		goto fix_extent_len; + +out: +	ext4_ext_show_leaf(inode, path); +	return err; + +fix_extent_len: +	ex->ee_len = orig_ex.ee_len; +	ext4_ext_dirty(handle, inode, path + depth); +	return err; +} + +/* + * ext4_split_extents() splits an extent and mark extent which is covered + * by @map as split_flags indicates + * + * It may result in splitting the extent into multiple extents (upto three) + * There are three possibilities: + *   a> There is no split required + *   b> Splits in two extents: Split is happening at either end of the extent + *   c> Splits in three extents: Somone is splitting in middle of the extent + * + */ +static int ext4_split_extent(handle_t *handle, +			      struct inode *inode, +			      struct ext4_ext_path *path, +			      struct ext4_map_blocks *map, +			      int split_flag, +			      int flags) +{ +	ext4_lblk_t ee_block; +	struct ext4_extent *ex; +	unsigned int ee_len, depth; +	int err = 0; +	int uninitialized; +	int split_flag1, flags1; + +	depth = ext_depth(inode); +	ex = path[depth].p_ext; +	ee_block = le32_to_cpu(ex->ee_block); +	ee_len = ext4_ext_get_actual_len(ex); +	uninitialized = ext4_ext_is_uninitialized(ex); + +	if (map->m_lblk + map->m_len < ee_block + ee_len) { +		split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? +			      EXT4_EXT_MAY_ZEROOUT : 0; +		flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; +		if (uninitialized) +			split_flag1 |= EXT4_EXT_MARK_UNINIT1 | +				       EXT4_EXT_MARK_UNINIT2; +		err = ext4_split_extent_at(handle, inode, path, +				map->m_lblk + map->m_len, split_flag1, flags1); +		if (err) +			goto out; +	} + +	ext4_ext_drop_refs(path); +	path = ext4_ext_find_extent(inode, map->m_lblk, path); +	if (IS_ERR(path)) +		return PTR_ERR(path); + +	if (map->m_lblk >= ee_block) { +		split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? +			      EXT4_EXT_MAY_ZEROOUT : 0; +		if (uninitialized) +			split_flag1 |= EXT4_EXT_MARK_UNINIT1; +		if (split_flag & EXT4_EXT_MARK_UNINIT2) +			split_flag1 |= EXT4_EXT_MARK_UNINIT2; +		err = ext4_split_extent_at(handle, inode, path, +				map->m_lblk, split_flag1, flags); +		if (err) +			goto out; +	} + +	ext4_ext_show_leaf(inode, path); +out: +	return err ? err : map->m_len; +} +  #define EXT4_EXT_ZERO_LEN 7  /*   * This function is called by ext4_ext_map_blocks() if someone tries to write @@ -2545,17 +2944,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  					   struct ext4_map_blocks *map,  					   struct ext4_ext_path *path)  { -	struct ext4_extent *ex, newex, orig_ex; -	struct ext4_extent *ex1 = NULL; -	struct ext4_extent *ex2 = NULL; -	struct ext4_extent *ex3 = NULL; -	struct ext4_extent_header *eh; +	struct ext4_map_blocks split_map; +	struct ext4_extent zero_ex; +	struct ext4_extent *ex;  	ext4_lblk_t ee_block, eof_block;  	unsigned int allocated, ee_len, depth; -	ext4_fsblk_t newblock;  	int err = 0; -	int ret = 0; -	int may_zeroout; +	int split_flag = 0;  	ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"  		"block %llu, max_blocks %u\n", inode->i_ino, @@ -2567,280 +2962,86 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  		eof_block = map->m_lblk + map->m_len;  	depth = ext_depth(inode); -	eh = path[depth].p_hdr;  	ex = path[depth].p_ext;  	ee_block = le32_to_cpu(ex->ee_block);  	ee_len = ext4_ext_get_actual_len(ex);  	allocated = ee_len - (map->m_lblk - ee_block); -	newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); - -	ex2 = ex; -	orig_ex.ee_block = ex->ee_block; -	orig_ex.ee_len   = cpu_to_le16(ee_len); -	ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); +	WARN_ON(map->m_lblk < ee_block);  	/*  	 * It is safe to convert extent to initialized via explicit  	 * zeroout only if extent is fully insde i_size or new_size.  	 */ -	may_zeroout = ee_block + ee_len <= eof_block; +	split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; -	err = ext4_ext_get_access(handle, inode, path + depth); -	if (err) -		goto out;  	/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ -	if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { -		err =  ext4_ext_zeroout(inode, &orig_ex); +	if (ee_len <= 2*EXT4_EXT_ZERO_LEN && +	    (EXT4_EXT_MAY_ZEROOUT & split_flag)) { +		err = ext4_ext_zeroout(inode, ex);  		if (err) -			goto fix_extent_len; -		/* update the extent length and mark as initialized */ -		ex->ee_block = orig_ex.ee_block; -		ex->ee_len   = orig_ex.ee_len; -		ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); -		ext4_ext_dirty(handle, inode, path + depth); -		/* zeroed the full extent */ -		return allocated; -	} - -	/* ex1: ee_block to map->m_lblk - 1 : uninitialized */ -	if (map->m_lblk > ee_block) { -		ex1 = ex; -		ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); -		ext4_ext_mark_uninitialized(ex1); -		ex2 = &newex; -	} -	/* -	 * for sanity, update the length of the ex2 extent before -	 * we insert ex3, if ex1 is NULL. This is to avoid temporary -	 * overlap of blocks. -	 */ -	if (!ex1 && allocated > map->m_len) -		ex2->ee_len = cpu_to_le16(map->m_len); -	/* ex3: to ee_block + ee_len : uninitialised */ -	if (allocated > map->m_len) { -		unsigned int newdepth; -		/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ -		if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { -			/* -			 * map->m_lblk == ee_block is handled by the zerouout -			 * at the beginning. -			 * Mark first half uninitialized. -			 * Mark second half initialized and zero out the -			 * initialized extent -			 */ -			ex->ee_block = orig_ex.ee_block; -			ex->ee_len   = cpu_to_le16(ee_len - allocated); -			ext4_ext_mark_uninitialized(ex); -			ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); -			ext4_ext_dirty(handle, inode, path + depth); - -			ex3 = &newex; -			ex3->ee_block = cpu_to_le32(map->m_lblk); -			ext4_ext_store_pblock(ex3, newblock); -			ex3->ee_len = cpu_to_le16(allocated); -			err = ext4_ext_insert_extent(handle, inode, path, -							ex3, 0); -			if (err == -ENOSPC) { -				err =  ext4_ext_zeroout(inode, &orig_ex); -				if (err) -					goto fix_extent_len; -				ex->ee_block = orig_ex.ee_block; -				ex->ee_len   = orig_ex.ee_len; -				ext4_ext_store_pblock(ex, -					ext4_ext_pblock(&orig_ex)); -				ext4_ext_dirty(handle, inode, path + depth); -				/* blocks available from map->m_lblk */ -				return allocated; - -			} else if (err) -				goto fix_extent_len; - -			/* -			 * We need to zero out the second half because -			 * an fallocate request can update file size and -			 * converting the second half to initialized extent -			 * implies that we can leak some junk data to user -			 * space. -			 */ -			err =  ext4_ext_zeroout(inode, ex3); -			if (err) { -				/* -				 * We should actually mark the -				 * second half as uninit and return error -				 * Insert would have changed the extent -				 */ -				depth = ext_depth(inode); -				ext4_ext_drop_refs(path); -				path = ext4_ext_find_extent(inode, map->m_lblk, -							    path); -				if (IS_ERR(path)) { -					err = PTR_ERR(path); -					return err; -				} -				/* get the second half extent details */ -				ex = path[depth].p_ext; -				err = ext4_ext_get_access(handle, inode, -								path + depth); -				if (err) -					return err; -				ext4_ext_mark_uninitialized(ex); -				ext4_ext_dirty(handle, inode, path + depth); -				return err; -			} - -			/* zeroed the second half */ -			return allocated; -		} -		ex3 = &newex; -		ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); -		ext4_ext_store_pblock(ex3, newblock + map->m_len); -		ex3->ee_len = cpu_to_le16(allocated - map->m_len); -		ext4_ext_mark_uninitialized(ex3); -		err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); -		if (err == -ENOSPC && may_zeroout) { -			err =  ext4_ext_zeroout(inode, &orig_ex); -			if (err) -				goto fix_extent_len; -			/* update the extent length and mark as initialized */ -			ex->ee_block = orig_ex.ee_block; -			ex->ee_len   = orig_ex.ee_len; -			ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); -			ext4_ext_dirty(handle, inode, path + depth); -			/* zeroed the full extent */ -			/* blocks available from map->m_lblk */ -			return allocated; - -		} else if (err) -			goto fix_extent_len; -		/* -		 * The depth, and hence eh & ex might change -		 * as part of the insert above. -		 */ -		newdepth = ext_depth(inode); -		/* -		 * update the extent length after successful insert of the -		 * split extent -		 */ -		ee_len -= ext4_ext_get_actual_len(ex3); -		orig_ex.ee_len = cpu_to_le16(ee_len); -		may_zeroout = ee_block + ee_len <= eof_block; - -		depth = newdepth; -		ext4_ext_drop_refs(path); -		path = ext4_ext_find_extent(inode, map->m_lblk, path); -		if (IS_ERR(path)) { -			err = PTR_ERR(path);  			goto out; -		} -		eh = path[depth].p_hdr; -		ex = path[depth].p_ext; -		if (ex2 != &newex) -			ex2 = ex;  		err = ext4_ext_get_access(handle, inode, path + depth);  		if (err)  			goto out; - -		allocated = map->m_len; - -		/* If extent has less than EXT4_EXT_ZERO_LEN and we are trying -		 * to insert a extent in the middle zerout directly -		 * otherwise give the extent a chance to merge to left -		 */ -		if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && -			map->m_lblk != ee_block && may_zeroout) { -			err =  ext4_ext_zeroout(inode, &orig_ex); -			if (err) -				goto fix_extent_len; -			/* update the extent length and mark as initialized */ -			ex->ee_block = orig_ex.ee_block; -			ex->ee_len   = orig_ex.ee_len; -			ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); -			ext4_ext_dirty(handle, inode, path + depth); -			/* zero out the first half */ -			/* blocks available from map->m_lblk */ -			return allocated; -		} -	} -	/* -	 * If there was a change of depth as part of the -	 * insertion of ex3 above, we need to update the length -	 * of the ex1 extent again here -	 */ -	if (ex1 && ex1 != ex) { -		ex1 = ex; -		ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); -		ext4_ext_mark_uninitialized(ex1); -		ex2 = &newex; -	} -	/* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ -	ex2->ee_block = cpu_to_le32(map->m_lblk); -	ext4_ext_store_pblock(ex2, newblock); -	ex2->ee_len = cpu_to_le16(allocated); -	if (ex2 != ex) -		goto insert; -	/* -	 * New (initialized) extent starts from the first block -	 * in the current extent. i.e., ex2 == ex -	 * We have to see if it can be merged with the extent -	 * on the left. -	 */ -	if (ex2 > EXT_FIRST_EXTENT(eh)) { -		/* -		 * To merge left, pass "ex2 - 1" to try_to_merge(), -		 * since it merges towards right _only_. -		 */ -		ret = ext4_ext_try_to_merge(inode, path, ex2 - 1); -		if (ret) { -			err = ext4_ext_correct_indexes(handle, inode, path); -			if (err) -				goto out; -			depth = ext_depth(inode); -			ex2--; -		} +		ext4_ext_mark_initialized(ex); +		ext4_ext_try_to_merge(inode, path, ex); +		err = ext4_ext_dirty(handle, inode, path + depth); +		goto out;  	} +  	/* -	 * Try to Merge towards right. This might be required -	 * only when the whole extent is being written to. -	 * i.e. ex2 == ex and ex3 == NULL. +	 * four cases: +	 * 1. split the extent into three extents. +	 * 2. split the extent into two extents, zeroout the first half. +	 * 3. split the extent into two extents, zeroout the second half. +	 * 4. split the extent into two extents with out zeroout.  	 */ -	if (!ex3) { -		ret = ext4_ext_try_to_merge(inode, path, ex2); -		if (ret) { -			err = ext4_ext_correct_indexes(handle, inode, path); +	split_map.m_lblk = map->m_lblk; +	split_map.m_len = map->m_len; + +	if (allocated > map->m_len) { +		if (allocated <= EXT4_EXT_ZERO_LEN && +		    (EXT4_EXT_MAY_ZEROOUT & split_flag)) { +			/* case 3 */ +			zero_ex.ee_block = +					 cpu_to_le32(map->m_lblk); +			zero_ex.ee_len = cpu_to_le16(allocated); +			ext4_ext_store_pblock(&zero_ex, +				ext4_ext_pblock(ex) + map->m_lblk - ee_block); +			err = ext4_ext_zeroout(inode, &zero_ex);  			if (err)  				goto out; +			split_map.m_lblk = map->m_lblk; +			split_map.m_len = allocated; +		} else if ((map->m_lblk - ee_block + map->m_len < +			   EXT4_EXT_ZERO_LEN) && +			   (EXT4_EXT_MAY_ZEROOUT & split_flag)) { +			/* case 2 */ +			if (map->m_lblk != ee_block) { +				zero_ex.ee_block = ex->ee_block; +				zero_ex.ee_len = cpu_to_le16(map->m_lblk - +							ee_block); +				ext4_ext_store_pblock(&zero_ex, +						      ext4_ext_pblock(ex)); +				err = ext4_ext_zeroout(inode, &zero_ex); +				if (err) +					goto out; +			} + +			split_map.m_lblk = ee_block; +			split_map.m_len = map->m_lblk - ee_block + map->m_len; +			allocated = map->m_len;  		}  	} -	/* Mark modified extent as dirty */ -	err = ext4_ext_dirty(handle, inode, path + depth); -	goto out; -insert: -	err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); -	if (err == -ENOSPC && may_zeroout) { -		err =  ext4_ext_zeroout(inode, &orig_ex); -		if (err) -			goto fix_extent_len; -		/* update the extent length and mark as initialized */ -		ex->ee_block = orig_ex.ee_block; -		ex->ee_len   = orig_ex.ee_len; -		ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); -		ext4_ext_dirty(handle, inode, path + depth); -		/* zero out the first half */ -		return allocated; -	} else if (err) -		goto fix_extent_len; + +	allocated = ext4_split_extent(handle, inode, path, +				       &split_map, split_flag, 0); +	if (allocated < 0) +		err = allocated; +  out: -	ext4_ext_show_leaf(inode, path);  	return err ? err : allocated; - -fix_extent_len: -	ex->ee_block = orig_ex.ee_block; -	ex->ee_len   = orig_ex.ee_len; -	ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); -	ext4_ext_mark_uninitialized(ex); -	ext4_ext_dirty(handle, inode, path + depth); -	return err;  }  /* @@ -2871,15 +3072,11 @@ static int ext4_split_unwritten_extents(handle_t *handle,  					struct ext4_ext_path *path,  					int flags)  { -	struct ext4_extent *ex, newex, orig_ex; -	struct ext4_extent *ex1 = NULL; -	struct ext4_extent *ex2 = NULL; -	struct ext4_extent *ex3 = NULL; -	ext4_lblk_t ee_block, eof_block; -	unsigned int allocated, ee_len, depth; -	ext4_fsblk_t newblock; -	int err = 0; -	int may_zeroout; +	ext4_lblk_t eof_block; +	ext4_lblk_t ee_block; +	struct ext4_extent *ex; +	unsigned int ee_len; +	int split_flag = 0, depth;  	ext_debug("ext4_split_unwritten_extents: inode %lu, logical"  		"block %llu, max_blocks %u\n", inode->i_ino, @@ -2889,156 +3086,22 @@ static int ext4_split_unwritten_extents(handle_t *handle,  		inode->i_sb->s_blocksize_bits;  	if (eof_block < map->m_lblk + map->m_len)  		eof_block = map->m_lblk + map->m_len; - -	depth = ext_depth(inode); -	ex = path[depth].p_ext; -	ee_block = le32_to_cpu(ex->ee_block); -	ee_len = ext4_ext_get_actual_len(ex); -	allocated = ee_len - (map->m_lblk - ee_block); -	newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); - -	ex2 = ex; -	orig_ex.ee_block = ex->ee_block; -	orig_ex.ee_len   = cpu_to_le16(ee_len); -	ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); -  	/*  	 * It is safe to convert extent to initialized via explicit  	 * zeroout only if extent is fully insde i_size or new_size.  	 */ -	may_zeroout = ee_block + ee_len <= eof_block; - -	/* - 	 * If the uninitialized extent begins at the same logical - 	 * block where the write begins, and the write completely - 	 * covers the extent, then we don't need to split it. - 	 */ -	if ((map->m_lblk == ee_block) && (allocated <= map->m_len)) -		return allocated; - -	err = ext4_ext_get_access(handle, inode, path + depth); -	if (err) -		goto out; -	/* ex1: ee_block to map->m_lblk - 1 : uninitialized */ -	if (map->m_lblk > ee_block) { -		ex1 = ex; -		ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); -		ext4_ext_mark_uninitialized(ex1); -		ex2 = &newex; -	} -	/* -	 * for sanity, update the length of the ex2 extent before -	 * we insert ex3, if ex1 is NULL. This is to avoid temporary -	 * overlap of blocks. -	 */ -	if (!ex1 && allocated > map->m_len) -		ex2->ee_len = cpu_to_le16(map->m_len); -	/* ex3: to ee_block + ee_len : uninitialised */ -	if (allocated > map->m_len) { -		unsigned int newdepth; -		ex3 = &newex; -		ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); -		ext4_ext_store_pblock(ex3, newblock + map->m_len); -		ex3->ee_len = cpu_to_le16(allocated - map->m_len); -		ext4_ext_mark_uninitialized(ex3); -		err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); -		if (err == -ENOSPC && may_zeroout) { -			err =  ext4_ext_zeroout(inode, &orig_ex); -			if (err) -				goto fix_extent_len; -			/* update the extent length and mark as initialized */ -			ex->ee_block = orig_ex.ee_block; -			ex->ee_len   = orig_ex.ee_len; -			ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); -			ext4_ext_dirty(handle, inode, path + depth); -			/* zeroed the full extent */ -			/* blocks available from map->m_lblk */ -			return allocated; - -		} else if (err) -			goto fix_extent_len; -		/* -		 * The depth, and hence eh & ex might change -		 * as part of the insert above. -		 */ -		newdepth = ext_depth(inode); -		/* -		 * update the extent length after successful insert of the -		 * split extent -		 */ -		ee_len -= ext4_ext_get_actual_len(ex3); -		orig_ex.ee_len = cpu_to_le16(ee_len); -		may_zeroout = ee_block + ee_len <= eof_block; - -		depth = newdepth; -		ext4_ext_drop_refs(path); -		path = ext4_ext_find_extent(inode, map->m_lblk, path); -		if (IS_ERR(path)) { -			err = PTR_ERR(path); -			goto out; -		} -		ex = path[depth].p_ext; -		if (ex2 != &newex) -			ex2 = ex; +	depth = ext_depth(inode); +	ex = path[depth].p_ext; +	ee_block = le32_to_cpu(ex->ee_block); +	ee_len = ext4_ext_get_actual_len(ex); -		err = ext4_ext_get_access(handle, inode, path + depth); -		if (err) -			goto out; +	split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; +	split_flag |= EXT4_EXT_MARK_UNINIT2; -		allocated = map->m_len; -	} -	/* -	 * If there was a change of depth as part of the -	 * insertion of ex3 above, we need to update the length -	 * of the ex1 extent again here -	 */ -	if (ex1 && ex1 != ex) { -		ex1 = ex; -		ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); -		ext4_ext_mark_uninitialized(ex1); -		ex2 = &newex; -	} -	/* -	 * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written -	 * using direct I/O, uninitialised still. -	 */ -	ex2->ee_block = cpu_to_le32(map->m_lblk); -	ext4_ext_store_pblock(ex2, newblock); -	ex2->ee_len = cpu_to_le16(allocated); -	ext4_ext_mark_uninitialized(ex2); -	if (ex2 != ex) -		goto insert; -	/* Mark modified extent as dirty */ -	err = ext4_ext_dirty(handle, inode, path + depth); -	ext_debug("out here\n"); -	goto out; -insert: -	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); -	if (err == -ENOSPC && may_zeroout) { -		err =  ext4_ext_zeroout(inode, &orig_ex); -		if (err) -			goto fix_extent_len; -		/* update the extent length and mark as initialized */ -		ex->ee_block = orig_ex.ee_block; -		ex->ee_len   = orig_ex.ee_len; -		ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); -		ext4_ext_dirty(handle, inode, path + depth); -		/* zero out the first half */ -		return allocated; -	} else if (err) -		goto fix_extent_len; -out: -	ext4_ext_show_leaf(inode, path); -	return err ? err : allocated; - -fix_extent_len: -	ex->ee_block = orig_ex.ee_block; -	ex->ee_len   = orig_ex.ee_len; -	ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); -	ext4_ext_mark_uninitialized(ex); -	ext4_ext_dirty(handle, inode, path + depth); -	return err; +	flags |= EXT4_GET_BLOCKS_PRE_IO; +	return ext4_split_extent(handle, inode, path, map, split_flag, flags);  } +  static int ext4_convert_unwritten_extents_endio(handle_t *handle,  					      struct inode *inode,  					      struct ext4_ext_path *path) @@ -3047,46 +3110,27 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,  	struct ext4_extent_header *eh;  	int depth;  	int err = 0; -	int ret = 0;  	depth = ext_depth(inode);  	eh = path[depth].p_hdr;  	ex = path[depth].p_ext; +	ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" +		"block %llu, max_blocks %u\n", inode->i_ino, +		(unsigned long long)le32_to_cpu(ex->ee_block), +		ext4_ext_get_actual_len(ex)); +  	err = ext4_ext_get_access(handle, inode, path + depth);  	if (err)  		goto out;  	/* first mark the extent as initialized */  	ext4_ext_mark_initialized(ex); -	/* -	 * We have to see if it can be merged with the extent -	 * on the left. +	/* note: ext4_ext_correct_indexes() isn't needed here because +	 * borders are not changed  	 */ -	if (ex > EXT_FIRST_EXTENT(eh)) { -		/* -		 * To merge left, pass "ex - 1" to try_to_merge(), -		 * since it merges towards right _only_. -		 */ -		ret = ext4_ext_try_to_merge(inode, path, ex - 1); -		if (ret) { -			err = ext4_ext_correct_indexes(handle, inode, path); -			if (err) -				goto out; -			depth = ext_depth(inode); -			ex--; -		} -	} -	/* -	 * Try to Merge towards right. -	 */ -	ret = ext4_ext_try_to_merge(inode, path, ex); -	if (ret) { -		err = ext4_ext_correct_indexes(handle, inode, path); -		if (err) -			goto out; -		depth = ext_depth(inode); -	} +	ext4_ext_try_to_merge(inode, path, ex); +  	/* Mark modified extent as dirty */  	err = ext4_ext_dirty(handle, inode, path + depth);  out: @@ -3302,15 +3346,19 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,  	ext4_fsblk_t newblock = 0;  	int err = 0, depth, ret;  	unsigned int allocated = 0; +	unsigned int punched_out = 0; +	unsigned int result = 0;  	struct ext4_allocation_request ar;  	ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; +	struct ext4_map_blocks punch_map;  	ext_debug("blocks %u/%u requested for inode %lu\n",  		  map->m_lblk, map->m_len, inode->i_ino);  	trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);  	/* check in cache */ -	if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { +	if (ext4_ext_in_cache(inode, map->m_lblk, &newex) && +		((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0)) {  		if (!newex.ee_start_lo && !newex.ee_start_hi) {  			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {  				/* @@ -3375,16 +3423,84 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,  			ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,  				  ee_block, ee_len, newblock); -			/* Do not put uninitialized extent in the cache */ -			if (!ext4_ext_is_uninitialized(ex)) { -				ext4_ext_put_in_cache(inode, ee_block, -							ee_len, ee_start); -				goto out; +			if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) { +				/* +				 * Do not put uninitialized extent +				 * in the cache +				 */ +				if (!ext4_ext_is_uninitialized(ex)) { +					ext4_ext_put_in_cache(inode, ee_block, +						ee_len, ee_start); +					goto out; +				} +				ret = ext4_ext_handle_uninitialized_extents( +					handle, inode, map, path, flags, +					allocated, newblock); +				return ret;  			} -			ret = ext4_ext_handle_uninitialized_extents(handle, -					inode, map, path, flags, allocated, -					newblock); -			return ret; + +			/* +			 * Punch out the map length, but only to the +			 * end of the extent +			 */ +			punched_out = allocated < map->m_len ? +				allocated : map->m_len; + +			/* +			 * Sense extents need to be converted to +			 * uninitialized, they must fit in an +			 * uninitialized extent +			 */ +			if (punched_out > EXT_UNINIT_MAX_LEN) +				punched_out = EXT_UNINIT_MAX_LEN; + +			punch_map.m_lblk = map->m_lblk; +			punch_map.m_pblk = newblock; +			punch_map.m_len = punched_out; +			punch_map.m_flags = 0; + +			/* Check to see if the extent needs to be split */ +			if (punch_map.m_len != ee_len || +				punch_map.m_lblk != ee_block) { + +				ret = ext4_split_extent(handle, inode, +				path, &punch_map, 0, +				EXT4_GET_BLOCKS_PUNCH_OUT_EXT | +				EXT4_GET_BLOCKS_PRE_IO); + +				if (ret < 0) { +					err = ret; +					goto out2; +				} +				/* +				 * find extent for the block at +				 * the start of the hole +				 */ +				ext4_ext_drop_refs(path); +				kfree(path); + +				path = ext4_ext_find_extent(inode, +				map->m_lblk, NULL); +				if (IS_ERR(path)) { +					err = PTR_ERR(path); +					path = NULL; +					goto out2; +				} + +				depth = ext_depth(inode); +				ex = path[depth].p_ext; +				ee_len = ext4_ext_get_actual_len(ex); +				ee_block = le32_to_cpu(ex->ee_block); +				ee_start = ext4_ext_pblock(ex); + +			} + +			ext4_ext_mark_uninitialized(ex); + +			err = ext4_ext_remove_space(inode, map->m_lblk, +				map->m_lblk + punched_out); + +			goto out2;  		}  	} @@ -3446,6 +3562,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,  	else  		/* disable in-core preallocation for non-regular files */  		ar.flags = 0; +	if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) +		ar.flags |= EXT4_MB_HINT_NOPREALLOC;  	newblock = ext4_mb_new_blocks(handle, &ar, &err);  	if (!newblock)  		goto out2; @@ -3529,7 +3647,11 @@ out2:  	}  	trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,  		newblock, map->m_len, err ? err : allocated); -	return err ? err : allocated; + +	result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? +			punched_out : allocated; + +	return err ? err : result;  }  void ext4_ext_truncate(struct inode *inode) @@ -3577,7 +3699,7 @@ void ext4_ext_truncate(struct inode *inode)  	last_block = (inode->i_size + sb->s_blocksize - 1)  			>> EXT4_BLOCK_SIZE_BITS(sb); -	err = ext4_ext_remove_space(inode, last_block); +	err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCK);  	/* In a multi-transaction truncate, we only make the final  	 * transaction synchronous. @@ -3585,8 +3707,9 @@ void ext4_ext_truncate(struct inode *inode)  	if (IS_SYNC(inode))  		ext4_handle_sync(handle); -out_stop:  	up_write(&EXT4_I(inode)->i_data_sem); + +out_stop:  	/*  	 * If this was a simple ftruncate() and the file will remain alive,  	 * then we need to clear up the orphan record which we created above. @@ -3651,10 +3774,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)  	struct ext4_map_blocks map;  	unsigned int credits, blkbits = inode->i_blkbits; -	/* We only support the FALLOC_FL_KEEP_SIZE mode */ -	if (mode & ~FALLOC_FL_KEEP_SIZE) -		return -EOPNOTSUPP; -  	/*  	 * currently supporting (pre)allocate mode for extent-based  	 * files _only_ @@ -3662,6 +3781,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)  	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))  		return -EOPNOTSUPP; +	/* Return error if mode is not supported */ +	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) +		return -EOPNOTSUPP; + +	if (mode & FALLOC_FL_PUNCH_HOLE) +		return ext4_punch_hole(file, offset, len); +  	trace_ext4_fallocate_enter(inode, offset, len, mode);  	map.m_lblk = offset >> blkbits;  	/* @@ -3691,7 +3817,8 @@ retry:  			break;  		}  		ret = ext4_map_blocks(handle, inode, &map, -				      EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); +				      EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | +				      EXT4_GET_BLOCKS_NO_NORMALIZE);  		if (ret <= 0) {  #ifdef EXT4FS_DEBUG  			WARN_ON(ret <= 0); @@ -3822,6 +3949,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,  		pgoff_t		last_offset;  		pgoff_t		offset;  		pgoff_t		index; +		pgoff_t		start_index = 0;  		struct page	**pages = NULL;  		struct buffer_head *bh = NULL;  		struct buffer_head *head = NULL; @@ -3848,39 +3976,57 @@ out:  				kfree(pages);  				return EXT_CONTINUE;  			} +			index = 0; +next_page:  			/* Try to find the 1st mapped buffer. */ -			end = ((__u64)pages[0]->index << PAGE_SHIFT) >> +			end = ((__u64)pages[index]->index << PAGE_SHIFT) >>  				  blksize_bits; -			if (!page_has_buffers(pages[0])) +			if (!page_has_buffers(pages[index]))  				goto out; -			head = page_buffers(pages[0]); +			head = page_buffers(pages[index]);  			if (!head)  				goto out; +			index++;  			bh = head;  			do { -				if (buffer_mapped(bh)) { +				if (end >= newex->ec_block + +					newex->ec_len) +					/* The buffer is out of +					 * the request range. +					 */ +					goto out; + +				if (buffer_mapped(bh) && +				    end >= newex->ec_block) { +					start_index = index - 1;  					/* get the 1st mapped buffer. */ -					if (end > newex->ec_block + -						newex->ec_len) -						/* The buffer is out of -						 * the request range. -						 */ -						goto out;  					goto found_mapped_buffer;  				} +  				bh = bh->b_this_page;  				end++;  			} while (bh != head); -			/* No mapped buffer found. */ -			goto out; +			/* No mapped buffer in the range found in this page, +			 * We need to look up next page. +			 */ +			if (index >= ret) { +				/* There is no page left, but we need to limit +				 * newex->ec_len. +				 */ +				newex->ec_len = end - newex->ec_block; +				goto out; +			} +			goto next_page;  		} else {  			/*Find contiguous delayed buffers. */  			if (ret > 0 && pages[0]->index == last_offset)  				head = page_buffers(pages[0]);  			bh = head; +			index = 1; +			start_index = 0;  		}  found_mapped_buffer: @@ -3903,7 +4049,7 @@ found_mapped_buffer:  				end++;  			} while (bh != head); -			for (index = 1; index < ret; index++) { +			for (; index < ret; index++) {  				if (!page_has_buffers(pages[index])) {  					bh = NULL;  					break; @@ -3913,8 +4059,10 @@ found_mapped_buffer:  					bh = NULL;  					break;  				} +  				if (pages[index]->index != -					pages[0]->index + index) { +				    pages[start_index]->index + index +				    - start_index) {  					/* Blocks are not contiguous. */  					bh = NULL;  					break; @@ -4006,6 +4154,177 @@ static int ext4_xattr_fiemap(struct inode *inode,  	return (error < 0 ? error : 0);  } +/* + * ext4_ext_punch_hole + * + * Punches a hole of "length" bytes in a file starting + * at byte "offset" + * + * @inode:  The inode of the file to punch a hole in + * @offset: The starting byte offset of the hole + * @length: The length of the hole + * + * Returns the number of blocks removed or negative on err + */ +int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) +{ +	struct inode *inode = file->f_path.dentry->d_inode; +	struct super_block *sb = inode->i_sb; +	struct ext4_ext_cache cache_ex; +	ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks; +	struct address_space *mapping = inode->i_mapping; +	struct ext4_map_blocks map; +	handle_t *handle; +	loff_t first_block_offset, last_block_offset, block_len; +	loff_t first_page, last_page, first_page_offset, last_page_offset; +	int ret, credits, blocks_released, err = 0; + +	first_block = (offset + sb->s_blocksize - 1) >> +		EXT4_BLOCK_SIZE_BITS(sb); +	last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); + +	first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb); +	last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb); + +	first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; +	last_page = (offset + length) >> PAGE_CACHE_SHIFT; + +	first_page_offset = first_page << PAGE_CACHE_SHIFT; +	last_page_offset = last_page << PAGE_CACHE_SHIFT; + +	/* +	 * Write out all dirty pages to avoid race conditions +	 * Then release them. +	 */ +	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { +		err = filemap_write_and_wait_range(mapping, +			first_page_offset == 0 ? 0 : first_page_offset-1, +			last_page_offset); + +			if (err) +				return err; +	} + +	/* Now release the pages */ +	if (last_page_offset > first_page_offset) { +		truncate_inode_pages_range(mapping, first_page_offset, +					   last_page_offset-1); +	} + +	/* finish any pending end_io work */ +	ext4_flush_completed_IO(inode); + +	credits = ext4_writepage_trans_blocks(inode); +	handle = ext4_journal_start(inode, credits); +	if (IS_ERR(handle)) +		return PTR_ERR(handle); + +	err = ext4_orphan_add(handle, inode); +	if (err) +		goto out; + +	/* +	 * Now we need to zero out the un block aligned data. +	 * If the file is smaller than a block, just +	 * zero out the middle +	 */ +	if (first_block > last_block) +		ext4_block_zero_page_range(handle, mapping, offset, length); +	else { +		/* zero out the head of the hole before the first block */ +		block_len  = first_block_offset - offset; +		if (block_len > 0) +			ext4_block_zero_page_range(handle, mapping, +						   offset, block_len); + +		/* zero out the tail of the hole after the last block */ +		block_len = offset + length - last_block_offset; +		if (block_len > 0) { +			ext4_block_zero_page_range(handle, mapping, +					last_block_offset, block_len); +		} +	} + +	/* If there are no blocks to remove, return now */ +	if (first_block >= last_block) +		goto out; + +	down_write(&EXT4_I(inode)->i_data_sem); +	ext4_ext_invalidate_cache(inode); +	ext4_discard_preallocations(inode); + +	/* +	 * Loop over all the blocks and identify blocks +	 * that need to be punched out +	 */ +	iblock = first_block; +	blocks_released = 0; +	while (iblock < last_block) { +		max_blocks = last_block - iblock; +		num_blocks = 1; +		memset(&map, 0, sizeof(map)); +		map.m_lblk = iblock; +		map.m_len = max_blocks; +		ret = ext4_ext_map_blocks(handle, inode, &map, +			EXT4_GET_BLOCKS_PUNCH_OUT_EXT); + +		if (ret > 0) { +			blocks_released += ret; +			num_blocks = ret; +		} else if (ret == 0) { +			/* +			 * If map blocks could not find the block, +			 * then it is in a hole.  If the hole was +			 * not already cached, then map blocks should +			 * put it in the cache.  So we can get the hole +			 * out of the cache +			 */ +			memset(&cache_ex, 0, sizeof(cache_ex)); +			if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) && +				!cache_ex.ec_start) { + +				/* The hole is cached */ +				num_blocks = cache_ex.ec_block + +				cache_ex.ec_len - iblock; + +			} else { +				/* The block could not be identified */ +				err = -EIO; +				break; +			} +		} else { +			/* Map blocks error */ +			err = ret; +			break; +		} + +		if (num_blocks == 0) { +			/* This condition should never happen */ +			ext_debug("Block lookup failed"); +			err = -EIO; +			break; +		} + +		iblock += num_blocks; +	} + +	if (blocks_released > 0) { +		ext4_ext_invalidate_cache(inode); +		ext4_discard_preallocations(inode); +	} + +	if (IS_SYNC(inode)) +		ext4_handle_sync(handle); + +	up_write(&EXT4_I(inode)->i_data_sem); + +out: +	ext4_orphan_del(handle, inode); +	inode->i_mtime = inode->i_ctime = ext4_current_time(inode); +	ext4_mark_inode_dirty(handle, inode); +	ext4_journal_stop(handle); +	return err; +}  int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,  		__u64 start, __u64 len)  { @@ -4042,4 +4361,3 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,  	return error;  } -  |