diff options
| author | Theodore Ts'o <tytso@mit.edu> | 2013-05-11 19:07:42 -0400 | 
|---|---|---|
| committer | Theodore Ts'o <tytso@mit.edu> | 2013-05-11 19:07:42 -0400 | 
| commit | a549984b8c95acbecefd1fdd4bfdbea4d29b0588 (patch) | |
| tree | eb25bf90acc6c084de08616ebb26ee091158c46e | |
| parent | e6155736ad76b2070652745f9e54cdea3f0d8567 (diff) | |
| download | olio-linux-3.10-a549984b8c95acbecefd1fdd4bfdbea4d29b0588.tar.xz olio-linux-3.10-a549984b8c95acbecefd1fdd4bfdbea4d29b0588.zip  | |
ext4: revert "ext4: use io_end for multiple bios"
This reverts commit 4eec708d263f0ee10861d69251708a225b64cac7.
Multiple users have reported crashes which is apparently caused by
this commit.  Thanks to Dmitry Monakhov for bisecting it.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Dmitry Monakhov <dmonakhov@openvz.org>
Cc: Jan Kara <jack@suse.cz>
| -rw-r--r-- | fs/ext4/ext4.h | 8 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 85 | ||||
| -rw-r--r-- | fs/ext4/page-io.c | 121 | 
3 files changed, 85 insertions, 129 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0aabb344b02..5aae3d12d40 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -209,7 +209,6 @@ typedef struct ext4_io_end {  	ssize_t			size;		/* size of the extent */  	struct kiocb		*iocb;		/* iocb struct for AIO */  	int			result;		/* error value for AIO */ -	atomic_t		count;		/* reference counter */  } ext4_io_end_t;  struct ext4_io_submit { @@ -2651,14 +2650,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,  /* page-io.c */  extern int __init ext4_init_pageio(void); +extern void ext4_add_complete_io(ext4_io_end_t *io_end);  extern void ext4_exit_pageio(void);  extern void ext4_ioend_shutdown(struct inode *); +extern void ext4_free_io_end(ext4_io_end_t *io);  extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); -extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end); -extern int ext4_put_io_end(ext4_io_end_t *io_end); -extern void ext4_put_io_end_defer(ext4_io_end_t *io_end); -extern void ext4_io_submit_init(struct ext4_io_submit *io, -				struct writeback_control *wbc);  extern void ext4_end_io_work(struct work_struct *work);  extern void ext4_io_submit(struct ext4_io_submit *io);  extern int ext4_bio_write_page(struct ext4_io_submit *io, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 793d44b84d7..d6665699235 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1487,10 +1487,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,  	struct ext4_io_submit io_submit;  	BUG_ON(mpd->next_page <= mpd->first_page); -	ext4_io_submit_init(&io_submit, mpd->wbc); -	io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); -	if (!io_submit.io_end) -		return -ENOMEM; +	memset(&io_submit, 0, sizeof(io_submit));  	/*  	 * We need to start from the first_page to the next_page - 1  	 * to make sure we also write the mapped dirty buffer_heads. @@ -1578,8 +1575,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,  		pagevec_release(&pvec);  	}  	ext4_io_submit(&io_submit); -	/* Drop io_end reference we got from init */ -	ext4_put_io_end_defer(io_submit.io_end);  	return ret;  } @@ -2238,16 +2233,9 @@ static int ext4_writepage(struct page *page,  		 */  		return __ext4_journalled_writepage(page, len); -	ext4_io_submit_init(&io_submit, wbc); -	io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); -	if (!io_submit.io_end) { -		redirty_page_for_writepage(wbc, page); -		return -ENOMEM; -	} +	memset(&io_submit, 0, sizeof(io_submit));  	ret = ext4_bio_write_page(&io_submit, page, len, wbc);  	ext4_io_submit(&io_submit); -	/* Drop io_end reference we got from init */ -	ext4_put_io_end_defer(io_submit.io_end);  	return ret;  } @@ -3078,13 +3066,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,  	struct inode *inode = file_inode(iocb->ki_filp);          ext4_io_end_t *io_end = iocb->private; -	/* if not async direct IO just return */ -	if (!io_end) { -		inode_dio_done(inode); -		if (is_async) -			aio_complete(iocb, ret, 0); -		return; -	} +	/* if not async direct IO or dio with 0 bytes write, just return */ +	if (!io_end || !size) +		goto out;  	ext_debug("ext4_end_io_dio(): io_end 0x%p "  		  "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", @@ -3092,13 +3076,25 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,  		  size);  	iocb->private = NULL; + +	/* if not aio dio with unwritten extents, just free io and return */ +	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { +		ext4_free_io_end(io_end); +out: +		inode_dio_done(inode); +		if (is_async) +			aio_complete(iocb, ret, 0); +		return; +	} +  	io_end->offset = offset;  	io_end->size = size;  	if (is_async) {  		io_end->iocb = iocb;  		io_end->result = ret;  	} -	ext4_put_io_end_defer(io_end); + +	ext4_add_complete_io(io_end);  }  /* @@ -3132,7 +3128,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,  	get_block_t *get_block_func = NULL;  	int dio_flags = 0;  	loff_t final_size = offset + count; -	ext4_io_end_t *io_end = NULL;  	/* Use the old path for reads and writes beyond i_size. */  	if (rw != WRITE || final_size > inode->i_size) @@ -3171,16 +3166,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,  	iocb->private = NULL;  	ext4_inode_aio_set(inode, NULL);  	if (!is_sync_kiocb(iocb)) { -		io_end = ext4_init_io_end(inode, GFP_NOFS); +		ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS);  		if (!io_end) {  			ret = -ENOMEM;  			goto retake_lock;  		}  		io_end->flag |= EXT4_IO_END_DIRECT; -		/* -		 * Grab reference for DIO. Will be dropped in ext4_end_io_dio() -		 */ -		iocb->private = ext4_get_io_end(io_end); +		iocb->private = io_end;  		/*  		 * we save the io structure for current async direct  		 * IO, so that later ext4_map_blocks() could flag the @@ -3204,27 +3196,26 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,  				   NULL,  				   dio_flags); +	if (iocb->private) +		ext4_inode_aio_set(inode, NULL);  	/* -	 * Put our reference to io_end. This can free the io_end structure e.g. -	 * in sync IO case or in case of error. It can even perform extent -	 * conversion if all bios we submitted finished before we got here. -	 * Note that in that case iocb->private can be already set to NULL -	 * here. +	 * The io_end structure takes a reference to the inode, that +	 * structure needs to be destroyed and the reference to the +	 * inode need to be dropped, when IO is complete, even with 0 +	 * byte write, or failed. +	 * +	 * In the successful AIO DIO case, the io_end structure will +	 * be destroyed and the reference to the inode will be dropped +	 * after the end_io call back function is called. +	 * +	 * In the case there is 0 byte write, or error case, since VFS +	 * direct IO won't invoke the end_io call back function, we +	 * need to free the end_io structure here.  	 */ -	if (io_end) { -		ext4_inode_aio_set(inode, NULL); -		ext4_put_io_end(io_end); -		/* -		 * In case of error or no write ext4_end_io_dio() was not -		 * called so we have to put iocb's reference. -		 */ -		if (ret <= 0 && ret != -EIOCBQUEUED) { -			WARN_ON(iocb->private != io_end); -			ext4_put_io_end(io_end); -			iocb->private = NULL; -		} -	} -	if (ret > 0 && !overwrite && ext4_test_inode_state(inode, +	if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { +		ext4_free_io_end(iocb->private); +		iocb->private = NULL; +	} else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,  						EXT4_STATE_DIO_UNWRITTEN)) {  		int err;  		/* diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 5929cd0baa2..6626aba57eb 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -61,28 +61,15 @@ void ext4_ioend_shutdown(struct inode *inode)  		cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);  } -static void ext4_release_io_end(ext4_io_end_t *io_end) +void ext4_free_io_end(ext4_io_end_t *io)  { -	BUG_ON(!list_empty(&io_end->list)); -	BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); +	BUG_ON(!io); +	BUG_ON(!list_empty(&io->list)); +	BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); -	if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) -		wake_up_all(ext4_ioend_wq(io_end->inode)); -	if (io_end->flag & EXT4_IO_END_DIRECT) -		inode_dio_done(io_end->inode); -	if (io_end->iocb) -		aio_complete(io_end->iocb, io_end->result, 0); -	kmem_cache_free(io_end_cachep, io_end); -} - -static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) -{ -	struct inode *inode = io_end->inode; - -	io_end->flag &= ~EXT4_IO_END_UNWRITTEN; -	/* Wake up anyone waiting on unwritten extent conversion */ -	if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) -		wake_up_all(ext4_ioend_wq(inode)); +	if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) +		wake_up_all(ext4_ioend_wq(io->inode)); +	kmem_cache_free(io_end_cachep, io);  }  /* check a range of space and convert unwritten extents to written. */ @@ -105,8 +92,13 @@ static int ext4_end_io(ext4_io_end_t *io)  			 "(inode %lu, offset %llu, size %zd, error %d)",  			 inode->i_ino, offset, size, ret);  	} -	ext4_clear_io_unwritten_flag(io); -	ext4_release_io_end(io); +	/* Wake up anyone waiting on unwritten extent conversion */ +	if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) +		wake_up_all(ext4_ioend_wq(inode)); +	if (io->flag & EXT4_IO_END_DIRECT) +		inode_dio_done(inode); +	if (io->iocb) +		aio_complete(io->iocb, io->result, 0);  	return ret;  } @@ -137,7 +129,7 @@ static void dump_completed_IO(struct inode *inode)  }  /* Add the io_end to per-inode completed end_io list. */ -static void ext4_add_complete_io(ext4_io_end_t *io_end) +void ext4_add_complete_io(ext4_io_end_t *io_end)  {  	struct ext4_inode_info *ei = EXT4_I(io_end->inode);  	struct workqueue_struct *wq; @@ -174,6 +166,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode)  		err = ext4_end_io(io);  		if (unlikely(!ret && err))  			ret = err; +		io->flag &= ~EXT4_IO_END_UNWRITTEN; +		ext4_free_io_end(io);  	}  	return ret;  } @@ -205,43 +199,10 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)  		atomic_inc(&EXT4_I(inode)->i_ioend_count);  		io->inode = inode;  		INIT_LIST_HEAD(&io->list); -		atomic_set(&io->count, 1);  	}  	return io;  } -void ext4_put_io_end_defer(ext4_io_end_t *io_end) -{ -	if (atomic_dec_and_test(&io_end->count)) { -		if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) { -			ext4_release_io_end(io_end); -			return; -		} -		ext4_add_complete_io(io_end); -	} -} - -int ext4_put_io_end(ext4_io_end_t *io_end) -{ -	int err = 0; - -	if (atomic_dec_and_test(&io_end->count)) { -		if (io_end->flag & EXT4_IO_END_UNWRITTEN) { -			err = ext4_convert_unwritten_extents(io_end->inode, -						io_end->offset, io_end->size); -			ext4_clear_io_unwritten_flag(io_end); -		} -		ext4_release_io_end(io_end); -	} -	return err; -} - -ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) -{ -	atomic_inc(&io_end->count); -	return io_end; -} -  /*   * Print an buffer I/O error compatible with the fs/buffer.c.  This   * provides compatibility with dmesg scrapers that look for a specific @@ -324,7 +285,12 @@ static void ext4_end_bio(struct bio *bio, int error)  			     bi_sector >> (inode->i_blkbits - 9));  	} -	ext4_put_io_end_defer(io_end); +	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { +		ext4_free_io_end(io_end); +		return; +	} + +	ext4_add_complete_io(io_end);  }  void ext4_io_submit(struct ext4_io_submit *io) @@ -338,37 +304,40 @@ void ext4_io_submit(struct ext4_io_submit *io)  		bio_put(io->io_bio);  	}  	io->io_bio = NULL; -} - -void ext4_io_submit_init(struct ext4_io_submit *io, -			 struct writeback_control *wbc) -{ -	io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE); -	io->io_bio = NULL; +	io->io_op = 0;  	io->io_end = NULL;  } -static int io_submit_init_bio(struct ext4_io_submit *io, -			      struct buffer_head *bh) +static int io_submit_init(struct ext4_io_submit *io, +			  struct inode *inode, +			  struct writeback_control *wbc, +			  struct buffer_head *bh)  { +	ext4_io_end_t *io_end; +	struct page *page = bh->b_page;  	int nvecs = bio_get_nr_vecs(bh->b_bdev);  	struct bio *bio; +	io_end = ext4_init_io_end(inode, GFP_NOFS); +	if (!io_end) +		return -ENOMEM;  	bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));  	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);  	bio->bi_bdev = bh->b_bdev; +	bio->bi_private = io->io_end = io_end;  	bio->bi_end_io = ext4_end_bio; -	bio->bi_private = ext4_get_io_end(io->io_end); -	if (!io->io_end->size) -		io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT) -				     + bh_offset(bh); + +	io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); +  	io->io_bio = bio; +	io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);  	io->io_next_block = bh->b_blocknr;  	return 0;  }  static int io_submit_add_bh(struct ext4_io_submit *io,  			    struct inode *inode, +			    struct writeback_control *wbc,  			    struct buffer_head *bh)  {  	ext4_io_end_t *io_end; @@ -379,18 +348,18 @@ submit_and_retry:  		ext4_io_submit(io);  	}  	if (io->io_bio == NULL) { -		ret = io_submit_init_bio(io, bh); +		ret = io_submit_init(io, inode, wbc, bh);  		if (ret)  			return ret;  	} -	ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); -	if (ret != bh->b_size) -		goto submit_and_retry;  	io_end = io->io_end;  	if (test_clear_buffer_uninit(bh))  		ext4_set_io_unwritten_flag(inode, io_end); -	io_end->size += bh->b_size; +	io->io_end->size += bh->b_size;  	io->io_next_block++; +	ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); +	if (ret != bh->b_size) +		goto submit_and_retry;  	return 0;  } @@ -462,7 +431,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,  	do {  		if (!buffer_async_write(bh))  			continue; -		ret = io_submit_add_bh(io, inode, bh); +		ret = io_submit_add_bh(io, inode, wbc, bh);  		if (ret) {  			/*  			 * We only get here on ENOMEM.  Not much else  |