diff options
| -rw-r--r-- | fs/btrfs/ctree.h | 2 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/file-item.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 176 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 303 | 
5 files changed, 328 insertions, 159 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4bd648d68e8..1e83ad720b1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1142,6 +1142,8 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans,  			struct btrfs_root *root, struct btrfs_path *path,  			u64 isize);  /* inode.c */ +int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); +int btrfs_readpage(struct file *file, struct page *page);  void btrfs_delete_inode(struct inode *inode);  void btrfs_read_locked_inode(struct inode *inode);  int btrfs_write_inode(struct inode *inode, int wait); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b0ebba6f2e4..602b63dc76c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -155,9 +155,9 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,  	desc.tfm = tfm;  	desc.flags = 0;  	sg_init_one(&sg, data, len); -	spin_lock(&root->fs_info->hash_lock); +	spin_lock_irq(&root->fs_info->hash_lock);  	ret = crypto_hash_digest(&desc, &sg, 1, result); -	spin_unlock(&root->fs_info->hash_lock); +	spin_unlock_irq(&root->fs_info->hash_lock);  	if (ret) {  		printk("digest failed\n");  	} diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 3c140cc1dd0..1068993ab1c 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -228,6 +228,7 @@ found:  			   path->nodes[0]->b_data,  			   root->fs_info->sb->s_blocksize);  	ret = btrfs_csum_data(root, data, len, &item->csum); +// printk("file %lu offset %llu csum %X\n", objectid, (unsigned long long)offset, *(int *)(&item->csum));  	btrfs_mark_buffer_dirty(path->nodes[0]);  fail:  	btrfs_release_path(root, path); @@ -298,4 +299,3 @@ fail:  	mutex_unlock(&root->fs_info->fs_mutex);  	return ret;  } - diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 83836fb3d4e..de8d47b44e1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -103,10 +103,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,  		this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);  		/* FIXME, one block at a time */ -		mutex_lock(&root->fs_info->fs_mutex); -		trans = btrfs_start_transaction(root, 1); -		btrfs_set_trans_block_group(trans, inode); -  		bh = page_buffers(pages[i]);  		if (buffer_mapped(bh) && bh->b_blocknr == 0) { @@ -115,6 +111,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,  			char *ptr, *kaddr;  			u32 datasize; +			mutex_lock(&root->fs_info->fs_mutex); +			trans = btrfs_start_transaction(root, 1); +			btrfs_set_trans_block_group(trans, inode); +  			/* create an inline extent, and copy the data in */  			path = btrfs_alloc_path();  			BUG_ON(!path); @@ -135,24 +135,19 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,  			btrfs_set_file_extent_type(ei,  						   BTRFS_FILE_EXTENT_INLINE);  			ptr = btrfs_file_extent_inline_start(ei); +  			kaddr = kmap_atomic(bh->b_page, KM_USER0);  			btrfs_memcpy(root, path->nodes[0]->b_data,  				     ptr, kaddr + bh_offset(bh),  				     offset + write_bytes);  			kunmap_atomic(kaddr, KM_USER0); +  			mark_buffer_dirty(path->nodes[0]);  			btrfs_free_path(path); -		} else if (buffer_mapped(bh)) { -			/* csum the file data */ -			btrfs_csum_file_block(trans, root, inode->i_ino, -				      pages[i]->index << PAGE_CACHE_SHIFT, -				      kmap(pages[i]), PAGE_CACHE_SIZE); -			kunmap(pages[i]); +			ret = btrfs_end_transaction(trans, root); +			BUG_ON(ret); +			mutex_unlock(&root->fs_info->fs_mutex);  		} -		SetPageChecked(pages[i]); -		ret = btrfs_end_transaction(trans, root); -		BUG_ON(ret); -		mutex_unlock(&root->fs_info->fs_mutex);  		ret = btrfs_commit_write(file, pages[i], offset,  					 offset + this_write); @@ -503,7 +498,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,  	if ((pos & (PAGE_CACHE_SIZE - 1))) {  		pinned[0] = grab_cache_page(inode->i_mapping, first_index);  		if (!PageUptodate(pinned[0])) { -			ret = mpage_readpage(pinned[0], btrfs_get_block); +			ret = btrfs_readpage(NULL, pinned[0]);  			BUG_ON(ret);  			wait_on_page_locked(pinned[0]);  		} else { @@ -513,7 +508,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,  	if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {  		pinned[1] = grab_cache_page(inode->i_mapping, last_index);  		if (!PageUptodate(pinned[1])) { -			ret = mpage_readpage(pinned[1], btrfs_get_block); +			ret = btrfs_readpage(NULL, pinned[1]);  			BUG_ON(ret);  			wait_on_page_locked(pinned[1]);  		} else { @@ -633,138 +628,6 @@ out:  	return num_written ? num_written : err;  } -/* - * FIXME, do this by stuffing the csum we want in the info hanging off - * page->private.  For now, verify file csums on read - */ -static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, -			unsigned long offset, unsigned long size) -{ -	char *kaddr; -	unsigned long left, count = desc->count; -	struct inode *inode = page->mapping->host; - -	if (size > count) -		size = count; - -	if (!PageChecked(page)) { -		/* FIXME, do it per block */ -		struct btrfs_root *root = BTRFS_I(inode)->root; -		int ret; -		struct buffer_head *bh; - -		if (page_has_buffers(page)) { -			bh = page_buffers(page); -			if (!buffer_mapped(bh)) { -				SetPageChecked(page); -				goto checked; -			} -		} - -		ret = btrfs_csum_verify_file_block(root, -				  page->mapping->host->i_ino, -				  page->index << PAGE_CACHE_SHIFT, -				  kmap(page), PAGE_CACHE_SIZE); -		if (ret) { -			if (ret != -ENOENT) { -				printk("failed to verify ino %lu page %lu ret %d\n", -				       page->mapping->host->i_ino, -				       page->index, ret); -				memset(page_address(page), 1, PAGE_CACHE_SIZE); -				flush_dcache_page(page); -			} -		} -		SetPageChecked(page); -		kunmap(page); -	} -checked: -	/* -	 * Faults on the destination of a read are common, so do it before -	 * taking the kmap. -	 */ -	if (!fault_in_pages_writeable(desc->arg.buf, size)) { -		kaddr = kmap_atomic(page, KM_USER0); -		left = __copy_to_user_inatomic(desc->arg.buf, -						kaddr + offset, size); -		kunmap_atomic(kaddr, KM_USER0); -		if (left == 0) -			goto success; -	} - -	/* Do it the slow way */ -	kaddr = kmap(page); -	left = __copy_to_user(desc->arg.buf, kaddr + offset, size); -	kunmap(page); - -	if (left) { -		size -= left; -		desc->error = -EFAULT; -	} -success: -	desc->count = count - size; -	desc->written += size; -	desc->arg.buf += size; -	return size; -} - -/** - * btrfs_file_aio_read - filesystem read routine, with a mod to csum verify - * @iocb:	kernel I/O control block - * @iov:	io vector request - * @nr_segs:	number of segments in the iovec - * @pos:	current file position - */ -static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, -				   unsigned long nr_segs, loff_t pos) -{ -	struct file *filp = iocb->ki_filp; -	ssize_t retval; -	unsigned long seg; -	size_t count; -	loff_t *ppos = &iocb->ki_pos; - -	count = 0; -	for (seg = 0; seg < nr_segs; seg++) { -		const struct iovec *iv = &iov[seg]; - -		/* -		 * If any segment has a negative length, or the cumulative -		 * length ever wraps negative then return -EINVAL. -		 */ -		count += iv->iov_len; -		if (unlikely((ssize_t)(count|iv->iov_len) < 0)) -			return -EINVAL; -		if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) -			continue; -		if (seg == 0) -			return -EFAULT; -		nr_segs = seg; -		count -= iv->iov_len;	/* This segment is no good */ -		break; -	} -	retval = 0; -	if (count) { -		for (seg = 0; seg < nr_segs; seg++) { -			read_descriptor_t desc; - -			desc.written = 0; -			desc.arg.buf = iov[seg].iov_base; -			desc.count = iov[seg].iov_len; -			if (desc.count == 0) -				continue; -			desc.error = 0; -			do_generic_file_read(filp, ppos, &desc, -					     btrfs_read_actor); -			retval += desc.written; -			if (desc.error) { -				retval = retval ?: desc.error; -				break; -			} -		} -	} -	return retval; -} -  static int btrfs_sync_file(struct file *file,  			   struct dentry *dentry, int datasync)  { @@ -789,12 +652,25 @@ out:  	return ret > 0 ? EIO : ret;  } +static struct vm_operations_struct btrfs_file_vm_ops = { +	.nopage		= filemap_nopage, +	.populate	= filemap_populate, +	.page_mkwrite	= btrfs_page_mkwrite, +}; + +static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma) +{ +	vma->vm_ops = &btrfs_file_vm_ops; +	file_accessed(filp); +	return 0; +} +  struct file_operations btrfs_file_operations = {  	.llseek		= generic_file_llseek,  	.read		= do_sync_read, -	.aio_read       = btrfs_file_aio_read, +	.aio_read       = generic_file_aio_read,  	.write		= btrfs_file_write, -	.mmap		= generic_file_mmap, +	.mmap		= btrfs_file_mmap,  	.open		= generic_file_open,  	.ioctl		= btrfs_ioctl,  	.fsync		= btrfs_sync_file, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e639cc9e089..def33ac90d7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -31,6 +31,7 @@  #include <linux/writeback.h>  #include <linux/statfs.h>  #include <linux/compat.h> +#include <linux/bit_spinlock.h>  #include "ctree.h"  #include "disk-io.h"  #include "transaction.h" @@ -561,7 +562,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)  		goto out;  	if (!PageUptodate(page)) { -		ret = mpage_readpage(page, btrfs_get_block); +		ret = btrfs_readpage(NULL, page);  		lock_page(page);  		if (!PageUptodate(page)) {  			ret = -EIO; @@ -1257,8 +1258,10 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,  	path = btrfs_alloc_path();  	BUG_ON(!path);  	if (create & BTRFS_GET_BLOCK_CREATE) { -		WARN_ON(1); -		/* this almost but not quite works */ +		/* +		 * danger!, this only works if the page is properly up +		 * to date somehow +		 */  		trans = btrfs_start_transaction(root, 1);  		if (!trans) {  			err = -ENOMEM; @@ -1353,7 +1356,6 @@ not_found:  					       ins.objectid, ins.offset,  					       ins.offset);  		BUG_ON(ret); -		SetPageChecked(result->b_page);  		btrfs_map_bh_to_logical(root, result, ins.objectid);  	}  out: @@ -1374,6 +1376,40 @@ int btrfs_get_block(struct inode *inode, sector_t iblock,  	return err;  } +int btrfs_get_block_csum(struct inode *inode, sector_t iblock, +		    struct buffer_head *result, int create) +{ +	int ret; +	struct btrfs_root *root = BTRFS_I(inode)->root; +	struct page *page = result->b_page; +	u64 offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(result); +	struct btrfs_csum_item *item; +	struct btrfs_path *path = NULL; + +	mutex_lock(&root->fs_info->fs_mutex); +	ret = btrfs_get_block_lock(inode, iblock, result, create); +	if (ret) +		goto out; + +	path = btrfs_alloc_path(); +	item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, offset, 0); +	if (IS_ERR(item)) { +		ret = PTR_ERR(item); +		/* a csum that isn't present is a preallocated region. */ +		if (ret == -ENOENT || ret == -EFBIG) +			ret = 0; +		result->b_private = 0; +		goto out; +	} +	memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE); +printk("get_block_sum file %lu offset %llu csum %X\n", inode->i_ino, (unsigned long long)offset, *(int *)(&item->csum)); +out: +	if (path) +		btrfs_free_path(path); +	mutex_unlock(&root->fs_info->fs_mutex); +	return ret; +} +  static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,  			   struct buffer_head *result, int create)  { @@ -1395,9 +1431,198 @@ static int btrfs_prepare_write(struct file *file, struct page *page,  	return block_prepare_write(page, from, to, btrfs_get_block);  } -static int btrfs_readpage(struct file *file, struct page *page) +static void buffer_io_error(struct buffer_head *bh) +{ +	char b[BDEVNAME_SIZE]; + +	printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", +			bdevname(bh->b_bdev, b), +			(unsigned long long)bh->b_blocknr); +} + +/* + * I/O completion handler for block_read_full_page() - pages + * which come unlocked at the end of I/O. + */ +static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)  { -	return mpage_readpage(page, btrfs_get_block); +	unsigned long flags; +	struct buffer_head *first; +	struct buffer_head *tmp; +	struct page *page; +	int page_uptodate = 1; +	struct inode *inode; +	int ret; + +	BUG_ON(!buffer_async_read(bh)); + +	page = bh->b_page; +	inode = page->mapping->host; +	if (uptodate) { +		void *kaddr; +		struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; +		if (bh->b_private) { +			char csum[BTRFS_CRC32_SIZE]; +			kaddr = kmap_atomic(page, KM_IRQ0); +			ret = btrfs_csum_data(root, kaddr + bh_offset(bh), +					      bh->b_size, csum); +			BUG_ON(ret); +			if (memcmp(csum, &bh->b_private, BTRFS_CRC32_SIZE)) { +				u64 offset; +				offset = (page->index << PAGE_CACHE_SHIFT) + +					bh_offset(bh); +				printk("btrfs csum failed ino %lu off %llu\n", +				       page->mapping->host->i_ino, +				       (unsigned long long)offset); +				memset(kaddr + bh_offset(bh), 1, bh->b_size); +				flush_dcache_page(page); +printk("bad verify file %lu offset %llu bh_private %lX csum %X\n", inode->i_ino, (unsigned long long)offset, (unsigned long)(bh->b_private), *(int *)csum); +			} +			kunmap_atomic(kaddr, KM_IRQ0); +		} +		set_buffer_uptodate(bh); +	} else { +		clear_buffer_uptodate(bh); +		if (printk_ratelimit()) +			buffer_io_error(bh); +		SetPageError(page); +	} + +	/* +	 * Be _very_ careful from here on. Bad things can happen if +	 * two buffer heads end IO at almost the same time and both +	 * decide that the page is now completely done. +	 */ +	first = page_buffers(page); +	local_irq_save(flags); +	bit_spin_lock(BH_Uptodate_Lock, &first->b_state); +	clear_buffer_async_read(bh); +	unlock_buffer(bh); +	tmp = bh; +	do { +		if (!buffer_uptodate(tmp)) +			page_uptodate = 0; +		if (buffer_async_read(tmp)) { +			BUG_ON(!buffer_locked(tmp)); +			goto still_busy; +		} +		tmp = tmp->b_this_page; +	} while (tmp != bh); +	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); +	local_irq_restore(flags); + +	/* +	 * If none of the buffers had errors and they are all +	 * uptodate then we can set the page uptodate. +	 */ +	if (page_uptodate && !PageError(page)) +		SetPageUptodate(page); +	unlock_page(page); +	return; + +still_busy: +	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); +	local_irq_restore(flags); +	return; +} + +/* + * Generic "read page" function for block devices that have the normal + * get_block functionality. This is most of the block device filesystems. + * Reads the page asynchronously --- the unlock_buffer() and + * set/clear_buffer_uptodate() functions propagate buffer state into the + * page struct once IO has completed. + */ +int btrfs_readpage(struct file *file, struct page *page) +{ +	struct inode *inode = page->mapping->host; +	sector_t iblock, lblock; +	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; +	unsigned int blocksize; +	int nr, i; +	int fully_mapped = 1; + +	BUG_ON(!PageLocked(page)); +	blocksize = 1 << inode->i_blkbits; +	if (!page_has_buffers(page)) +		create_empty_buffers(page, blocksize, 0); +	head = page_buffers(page); + +	iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); +	lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; +	bh = head; +	nr = 0; +	i = 0; + +	do { +		if (buffer_uptodate(bh)) +			continue; + +		if (!buffer_mapped(bh)) { +			int err = 0; + +			fully_mapped = 0; +			if (iblock < lblock) { +				WARN_ON(bh->b_size != blocksize); +				err = btrfs_get_block_csum(inode, iblock, +							   bh, 0); +				if (err) +					SetPageError(page); +			} +			if (!buffer_mapped(bh)) { +				void *kaddr = kmap_atomic(page, KM_USER0); +				memset(kaddr + i * blocksize, 0, blocksize); +				flush_dcache_page(page); +				kunmap_atomic(kaddr, KM_USER0); +				if (!err) +					set_buffer_uptodate(bh); +				continue; +			} +			/* +			 * get_block() might have updated the buffer +			 * synchronously +			 */ +			if (buffer_uptodate(bh)) +				continue; +		} +		arr[nr++] = bh; +	} while (i++, iblock++, (bh = bh->b_this_page) != head); + +	if (fully_mapped) +		SetPageMappedToDisk(page); + +	if (!nr) { +		/* +		 * All buffers are uptodate - we can set the page uptodate +		 * as well. But not if get_block() returned an error. +		 */ +		if (!PageError(page)) +			SetPageUptodate(page); +		unlock_page(page); +		return 0; +	} + +	/* Stage two: lock the buffers */ +	for (i = 0; i < nr; i++) { +		bh = arr[i]; +		lock_buffer(bh); +		bh->b_end_io = btrfs_end_buffer_async_read; +		set_buffer_async_read(bh); +	} + +	/* +	 * Stage 3: start the IO.  Check for uptodateness +	 * inside the buffer lock in case another process reading +	 * the underlying blockdev brought it uptodate (the sct fix). +	 */ +	for (i = 0; i < nr; i++) { +		bh = arr[i]; +		if (buffer_uptodate(bh)) +			btrfs_end_buffer_async_read(bh, 1); +		else +			submit_bh(READ, bh); +	} +	return 0;  }  /* @@ -1424,6 +1649,7 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page,  	struct buffer_head *bh, *head;  	const unsigned blocksize = 1 << inode->i_blkbits;  	int nr_underway = 0; +	struct btrfs_root *root = BTRFS_I(inode)->root;  	BUG_ON(!PageLocked(page)); @@ -1496,6 +1722,24 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page,  			continue;  		}  		if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) { +			struct btrfs_trans_handle *trans; +			int ret; +			u64 off = page->index << PAGE_CACHE_SHIFT; +			char *kaddr; + +			off += bh_offset(bh); +			mutex_lock(&root->fs_info->fs_mutex); +			trans = btrfs_start_transaction(root, 1); +			btrfs_set_trans_block_group(trans, inode); +			kaddr = kmap(page); +			ret = btrfs_csum_file_block(trans, root, inode->i_ino, +						    off, kaddr + bh_offset(bh), +						    bh->b_size); +			kunmap(page); +			BUG_ON(ret); +			ret = btrfs_end_transaction(trans, root); +			BUG_ON(ret); +			mutex_unlock(&root->fs_info->fs_mutex);  			mark_buffer_async_write(bh);  		} else {  			unlock_buffer(bh); @@ -1617,6 +1861,53 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)  	return __btrfs_write_full_page(inode, page, wbc);  } +/* + * btrfs_page_mkwrite() is not allowed to change the file size as it gets + * called from a page fault handler when a page is first dirtied. Hence we must + * be careful to check for EOF conditions here. We set the page up correctly + * for a written page which means we get ENOSPC checking when writing into + * holes and correct delalloc and unwritten extent mapping on filesystems that + * support these features. + * + * We are not allowed to take the i_mutex here so we have to play games to + * protect against truncate races as the page could now be beyond EOF.  Because + * vmtruncate() writes the inode size before removing pages, once we have the + * page lock we can determine safely if the page is beyond EOF. If it is not + * beyond EOF, then the page is guaranteed safe against truncation until we + * unlock the page. + */ +int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ +	struct inode *inode = vma->vm_file->f_path.dentry->d_inode; +	unsigned long end; +	loff_t size; +	int ret = -EINVAL; + +	lock_page(page); +	wait_on_page_writeback(page); +printk("btrfs_page_mkwrite %lu %lu\n", page->mapping->host->i_ino, page->index); +	size = i_size_read(inode); +	if ((page->mapping != inode->i_mapping) || +	    ((page->index << PAGE_CACHE_SHIFT) > size)) { +		/* page got truncated out from underneath us */ +		goto out_unlock; +	} + +	/* page is wholly or partially inside EOF */ +	if (((page->index + 1) << PAGE_CACHE_SHIFT) > size) +		end = size & ~PAGE_CACHE_MASK; +	else +		end = PAGE_CACHE_SIZE; + +	ret = btrfs_prepare_write(NULL, page, 0, end); +	if (!ret) +		ret = btrfs_commit_write(NULL, page, 0, end); + +out_unlock: +	unlock_page(page); +	return ret; +} +  static void btrfs_truncate(struct inode *inode)  {  	struct btrfs_root *root = BTRFS_I(inode)->root;  |