diff options
Diffstat (limited to 'fs/buffer.c')
| -rw-r--r-- | fs/buffer.c | 176 | 
1 files changed, 127 insertions, 49 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index 58e2e7b7737..c017a2dfb90 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -46,8 +46,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);  #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) -inline void -init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) +void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)  {  	bh->b_end_io = handler;  	bh->b_private = private; @@ -555,7 +554,7 @@ void emergency_thaw_all(void)   */  int sync_mapping_buffers(struct address_space *mapping)  { -	struct address_space *buffer_mapping = mapping->assoc_mapping; +	struct address_space *buffer_mapping = mapping->private_data;  	if (buffer_mapping == NULL || list_empty(&mapping->private_list))  		return 0; @@ -588,10 +587,10 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)  	struct address_space *buffer_mapping = bh->b_page->mapping;  	mark_buffer_dirty(bh); -	if (!mapping->assoc_mapping) { -		mapping->assoc_mapping = buffer_mapping; +	if (!mapping->private_data) { +		mapping->private_data = buffer_mapping;  	} else { -		BUG_ON(mapping->assoc_mapping != buffer_mapping); +		BUG_ON(mapping->private_data != buffer_mapping);  	}  	if (!bh->b_assoc_map) {  		spin_lock(&buffer_mapping->private_lock); @@ -788,7 +787,7 @@ void invalidate_inode_buffers(struct inode *inode)  	if (inode_has_buffers(inode)) {  		struct address_space *mapping = &inode->i_data;  		struct list_head *list = &mapping->private_list; -		struct address_space *buffer_mapping = mapping->assoc_mapping; +		struct address_space *buffer_mapping = mapping->private_data;  		spin_lock(&buffer_mapping->private_lock);  		while (!list_empty(list)) @@ -811,7 +810,7 @@ int remove_inode_buffers(struct inode *inode)  	if (inode_has_buffers(inode)) {  		struct address_space *mapping = &inode->i_data;  		struct list_head *list = &mapping->private_list; -		struct address_space *buffer_mapping = mapping->assoc_mapping; +		struct address_space *buffer_mapping = mapping->private_data;  		spin_lock(&buffer_mapping->private_lock);  		while (!list_empty(list)) { @@ -850,13 +849,10 @@ try_again:  		if (!bh)  			goto no_grow; -		bh->b_bdev = NULL;  		bh->b_this_page = head;  		bh->b_blocknr = -1;  		head = bh; -		bh->b_state = 0; -		atomic_set(&bh->b_count, 0);  		bh->b_size = size;  		/* Link the buffer to its page */ @@ -911,6 +907,18 @@ link_dev_buffers(struct page *page, struct buffer_head *head)  	attach_page_buffers(page, head);  } +static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size) +{ +	sector_t retval = ~((sector_t)0); +	loff_t sz = i_size_read(bdev->bd_inode); + +	if (sz) { +		unsigned int sizebits = blksize_bits(size); +		retval = (sz >> sizebits); +	} +	return retval; +} +  /*   * Initialise the state of a blockdev page's buffers.   */  @@ -921,7 +929,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,  	struct buffer_head *head = page_buffers(page);  	struct buffer_head *bh = head;  	int uptodate = PageUptodate(page); -	sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); +	sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);  	do {  		if (!buffer_mapped(bh)) { @@ -1553,6 +1561,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block)  EXPORT_SYMBOL(unmap_underlying_metadata);  /* + * Size is a power-of-two in the range 512..PAGE_SIZE, + * and the case we care about most is PAGE_SIZE. + * + * So this *could* possibly be written with those + * constraints in mind (relevant mostly if some + * architecture has a slow bit-scan instruction) + */ +static inline int block_size_bits(unsigned int blocksize) +{ +	return ilog2(blocksize); +} + +static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state) +{ +	BUG_ON(!PageLocked(page)); + +	if (!page_has_buffers(page)) +		create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state); +	return page_buffers(page); +} + +/*   * NOTE! All mapped/uptodate combinations are valid:   *   *	Mapped	Uptodate	Meaning @@ -1589,19 +1619,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,  	sector_t block;  	sector_t last_block;  	struct buffer_head *bh, *head; -	const unsigned blocksize = 1 << inode->i_blkbits; +	unsigned int blocksize, bbits;  	int nr_underway = 0;  	int write_op = (wbc->sync_mode == WB_SYNC_ALL ?  			WRITE_SYNC : WRITE); -	BUG_ON(!PageLocked(page)); - -	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; - -	if (!page_has_buffers(page)) { -		create_empty_buffers(page, blocksize, +	head = create_page_buffers(page, inode,  					(1 << BH_Dirty)|(1 << BH_Uptodate)); -	}  	/*  	 * Be very careful.  We have no exclusion from __set_page_dirty_buffers @@ -1613,9 +1637,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page,  	 * handle that here by just cleaning them.  	 */ -	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); -	head = page_buffers(page);  	bh = head; +	blocksize = bh->b_size; +	bbits = block_size_bits(blocksize); + +	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); +	last_block = (i_size_read(inode) - 1) >> bbits;  	/*  	 * Get all the dirty buffers mapped to disk addresses and @@ -1806,12 +1833,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,  	BUG_ON(to > PAGE_CACHE_SIZE);  	BUG_ON(from > to); -	blocksize = 1 << inode->i_blkbits; -	if (!page_has_buffers(page)) -		create_empty_buffers(page, blocksize, 0); -	head = page_buffers(page); +	head = create_page_buffers(page, inode, 0); +	blocksize = head->b_size; +	bbits = block_size_bits(blocksize); -	bbits = inode->i_blkbits;  	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);  	for(bh = head, block_start = 0; bh != head || !block_start; @@ -1881,11 +1906,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,  	unsigned blocksize;  	struct buffer_head *bh, *head; -	blocksize = 1 << inode->i_blkbits; +	bh = head = page_buffers(page); +	blocksize = bh->b_size; -	for(bh = head = page_buffers(page), block_start = 0; -	    bh != head || !block_start; -	    block_start=block_end, bh = bh->b_this_page) { +	block_start = 0; +	do {  		block_end = block_start + blocksize;  		if (block_end <= from || block_start >= to) {  			if (!buffer_uptodate(bh)) @@ -1895,7 +1920,10 @@ static int __block_commit_write(struct inode *inode, struct page *page,  			mark_buffer_dirty(bh);  		}  		clear_buffer_new(bh); -	} + +		block_start = block_end; +		bh = bh->b_this_page; +	} while (bh != head);  	/*  	 * If this is a partial write which happened to make all buffers @@ -2020,7 +2048,6 @@ EXPORT_SYMBOL(generic_write_end);  int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,  					unsigned long from)  { -	struct inode *inode = page->mapping->host;  	unsigned block_start, block_end, blocksize;  	unsigned to;  	struct buffer_head *bh, *head; @@ -2029,13 +2056,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,  	if (!page_has_buffers(page))  		return 0; -	blocksize = 1 << inode->i_blkbits; +	head = page_buffers(page); +	blocksize = head->b_size;  	to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);  	to = from + to;  	if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)  		return 0; -	head = page_buffers(page);  	bh = head;  	block_start = 0;  	do { @@ -2068,18 +2095,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block)  	struct inode *inode = page->mapping->host;  	sector_t iblock, lblock;  	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; -	unsigned int blocksize; +	unsigned int blocksize, bbits;  	int nr, i;  	int fully_mapped = 1; -	BUG_ON(!PageLocked(page)); -	blocksize = 1 << inode->i_blkbits; -	if (!page_has_buffers(page)) -		create_empty_buffers(page, blocksize, 0); -	head = page_buffers(page); +	head = create_page_buffers(page, inode, 0); +	blocksize = head->b_size; +	bbits = block_size_bits(blocksize); -	iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); -	lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; +	iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); +	lblock = (i_size_read(inode)+blocksize-1) >> bbits;  	bh = head;  	nr = 0;  	i = 0; @@ -2312,12 +2337,6 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,  	loff_t size;  	int ret; -	/* -	 * Update file times before taking page lock. We may end up failing the -	 * fault so this update may be superfluous but who really cares... -	 */ -	file_update_time(vma->vm_file); -  	lock_page(page);  	size = i_size_read(inode);  	if ((page->mapping != inode->i_mapping) || @@ -2355,6 +2374,13 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,  	struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;  	sb_start_pagefault(sb); + +	/* +	 * Update file times before taking page lock. We may end up failing the +	 * fault so this update may be superfluous but who really cares... +	 */ +	file_update_time(vma->vm_file); +  	ret = __block_page_mkwrite(vma, vmf, get_block);  	sb_end_pagefault(sb);  	return block_page_mkwrite_return(ret); @@ -2863,6 +2889,55 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)  	bio_put(bio);  } +/* + * This allows us to do IO even on the odd last sectors + * of a device, even if the bh block size is some multiple + * of the physical sector size. + * + * We'll just truncate the bio to the size of the device, + * and clear the end of the buffer head manually. + * + * Truly out-of-range accesses will turn into actual IO + * errors, this only handles the "we need to be able to + * do IO at the final sector" case. + */ +static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) +{ +	sector_t maxsector; +	unsigned bytes; + +	maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; +	if (!maxsector) +		return; + +	/* +	 * If the *whole* IO is past the end of the device, +	 * let it through, and the IO layer will turn it into +	 * an EIO. +	 */ +	if (unlikely(bio->bi_sector >= maxsector)) +		return; + +	maxsector -= bio->bi_sector; +	bytes = bio->bi_size; +	if (likely((bytes >> 9) <= maxsector)) +		return; + +	/* Uhhuh. We've got a bh that straddles the device size! */ +	bytes = maxsector << 9; + +	/* Truncate the bio.. */ +	bio->bi_size = bytes; +	bio->bi_io_vec[0].bv_len = bytes; + +	/* ..and clear the end of the buffer for reads */ +	if ((rw & RW_MASK) == READ) { +		void *kaddr = kmap_atomic(bh->b_page); +		memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes); +		kunmap_atomic(kaddr); +	} +} +  int submit_bh(int rw, struct buffer_head * bh)  {  	struct bio *bio; @@ -2899,6 +2974,9 @@ int submit_bh(int rw, struct buffer_head * bh)  	bio->bi_end_io = end_bio_bh_io_sync;  	bio->bi_private = bh; +	/* Take care of bh's that straddle the end of the device */ +	guard_bh_eod(rw, bio, bh); +  	bio_get(bio);  	submit_bio(rw, bio);  |