diff options
Diffstat (limited to 'fs/buffer.c')
| -rw-r--r-- | fs/buffer.c | 157 | 
1 files changed, 119 insertions, 38 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index b5f044283ed..6e9ed48064f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -555,7 +555,7 @@ void emergency_thaw_all(void)   */  int sync_mapping_buffers(struct address_space *mapping)  { -	struct address_space *buffer_mapping = mapping->assoc_mapping; +	struct address_space *buffer_mapping = mapping->private_data;  	if (buffer_mapping == NULL || list_empty(&mapping->private_list))  		return 0; @@ -588,10 +588,10 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)  	struct address_space *buffer_mapping = bh->b_page->mapping;  	mark_buffer_dirty(bh); -	if (!mapping->assoc_mapping) { -		mapping->assoc_mapping = buffer_mapping; +	if (!mapping->private_data) { +		mapping->private_data = buffer_mapping;  	} else { -		BUG_ON(mapping->assoc_mapping != buffer_mapping); +		BUG_ON(mapping->private_data != buffer_mapping);  	}  	if (!bh->b_assoc_map) {  		spin_lock(&buffer_mapping->private_lock); @@ -788,7 +788,7 @@ void invalidate_inode_buffers(struct inode *inode)  	if (inode_has_buffers(inode)) {  		struct address_space *mapping = &inode->i_data;  		struct list_head *list = &mapping->private_list; -		struct address_space *buffer_mapping = mapping->assoc_mapping; +		struct address_space *buffer_mapping = mapping->private_data;  		spin_lock(&buffer_mapping->private_lock);  		while (!list_empty(list)) @@ -811,7 +811,7 @@ int remove_inode_buffers(struct inode *inode)  	if (inode_has_buffers(inode)) {  		struct address_space *mapping = &inode->i_data;  		struct list_head *list = &mapping->private_list; -		struct address_space *buffer_mapping = mapping->assoc_mapping; +		struct address_space *buffer_mapping = mapping->private_data;  		spin_lock(&buffer_mapping->private_lock);  		while (!list_empty(list)) { @@ -911,6 +911,18 @@ link_dev_buffers(struct page *page, struct buffer_head *head)  	attach_page_buffers(page, head);  } +static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size) +{ +	sector_t retval = ~((sector_t)0); +	loff_t sz = i_size_read(bdev->bd_inode); + +	if (sz) { +		unsigned int sizebits = blksize_bits(size); +		retval = (sz >> sizebits); +	} +	return retval; +} +  /*   * Initialise the state of a blockdev page's buffers.   */  @@ -921,7 +933,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,  	struct buffer_head *head = page_buffers(page);  	struct buffer_head *bh = head;  	int uptodate = PageUptodate(page); -	sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); +	sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);  	do {  		if (!buffer_mapped(bh)) { @@ -1553,6 +1565,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block)  EXPORT_SYMBOL(unmap_underlying_metadata);  /* + * Size is a power-of-two in the range 512..PAGE_SIZE, + * and the case we care about most is PAGE_SIZE. + * + * So this *could* possibly be written with those + * constraints in mind (relevant mostly if some + * architecture has a slow bit-scan instruction) + */ +static inline int block_size_bits(unsigned int blocksize) +{ +	return ilog2(blocksize); +} + +static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state) +{ +	BUG_ON(!PageLocked(page)); + +	if (!page_has_buffers(page)) +		create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state); +	return page_buffers(page); +} + +/*   * NOTE! All mapped/uptodate combinations are valid:   *   *	Mapped	Uptodate	Meaning @@ -1589,19 +1623,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,  	sector_t block;  	sector_t last_block;  	struct buffer_head *bh, *head; -	const unsigned blocksize = 1 << inode->i_blkbits; +	unsigned int blocksize, bbits;  	int nr_underway = 0;  	int write_op = (wbc->sync_mode == WB_SYNC_ALL ?  			WRITE_SYNC : WRITE); -	BUG_ON(!PageLocked(page)); - -	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; - -	if (!page_has_buffers(page)) { -		create_empty_buffers(page, blocksize, +	head = create_page_buffers(page, inode,  					(1 << BH_Dirty)|(1 << BH_Uptodate)); -	}  	/*  	 * Be very careful.  We have no exclusion from __set_page_dirty_buffers @@ -1613,9 +1641,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page,  	 * handle that here by just cleaning them.  	 */ -	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); -	head = page_buffers(page);  	bh = head; +	blocksize = bh->b_size; +	bbits = block_size_bits(blocksize); + +	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); +	last_block = (i_size_read(inode) - 1) >> bbits;  	/*  	 * Get all the dirty buffers mapped to disk addresses and @@ -1806,12 +1837,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,  	BUG_ON(to > PAGE_CACHE_SIZE);  	BUG_ON(from > to); -	blocksize = 1 << inode->i_blkbits; -	if (!page_has_buffers(page)) -		create_empty_buffers(page, blocksize, 0); -	head = page_buffers(page); +	head = create_page_buffers(page, inode, 0); +	blocksize = head->b_size; +	bbits = block_size_bits(blocksize); -	bbits = inode->i_blkbits;  	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);  	for(bh = head, block_start = 0; bh != head || !block_start; @@ -1881,11 +1910,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,  	unsigned blocksize;  	struct buffer_head *bh, *head; -	blocksize = 1 << inode->i_blkbits; +	bh = head = page_buffers(page); +	blocksize = bh->b_size; -	for(bh = head = page_buffers(page), block_start = 0; -	    bh != head || !block_start; -	    block_start=block_end, bh = bh->b_this_page) { +	block_start = 0; +	do {  		block_end = block_start + blocksize;  		if (block_end <= from || block_start >= to) {  			if (!buffer_uptodate(bh)) @@ -1895,7 +1924,10 @@ static int __block_commit_write(struct inode *inode, struct page *page,  			mark_buffer_dirty(bh);  		}  		clear_buffer_new(bh); -	} + +		block_start = block_end; +		bh = bh->b_this_page; +	} while (bh != head);  	/*  	 * If this is a partial write which happened to make all buffers @@ -2020,7 +2052,6 @@ EXPORT_SYMBOL(generic_write_end);  int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,  					unsigned long from)  { -	struct inode *inode = page->mapping->host;  	unsigned block_start, block_end, blocksize;  	unsigned to;  	struct buffer_head *bh, *head; @@ -2029,13 +2060,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,  	if (!page_has_buffers(page))  		return 0; -	blocksize = 1 << inode->i_blkbits; +	head = page_buffers(page); +	blocksize = head->b_size;  	to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);  	to = from + to;  	if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)  		return 0; -	head = page_buffers(page);  	bh = head;  	block_start = 0;  	do { @@ -2068,18 +2099,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block)  	struct inode *inode = page->mapping->host;  	sector_t iblock, lblock;  	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; -	unsigned int blocksize; +	unsigned int blocksize, bbits;  	int nr, i;  	int fully_mapped = 1; -	BUG_ON(!PageLocked(page)); -	blocksize = 1 << inode->i_blkbits; -	if (!page_has_buffers(page)) -		create_empty_buffers(page, blocksize, 0); -	head = page_buffers(page); +	head = create_page_buffers(page, inode, 0); +	blocksize = head->b_size; +	bbits = block_size_bits(blocksize); -	iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); -	lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; +	iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); +	lblock = (i_size_read(inode)+blocksize-1) >> bbits;  	bh = head;  	nr = 0;  	i = 0; @@ -2864,6 +2893,55 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)  	bio_put(bio);  } +/* + * This allows us to do IO even on the odd last sectors + * of a device, even if the bh block size is some multiple + * of the physical sector size. + * + * We'll just truncate the bio to the size of the device, + * and clear the end of the buffer head manually. + * + * Truly out-of-range accesses will turn into actual IO + * errors, this only handles the "we need to be able to + * do IO at the final sector" case. + */ +static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) +{ +	sector_t maxsector; +	unsigned bytes; + +	maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; +	if (!maxsector) +		return; + +	/* +	 * If the *whole* IO is past the end of the device, +	 * let it through, and the IO layer will turn it into +	 * an EIO. +	 */ +	if (unlikely(bio->bi_sector >= maxsector)) +		return; + +	maxsector -= bio->bi_sector; +	bytes = bio->bi_size; +	if (likely((bytes >> 9) <= maxsector)) +		return; + +	/* Uhhuh. We've got a bh that straddles the device size! */ +	bytes = maxsector << 9; + +	/* Truncate the bio.. */ +	bio->bi_size = bytes; +	bio->bi_io_vec[0].bv_len = bytes; + +	/* ..and clear the end of the buffer for reads */ +	if ((rw & RW_MASK) == READ) { +		void *kaddr = kmap_atomic(bh->b_page); +		memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes); +		kunmap_atomic(kaddr); +	} +} +  int submit_bh(int rw, struct buffer_head * bh)  {  	struct bio *bio; @@ -2900,6 +2978,9 @@ int submit_bh(int rw, struct buffer_head * bh)  	bio->bi_end_io = end_bio_bh_io_sync;  	bio->bi_private = bh; +	/* Take care of bh's that straddle the end of the device */ +	guard_bh_eod(rw, bio, bh); +  	bio_get(bio);  	submit_bio(rw, bio);  |