diff options
Diffstat (limited to 'fs')
64 files changed, 1096 insertions, 435 deletions
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index e9bad5093a3..5f95d1ed9c6 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -45,6 +45,14 @@ static int adfs_readpage(struct file *file, struct page *page)  	return block_read_full_page(page, adfs_get_block);  } +static void adfs_write_failed(struct address_space *mapping, loff_t to) +{ +	struct inode *inode = mapping->host; + +	if (to > inode->i_size) +		truncate_pagecache(inode, to, inode->i_size); +} +  static int adfs_write_begin(struct file *file, struct address_space *mapping,  			loff_t pos, unsigned len, unsigned flags,  			struct page **pagep, void **fsdata) @@ -55,11 +63,8 @@ static int adfs_write_begin(struct file *file, struct address_space *mapping,  	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,  				adfs_get_block,  				&ADFS_I(mapping->host)->mmu_private); -	if (unlikely(ret)) { -		loff_t isize = mapping->host->i_size; -		if (pos + len > isize) -			vmtruncate(mapping->host, isize); -	} +	if (unlikely(ret)) +		adfs_write_failed(mapping, pos + len);  	return ret;  } diff --git a/fs/affs/file.c b/fs/affs/file.c index 2f4c935cb32..af3261b7810 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -39,7 +39,6 @@ const struct file_operations affs_file_operations = {  };  const struct inode_operations affs_file_inode_operations = { -	.truncate	= affs_truncate,  	.setattr	= affs_notify_change,  }; @@ -402,6 +401,16 @@ static int affs_readpage(struct file *file, struct page *page)  	return block_read_full_page(page, affs_get_block);  } +static void affs_write_failed(struct address_space *mapping, loff_t to) +{ +	struct inode *inode = mapping->host; + +	if (to > inode->i_size) { +		truncate_pagecache(inode, to, inode->i_size); +		affs_truncate(inode); +	} +} +  static int affs_write_begin(struct file *file, struct address_space *mapping,  			loff_t pos, unsigned len, unsigned flags,  			struct page **pagep, void **fsdata) @@ -412,11 +421,8 @@ static int affs_write_begin(struct file *file, struct address_space *mapping,  	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,  				affs_get_block,  				&AFFS_I(mapping->host)->mmu_private); -	if (unlikely(ret)) { -		loff_t isize = mapping->host->i_size; -		if (pos + len > isize) -			vmtruncate(mapping->host, isize); -	} +	if (unlikely(ret)) +		affs_write_failed(mapping, pos + len);  	return ret;  } diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 15c48426822..0e092d08680 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -237,9 +237,12 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)  	if ((attr->ia_valid & ATTR_SIZE) &&  	    attr->ia_size != i_size_read(inode)) { -		error = vmtruncate(inode, attr->ia_size); +		error = inode_newsize_ok(inode, attr->ia_size);  		if (error)  			return error; + +		truncate_setsize(inode, attr->ia_size); +		affs_truncate(inode);  	}  	setattr_copy(inode, attr); diff --git a/fs/bfs/file.c b/fs/bfs/file.c index f20e8a71062..ad3ea1497cc 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -161,6 +161,14 @@ static int bfs_readpage(struct file *file, struct page *page)  	return block_read_full_page(page, bfs_get_block);  } +static void bfs_write_failed(struct address_space *mapping, loff_t to) +{ +	struct inode *inode = mapping->host; + +	if (to > inode->i_size) +		truncate_pagecache(inode, to, inode->i_size); +} +  static int bfs_write_begin(struct file *file, struct address_space *mapping,  			loff_t pos, unsigned len, unsigned flags,  			struct page **pagep, void **fsdata) @@ -169,11 +177,8 @@ static int bfs_write_begin(struct file *file, struct address_space *mapping,  	ret = block_write_begin(mapping, pos, len, flags, pagep,  				bfs_get_block); -	if (unlikely(ret)) { -		loff_t isize = mapping->host->i_size; -		if (pos + len > isize) -			vmtruncate(mapping->host, isize); -	} +	if (unlikely(ret)) +		bfs_write_failed(mapping, pos + len);  	return ret;  } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 67ed24ae86b..16d9e8e191e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4262,16 +4262,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)  	if (dentry->d_name.len > BTRFS_NAME_LEN)  		return ERR_PTR(-ENAMETOOLONG); -	if (unlikely(d_need_lookup(dentry))) { -		memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); -		kfree(dentry->d_fsdata); -		dentry->d_fsdata = NULL; -		/* This thing is hashed, drop it for now */ -		d_drop(dentry); -	} else { -		ret = btrfs_inode_by_name(dir, dentry, &location); -	} - +	ret = btrfs_inode_by_name(dir, dentry, &location);  	if (ret < 0)  		return ERR_PTR(ret); @@ -4341,11 +4332,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,  	struct dentry *ret;  	ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry); -	if (unlikely(d_need_lookup(dentry))) { -		spin_lock(&dentry->d_lock); -		dentry->d_flags &= ~DCACHE_NEED_LOOKUP; -		spin_unlock(&dentry->d_lock); -	}  	return ret;  } diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 67bef6d0148..746ce532e13 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -41,12 +41,12 @@ static struct fscache_object *cachefiles_alloc_object(  	_enter("{%s},%p,", cache->cache.identifier, cookie); -	lookup_data = kmalloc(sizeof(*lookup_data), GFP_KERNEL); +	lookup_data = kmalloc(sizeof(*lookup_data), cachefiles_gfp);  	if (!lookup_data)  		goto nomem_lookup_data;  	/* create a new object record and a temporary leaf image */ -	object = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL); +	object = kmem_cache_alloc(cachefiles_object_jar, cachefiles_gfp);  	if (!object)  		goto nomem_object; @@ -63,7 +63,7 @@ static struct fscache_object *cachefiles_alloc_object(  	 * - stick the length on the front and leave space on the back for the  	 *   encoder  	 */ -	buffer = kmalloc((2 + 512) + 3, GFP_KERNEL); +	buffer = kmalloc((2 + 512) + 3, cachefiles_gfp);  	if (!buffer)  		goto nomem_buffer; @@ -219,7 +219,7 @@ static void cachefiles_update_object(struct fscache_object *_object)  		return;  	} -	auxdata = kmalloc(2 + 512 + 3, GFP_KERNEL); +	auxdata = kmalloc(2 + 512 + 3, cachefiles_gfp);  	if (!auxdata) {  		_leave(" [nomem]");  		return; @@ -441,6 +441,54 @@ truncate_failed:  }  /* + * Invalidate an object + */ +static void cachefiles_invalidate_object(struct fscache_operation *op) +{ +	struct cachefiles_object *object; +	struct cachefiles_cache *cache; +	const struct cred *saved_cred; +	struct path path; +	uint64_t ni_size; +	int ret; + +	object = container_of(op->object, struct cachefiles_object, fscache); +	cache = container_of(object->fscache.cache, +			     struct cachefiles_cache, cache); + +	op->object->cookie->def->get_attr(op->object->cookie->netfs_data, +					  &ni_size); + +	_enter("{OBJ%x},[%llu]", +	       op->object->debug_id, (unsigned long long)ni_size); + +	if (object->backer) { +		ASSERT(S_ISREG(object->backer->d_inode->i_mode)); + +		fscache_set_store_limit(&object->fscache, ni_size); + +		path.dentry = object->backer; +		path.mnt = cache->mnt; + +		cachefiles_begin_secure(cache, &saved_cred); +		ret = vfs_truncate(&path, 0); +		if (ret == 0) +			ret = vfs_truncate(&path, ni_size); +		cachefiles_end_secure(cache, saved_cred); + +		if (ret != 0) { +			fscache_set_store_limit(&object->fscache, 0); +			if (ret == -EIO) +				cachefiles_io_error_obj(object, +							"Invalidate failed"); +		} +	} + +	fscache_op_complete(op, true); +	_leave(""); +} + +/*   * dissociate a cache from all the pages it was backing   */  static void cachefiles_dissociate_pages(struct fscache_cache *cache) @@ -455,6 +503,7 @@ const struct fscache_cache_ops cachefiles_cache_ops = {  	.lookup_complete	= cachefiles_lookup_complete,  	.grab_object		= cachefiles_grab_object,  	.update_object		= cachefiles_update_object, +	.invalidate_object	= cachefiles_invalidate_object,  	.drop_object		= cachefiles_drop_object,  	.put_object		= cachefiles_put_object,  	.sync_cache		= cachefiles_sync_cache, diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index bd6bc1bde2d..49382519907 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -23,6 +23,8 @@ extern unsigned cachefiles_debug;  #define CACHEFILES_DEBUG_KLEAVE	2  #define CACHEFILES_DEBUG_KDEBUG	4 +#define cachefiles_gfp (__GFP_WAIT | __GFP_NORETRY | __GFP_NOMEMALLOC) +  /*   * node records   */ diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c index 81b8b2b3a67..33b58c60f2d 100644 --- a/fs/cachefiles/key.c +++ b/fs/cachefiles/key.c @@ -78,7 +78,7 @@ char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type)  	_debug("max: %d", max); -	key = kmalloc(max, GFP_KERNEL); +	key = kmalloc(max, cachefiles_gfp);  	if (!key)  		return NULL; diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index b0b5f7cdfff..8c01c5fcdf7 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -40,8 +40,7 @@ void __cachefiles_printk_object(struct cachefiles_object *object,  	printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n",  	       prefix, fscache_object_states[object->fscache.state],  	       object->fscache.flags, work_busy(&object->fscache.work), -	       object->fscache.events, -	       object->fscache.event_mask & FSCACHE_OBJECT_EVENTS_MASK); +	       object->fscache.events, object->fscache.event_mask);  	printk(KERN_ERR "%sops=%u inp=%u exc=%u\n",  	       prefix, object->fscache.n_ops, object->fscache.n_in_progress,  	       object->fscache.n_exclusive); diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index c994691d944..48099225970 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -77,25 +77,25 @@ static int cachefiles_read_reissue(struct cachefiles_object *object,  	struct page *backpage = monitor->back_page, *backpage2;  	int ret; -	kenter("{ino=%lx},{%lx,%lx}", +	_enter("{ino=%lx},{%lx,%lx}",  	       object->backer->d_inode->i_ino,  	       backpage->index, backpage->flags);  	/* skip if the page was truncated away completely */  	if (backpage->mapping != bmapping) { -		kleave(" = -ENODATA [mapping]"); +		_leave(" = -ENODATA [mapping]");  		return -ENODATA;  	}  	backpage2 = find_get_page(bmapping, backpage->index);  	if (!backpage2) { -		kleave(" = -ENODATA [gone]"); +		_leave(" = -ENODATA [gone]");  		return -ENODATA;  	}  	if (backpage != backpage2) {  		put_page(backpage2); -		kleave(" = -ENODATA [different]"); +		_leave(" = -ENODATA [different]");  		return -ENODATA;  	} @@ -114,7 +114,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object,  		if (PageUptodate(backpage))  			goto unlock_discard; -		kdebug("reissue read"); +		_debug("reissue read");  		ret = bmapping->a_ops->readpage(NULL, backpage);  		if (ret < 0)  			goto unlock_discard; @@ -129,7 +129,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object,  	}  	/* it'll reappear on the todo list */ -	kleave(" = -EINPROGRESS"); +	_leave(" = -EINPROGRESS");  	return -EINPROGRESS;  unlock_discard: @@ -137,7 +137,7 @@ unlock_discard:  	spin_lock_irq(&object->work_lock);  	list_del(&monitor->op_link);  	spin_unlock_irq(&object->work_lock); -	kleave(" = %d", ret); +	_leave(" = %d", ret);  	return ret;  } @@ -174,11 +174,13 @@ static void cachefiles_read_copier(struct fscache_operation *_op)  		_debug("- copy {%lu}", monitor->back_page->index);  	recheck: -		if (PageUptodate(monitor->back_page)) { +		if (test_bit(FSCACHE_COOKIE_INVALIDATING, +			     &object->fscache.cookie->flags)) { +			error = -ESTALE; +		} else if (PageUptodate(monitor->back_page)) {  			copy_highpage(monitor->netfs_page, monitor->back_page); - -			pagevec_add(&pagevec, monitor->netfs_page); -			fscache_mark_pages_cached(monitor->op, &pagevec); +			fscache_mark_page_cached(monitor->op, +						 monitor->netfs_page);  			error = 0;  		} else if (!PageError(monitor->back_page)) {  			/* the page has probably been truncated */ @@ -198,6 +200,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op)  		fscache_end_io(op, monitor->netfs_page, error);  		page_cache_release(monitor->netfs_page); +		fscache_retrieval_complete(op, 1);  		fscache_put_retrieval(op);  		kfree(monitor); @@ -239,7 +242,7 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object,  	_debug("read back %p{%lu,%d}",  	       netpage, netpage->index, page_count(netpage)); -	monitor = kzalloc(sizeof(*monitor), GFP_KERNEL); +	monitor = kzalloc(sizeof(*monitor), cachefiles_gfp);  	if (!monitor)  		goto nomem; @@ -258,13 +261,14 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object,  			goto backing_page_already_present;  		if (!newpage) { -			newpage = page_cache_alloc_cold(bmapping); +			newpage = __page_cache_alloc(cachefiles_gfp | +						     __GFP_COLD);  			if (!newpage)  				goto nomem_monitor;  		}  		ret = add_to_page_cache(newpage, bmapping, -					netpage->index, GFP_KERNEL); +					netpage->index, cachefiles_gfp);  		if (ret == 0)  			goto installed_new_backing_page;  		if (ret != -EEXIST) @@ -335,11 +339,11 @@ backing_page_already_present:  backing_page_already_uptodate:  	_debug("- uptodate"); -	pagevec_add(pagevec, netpage); -	fscache_mark_pages_cached(op, pagevec); +	fscache_mark_page_cached(op, netpage);  	copy_highpage(netpage, backpage);  	fscache_end_io(op, netpage, 0); +	fscache_retrieval_complete(op, 1);  success:  	_debug("success"); @@ -357,10 +361,13 @@ out:  read_error:  	_debug("read error %d", ret); -	if (ret == -ENOMEM) +	if (ret == -ENOMEM) { +		fscache_retrieval_complete(op, 1);  		goto out; +	}  io_error:  	cachefiles_io_error_obj(object, "Page read error on backing file"); +	fscache_retrieval_complete(op, 1);  	ret = -ENOBUFS;  	goto out; @@ -370,6 +377,7 @@ nomem_monitor:  	fscache_put_retrieval(monitor->op);  	kfree(monitor);  nomem: +	fscache_retrieval_complete(op, 1);  	_leave(" = -ENOMEM");  	return -ENOMEM;  } @@ -408,7 +416,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,  	_enter("{%p},{%lx},,,", object, page->index);  	if (!object->backer) -		return -ENOBUFS; +		goto enobufs;  	inode = object->backer->d_inode;  	ASSERT(S_ISREG(inode->i_mode)); @@ -417,7 +425,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,  	/* calculate the shift required to use bmap */  	if (inode->i_sb->s_blocksize > PAGE_SIZE) -		return -ENOBUFS; +		goto enobufs;  	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; @@ -448,15 +456,20 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,  						       &pagevec);  	} else if (cachefiles_has_space(cache, 0, 1) == 0) {  		/* there's space in the cache we can use */ -		pagevec_add(&pagevec, page); -		fscache_mark_pages_cached(op, &pagevec); +		fscache_mark_page_cached(op, page); +		fscache_retrieval_complete(op, 1);  		ret = -ENODATA;  	} else { -		ret = -ENOBUFS; +		goto enobufs;  	}  	_leave(" = %d", ret);  	return ret; + +enobufs: +	fscache_retrieval_complete(op, 1); +	_leave(" = -ENOBUFS"); +	return -ENOBUFS;  }  /* @@ -465,8 +478,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,   */  static int cachefiles_read_backing_file(struct cachefiles_object *object,  					struct fscache_retrieval *op, -					struct list_head *list, -					struct pagevec *mark_pvec) +					struct list_head *list)  {  	struct cachefiles_one_read *monitor = NULL;  	struct address_space *bmapping = object->backer->d_inode->i_mapping; @@ -485,7 +497,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,  		       netpage, netpage->index, page_count(netpage));  		if (!monitor) { -			monitor = kzalloc(sizeof(*monitor), GFP_KERNEL); +			monitor = kzalloc(sizeof(*monitor), cachefiles_gfp);  			if (!monitor)  				goto nomem; @@ -500,13 +512,14 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,  				goto backing_page_already_present;  			if (!newpage) { -				newpage = page_cache_alloc_cold(bmapping); +				newpage = __page_cache_alloc(cachefiles_gfp | +							     __GFP_COLD);  				if (!newpage)  					goto nomem;  			}  			ret = add_to_page_cache(newpage, bmapping, -						netpage->index, GFP_KERNEL); +						netpage->index, cachefiles_gfp);  			if (ret == 0)  				goto installed_new_backing_page;  			if (ret != -EEXIST) @@ -536,10 +549,11 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,  		_debug("- monitor add");  		ret = add_to_page_cache(netpage, op->mapping, netpage->index, -					GFP_KERNEL); +					cachefiles_gfp);  		if (ret < 0) {  			if (ret == -EEXIST) {  				page_cache_release(netpage); +				fscache_retrieval_complete(op, 1);  				continue;  			}  			goto nomem; @@ -612,10 +626,11 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,  		_debug("- uptodate");  		ret = add_to_page_cache(netpage, op->mapping, netpage->index, -					GFP_KERNEL); +					cachefiles_gfp);  		if (ret < 0) {  			if (ret == -EEXIST) {  				page_cache_release(netpage); +				fscache_retrieval_complete(op, 1);  				continue;  			}  			goto nomem; @@ -626,16 +641,17 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,  		page_cache_release(backpage);  		backpage = NULL; -		if (!pagevec_add(mark_pvec, netpage)) -			fscache_mark_pages_cached(op, mark_pvec); +		fscache_mark_page_cached(op, netpage);  		page_cache_get(netpage);  		if (!pagevec_add(&lru_pvec, netpage))  			__pagevec_lru_add_file(&lru_pvec); +		/* the netpage is unlocked and marked up to date here */  		fscache_end_io(op, netpage, 0);  		page_cache_release(netpage);  		netpage = NULL; +		fscache_retrieval_complete(op, 1);  		continue;  	} @@ -661,6 +677,7 @@ out:  	list_for_each_entry_safe(netpage, _n, list, lru) {  		list_del(&netpage->lru);  		page_cache_release(netpage); +		fscache_retrieval_complete(op, 1);  	}  	_leave(" = %d", ret); @@ -669,15 +686,17 @@ out:  nomem:  	_debug("nomem");  	ret = -ENOMEM; -	goto out; +	goto record_page_complete;  read_error:  	_debug("read error %d", ret);  	if (ret == -ENOMEM) -		goto out; +		goto record_page_complete;  io_error:  	cachefiles_io_error_obj(object, "Page read error on backing file");  	ret = -ENOBUFS; +record_page_complete: +	fscache_retrieval_complete(op, 1);  	goto out;  } @@ -709,7 +728,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,  	       *nr_pages);  	if (!object->backer) -		return -ENOBUFS; +		goto all_enobufs;  	space = 1;  	if (cachefiles_has_space(cache, 0, *nr_pages) < 0) @@ -722,7 +741,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,  	/* calculate the shift required to use bmap */  	if (inode->i_sb->s_blocksize > PAGE_SIZE) -		return -ENOBUFS; +		goto all_enobufs;  	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; @@ -762,7 +781,10 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,  			nrbackpages++;  		} else if (space && pagevec_add(&pagevec, page) == 0) {  			fscache_mark_pages_cached(op, &pagevec); +			fscache_retrieval_complete(op, 1);  			ret = -ENODATA; +		} else { +			fscache_retrieval_complete(op, 1);  		}  	} @@ -775,18 +797,18 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,  	/* submit the apparently valid pages to the backing fs to be read from  	 * disk */  	if (nrbackpages > 0) { -		ret2 = cachefiles_read_backing_file(object, op, &backpages, -						    &pagevec); +		ret2 = cachefiles_read_backing_file(object, op, &backpages);  		if (ret2 == -ENOMEM || ret2 == -EINTR)  			ret = ret2;  	} -	if (pagevec_count(&pagevec) > 0) -		fscache_mark_pages_cached(op, &pagevec); -  	_leave(" = %d [nr=%u%s]",  	       ret, *nr_pages, list_empty(pages) ? " empty" : "");  	return ret; + +all_enobufs: +	fscache_retrieval_complete(op, *nr_pages); +	return -ENOBUFS;  }  /* @@ -806,7 +828,6 @@ int cachefiles_allocate_page(struct fscache_retrieval *op,  {  	struct cachefiles_object *object;  	struct cachefiles_cache *cache; -	struct pagevec pagevec;  	int ret;  	object = container_of(op->op.object, @@ -817,14 +838,12 @@ int cachefiles_allocate_page(struct fscache_retrieval *op,  	_enter("%p,{%lx},", object, page->index);  	ret = cachefiles_has_space(cache, 0, 1); -	if (ret == 0) { -		pagevec_init(&pagevec, 0); -		pagevec_add(&pagevec, page); -		fscache_mark_pages_cached(op, &pagevec); -	} else { +	if (ret == 0) +		fscache_mark_page_cached(op, page); +	else  		ret = -ENOBUFS; -	} +	fscache_retrieval_complete(op, 1);  	_leave(" = %d", ret);  	return ret;  } @@ -874,6 +893,7 @@ int cachefiles_allocate_pages(struct fscache_retrieval *op,  		ret = -ENOBUFS;  	} +	fscache_retrieval_complete(op, *nr_pages);  	_leave(" = %d", ret);  	return ret;  } diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index e18b183b47e..73b46288b54 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -174,7 +174,7 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,  	ASSERT(dentry);  	ASSERT(dentry->d_inode); -	auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL); +	auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, cachefiles_gfp);  	if (!auxbuf) {  		_leave(" = -ENOMEM");  		return -ENOMEM; diff --git a/fs/dcache.c b/fs/dcache.c index 3a463d0c4fe..19153a0a810 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -455,24 +455,6 @@ void d_drop(struct dentry *dentry)  EXPORT_SYMBOL(d_drop);  /* - * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag - * @dentry: dentry to drop - * - * This is called when we do a lookup on a placeholder dentry that needed to be - * looked up.  The dentry should have been hashed in order for it to be found by - * the lookup code, but now needs to be unhashed while we do the actual lookup - * and clear the DCACHE_NEED_LOOKUP flag. - */ -void d_clear_need_lookup(struct dentry *dentry) -{ -	spin_lock(&dentry->d_lock); -	__d_drop(dentry); -	dentry->d_flags &= ~DCACHE_NEED_LOOKUP; -	spin_unlock(&dentry->d_lock); -} -EXPORT_SYMBOL(d_clear_need_lookup); - -/*   * Finish off a dentry we've decided to kill.   * dentry->d_lock must be held, returns with it unlocked.   * If ref is non-zero, then decrement the refcount too. @@ -565,13 +547,7 @@ repeat:   	if (d_unhashed(dentry))  		goto kill_it; -	/* -	 * If this dentry needs lookup, don't set the referenced flag so that it -	 * is more likely to be cleaned up by the dcache shrinker in case of -	 * memory pressure. -	 */ -	if (!d_need_lookup(dentry)) -		dentry->d_flags |= DCACHE_REFERENCED; +	dentry->d_flags |= DCACHE_REFERENCED;  	dentry_lru_add(dentry);  	dentry->d_count--; @@ -1583,7 +1559,7 @@ EXPORT_SYMBOL(d_find_any_alias);   */  struct dentry *d_obtain_alias(struct inode *inode)  { -	static const struct qstr anonstring = { .name = "" }; +	static const struct qstr anonstring = QSTR_INIT("/", 1);  	struct dentry *tmp;  	struct dentry *res; @@ -1737,13 +1713,6 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,  	}  	/* -	 * We are going to instantiate this dentry, unhash it and clear the -	 * lookup flag so we can do that. -	 */ -	if (unlikely(d_need_lookup(found))) -		d_clear_need_lookup(found); - -	/*  	 * Negative dentry: instantiate it unless the inode is a directory and  	 * already has a dentry.  	 */ diff --git a/fs/file_table.c b/fs/file_table.c index a72bf9ddd0d..de9e9653d61 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -458,8 +458,8 @@ void mark_files_ro(struct super_block *sb)  		spin_unlock(&f->f_lock);  		if (file_check_writeable(f) != 0)  			continue; +		__mnt_drop_write(f->f_path.mnt);  		file_release_write(f); -		mnt_drop_write_file(f);  	} while_file_list_for_each_entry;  	lg_global_unlock(&files_lglock);  } diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index 6a3c48abd67..b52aed1dca9 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -314,10 +314,10 @@ EXPORT_SYMBOL(fscache_add_cache);   */  void fscache_io_error(struct fscache_cache *cache)  { -	set_bit(FSCACHE_IOERROR, &cache->flags); - -	printk(KERN_ERR "FS-Cache: Cache %s stopped due to I/O error\n", -	       cache->ops->name); +	if (!test_and_set_bit(FSCACHE_IOERROR, &cache->flags)) +		printk(KERN_ERR "FS-Cache:" +		       " Cache '%s' stopped due to I/O error\n", +		       cache->ops->name);  }  EXPORT_SYMBOL(fscache_io_error); diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 990535071a8..8dcb114758e 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -370,6 +370,66 @@ cant_attach_object:  }  /* + * Invalidate an object.  Callable with spinlocks held. + */ +void __fscache_invalidate(struct fscache_cookie *cookie) +{ +	struct fscache_object *object; + +	_enter("{%s}", cookie->def->name); + +	fscache_stat(&fscache_n_invalidates); + +	/* Only permit invalidation of data files.  Invalidating an index will +	 * require the caller to release all its attachments to the tree rooted +	 * there, and if it's doing that, it may as well just retire the +	 * cookie. +	 */ +	ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE); + +	/* We will be updating the cookie too. */ +	BUG_ON(!cookie->def->get_aux); + +	/* If there's an object, we tell the object state machine to handle the +	 * invalidation on our behalf, otherwise there's nothing to do. +	 */ +	if (!hlist_empty(&cookie->backing_objects)) { +		spin_lock(&cookie->lock); + +		if (!hlist_empty(&cookie->backing_objects) && +		    !test_and_set_bit(FSCACHE_COOKIE_INVALIDATING, +				      &cookie->flags)) { +			object = hlist_entry(cookie->backing_objects.first, +					     struct fscache_object, +					     cookie_link); +			if (object->state < FSCACHE_OBJECT_DYING) +				fscache_raise_event( +					object, FSCACHE_OBJECT_EV_INVALIDATE); +		} + +		spin_unlock(&cookie->lock); +	} + +	_leave(""); +} +EXPORT_SYMBOL(__fscache_invalidate); + +/* + * Wait for object invalidation to complete. + */ +void __fscache_wait_on_invalidate(struct fscache_cookie *cookie) +{ +	_enter("%p", cookie); + +	wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING, +		    fscache_wait_bit_interruptible, +		    TASK_UNINTERRUPTIBLE); + +	_leave(""); +} +EXPORT_SYMBOL(__fscache_wait_on_invalidate); + +/*   * update the index entries backing a cookie   */  void __fscache_update_cookie(struct fscache_cookie *cookie) @@ -442,16 +502,34 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)  	event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE; +try_again:  	spin_lock(&cookie->lock);  	/* break links with all the active objects */  	while (!hlist_empty(&cookie->backing_objects)) { +		int n_reads;  		object = hlist_entry(cookie->backing_objects.first,  				     struct fscache_object,  				     cookie_link);  		_debug("RELEASE OBJ%x", object->debug_id); +		set_bit(FSCACHE_COOKIE_WAITING_ON_READS, &cookie->flags); +		n_reads = atomic_read(&object->n_reads); +		if (n_reads) { +			int n_ops = object->n_ops; +			int n_in_progress = object->n_in_progress; +			spin_unlock(&cookie->lock); +			printk(KERN_ERR "FS-Cache:" +			       " Cookie '%s' still has %d outstanding reads (%d,%d)\n", +			       cookie->def->name, +			       n_reads, n_ops, n_in_progress); +			wait_on_bit(&cookie->flags, FSCACHE_COOKIE_WAITING_ON_READS, +				    fscache_wait_bit, TASK_UNINTERRUPTIBLE); +			printk("Wait finished\n"); +			goto try_again; +		} +  		/* detach each cache object from the object cookie */  		spin_lock(&object->lock);  		hlist_del_init(&object->cookie_link); diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index f6aad48d38a..ee38fef4be5 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -121,12 +121,19 @@ extern int fscache_submit_exclusive_op(struct fscache_object *,  				       struct fscache_operation *);  extern int fscache_submit_op(struct fscache_object *,  			     struct fscache_operation *); -extern int fscache_cancel_op(struct fscache_operation *); +extern int fscache_cancel_op(struct fscache_operation *, +			     void (*)(struct fscache_operation *)); +extern void fscache_cancel_all_ops(struct fscache_object *);  extern void fscache_abort_object(struct fscache_object *);  extern void fscache_start_operations(struct fscache_object *);  extern void fscache_operation_gc(struct work_struct *);  /* + * page.c + */ +extern void fscache_invalidate_writes(struct fscache_cookie *); + +/*   * proc.c   */  #ifdef CONFIG_PROC_FS @@ -194,6 +201,7 @@ extern atomic_t fscache_n_store_vmscan_not_storing;  extern atomic_t fscache_n_store_vmscan_gone;  extern atomic_t fscache_n_store_vmscan_busy;  extern atomic_t fscache_n_store_vmscan_cancelled; +extern atomic_t fscache_n_store_vmscan_wait;  extern atomic_t fscache_n_marks;  extern atomic_t fscache_n_uncaches; @@ -205,6 +213,9 @@ extern atomic_t fscache_n_acquires_ok;  extern atomic_t fscache_n_acquires_nobufs;  extern atomic_t fscache_n_acquires_oom; +extern atomic_t fscache_n_invalidates; +extern atomic_t fscache_n_invalidates_run; +  extern atomic_t fscache_n_updates;  extern atomic_t fscache_n_updates_null;  extern atomic_t fscache_n_updates_run; @@ -237,6 +248,7 @@ extern atomic_t fscache_n_cop_alloc_object;  extern atomic_t fscache_n_cop_lookup_object;  extern atomic_t fscache_n_cop_lookup_complete;  extern atomic_t fscache_n_cop_grab_object; +extern atomic_t fscache_n_cop_invalidate_object;  extern atomic_t fscache_n_cop_update_object;  extern atomic_t fscache_n_cop_drop_object;  extern atomic_t fscache_n_cop_put_object; @@ -278,6 +290,7 @@ extern const struct file_operations fscache_stats_fops;  static inline void fscache_raise_event(struct fscache_object *object,  				       unsigned event)  { +	BUG_ON(event >= NR_FSCACHE_OBJECT_EVENTS);  	if (!test_and_set_bit(event, &object->events) &&  	    test_bit(event, &object->event_mask))  		fscache_enqueue_object(object); diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index ebe29c58138..f27c89d1788 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -245,7 +245,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v)  		   obj->n_in_progress,  		   obj->n_exclusive,  		   atomic_read(&obj->n_reads), -		   obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK, +		   obj->event_mask,  		   obj->events,  		   obj->flags,  		   work_busy(&obj->work)); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index b6b897c550a..50d41c18021 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -14,6 +14,7 @@  #define FSCACHE_DEBUG_LEVEL COOKIE  #include <linux/module.h> +#include <linux/slab.h>  #include "internal.h"  const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { @@ -22,6 +23,7 @@ const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = {  	[FSCACHE_OBJECT_CREATING]	= "OBJECT_CREATING",  	[FSCACHE_OBJECT_AVAILABLE]	= "OBJECT_AVAILABLE",  	[FSCACHE_OBJECT_ACTIVE]		= "OBJECT_ACTIVE", +	[FSCACHE_OBJECT_INVALIDATING]	= "OBJECT_INVALIDATING",  	[FSCACHE_OBJECT_UPDATING]	= "OBJECT_UPDATING",  	[FSCACHE_OBJECT_DYING]		= "OBJECT_DYING",  	[FSCACHE_OBJECT_LC_DYING]	= "OBJECT_LC_DYING", @@ -39,6 +41,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {  	[FSCACHE_OBJECT_CREATING]	= "CRTN",  	[FSCACHE_OBJECT_AVAILABLE]	= "AVBL",  	[FSCACHE_OBJECT_ACTIVE]		= "ACTV", +	[FSCACHE_OBJECT_INVALIDATING]	= "INVL",  	[FSCACHE_OBJECT_UPDATING]	= "UPDT",  	[FSCACHE_OBJECT_DYING]		= "DYNG",  	[FSCACHE_OBJECT_LC_DYING]	= "LCDY", @@ -54,6 +57,7 @@ static void fscache_put_object(struct fscache_object *);  static void fscache_initialise_object(struct fscache_object *);  static void fscache_lookup_object(struct fscache_object *);  static void fscache_object_available(struct fscache_object *); +static void fscache_invalidate_object(struct fscache_object *);  static void fscache_release_object(struct fscache_object *);  static void fscache_withdraw_object(struct fscache_object *);  static void fscache_enqueue_dependents(struct fscache_object *); @@ -79,6 +83,15 @@ static inline void fscache_done_parent_op(struct fscache_object *object)  }  /* + * Notify netfs of invalidation completion. + */ +static inline void fscache_invalidation_complete(struct fscache_cookie *cookie) +{ +	if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) +		wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); +} + +/*   * process events that have been sent to an object's state machine   * - initiates parent lookup   * - does object lookup @@ -90,6 +103,7 @@ static void fscache_object_state_machine(struct fscache_object *object)  {  	enum fscache_object_state new_state;  	struct fscache_cookie *cookie; +	int event;  	ASSERT(object != NULL); @@ -101,7 +115,8 @@ static void fscache_object_state_machine(struct fscache_object *object)  		/* wait for the parent object to become ready */  	case FSCACHE_OBJECT_INIT:  		object->event_mask = -			ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED); +			FSCACHE_OBJECT_EVENTS_MASK & +			~(1 << FSCACHE_OBJECT_EV_CLEARED);  		fscache_initialise_object(object);  		goto done; @@ -125,6 +140,16 @@ static void fscache_object_state_machine(struct fscache_object *object)  	case FSCACHE_OBJECT_ACTIVE:  		goto active_transit; +		/* Invalidate an object on disk */ +	case FSCACHE_OBJECT_INVALIDATING: +		clear_bit(FSCACHE_OBJECT_EV_INVALIDATE, &object->events); +		fscache_stat(&fscache_n_invalidates_run); +		fscache_stat(&fscache_n_cop_invalidate_object); +		fscache_invalidate_object(object); +		fscache_stat_d(&fscache_n_cop_invalidate_object); +		fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); +		goto active_transit; +  		/* update the object metadata on disk */  	case FSCACHE_OBJECT_UPDATING:  		clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events); @@ -251,13 +276,17 @@ static void fscache_object_state_machine(struct fscache_object *object)  	/* determine the transition from a lookup state */  lookup_transit: -	switch (fls(object->events & object->event_mask) - 1) { +	event = fls(object->events & object->event_mask) - 1; +	switch (event) {  	case FSCACHE_OBJECT_EV_WITHDRAW:  	case FSCACHE_OBJECT_EV_RETIRE:  	case FSCACHE_OBJECT_EV_RELEASE:  	case FSCACHE_OBJECT_EV_ERROR:  		new_state = FSCACHE_OBJECT_LC_DYING;  		goto change_state; +	case FSCACHE_OBJECT_EV_INVALIDATE: +		new_state = FSCACHE_OBJECT_INVALIDATING; +		goto change_state;  	case FSCACHE_OBJECT_EV_REQUEUE:  		goto done;  	case -1: @@ -268,13 +297,17 @@ lookup_transit:  	/* determine the transition from an active state */  active_transit: -	switch (fls(object->events & object->event_mask) - 1) { +	event = fls(object->events & object->event_mask) - 1; +	switch (event) {  	case FSCACHE_OBJECT_EV_WITHDRAW:  	case FSCACHE_OBJECT_EV_RETIRE:  	case FSCACHE_OBJECT_EV_RELEASE:  	case FSCACHE_OBJECT_EV_ERROR:  		new_state = FSCACHE_OBJECT_DYING;  		goto change_state; +	case FSCACHE_OBJECT_EV_INVALIDATE: +		new_state = FSCACHE_OBJECT_INVALIDATING; +		goto change_state;  	case FSCACHE_OBJECT_EV_UPDATE:  		new_state = FSCACHE_OBJECT_UPDATING;  		goto change_state; @@ -287,7 +320,8 @@ active_transit:  	/* determine the transition from a terminal state */  terminal_transit: -	switch (fls(object->events & object->event_mask) - 1) { +	event = fls(object->events & object->event_mask) - 1; +	switch (event) {  	case FSCACHE_OBJECT_EV_WITHDRAW:  		new_state = FSCACHE_OBJECT_WITHDRAWING;  		goto change_state; @@ -320,8 +354,8 @@ done:  unsupported_event:  	printk(KERN_ERR "FS-Cache:" -	       " Unsupported event %lx [mask %lx] in state %s\n", -	       object->events, object->event_mask, +	       " Unsupported event %d [%lx/%lx] in state %s\n", +	       event, object->events, object->event_mask,  	       fscache_object_states[object->state]);  	BUG();  } @@ -587,8 +621,6 @@ static void fscache_object_available(struct fscache_object *object)  	if (object->n_in_progress == 0) {  		if (object->n_ops > 0) {  			ASSERTCMP(object->n_ops, >=, object->n_obj_ops); -			ASSERTIF(object->n_ops > object->n_obj_ops, -				 !list_empty(&object->pending_ops));  			fscache_start_operations(object);  		} else {  			ASSERT(list_empty(&object->pending_ops)); @@ -681,6 +713,7 @@ static void fscache_withdraw_object(struct fscache_object *object)  		if (object->cookie == cookie) {  			hlist_del_init(&object->cookie_link);  			object->cookie = NULL; +			fscache_invalidation_complete(cookie);  			detached = true;  		}  		spin_unlock(&cookie->lock); @@ -890,3 +923,55 @@ enum fscache_checkaux fscache_check_aux(struct fscache_object *object,  	return result;  }  EXPORT_SYMBOL(fscache_check_aux); + +/* + * Asynchronously invalidate an object. + */ +static void fscache_invalidate_object(struct fscache_object *object) +{ +	struct fscache_operation *op; +	struct fscache_cookie *cookie = object->cookie; + +	_enter("{OBJ%x}", object->debug_id); + +	/* Reject any new read/write ops and abort any that are pending. */ +	fscache_invalidate_writes(cookie); +	clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); +	fscache_cancel_all_ops(object); + +	/* Now we have to wait for in-progress reads and writes */ +	op = kzalloc(sizeof(*op), GFP_KERNEL); +	if (!op) { +		fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); +		_leave(" [ENOMEM]"); +		return; +	} + +	fscache_operation_init(op, object->cache->ops->invalidate_object, NULL); +	op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); + +	spin_lock(&cookie->lock); +	if (fscache_submit_exclusive_op(object, op) < 0) +		goto submit_op_failed; +	spin_unlock(&cookie->lock); +	fscache_put_operation(op); + +	/* Once we've completed the invalidation, we know there will be no data +	 * stored in the cache and thus we can reinstate the data-check-skip +	 * optimisation. +	 */ +	set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + +	/* We can allow read and write requests to come in once again.  They'll +	 * queue up behind our exclusive invalidation operation. +	 */ +	fscache_invalidation_complete(cookie); +	_leave(""); +	return; + +submit_op_failed: +	spin_unlock(&cookie->lock); +	kfree(op); +	fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); +	_leave(" [EIO]"); +} diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 30afdfa7aec..762a9ec4ffa 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -37,6 +37,7 @@ void fscache_enqueue_operation(struct fscache_operation *op)  	ASSERT(op->processor != NULL);  	ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);  	ASSERTCMP(atomic_read(&op->usage), >, 0); +	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS);  	fscache_stat(&fscache_n_op_enqueue);  	switch (op->flags & FSCACHE_OP_TYPE) { @@ -64,6 +65,9 @@ EXPORT_SYMBOL(fscache_enqueue_operation);  static void fscache_run_op(struct fscache_object *object,  			   struct fscache_operation *op)  { +	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); + +	op->state = FSCACHE_OP_ST_IN_PROGRESS;  	object->n_in_progress++;  	if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))  		wake_up_bit(&op->flags, FSCACHE_OP_WAITING); @@ -84,18 +88,21 @@ int fscache_submit_exclusive_op(struct fscache_object *object,  	_enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); +	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); +	ASSERTCMP(atomic_read(&op->usage), >, 0); +  	spin_lock(&object->lock);  	ASSERTCMP(object->n_ops, >=, object->n_in_progress);  	ASSERTCMP(object->n_ops, >=, object->n_exclusive);  	ASSERT(list_empty(&op->pend_link)); -	ret = -ENOBUFS; +	op->state = FSCACHE_OP_ST_PENDING;  	if (fscache_object_is_active(object)) {  		op->object = object;  		object->n_ops++;  		object->n_exclusive++;	/* reads and writes must wait */ -		if (object->n_ops > 1) { +		if (object->n_in_progress > 0) {  			atomic_inc(&op->usage);  			list_add_tail(&op->pend_link, &object->pending_ops);  			fscache_stat(&fscache_n_op_pend); @@ -121,8 +128,11 @@ int fscache_submit_exclusive_op(struct fscache_object *object,  		fscache_stat(&fscache_n_op_pend);  		ret = 0;  	} else { -		/* not allowed to submit ops in any other state */ -		BUG(); +		/* If we're in any other state, there must have been an I/O +		 * error of some nature. +		 */ +		ASSERT(test_bit(FSCACHE_IOERROR, &object->cache->flags)); +		ret = -EIO;  	}  	spin_unlock(&object->lock); @@ -186,6 +196,7 @@ int fscache_submit_op(struct fscache_object *object,  	_enter("{OBJ%x OP%x},{%u}",  	       object->debug_id, op->debug_id, atomic_read(&op->usage)); +	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED);  	ASSERTCMP(atomic_read(&op->usage), >, 0);  	spin_lock(&object->lock); @@ -196,6 +207,7 @@ int fscache_submit_op(struct fscache_object *object,  	ostate = object->state;  	smp_rmb(); +	op->state = FSCACHE_OP_ST_PENDING;  	if (fscache_object_is_active(object)) {  		op->object = object;  		object->n_ops++; @@ -225,12 +237,15 @@ int fscache_submit_op(struct fscache_object *object,  		   object->state == FSCACHE_OBJECT_LC_DYING ||  		   object->state == FSCACHE_OBJECT_WITHDRAWING) {  		fscache_stat(&fscache_n_op_rejected); +		op->state = FSCACHE_OP_ST_CANCELLED;  		ret = -ENOBUFS;  	} else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) {  		fscache_report_unexpected_submission(object, op, ostate);  		ASSERT(!fscache_object_is_active(object)); +		op->state = FSCACHE_OP_ST_CANCELLED;  		ret = -ENOBUFS;  	} else { +		op->state = FSCACHE_OP_ST_CANCELLED;  		ret = -ENOBUFS;  	} @@ -283,20 +298,28 @@ void fscache_start_operations(struct fscache_object *object)  /*   * cancel an operation that's pending on an object   */ -int fscache_cancel_op(struct fscache_operation *op) +int fscache_cancel_op(struct fscache_operation *op, +		      void (*do_cancel)(struct fscache_operation *))  {  	struct fscache_object *object = op->object;  	int ret;  	_enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id); +	ASSERTCMP(op->state, >=, FSCACHE_OP_ST_PENDING); +	ASSERTCMP(op->state, !=, FSCACHE_OP_ST_CANCELLED); +	ASSERTCMP(atomic_read(&op->usage), >, 0); +  	spin_lock(&object->lock);  	ret = -EBUSY; -	if (!list_empty(&op->pend_link)) { +	if (op->state == FSCACHE_OP_ST_PENDING) { +		ASSERT(!list_empty(&op->pend_link));  		fscache_stat(&fscache_n_op_cancelled);  		list_del_init(&op->pend_link); -		object->n_ops--; +		if (do_cancel) +			do_cancel(op); +		op->state = FSCACHE_OP_ST_CANCELLED;  		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))  			object->n_exclusive--;  		if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) @@ -311,6 +334,70 @@ int fscache_cancel_op(struct fscache_operation *op)  }  /* + * Cancel all pending operations on an object + */ +void fscache_cancel_all_ops(struct fscache_object *object) +{ +	struct fscache_operation *op; + +	_enter("OBJ%x", object->debug_id); + +	spin_lock(&object->lock); + +	while (!list_empty(&object->pending_ops)) { +		op = list_entry(object->pending_ops.next, +				struct fscache_operation, pend_link); +		fscache_stat(&fscache_n_op_cancelled); +		list_del_init(&op->pend_link); + +		ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); +		op->state = FSCACHE_OP_ST_CANCELLED; + +		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) +			object->n_exclusive--; +		if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) +			wake_up_bit(&op->flags, FSCACHE_OP_WAITING); +		fscache_put_operation(op); +		cond_resched_lock(&object->lock); +	} + +	spin_unlock(&object->lock); +	_leave(""); +} + +/* + * Record the completion or cancellation of an in-progress operation. + */ +void fscache_op_complete(struct fscache_operation *op, bool cancelled) +{ +	struct fscache_object *object = op->object; + +	_enter("OBJ%x", object->debug_id); + +	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); +	ASSERTCMP(object->n_in_progress, >, 0); +	ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags), +		    object->n_exclusive, >, 0); +	ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags), +		    object->n_in_progress, ==, 1); + +	spin_lock(&object->lock); + +	op->state = cancelled ? +		FSCACHE_OP_ST_CANCELLED : FSCACHE_OP_ST_COMPLETE; + +	if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) +		object->n_exclusive--; +	object->n_in_progress--; +	if (object->n_in_progress == 0) +		fscache_start_operations(object); + +	spin_unlock(&object->lock); +	_leave(""); +} +EXPORT_SYMBOL(fscache_op_complete); + +/*   * release an operation   * - queues pending ops if this is the last in-progress op   */ @@ -328,8 +415,9 @@ void fscache_put_operation(struct fscache_operation *op)  		return;  	_debug("PUT OP"); -	if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) -		BUG(); +	ASSERTIFCMP(op->state != FSCACHE_OP_ST_COMPLETE, +		    op->state, ==, FSCACHE_OP_ST_CANCELLED); +	op->state = FSCACHE_OP_ST_DEAD;  	fscache_stat(&fscache_n_op_release); @@ -340,8 +428,14 @@ void fscache_put_operation(struct fscache_operation *op)  	object = op->object; -	if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) -		atomic_dec(&object->n_reads); +	if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) { +		if (atomic_dec_and_test(&object->n_reads)) { +			clear_bit(FSCACHE_COOKIE_WAITING_ON_READS, +				  &object->cookie->flags); +			wake_up_bit(&object->cookie->flags, +				    FSCACHE_COOKIE_WAITING_ON_READS); +		} +	}  	/* now... we may get called with the object spinlock held, so we  	 * complete the cleanup here only if we can immediately acquire the @@ -359,16 +453,6 @@ void fscache_put_operation(struct fscache_operation *op)  		return;  	} -	if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { -		ASSERTCMP(object->n_exclusive, >, 0); -		object->n_exclusive--; -	} - -	ASSERTCMP(object->n_in_progress, >, 0); -	object->n_in_progress--; -	if (object->n_in_progress == 0) -		fscache_start_operations(object); -  	ASSERTCMP(object->n_ops, >, 0);  	object->n_ops--;  	if (object->n_ops == 0) @@ -407,23 +491,14 @@ void fscache_operation_gc(struct work_struct *work)  		spin_unlock(&cache->op_gc_list_lock);  		object = op->object; +		spin_lock(&object->lock);  		_debug("GC DEFERRED REL OBJ%x OP%x",  		       object->debug_id, op->debug_id);  		fscache_stat(&fscache_n_op_gc);  		ASSERTCMP(atomic_read(&op->usage), ==, 0); - -		spin_lock(&object->lock); -		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { -			ASSERTCMP(object->n_exclusive, >, 0); -			object->n_exclusive--; -		} - -		ASSERTCMP(object->n_in_progress, >, 0); -		object->n_in_progress--; -		if (object->n_in_progress == 0) -			fscache_start_operations(object); +		ASSERTCMP(op->state, ==, FSCACHE_OP_ST_DEAD);  		ASSERTCMP(object->n_ops, >, 0);  		object->n_ops--; @@ -431,6 +506,7 @@ void fscache_operation_gc(struct work_struct *work)  			fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED);  		spin_unlock(&object->lock); +		kfree(op);  	} while (count++ < 20); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 3f7a59bfa7a..ff000e52072 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -56,6 +56,7 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie,  	_enter("%p,%p,%x", cookie, page, gfp); +try_again:  	rcu_read_lock();  	val = radix_tree_lookup(&cookie->stores, page->index);  	if (!val) { @@ -104,11 +105,19 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie,  	return true;  page_busy: -	/* we might want to wait here, but that could deadlock the allocator as -	 * the work threads writing to the cache may all end up sleeping -	 * on memory allocation */ -	fscache_stat(&fscache_n_store_vmscan_busy); -	return false; +	/* We will wait here if we're allowed to, but that could deadlock the +	 * allocator as the work threads writing to the cache may all end up +	 * sleeping on memory allocation, so we may need to impose a timeout +	 * too. */ +	if (!(gfp & __GFP_WAIT)) { +		fscache_stat(&fscache_n_store_vmscan_busy); +		return false; +	} + +	fscache_stat(&fscache_n_store_vmscan_wait); +	__fscache_wait_on_page_write(cookie, page); +	gfp &= ~__GFP_WAIT; +	goto try_again;  }  EXPORT_SYMBOL(__fscache_maybe_release_page); @@ -162,6 +171,7 @@ static void fscache_attr_changed_op(struct fscache_operation *op)  			fscache_abort_object(object);  	} +	fscache_op_complete(op, true);  	_leave("");  } @@ -223,6 +233,8 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op)  	_enter("{OP%x}", op->op.debug_id); +	ASSERTCMP(op->n_pages, ==, 0); +  	fscache_hist(fscache_retrieval_histogram, op->start_time);  	if (op->context)  		fscache_put_context(op->op.object->cookie, op->context); @@ -291,6 +303,17 @@ static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)  }  /* + * Handle cancellation of a pending retrieval op + */ +static void fscache_do_cancel_retrieval(struct fscache_operation *_op) +{ +	struct fscache_retrieval *op = +		container_of(_op, struct fscache_retrieval, op); + +	op->n_pages = 0; +} + +/*   * wait for an object to become active (or dead)   */  static int fscache_wait_for_retrieval_activation(struct fscache_object *object, @@ -307,8 +330,8 @@ static int fscache_wait_for_retrieval_activation(struct fscache_object *object,  	fscache_stat(stat_op_waits);  	if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,  			fscache_wait_bit_interruptible, -			TASK_INTERRUPTIBLE) < 0) { -		ret = fscache_cancel_op(&op->op); +			TASK_INTERRUPTIBLE) != 0) { +		ret = fscache_cancel_op(&op->op, fscache_do_cancel_retrieval);  		if (ret == 0)  			return -ERESTARTSYS; @@ -320,7 +343,14 @@ static int fscache_wait_for_retrieval_activation(struct fscache_object *object,  	_debug("<<< GO");  check_if_dead: +	if (op->op.state == FSCACHE_OP_ST_CANCELLED) { +		fscache_stat(stat_object_dead); +		_leave(" = -ENOBUFS [cancelled]"); +		return -ENOBUFS; +	}  	if (unlikely(fscache_object_is_dead(object))) { +		pr_err("%s() = -ENOBUFS [obj dead %d]\n", __func__, op->op.state); +		fscache_cancel_op(&op->op, fscache_do_cancel_retrieval);  		fscache_stat(stat_object_dead);  		return -ENOBUFS;  	} @@ -353,6 +383,11 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,  	if (hlist_empty(&cookie->backing_objects))  		goto nobufs; +	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { +		_leave(" = -ENOBUFS [invalidating]"); +		return -ENOBUFS; +	} +  	ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);  	ASSERTCMP(page, !=, NULL); @@ -364,6 +399,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,  		_leave(" = -ENOMEM");  		return -ENOMEM;  	} +	op->n_pages = 1;  	spin_lock(&cookie->lock); @@ -375,10 +411,10 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,  	ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP);  	atomic_inc(&object->n_reads); -	set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); +	__set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);  	if (fscache_submit_op(object, &op->op) < 0) -		goto nobufs_unlock; +		goto nobufs_unlock_dec;  	spin_unlock(&cookie->lock);  	fscache_stat(&fscache_n_retrieval_ops); @@ -425,6 +461,8 @@ error:  	_leave(" = %d", ret);  	return ret; +nobufs_unlock_dec: +	atomic_dec(&object->n_reads);  nobufs_unlock:  	spin_unlock(&cookie->lock);  	kfree(op); @@ -472,6 +510,11 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,  	if (hlist_empty(&cookie->backing_objects))  		goto nobufs; +	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { +		_leave(" = -ENOBUFS [invalidating]"); +		return -ENOBUFS; +	} +  	ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);  	ASSERTCMP(*nr_pages, >, 0);  	ASSERT(!list_empty(pages)); @@ -482,6 +525,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,  	op = fscache_alloc_retrieval(mapping, end_io_func, context);  	if (!op)  		return -ENOMEM; +	op->n_pages = *nr_pages;  	spin_lock(&cookie->lock); @@ -491,10 +535,10 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,  			     struct fscache_object, cookie_link);  	atomic_inc(&object->n_reads); -	set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); +	__set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);  	if (fscache_submit_op(object, &op->op) < 0) -		goto nobufs_unlock; +		goto nobufs_unlock_dec;  	spin_unlock(&cookie->lock);  	fscache_stat(&fscache_n_retrieval_ops); @@ -541,6 +585,8 @@ error:  	_leave(" = %d", ret);  	return ret; +nobufs_unlock_dec: +	atomic_dec(&object->n_reads);  nobufs_unlock:  	spin_unlock(&cookie->lock);  	kfree(op); @@ -577,12 +623,18 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,  	ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);  	ASSERTCMP(page, !=, NULL); +	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { +		_leave(" = -ENOBUFS [invalidating]"); +		return -ENOBUFS; +	} +  	if (fscache_wait_for_deferred_lookup(cookie) < 0)  		return -ERESTARTSYS;  	op = fscache_alloc_retrieval(page->mapping, NULL, NULL);  	if (!op)  		return -ENOMEM; +	op->n_pages = 1;  	spin_lock(&cookie->lock); @@ -658,9 +710,27 @@ static void fscache_write_op(struct fscache_operation *_op)  	spin_lock(&object->lock);  	cookie = object->cookie; -	if (!fscache_object_is_active(object) || !cookie) { +	if (!fscache_object_is_active(object)) { +		/* If we get here, then the on-disk cache object likely longer +		 * exists, so we should just cancel this write operation. +		 */ +		spin_unlock(&object->lock); +		fscache_op_complete(&op->op, false); +		_leave(" [inactive]"); +		return; +	} + +	if (!cookie) { +		/* If we get here, then the cookie belonging to the object was +		 * detached, probably by the cookie being withdrawn due to +		 * memory pressure, which means that the pages we might write +		 * to the cache from no longer exist - therefore, we can just +		 * cancel this write operation. +		 */  		spin_unlock(&object->lock); -		_leave(""); +		fscache_op_complete(&op->op, false); +		_leave(" [cancel] op{f=%lx s=%u} obj{s=%u f=%lx}", +		       _op->flags, _op->state, object->state, object->flags);  		return;  	} @@ -696,6 +766,7 @@ static void fscache_write_op(struct fscache_operation *_op)  	fscache_end_page_write(object, page);  	if (ret < 0) {  		fscache_abort_object(object); +		fscache_op_complete(&op->op, true);  	} else {  		fscache_enqueue_operation(&op->op);  	} @@ -710,6 +781,38 @@ superseded:  	spin_unlock(&cookie->stores_lock);  	clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);  	spin_unlock(&object->lock); +	fscache_op_complete(&op->op, true); +	_leave(""); +} + +/* + * Clear the pages pending writing for invalidation + */ +void fscache_invalidate_writes(struct fscache_cookie *cookie) +{ +	struct page *page; +	void *results[16]; +	int n, i; + +	_enter(""); + +	while (spin_lock(&cookie->stores_lock), +	       n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, +					      ARRAY_SIZE(results), +					      FSCACHE_COOKIE_PENDING_TAG), +	       n > 0) { +		for (i = n - 1; i >= 0; i--) { +			page = results[i]; +			radix_tree_delete(&cookie->stores, page->index); +		} + +		spin_unlock(&cookie->stores_lock); + +		for (i = n - 1; i >= 0; i--) +			page_cache_release(results[i]); +	} + +	spin_unlock(&cookie->stores_lock);  	_leave("");  } @@ -759,7 +862,12 @@ int __fscache_write_page(struct fscache_cookie *cookie,  	fscache_stat(&fscache_n_stores); -	op = kzalloc(sizeof(*op), GFP_NOIO); +	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { +		_leave(" = -ENOBUFS [invalidating]"); +		return -ENOBUFS; +	} + +	op = kzalloc(sizeof(*op), GFP_NOIO | __GFP_NOMEMALLOC | __GFP_NORETRY);  	if (!op)  		goto nomem; @@ -915,6 +1023,40 @@ done:  EXPORT_SYMBOL(__fscache_uncache_page);  /** + * fscache_mark_page_cached - Mark a page as being cached + * @op: The retrieval op pages are being marked for + * @page: The page to be marked + * + * Mark a netfs page as being cached.  After this is called, the netfs + * must call fscache_uncache_page() to remove the mark. + */ +void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page) +{ +	struct fscache_cookie *cookie = op->op.object->cookie; + +#ifdef CONFIG_FSCACHE_STATS +	atomic_inc(&fscache_n_marks); +#endif + +	_debug("- mark %p{%lx}", page, page->index); +	if (TestSetPageFsCache(page)) { +		static bool once_only; +		if (!once_only) { +			once_only = true; +			printk(KERN_WARNING "FS-Cache:" +			       " Cookie type %s marked page %lx" +			       " multiple times\n", +			       cookie->def->name, page->index); +		} +	} + +	if (cookie->def->mark_page_cached) +		cookie->def->mark_page_cached(cookie->netfs_data, +					      op->mapping, page); +} +EXPORT_SYMBOL(fscache_mark_page_cached); + +/**   * fscache_mark_pages_cached - Mark pages as being cached   * @op: The retrieval op pages are being marked for   * @pagevec: The pages to be marked @@ -925,32 +1067,11 @@ EXPORT_SYMBOL(__fscache_uncache_page);  void fscache_mark_pages_cached(struct fscache_retrieval *op,  			       struct pagevec *pagevec)  { -	struct fscache_cookie *cookie = op->op.object->cookie;  	unsigned long loop; -#ifdef CONFIG_FSCACHE_STATS -	atomic_add(pagevec->nr, &fscache_n_marks); -#endif - -	for (loop = 0; loop < pagevec->nr; loop++) { -		struct page *page = pagevec->pages[loop]; - -		_debug("- mark %p{%lx}", page, page->index); -		if (TestSetPageFsCache(page)) { -			static bool once_only; -			if (!once_only) { -				once_only = true; -				printk(KERN_WARNING "FS-Cache:" -				       " Cookie type %s marked page %lx" -				       " multiple times\n", -				       cookie->def->name, page->index); -			} -		} -	} +	for (loop = 0; loop < pagevec->nr; loop++) +		fscache_mark_page_cached(op, pagevec->pages[loop]); -	if (cookie->def->mark_pages_cached) -		cookie->def->mark_pages_cached(cookie->netfs_data, -					       op->mapping, pagevec);  	pagevec_reinit(pagevec);  }  EXPORT_SYMBOL(fscache_mark_pages_cached); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 4765190d537..8179e8bc4a3 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -69,6 +69,7 @@ atomic_t fscache_n_store_vmscan_not_storing;  atomic_t fscache_n_store_vmscan_gone;  atomic_t fscache_n_store_vmscan_busy;  atomic_t fscache_n_store_vmscan_cancelled; +atomic_t fscache_n_store_vmscan_wait;  atomic_t fscache_n_marks;  atomic_t fscache_n_uncaches; @@ -80,6 +81,9 @@ atomic_t fscache_n_acquires_ok;  atomic_t fscache_n_acquires_nobufs;  atomic_t fscache_n_acquires_oom; +atomic_t fscache_n_invalidates; +atomic_t fscache_n_invalidates_run; +  atomic_t fscache_n_updates;  atomic_t fscache_n_updates_null;  atomic_t fscache_n_updates_run; @@ -112,6 +116,7 @@ atomic_t fscache_n_cop_alloc_object;  atomic_t fscache_n_cop_lookup_object;  atomic_t fscache_n_cop_lookup_complete;  atomic_t fscache_n_cop_grab_object; +atomic_t fscache_n_cop_invalidate_object;  atomic_t fscache_n_cop_update_object;  atomic_t fscache_n_cop_drop_object;  atomic_t fscache_n_cop_put_object; @@ -168,6 +173,10 @@ static int fscache_stats_show(struct seq_file *m, void *v)  		   atomic_read(&fscache_n_object_created),  		   atomic_read(&fscache_n_object_lookups_timed_out)); +	seq_printf(m, "Invals : n=%u run=%u\n", +		   atomic_read(&fscache_n_invalidates), +		   atomic_read(&fscache_n_invalidates_run)); +  	seq_printf(m, "Updates: n=%u nul=%u run=%u\n",  		   atomic_read(&fscache_n_updates),  		   atomic_read(&fscache_n_updates_null), @@ -224,11 +233,12 @@ static int fscache_stats_show(struct seq_file *m, void *v)  		   atomic_read(&fscache_n_store_radix_deletes),  		   atomic_read(&fscache_n_store_pages_over_limit)); -	seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u\n", +	seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u wt=%u\n",  		   atomic_read(&fscache_n_store_vmscan_not_storing),  		   atomic_read(&fscache_n_store_vmscan_gone),  		   atomic_read(&fscache_n_store_vmscan_busy), -		   atomic_read(&fscache_n_store_vmscan_cancelled)); +		   atomic_read(&fscache_n_store_vmscan_cancelled), +		   atomic_read(&fscache_n_store_vmscan_wait));  	seq_printf(m, "Ops    : pend=%u run=%u enq=%u can=%u rej=%u\n",  		   atomic_read(&fscache_n_op_pend), @@ -246,7 +256,8 @@ static int fscache_stats_show(struct seq_file *m, void *v)  		   atomic_read(&fscache_n_cop_lookup_object),  		   atomic_read(&fscache_n_cop_lookup_complete),  		   atomic_read(&fscache_n_cop_grab_object)); -	seq_printf(m, "CacheOp: upo=%d dro=%d pto=%d atc=%d syn=%d\n", +	seq_printf(m, "CacheOp: inv=%d upo=%d dro=%d pto=%d atc=%d syn=%d\n", +		   atomic_read(&fscache_n_cop_invalidate_object),  		   atomic_read(&fscache_n_cop_update_object),  		   atomic_read(&fscache_n_cop_drop_object),  		   atomic_read(&fscache_n_cop_put_object), diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 0b35903219b..d47f11658c1 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -35,6 +35,16 @@ static int hfs_readpage(struct file *file, struct page *page)  	return block_read_full_page(page, hfs_get_block);  } +static void hfs_write_failed(struct address_space *mapping, loff_t to) +{ +	struct inode *inode = mapping->host; + +	if (to > inode->i_size) { +		truncate_pagecache(inode, to, inode->i_size); +		hfs_file_truncate(inode); +	} +} +  static int hfs_write_begin(struct file *file, struct address_space *mapping,  			loff_t pos, unsigned len, unsigned flags,  			struct page **pagep, void **fsdata) @@ -45,11 +55,8 @@ static int hfs_write_begin(struct file *file, struct address_space *mapping,  	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,  				hfs_get_block,  				&HFS_I(mapping->host)->phys_size); -	if (unlikely(ret)) { -		loff_t isize = mapping->host->i_size; -		if (pos + len > isize) -			vmtruncate(mapping->host, isize); -	} +	if (unlikely(ret)) +		hfs_write_failed(mapping, pos + len);  	return ret;  } @@ -120,6 +127,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,  		const struct iovec *iov, loff_t offset, unsigned long nr_segs)  {  	struct file *file = iocb->ki_filp; +	struct address_space *mapping = file->f_mapping;  	struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;  	ssize_t ret; @@ -135,7 +143,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,  		loff_t end = offset + iov_length(iov, nr_segs);  		if (end > isize) -			vmtruncate(inode, isize); +			hfs_write_failed(mapping, end);  	}  	return ret; @@ -617,9 +625,12 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)  	    attr->ia_size != i_size_read(inode)) {  		inode_dio_wait(inode); -		error = vmtruncate(inode, attr->ia_size); +		error = inode_newsize_ok(inode, attr->ia_size);  		if (error)  			return error; + +		truncate_setsize(inode, attr->ia_size); +		hfs_file_truncate(inode);  	}  	setattr_copy(inode, attr); @@ -668,7 +679,6 @@ static const struct file_operations hfs_file_operations = {  static const struct inode_operations hfs_file_inode_operations = {  	.lookup		= hfs_file_lookup, -	.truncate	= hfs_file_truncate,  	.setattr	= hfs_inode_setattr,  	.setxattr	= hfs_setxattr,  	.getxattr	= hfs_getxattr, diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 2172aa5976f..799b336b59f 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -28,6 +28,16 @@ static int hfsplus_writepage(struct page *page, struct writeback_control *wbc)  	return block_write_full_page(page, hfsplus_get_block, wbc);  } +static void hfsplus_write_failed(struct address_space *mapping, loff_t to) +{ +	struct inode *inode = mapping->host; + +	if (to > inode->i_size) { +		truncate_pagecache(inode, to, inode->i_size); +		hfsplus_file_truncate(inode); +	} +} +  static int hfsplus_write_begin(struct file *file, struct address_space *mapping,  			loff_t pos, unsigned len, unsigned flags,  			struct page **pagep, void **fsdata) @@ -38,11 +48,8 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping,  	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,  				hfsplus_get_block,  				&HFSPLUS_I(mapping->host)->phys_size); -	if (unlikely(ret)) { -		loff_t isize = mapping->host->i_size; -		if (pos + len > isize) -			vmtruncate(mapping->host, isize); -	} +	if (unlikely(ret)) +		hfsplus_write_failed(mapping, pos + len);  	return ret;  } @@ -116,6 +123,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,  		const struct iovec *iov, loff_t offset, unsigned long nr_segs)  {  	struct file *file = iocb->ki_filp; +	struct address_space *mapping = file->f_mapping;  	struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;  	ssize_t ret; @@ -131,7 +139,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,  		loff_t end = offset + iov_length(iov, nr_segs);  		if (end > isize) -			vmtruncate(inode, isize); +			hfsplus_write_failed(mapping, end);  	}  	return ret; @@ -300,10 +308,8 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)  	if ((attr->ia_valid & ATTR_SIZE) &&  	    attr->ia_size != i_size_read(inode)) {  		inode_dio_wait(inode); - -		error = vmtruncate(inode, attr->ia_size); -		if (error) -			return error; +		truncate_setsize(inode, attr->ia_size); +		hfsplus_file_truncate(inode);  	}  	setattr_copy(inode, attr); @@ -358,7 +364,6 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,  static const struct inode_operations hfsplus_file_inode_operations = {  	.lookup		= hfsplus_file_lookup, -	.truncate	= hfsplus_file_truncate,  	.setattr	= hfsplus_setattr,  	.setxattr	= hfsplus_setxattr,  	.getxattr	= hfsplus_getxattr, diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 89d2a5803ae..fbfe2df5624 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -50,7 +50,7 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno)  	return disk_secno;  } -static void hpfs_truncate(struct inode *i) +void hpfs_truncate(struct inode *i)  {  	if (IS_IMMUTABLE(i)) return /*-EPERM*/;  	hpfs_lock_assert(i->i_sb); @@ -105,6 +105,16 @@ static int hpfs_readpage(struct file *file, struct page *page)  	return block_read_full_page(page,hpfs_get_block);  } +static void hpfs_write_failed(struct address_space *mapping, loff_t to) +{ +	struct inode *inode = mapping->host; + +	if (to > inode->i_size) { +		truncate_pagecache(inode, to, inode->i_size); +		hpfs_truncate(inode); +	} +} +  static int hpfs_write_begin(struct file *file, struct address_space *mapping,  			loff_t pos, unsigned len, unsigned flags,  			struct page **pagep, void **fsdata) @@ -115,11 +125,8 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping,  	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,  				hpfs_get_block,  				&hpfs_i(mapping->host)->mmu_private); -	if (unlikely(ret)) { -		loff_t isize = mapping->host->i_size; -		if (pos + len > isize) -			vmtruncate(mapping->host, isize); -	} +	if (unlikely(ret)) +		hpfs_write_failed(mapping, pos + len);  	return ret;  } @@ -166,6 +173,5 @@ const struct file_operations hpfs_file_ops =  const struct inode_operations hpfs_file_iops =  { -	.truncate	= hpfs_truncate,  	.setattr	= hpfs_setattr,  }; diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 7102aaecc24..b7ae286646b 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -252,6 +252,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, const char *,  /* file.c */  int hpfs_file_fsync(struct file *, loff_t, loff_t, int); +void hpfs_truncate(struct inode *);  extern const struct file_operations hpfs_file_ops;  extern const struct inode_operations hpfs_file_iops;  extern const struct address_space_operations hpfs_aops; diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 804a9a842cb..5dc06c83710 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -277,9 +277,12 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)  	if ((attr->ia_valid & ATTR_SIZE) &&  	    attr->ia_size != i_size_read(inode)) { -		error = vmtruncate(inode, attr->ia_size); +		error = inode_newsize_ok(inode, attr->ia_size);  		if (error)  			goto out_unlock; + +		truncate_setsize(inode, attr->ia_size); +		hpfs_truncate(inode);  	}  	setattr_copy(inode, attr); diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 9d3afd157f9..dd7442c5835 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -119,9 +119,12 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)  	    iattr->ia_size != i_size_read(inode)) {  		inode_dio_wait(inode); -		rc = vmtruncate(inode, iattr->ia_size); +		rc = inode_newsize_ok(inode, iattr->ia_size);  		if (rc)  			return rc; + +		truncate_setsize(inode, iattr->ia_size); +		jfs_truncate(inode);  	}  	setattr_copy(inode, iattr); @@ -133,7 +136,6 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)  }  const struct inode_operations jfs_file_inode_operations = { -	.truncate	= jfs_truncate,  	.setxattr	= jfs_setxattr,  	.getxattr	= jfs_getxattr,  	.listxattr	= jfs_listxattr, diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 4692bf3ca8c..b7dc47ba675 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -300,6 +300,16 @@ static int jfs_readpages(struct file *file, struct address_space *mapping,  	return mpage_readpages(mapping, pages, nr_pages, jfs_get_block);  } +static void jfs_write_failed(struct address_space *mapping, loff_t to) +{ +	struct inode *inode = mapping->host; + +	if (to > inode->i_size) { +		truncate_pagecache(inode, to, inode->i_size); +		jfs_truncate(inode); +	} +} +  static int jfs_write_begin(struct file *file, struct address_space *mapping,  				loff_t pos, unsigned len, unsigned flags,  				struct page **pagep, void **fsdata) @@ -308,11 +318,8 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping,  	ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata,  				jfs_get_block); -	if (unlikely(ret)) { -		loff_t isize = mapping->host->i_size; -		if (pos + len > isize) -			vmtruncate(mapping->host, isize); -	} +	if (unlikely(ret)) +		jfs_write_failed(mapping, pos + len);  	return ret;  } @@ -326,6 +333,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,  	const struct iovec *iov, loff_t offset, unsigned long nr_segs)  {  	struct file *file = iocb->ki_filp; +	struct address_space *mapping = file->f_mapping;  	struct inode *inode = file->f_mapping->host;  	ssize_t ret; @@ -341,7 +349,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,  		loff_t end = offset + iov_length(iov, nr_segs);  		if (end > isize) -			vmtruncate(inode, isize); +			jfs_write_failed(mapping, end);  	}  	return ret; diff --git a/fs/libfs.c b/fs/libfs.c index 35fc6e74cd8..916da8c4158 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -369,8 +369,6 @@ int simple_setattr(struct dentry *dentry, struct iattr *iattr)  	struct inode *inode = dentry->d_inode;  	int error; -	WARN_ON_ONCE(inode->i_op->truncate); -  	error = inode_change_ok(inode, iattr);  	if (error)  		return error; diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index e1a3b6bf632..9a59cbade2f 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -1887,9 +1887,15 @@ int logfs_truncate(struct inode *inode, u64 target)  		logfs_put_wblocks(sb, NULL, 1);  	} -	if (!err) -		err = vmtruncate(inode, target); +	if (!err) { +		err = inode_newsize_ok(inode, target); +		if (err) +			goto out; + +		truncate_setsize(inode, target); +	} + out:  	/* I don't trust error recovery yet. */  	WARN_ON(err);  	return err; diff --git a/fs/minix/file.c b/fs/minix/file.c index 4493ce695ab..adc6f549423 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -34,9 +34,12 @@ static int minix_setattr(struct dentry *dentry, struct iattr *attr)  	if ((attr->ia_valid & ATTR_SIZE) &&  	    attr->ia_size != i_size_read(inode)) { -		error = vmtruncate(inode, attr->ia_size); +		error = inode_newsize_ok(inode, attr->ia_size);  		if (error)  			return error; + +		truncate_setsize(inode, attr->ia_size); +		minix_truncate(inode);  	}  	setattr_copy(inode, attr); @@ -45,7 +48,6 @@ static int minix_setattr(struct dentry *dentry, struct iattr *attr)  }  const struct inode_operations minix_file_inode_operations = { -	.truncate	= minix_truncate,  	.setattr	= minix_setattr,  	.getattr	= minix_getattr,  }; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 4fc5f8ab1c4..99541cceb58 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -395,6 +395,16 @@ int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len)  	return __block_write_begin(page, pos, len, minix_get_block);  } +static void minix_write_failed(struct address_space *mapping, loff_t to) +{ +	struct inode *inode = mapping->host; + +	if (to > inode->i_size) { +		truncate_pagecache(inode, to, inode->i_size); +		minix_truncate(inode); +	} +} +  static int minix_write_begin(struct file *file, struct address_space *mapping,  			loff_t pos, unsigned len, unsigned flags,  			struct page **pagep, void **fsdata) @@ -403,11 +413,8 @@ static int minix_write_begin(struct file *file, struct address_space *mapping,  	ret = block_write_begin(mapping, pos, len, flags, pagep,  				minix_get_block); -	if (unlikely(ret)) { -		loff_t isize = mapping->host->i_size; -		if (pos + len > isize) -			vmtruncate(mapping->host, isize); -	} +	if (unlikely(ret)) +		minix_write_failed(mapping, pos + len);  	return ret;  } diff --git a/fs/namei.c b/fs/namei.c index 5f4cdf3ad91..43a97ee1d4c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1275,9 +1275,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,  	*need_lookup = false;  	dentry = d_lookup(dir, name);  	if (dentry) { -		if (d_need_lookup(dentry)) { -			*need_lookup = true; -		} else if (dentry->d_flags & DCACHE_OP_REVALIDATE) { +		if (dentry->d_flags & DCACHE_OP_REVALIDATE) {  			error = d_revalidate(dentry, flags);  			if (unlikely(error <= 0)) {  				if (error < 0) { @@ -1383,8 +1381,6 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name,  			return -ECHILD;  		nd->seq = seq; -		if (unlikely(d_need_lookup(dentry))) -			goto unlazy;  		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {  			status = d_revalidate(dentry, nd->flags);  			if (unlikely(status <= 0)) { @@ -1410,11 +1406,6 @@ unlazy:  	if (unlikely(!dentry))  		goto need_lookup; -	if (unlikely(d_need_lookup(dentry))) { -		dput(dentry); -		goto need_lookup; -	} -  	if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)  		status = d_revalidate(dentry, nd->flags);  	if (unlikely(status <= 0)) { @@ -1859,7 +1850,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,  	if (flags & LOOKUP_ROOT) {  		struct inode *inode = nd->root.dentry->d_inode;  		if (*name) { -			if (!inode->i_op->lookup) +			if (!can_lookup(inode))  				return -ENOTDIR;  			retval = inode_permission(inode, MAY_EXEC);  			if (retval) @@ -1903,6 +1894,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,  			get_fs_pwd(current->fs, &nd->path);  		}  	} else { +		/* Caller must check execute permissions on the starting path component */  		struct fd f = fdget_raw(dfd);  		struct dentry *dentry; @@ -1912,16 +1904,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,  		dentry = f.file->f_path.dentry;  		if (*name) { -			if (!S_ISDIR(dentry->d_inode->i_mode)) { +			if (!can_lookup(dentry->d_inode)) {  				fdput(f);  				return -ENOTDIR;  			} - -			retval = inode_permission(dentry->d_inode, MAY_EXEC); -			if (retval) { -				fdput(f); -				return retval; -			}  		}  		nd->path = f.file->f_path; @@ -2189,15 +2175,19 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,   *     path-walking is complete.   */  static struct filename * -user_path_parent(int dfd, const char __user *path, struct nameidata *nd) +user_path_parent(int dfd, const char __user *path, struct nameidata *nd, +		 unsigned int flags)  {  	struct filename *s = getname(path);  	int error; +	/* only LOOKUP_REVAL is allowed in extra flags */ +	flags &= LOOKUP_REVAL; +  	if (IS_ERR(s))  		return s; -	error = filename_lookup(dfd, s, LOOKUP_PARENT, nd); +	error = filename_lookup(dfd, s, flags | LOOKUP_PARENT, nd);  	if (error) {  		putname(s);  		return ERR_PTR(error); @@ -3044,12 +3034,22 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,  	return file;  } -struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir) +struct dentry *kern_path_create(int dfd, const char *pathname, +				struct path *path, unsigned int lookup_flags)  {  	struct dentry *dentry = ERR_PTR(-EEXIST);  	struct nameidata nd;  	int err2; -	int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); +	int error; +	bool is_dir = (lookup_flags & LOOKUP_DIRECTORY); + +	/* +	 * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any +	 * other flags passed in are ignored! +	 */ +	lookup_flags &= LOOKUP_REVAL; + +	error = do_path_lookup(dfd, pathname, LOOKUP_PARENT|lookup_flags, &nd);  	if (error)  		return ERR_PTR(error); @@ -3113,13 +3113,14 @@ void done_path_create(struct path *path, struct dentry *dentry)  }  EXPORT_SYMBOL(done_path_create); -struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) +struct dentry *user_path_create(int dfd, const char __user *pathname, +				struct path *path, unsigned int lookup_flags)  {  	struct filename *tmp = getname(pathname);  	struct dentry *res;  	if (IS_ERR(tmp))  		return ERR_CAST(tmp); -	res = kern_path_create(dfd, tmp->name, path, is_dir); +	res = kern_path_create(dfd, tmp->name, path, lookup_flags);  	putname(tmp);  	return res;  } @@ -3175,12 +3176,13 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,  	struct dentry *dentry;  	struct path path;  	int error; +	unsigned int lookup_flags = 0;  	error = may_mknod(mode);  	if (error)  		return error; - -	dentry = user_path_create(dfd, filename, &path, 0); +retry: +	dentry = user_path_create(dfd, filename, &path, lookup_flags);  	if (IS_ERR(dentry))  		return PTR_ERR(dentry); @@ -3203,6 +3205,10 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,  	}  out:  	done_path_create(&path, dentry); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  	return error;  } @@ -3241,8 +3247,10 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)  	struct dentry *dentry;  	struct path path;  	int error; +	unsigned int lookup_flags = LOOKUP_DIRECTORY; -	dentry = user_path_create(dfd, pathname, &path, 1); +retry: +	dentry = user_path_create(dfd, pathname, &path, lookup_flags);  	if (IS_ERR(dentry))  		return PTR_ERR(dentry); @@ -3252,6 +3260,10 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)  	if (!error)  		error = vfs_mkdir(path.dentry->d_inode, dentry, mode);  	done_path_create(&path, dentry); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  	return error;  } @@ -3327,8 +3339,9 @@ static long do_rmdir(int dfd, const char __user *pathname)  	struct filename *name;  	struct dentry *dentry;  	struct nameidata nd; - -	name = user_path_parent(dfd, pathname, &nd); +	unsigned int lookup_flags = 0; +retry: +	name = user_path_parent(dfd, pathname, &nd, lookup_flags);  	if (IS_ERR(name))  		return PTR_ERR(name); @@ -3370,6 +3383,10 @@ exit2:  exit1:  	path_put(&nd.path);  	putname(name); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  	return error;  } @@ -3423,8 +3440,9 @@ static long do_unlinkat(int dfd, const char __user *pathname)  	struct dentry *dentry;  	struct nameidata nd;  	struct inode *inode = NULL; - -	name = user_path_parent(dfd, pathname, &nd); +	unsigned int lookup_flags = 0; +retry: +	name = user_path_parent(dfd, pathname, &nd, lookup_flags);  	if (IS_ERR(name))  		return PTR_ERR(name); @@ -3462,6 +3480,11 @@ exit2:  exit1:  	path_put(&nd.path);  	putname(name); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		inode = NULL; +		goto retry; +	}  	return error;  slashes: @@ -3513,12 +3536,13 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,  	struct filename *from;  	struct dentry *dentry;  	struct path path; +	unsigned int lookup_flags = 0;  	from = getname(oldname);  	if (IS_ERR(from))  		return PTR_ERR(from); - -	dentry = user_path_create(newdfd, newname, &path, 0); +retry: +	dentry = user_path_create(newdfd, newname, &path, lookup_flags);  	error = PTR_ERR(dentry);  	if (IS_ERR(dentry))  		goto out_putname; @@ -3527,6 +3551,10 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,  	if (!error)  		error = vfs_symlink(path.dentry->d_inode, dentry, from->name);  	done_path_create(&path, dentry); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  out_putname:  	putname(from);  	return error; @@ -3613,12 +3641,13 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,  	if (flags & AT_SYMLINK_FOLLOW)  		how |= LOOKUP_FOLLOW; - +retry:  	error = user_path_at(olddfd, oldname, how, &old_path);  	if (error)  		return error; -	new_dentry = user_path_create(newdfd, newname, &new_path, 0); +	new_dentry = user_path_create(newdfd, newname, &new_path, +					(how & LOOKUP_REVAL));  	error = PTR_ERR(new_dentry);  	if (IS_ERR(new_dentry))  		goto out; @@ -3635,6 +3664,10 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,  	error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);  out_dput:  	done_path_create(&new_path, new_dentry); +	if (retry_estale(error, how)) { +		how |= LOOKUP_REVAL; +		goto retry; +	}  out:  	path_put(&old_path); @@ -3807,15 +3840,17 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,  	struct nameidata oldnd, newnd;  	struct filename *from;  	struct filename *to; +	unsigned int lookup_flags = 0; +	bool should_retry = false;  	int error; - -	from = user_path_parent(olddfd, oldname, &oldnd); +retry: +	from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags);  	if (IS_ERR(from)) {  		error = PTR_ERR(from);  		goto exit;  	} -	to = user_path_parent(newdfd, newname, &newnd); +	to = user_path_parent(newdfd, newname, &newnd, lookup_flags);  	if (IS_ERR(to)) {  		error = PTR_ERR(to);  		goto exit1; @@ -3887,11 +3922,18 @@ exit3:  	unlock_rename(new_dir, old_dir);  	mnt_drop_write(oldnd.path.mnt);  exit2: +	if (retry_estale(error, lookup_flags)) +		should_retry = true;  	path_put(&newnd.path);  	putname(to);  exit1:  	path_put(&oldnd.path);  	putname(from); +	if (should_retry) { +		should_retry = false; +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  exit:  	return error;  } diff --git a/fs/namespace.c b/fs/namespace.c index 398a50ff243..55605c55278 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -313,7 +313,7 @@ int __mnt_want_write(struct vfsmount *m)  	 * incremented count after it has set MNT_WRITE_HOLD.  	 */  	smp_mb(); -	while (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) +	while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)  		cpu_relax();  	/*  	 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index d7e9fe77188..1acdad7fcec 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -976,9 +976,7 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)  			goto out;  		if (attr->ia_size != i_size_read(inode)) { -			result = vmtruncate(inode, attr->ia_size); -			if (result) -				goto out; +			truncate_setsize(inode, attr->ia_size);  			mark_inode_dirty(inode);  		}  	} diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index c817787fbdb..24d1d1c5fca 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -307,6 +307,7 @@ void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)  		nfs_fscache_inode_unlock(inode);  	}  } +EXPORT_SYMBOL_GPL(nfs_fscache_set_inode_cookie);  /*   * Replace a per-inode cookie due to revalidation detecting a file having diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index c5b11b53ff3..277b0278289 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -153,6 +153,22 @@ static inline void nfs_readpage_to_fscache(struct inode *inode,  }  /* + * Invalidate the contents of fscache for this inode.  This will not sleep. + */ +static inline void nfs_fscache_invalidate(struct inode *inode) +{ +	fscache_invalidate(NFS_I(inode)->fscache); +} + +/* + * Wait for an object to finish being invalidated. + */ +static inline void nfs_fscache_wait_on_invalidate(struct inode *inode) +{ +	fscache_wait_on_invalidate(NFS_I(inode)->fscache); +} + +/*   * indicate the client caching state as readable text   */  static inline const char *nfs_server_fscache_state(struct nfs_server *server) @@ -162,7 +178,6 @@ static inline const char *nfs_server_fscache_state(struct nfs_server *server)  	return "no ";  } -  #else /* CONFIG_NFS_FSCACHE */  static inline int nfs_fscache_register(void) { return 0; }  static inline void nfs_fscache_unregister(void) {} @@ -205,6 +220,9 @@ static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx,  static inline void nfs_readpage_to_fscache(struct inode *inode,  					   struct page *page, int sync) {} + +static inline void nfs_fscache_invalidate(struct inode *inode) {} +  static inline const char *nfs_server_fscache_state(struct nfs_server *server)  {  	return "no "; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2faae14d89f..ebeb94ce1b0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -161,10 +161,12 @@ static void nfs_zap_caches_locked(struct inode *inode)  	nfsi->attrtimeo_timestamp = jiffies;  	memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); -	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) +	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {  		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; -	else +		nfs_fscache_invalidate(inode); +	} else {  		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; +	}  }  void nfs_zap_caches(struct inode *inode) @@ -179,6 +181,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)  	if (mapping->nrpages != 0) {  		spin_lock(&inode->i_lock);  		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; +		nfs_fscache_invalidate(inode);  		spin_unlock(&inode->i_lock);  	}  } @@ -881,7 +884,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map  		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));  	spin_unlock(&inode->i_lock);  	nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); -	nfs_fscache_reset_inode_cookie(inode); +	nfs_fscache_wait_on_invalidate(inode);  	dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",  			inode->i_sb->s_id, (long long)NFS_FILEID(inode));  	return 0; @@ -957,6 +960,10 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr  		i_size_write(inode, nfs_size_to_loff_t(fattr->size));  		ret |= NFS_INO_INVALID_ATTR;  	} + +	if (nfsi->cache_validity & NFS_INO_INVALID_DATA) +		nfs_fscache_invalidate(inode); +  	return ret;  } @@ -1205,8 +1212,10 @@ static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr  	struct nfs_inode *nfsi = NFS_I(inode);  	nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; -	if (S_ISDIR(inode->i_mode)) +	if (S_ISDIR(inode->i_mode)) {  		nfsi->cache_validity |= NFS_INO_INVALID_DATA; +		nfs_fscache_invalidate(inode); +	}  	if ((fattr->valid & NFS_ATTR_FATTR) == 0)  		return 0;  	return nfs_refresh_inode_locked(inode, fattr); @@ -1494,6 +1503,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)  			(save_cache_validity & NFS_INO_REVAL_FORCED))  		nfsi->cache_validity |= invalid; +	if (invalid & NFS_INO_INVALID_DATA) +		nfs_fscache_invalidate(inode); +  	return 0;   out_err:  	/* diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index e7699308364..08ddcccb888 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -5,6 +5,7 @@   */  #include <linux/nfs_fs.h>  #include "internal.h" +#include "fscache.h"  #include "pnfs.h"  #define NFSDBG_FACILITY		NFSDBG_FILE @@ -74,6 +75,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)  	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));  	nfs_file_set_open_context(filp, ctx); +	nfs_fscache_set_inode_cookie(inode, filp);  	err = 0;  out_put_ctx: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 493f0f41c55..5d864fb3657 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -64,7 +64,7 @@  #include "pnfs.h"  #include "netns.h"  #include "nfs4session.h" - +#include "fscache.h"  #define NFSDBG_FACILITY		NFSDBG_PROC @@ -734,6 +734,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)  	if (!cinfo->atomic || cinfo->before != dir->i_version)  		nfs_force_lookup_revalidate(dir);  	dir->i_version = cinfo->after; +	nfs_fscache_invalidate(dir);  	spin_unlock(&dir->i_lock);  } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5209916e122..b673be31590 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1794,7 +1794,8 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,  	if (PagePrivate(page))  		return -EBUSY; -	nfs_fscache_release_page(page, GFP_KERNEL); +	if (!nfs_fscache_release_page(page, GFP_KERNEL)) +		return -EBUSY;  	return migrate_page(mapping, newpage, page, mode);  } diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 16f35f7423c..61946883025 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -167,7 +167,6 @@ const struct file_operations nilfs_file_operations = {  };  const struct inode_operations nilfs_file_inode_operations = { -	.truncate	= nilfs_truncate,  	.setattr	= nilfs_setattr,  	.permission     = nilfs_permission,  	.fiemap		= nilfs_fiemap, diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 4d31d2cca7f..6b49f14eac8 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -213,6 +213,16 @@ static int nilfs_set_page_dirty(struct page *page)  	return ret;  } +void nilfs_write_failed(struct address_space *mapping, loff_t to) +{ +	struct inode *inode = mapping->host; + +	if (to > inode->i_size) { +		truncate_pagecache(inode, to, inode->i_size); +		nilfs_truncate(inode); +	} +} +  static int nilfs_write_begin(struct file *file, struct address_space *mapping,  			     loff_t pos, unsigned len, unsigned flags,  			     struct page **pagep, void **fsdata) @@ -227,10 +237,7 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping,  	err = block_write_begin(mapping, pos, len, flags, pagep,  				nilfs_get_block);  	if (unlikely(err)) { -		loff_t isize = mapping->host->i_size; -		if (pos + len > isize) -			vmtruncate(mapping->host, isize); - +		nilfs_write_failed(mapping, pos + len);  		nilfs_transaction_abort(inode->i_sb);  	}  	return err; @@ -259,6 +266,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,  		loff_t offset, unsigned long nr_segs)  {  	struct file *file = iocb->ki_filp; +	struct address_space *mapping = file->f_mapping;  	struct inode *inode = file->f_mapping->host;  	ssize_t size; @@ -278,7 +286,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,  		loff_t end = offset + iov_length(iov, nr_segs);  		if (end > isize) -			vmtruncate(inode, isize); +			nilfs_write_failed(mapping, end);  	}  	return size; @@ -786,10 +794,8 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)  	if ((iattr->ia_valid & ATTR_SIZE) &&  	    iattr->ia_size != i_size_read(inode)) {  		inode_dio_wait(inode); - -		err = vmtruncate(inode, iattr->ia_size); -		if (unlikely(err)) -			goto out_err; +		truncate_setsize(inode, iattr->ia_size); +		nilfs_truncate(inode);  	}  	setattr_copy(inode, iattr); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 74cece80e9a..9bc72dec3fa 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -277,6 +277,7 @@ extern void nilfs_update_inode(struct inode *, struct buffer_head *);  extern void nilfs_truncate(struct inode *);  extern void nilfs_evict_inode(struct inode *);  extern int nilfs_setattr(struct dentry *, struct iattr *); +extern void nilfs_write_failed(struct address_space *mapping, loff_t to);  int nilfs_permission(struct inode *inode, int mask);  int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);  extern int nilfs_inode_dirty(struct inode *); diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index f1626f5011c..ff00a0b7acb 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -527,7 +527,8 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,  		if (unlikely(err)) {  			loff_t isize = inode->i_size;  			if (pos + blocksize > isize) -				vmtruncate(inode, isize); +				nilfs_write_failed(inode->i_mapping, +							pos + blocksize);  			goto failed_inode;  		} diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 1ecf46448f8..5b2d4f0853a 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1762,6 +1762,16 @@ err_out:  	return err;  } +static void ntfs_write_failed(struct address_space *mapping, loff_t to) +{ +	struct inode *inode = mapping->host; + +	if (to > inode->i_size) { +		truncate_pagecache(inode, to, inode->i_size); +		ntfs_truncate_vfs(inode); +	} +} +  /**   * ntfs_file_buffered_write -   * @@ -2022,8 +2032,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,  				 * allocated space, which is not a disaster.  				 */  				i_size = i_size_read(vi); -				if (pos + bytes > i_size) -					vmtruncate(vi, i_size); +				if (pos + bytes > i_size) { +					ntfs_write_failed(mapping, pos + bytes); +				}  				break;  			}  		} @@ -2227,7 +2238,6 @@ const struct file_operations ntfs_file_ops = {  const struct inode_operations ntfs_file_inode_ops = {  #ifdef NTFS_RW -	.truncate	= ntfs_truncate_vfs,  	.setattr	= ntfs_setattr,  #endif /* NTFS_RW */  }; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 1d27331e6fc..d3e118cc6ff 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2866,9 +2866,11 @@ conv_err_out:   *   * See ntfs_truncate() description above for details.   */ +#ifdef NTFS_RW  void ntfs_truncate_vfs(struct inode *vi) {  	ntfs_truncate(vi);  } +#endif  /**   * ntfs_setattr - called from notify_change() when an attribute is being changed @@ -2914,8 +2916,10 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)  						NInoCompressed(ni) ?  						"compressed" : "encrypted");  				err = -EOPNOTSUPP; -			} else -				err = vmtruncate(vi, attr->ia_size); +			} else { +				truncate_setsize(vi, attr->ia_size); +				ntfs_truncate_vfs(vi); +			}  			if (err || ia_valid == ATTR_SIZE)  				goto out;  		} else { diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index db29695f845..76b6cfb579d 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -316,6 +316,10 @@ static inline void ntfs_commit_inode(struct inode *vi)  	return;  } +#else + +static inline void ntfs_truncate_vfs(struct inode *vi) {} +  #endif /* NTFS_RW */  #endif /* _LINUX_NTFS_INODE_H */ diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index fe492e1a3cf..37d313ede15 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1218,24 +1218,6 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)  		}  	} -	/* -	 * This will intentionally not wind up calling truncate_setsize(), -	 * since all the work for a size change has been done above. -	 * Otherwise, we could get into problems with truncate as -	 * ip_alloc_sem is used there to protect against i_size -	 * changes. -	 * -	 * XXX: this means the conditional below can probably be removed. -	 */ -	if ((attr->ia_valid & ATTR_SIZE) && -	    attr->ia_size != i_size_read(inode)) { -		status = vmtruncate(inode, attr->ia_size); -		if (status) { -			mlog_errno(status); -			goto bail_commit; -		} -	} -  	setattr_copy(inode, attr);  	mark_inode_dirty(inode); diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 77e3cb2962b..e0d9b3e722b 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -306,6 +306,16 @@ omfs_writepages(struct address_space *mapping, struct writeback_control *wbc)  	return mpage_writepages(mapping, wbc, omfs_get_block);  } +static void omfs_write_failed(struct address_space *mapping, loff_t to) +{ +	struct inode *inode = mapping->host; + +	if (to > inode->i_size) { +		truncate_pagecache(inode, to, inode->i_size); +		omfs_truncate(inode); +	} +} +  static int omfs_write_begin(struct file *file, struct address_space *mapping,  			loff_t pos, unsigned len, unsigned flags,  			struct page **pagep, void **fsdata) @@ -314,11 +324,8 @@ static int omfs_write_begin(struct file *file, struct address_space *mapping,  	ret = block_write_begin(mapping, pos, len, flags, pagep,  				omfs_get_block); -	if (unlikely(ret)) { -		loff_t isize = mapping->host->i_size; -		if (pos + len > isize) -			vmtruncate(mapping->host, isize); -	} +	if (unlikely(ret)) +		omfs_write_failed(mapping, pos + len);  	return ret;  } @@ -350,9 +357,11 @@ static int omfs_setattr(struct dentry *dentry, struct iattr *attr)  	if ((attr->ia_valid & ATTR_SIZE) &&  	    attr->ia_size != i_size_read(inode)) { -		error = vmtruncate(inode, attr->ia_size); +		error = inode_newsize_ok(inode, attr->ia_size);  		if (error)  			return error; +		truncate_setsize(inode, attr->ia_size); +		omfs_truncate(inode);  	}  	setattr_copy(inode, attr); @@ -362,7 +371,6 @@ static int omfs_setattr(struct dentry *dentry, struct iattr *attr)  const struct inode_operations omfs_file_inops = {  	.setattr = omfs_setattr, -	.truncate = omfs_truncate  };  const struct address_space_operations omfs_aops = { diff --git a/fs/open.c b/fs/open.c index 182d8667b7b..9b33c0cbfac 100644 --- a/fs/open.c +++ b/fs/open.c @@ -61,33 +61,22 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,  	return ret;  } -static long do_sys_truncate(const char __user *pathname, loff_t length) +long vfs_truncate(struct path *path, loff_t length)  { -	struct path path;  	struct inode *inode; -	int error; - -	error = -EINVAL; -	if (length < 0)	/* sorry, but loff_t says... */ -		goto out; +	long error; -	error = user_path(pathname, &path); -	if (error) -		goto out; -	inode = path.dentry->d_inode; +	inode = path->dentry->d_inode;  	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */ -	error = -EISDIR;  	if (S_ISDIR(inode->i_mode)) -		goto dput_and_out; - -	error = -EINVAL; +		return -EISDIR;  	if (!S_ISREG(inode->i_mode)) -		goto dput_and_out; +		return -EINVAL; -	error = mnt_want_write(path.mnt); +	error = mnt_want_write(path->mnt);  	if (error) -		goto dput_and_out; +		goto out;  	error = inode_permission(inode, MAY_WRITE);  	if (error) @@ -111,19 +100,40 @@ static long do_sys_truncate(const char __user *pathname, loff_t length)  	error = locks_verify_truncate(inode, NULL, length);  	if (!error) -		error = security_path_truncate(&path); +		error = security_path_truncate(path);  	if (!error) -		error = do_truncate(path.dentry, length, 0, NULL); +		error = do_truncate(path->dentry, length, 0, NULL);  put_write_and_out:  	put_write_access(inode);  mnt_drop_write_and_out: -	mnt_drop_write(path.mnt); -dput_and_out: -	path_put(&path); +	mnt_drop_write(path->mnt);  out:  	return error;  } +EXPORT_SYMBOL_GPL(vfs_truncate); + +static long do_sys_truncate(const char __user *pathname, loff_t length) +{ +	unsigned int lookup_flags = LOOKUP_FOLLOW; +	struct path path; +	int error; + +	if (length < 0)	/* sorry, but loff_t says... */ +		return -EINVAL; + +retry: +	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); +	if (!error) { +		error = vfs_truncate(&path, length); +		path_put(&path); +	} +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	} +	return error; +}  SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)  { @@ -306,6 +316,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)  	struct path path;  	struct inode *inode;  	int res; +	unsigned int lookup_flags = LOOKUP_FOLLOW;  	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */  		return -EINVAL; @@ -328,8 +339,8 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)  	}  	old_cred = override_creds(override_cred); - -	res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); +retry: +	res = user_path_at(dfd, filename, lookup_flags, &path);  	if (res)  		goto out; @@ -364,6 +375,10 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)  out_path_release:  	path_put(&path); +	if (retry_estale(res, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  out:  	revert_creds(old_cred);  	put_cred(override_cred); @@ -379,8 +394,9 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename)  {  	struct path path;  	int error; - -	error = user_path_dir(filename, &path); +	unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; +retry: +	error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);  	if (error)  		goto out; @@ -392,6 +408,10 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename)  dput_and_out:  	path_put(&path); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  out:  	return error;  } @@ -425,8 +445,9 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)  {  	struct path path;  	int error; - -	error = user_path_dir(filename, &path); +	unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; +retry: +	error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);  	if (error)  		goto out; @@ -445,6 +466,10 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)  	error = 0;  dput_and_out:  	path_put(&path); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  out:  	return error;  } @@ -489,11 +514,16 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode  {  	struct path path;  	int error; - -	error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); +	unsigned int lookup_flags = LOOKUP_FOLLOW; +retry: +	error = user_path_at(dfd, filename, lookup_flags, &path);  	if (!error) {  		error = chmod_common(&path, mode);  		path_put(&path); +		if (retry_estale(error, lookup_flags)) { +			lookup_flags |= LOOKUP_REVAL; +			goto retry; +		}  	}  	return error;  } @@ -552,6 +582,7 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,  	lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;  	if (flag & AT_EMPTY_PATH)  		lookup_flags |= LOOKUP_EMPTY; +retry:  	error = user_path_at(dfd, filename, lookup_flags, &path);  	if (error)  		goto out; @@ -562,6 +593,10 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,  	mnt_drop_write(path.mnt);  out_release:  	path_put(&path); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  out:  	return error;  } diff --git a/fs/proc/base.c b/fs/proc/base.c index 5a5a0be40e4..9b43ff77a51 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -542,13 +542,6 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr)  	if (error)  		return error; -	if ((attr->ia_valid & ATTR_SIZE) && -	    attr->ia_size != i_size_read(inode)) { -		error = vmtruncate(inode, attr->ia_size); -		if (error) -			return error; -	} -  	setattr_copy(inode, attr);  	mark_inode_dirty(inode);  	return 0; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 7b3ae3cc0ef..2e4ed13b9ee 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -261,16 +261,9 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)  	if (error)  		return error; -	if ((iattr->ia_valid & ATTR_SIZE) && -	    iattr->ia_size != i_size_read(inode)) { -		error = vmtruncate(inode, iattr->ia_size); -		if (error) -			return error; -	} -  	setattr_copy(inode, iattr);  	mark_inode_dirty(inode); -	 +  	de->uid = inode->i_uid;  	de->gid = inode->i_gid;  	de->mode = inode->i_mode; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 701580ddfcc..1827d88ad58 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -736,13 +736,6 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)  	if (error)  		return error; -	if ((attr->ia_valid & ATTR_SIZE) && -	    attr->ia_size != i_size_read(inode)) { -		error = vmtruncate(inode, attr->ia_size); -		if (error) -			return error; -	} -  	setattr_copy(inode, attr);  	mark_inode_dirty(inode);  	return 0; diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 8375c922c0d..50302d6f889 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -126,7 +126,7 @@ static int reiserfs_file_open(struct inode *inode, struct file *file)  	return err;  } -static void reiserfs_vfs_truncate_file(struct inode *inode) +void reiserfs_vfs_truncate_file(struct inode *inode)  {  	mutex_lock(&(REISERFS_I(inode)->tailpack));  	reiserfs_truncate_file(inode, 1); @@ -312,7 +312,6 @@ const struct file_operations reiserfs_file_operations = {  };  const struct inode_operations reiserfs_file_inode_operations = { -	.truncate = reiserfs_vfs_truncate_file,  	.setattr = reiserfs_setattr,  	.setxattr = reiserfs_setxattr,  	.getxattr = reiserfs_getxattr, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index d83736fbc26..95d7680ead4 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3085,8 +3085,10 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,  		loff_t isize = i_size_read(inode);  		loff_t end = offset + iov_length(iov, nr_segs); -		if (end > isize) -			vmtruncate(inode, isize); +		if ((end > isize) && inode_newsize_ok(inode, isize) == 0) { +			truncate_setsize(inode, isize); +			reiserfs_vfs_truncate_file(inode); +		}  	}  	return ret; @@ -3200,8 +3202,13 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)  	 */  	reiserfs_write_unlock_once(inode->i_sb, depth);  	if ((attr->ia_valid & ATTR_SIZE) && -	    attr->ia_size != i_size_read(inode)) -		error = vmtruncate(inode, attr->ia_size); +	    attr->ia_size != i_size_read(inode)) { +		error = inode_newsize_ok(inode, attr->ia_size); +		if (!error) { +			truncate_setsize(inode, attr->ia_size); +			reiserfs_vfs_truncate_file(inode); +		} +	}  	if (!error) {  		setattr_copy(inode, attr); diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 33215f57ea0..157e474ab30 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -2455,6 +2455,7 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct  								    *,  								    int count);  int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *); +void reiserfs_vfs_truncate_file(struct inode *inode);  int reiserfs_commit_page(struct inode *inode, struct page *page,  			 unsigned from, unsigned to);  void reiserfs_flush_old_commits(struct super_block *); diff --git a/fs/stat.c b/fs/stat.c index eae494630a3..14f45459c83 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -74,7 +74,7 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,  {  	struct path path;  	int error = -EINVAL; -	int lookup_flags = 0; +	unsigned int lookup_flags = 0;  	if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |  		      AT_EMPTY_PATH)) != 0) @@ -84,13 +84,17 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,  		lookup_flags |= LOOKUP_FOLLOW;  	if (flag & AT_EMPTY_PATH)  		lookup_flags |= LOOKUP_EMPTY; - +retry:  	error = user_path_at(dfd, filename, lookup_flags, &path);  	if (error)  		goto out;  	error = vfs_getattr(path.mnt, path.dentry, stat);  	path_put(&path); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  out:  	return error;  } @@ -296,11 +300,13 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,  	struct path path;  	int error;  	int empty = 0; +	unsigned int lookup_flags = LOOKUP_EMPTY;  	if (bufsiz <= 0)  		return -EINVAL; -	error = user_path_at_empty(dfd, pathname, LOOKUP_EMPTY, &path, &empty); +retry: +	error = user_path_at_empty(dfd, pathname, lookup_flags, &path, &empty);  	if (!error) {  		struct inode *inode = path.dentry->d_inode; @@ -314,6 +320,10 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,  			}  		}  		path_put(&path); +		if (retry_estale(error, lookup_flags)) { +			lookup_flags |= LOOKUP_REVAL; +			goto retry; +		}  	}  	return error;  } diff --git a/fs/statfs.c b/fs/statfs.c index f8e832e6f0a..c219e733f55 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -77,10 +77,17 @@ EXPORT_SYMBOL(vfs_statfs);  int user_statfs(const char __user *pathname, struct kstatfs *st)  {  	struct path path; -	int error = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); +	int error; +	unsigned int lookup_flags = LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT; +retry: +	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);  	if (!error) {  		error = vfs_statfs(&path, st);  		path_put(&path); +		if (retry_estale(error, lookup_flags)) { +			lookup_flags |= LOOKUP_REVAL; +			goto retry; +		}  	}  	return error;  } diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 0a65939508e..9d4dc683179 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -41,9 +41,11 @@ static int sysv_setattr(struct dentry *dentry, struct iattr *attr)  	if ((attr->ia_valid & ATTR_SIZE) &&  	    attr->ia_size != i_size_read(inode)) { -		error = vmtruncate(inode, attr->ia_size); +		error = inode_newsize_ok(inode, attr->ia_size);  		if (error)  			return error; +		truncate_setsize(inode, attr->ia_size); +		sysv_truncate(inode);  	}  	setattr_copy(inode, attr); @@ -52,7 +54,6 @@ static int sysv_setattr(struct dentry *dentry, struct iattr *attr)  }  const struct inode_operations sysv_file_inode_operations = { -	.truncate	= sysv_truncate,  	.setattr	= sysv_setattr,  	.getattr	= sysv_getattr,  }; diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 90b54b43878..c1a591a4725 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -464,6 +464,16 @@ int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len)  	return __block_write_begin(page, pos, len, get_block);  } +static void sysv_write_failed(struct address_space *mapping, loff_t to) +{ +	struct inode *inode = mapping->host; + +	if (to > inode->i_size) { +		truncate_pagecache(inode, to, inode->i_size); +		sysv_truncate(inode); +	} +} +  static int sysv_write_begin(struct file *file, struct address_space *mapping,  			loff_t pos, unsigned len, unsigned flags,  			struct page **pagep, void **fsdata) @@ -471,11 +481,8 @@ static int sysv_write_begin(struct file *file, struct address_space *mapping,  	int ret;  	ret = block_write_begin(mapping, pos, len, flags, pagep, get_block); -	if (unlikely(ret)) { -		loff_t isize = mapping->host->i_size; -		if (pos + len > isize) -			vmtruncate(mapping->host, isize); -	} +	if (unlikely(ret)) +		sysv_write_failed(mapping, pos + len);  	return ret;  } diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index eb6d0b7dc87..ff24e4449ec 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -526,6 +526,14 @@ int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len)  	return __block_write_begin(page, pos, len, ufs_getfrag_block);  } +static void ufs_write_failed(struct address_space *mapping, loff_t to) +{ +	struct inode *inode = mapping->host; + +	if (to > inode->i_size) +		truncate_pagecache(inode, to, inode->i_size); +} +  static int ufs_write_begin(struct file *file, struct address_space *mapping,  			loff_t pos, unsigned len, unsigned flags,  			struct page **pagep, void **fsdata) @@ -534,11 +542,8 @@ static int ufs_write_begin(struct file *file, struct address_space *mapping,  	ret = block_write_begin(mapping, pos, len, flags, pagep,  				ufs_getfrag_block); -	if (unlikely(ret)) { -		loff_t isize = mapping->host->i_size; -		if (pos + len > isize) -			vmtruncate(mapping->host, isize); -	} +	if (unlikely(ret)) +		ufs_write_failed(mapping, pos + len);  	return ret;  } diff --git a/fs/utimes.c b/fs/utimes.c index bb0696a4173..f4fb7eca10e 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -158,13 +158,17 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times,  		if (!(flags & AT_SYMLINK_NOFOLLOW))  			lookup_flags |= LOOKUP_FOLLOW; - +retry:  		error = user_path_at(dfd, filename, lookup_flags, &path);  		if (error)  			goto out;  		error = utimes_common(&path, times);  		path_put(&path); +		if (retry_estale(error, lookup_flags)) { +			lookup_flags |= LOOKUP_REVAL; +			goto retry; +		}  	}  out: diff --git a/fs/xattr.c b/fs/xattr.c index e21c119f4f9..3377dff1840 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -370,8 +370,9 @@ SYSCALL_DEFINE5(setxattr, const char __user *, pathname,  {  	struct path path;  	int error; - -	error = user_path(pathname, &path); +	unsigned int lookup_flags = LOOKUP_FOLLOW; +retry: +	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);  	if (error)  		return error;  	error = mnt_want_write(path.mnt); @@ -380,6 +381,10 @@ SYSCALL_DEFINE5(setxattr, const char __user *, pathname,  		mnt_drop_write(path.mnt);  	}  	path_put(&path); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  	return error;  } @@ -389,8 +394,9 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,  {  	struct path path;  	int error; - -	error = user_lpath(pathname, &path); +	unsigned int lookup_flags = 0; +retry: +	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);  	if (error)  		return error;  	error = mnt_want_write(path.mnt); @@ -399,6 +405,10 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,  		mnt_drop_write(path.mnt);  	}  	path_put(&path); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  	return error;  } @@ -476,12 +486,17 @@ SYSCALL_DEFINE4(getxattr, const char __user *, pathname,  {  	struct path path;  	ssize_t error; - -	error = user_path(pathname, &path); +	unsigned int lookup_flags = LOOKUP_FOLLOW; +retry: +	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);  	if (error)  		return error;  	error = getxattr(path.dentry, name, value, size);  	path_put(&path); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  	return error;  } @@ -490,12 +505,17 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,  {  	struct path path;  	ssize_t error; - -	error = user_lpath(pathname, &path); +	unsigned int lookup_flags = 0; +retry: +	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);  	if (error)  		return error;  	error = getxattr(path.dentry, name, value, size);  	path_put(&path); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  	return error;  } @@ -556,12 +576,17 @@ SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list,  {  	struct path path;  	ssize_t error; - -	error = user_path(pathname, &path); +	unsigned int lookup_flags = LOOKUP_FOLLOW; +retry: +	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);  	if (error)  		return error;  	error = listxattr(path.dentry, list, size);  	path_put(&path); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  	return error;  } @@ -570,12 +595,17 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,  {  	struct path path;  	ssize_t error; - -	error = user_lpath(pathname, &path); +	unsigned int lookup_flags = 0; +retry: +	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);  	if (error)  		return error;  	error = listxattr(path.dentry, list, size);  	path_put(&path); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  	return error;  } @@ -615,8 +645,9 @@ SYSCALL_DEFINE2(removexattr, const char __user *, pathname,  {  	struct path path;  	int error; - -	error = user_path(pathname, &path); +	unsigned int lookup_flags = LOOKUP_FOLLOW; +retry: +	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);  	if (error)  		return error;  	error = mnt_want_write(path.mnt); @@ -625,6 +656,10 @@ SYSCALL_DEFINE2(removexattr, const char __user *, pathname,  		mnt_drop_write(path.mnt);  	}  	path_put(&path); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  	return error;  } @@ -633,8 +668,9 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,  {  	struct path path;  	int error; - -	error = user_lpath(pathname, &path); +	unsigned int lookup_flags = 0; +retry: +	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);  	if (error)  		return error;  	error = mnt_want_write(path.mnt); @@ -643,6 +679,10 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,  		mnt_drop_write(path.mnt);  	}  	path_put(&path); +	if (retry_estale(error, lookup_flags)) { +		lookup_flags |= LOOKUP_REVAL; +		goto retry; +	}  	return error;  }  |