diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-29 10:43:51 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-29 10:43:51 -0700 | 
| commit | 53f2c4a8fd882009a2a75c5b72d6898c0808616e (patch) | |
| tree | 922293a1056c0c2358203cdab832f0e0891e628a /fs/nfs | |
| parent | 8f6576ad476b2a22d05ddafd2ddaee102577a4ed (diff) | |
| parent | cc0a98436820b161b595b8cc1d2329bcf7328107 (diff) | |
| download | olio-linux-3.10-53f2c4a8fd882009a2a75c5b72d6898c0808616e.tar.xz olio-linux-3.10-53f2c4a8fd882009a2a75c5b72d6898c0808616e.zip  | |
Merge tag 'nfs-for-3.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
 "New features include:
   - Rewrite the O_DIRECT code so that it can share the same coalescing
     and pNFS functionality as the page cache code.
   - Allow the server to provide hints as to when we should use pNFS,
     and when it is more efficient to read and write through the
     metadata server.
   - NFS cache consistency updates:
     * Use the ctime to emulate a change attribute for NFSv2/v3 so that
       all NFS versions can share the same cache management code.
     * New cache management code will only look at the change attribute
       and size attribute when deciding whether or not our cached data
       is still valid or not.
     * Don't request NFSv4 post-op attributes on writes in cases such as
       O_DIRECT, where we don't care about data cache consistency, or
       when we have a write delegation, and know that our cache is still
       consistent.
     * Don't request NFSv4 post-op attributes on operations such as
       COMMIT, where there are no expected metadata updates.
     * Don't request NFSv4 directory post-op attributes in cases where
       the operations themselves already return change attribute
       updates: i.e. operations such as OPEN, CREATE, REMOVE, LINK and
       RENAME.
   - Speed up 'ls' and friends by using READDIR rather than READDIRPLUS
     if we detect no attempts to lookup filenames.
   - Improve the code sharing between NFSv2/v3 and v4 mounts
   - NFSv4.1 state management efficiency improvements
   - More patches in preparation for NFSv4/v4.1 migration functionality."
Fix trivial conflict in fs/nfs/nfs4proc.c that was due to the dcache
qstr name initialization changes (that made the length/hash a 64-bit
union)
* tag 'nfs-for-3.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (146 commits)
  NFSv4: Add debugging printks to state manager
  NFSv4: Map NFS4ERR_SHARE_DENIED into an EACCES error instead of EIO
  NFSv4: update_changeattr does not need to set NFS_INO_REVAL_PAGECACHE
  NFSv4.1: nfs4_reset_session should use nfs4_handle_reclaim_lease_error
  NFSv4.1: Handle other occurrences of NFS4ERR_CONN_NOT_BOUND_TO_SESSION
  NFSv4.1: Handle NFS4ERR_CONN_NOT_BOUND_TO_SESSION in the state manager
  NFSv4.1: Handle errors in nfs4_bind_conn_to_session
  NFSv4.1: nfs4_bind_conn_to_session should drain the session
  NFSv4.1: Don't clobber the seqid if exchange_id returns a confirmed clientid
  NFSv4.1: Add DESTROY_CLIENTID
  NFSv4.1: Ensure we use the correct credentials for bind_conn_to_session
  NFSv4.1: Ensure we use the correct credentials for session create/destroy
  NFSv4.1: Move NFSPROC4_CLNT_BIND_CONN_TO_SESSION to the end of the operations
  NFSv4.1: Handle NFS4ERR_SEQ_MISORDERED when confirming the lease
  NFSv4: When purging the lease, we must clear NFS4CLNT_LEASE_CONFIRM
  NFSv4: Clean up the error handling for nfs4_reclaim_lease
  NFSv4.1: Exchange ID must use GFP_NOFS allocation mode
  nfs41: Use BIND_CONN_TO_SESSION for CB_PATH_DOWN*
  nfs4.1: add BIND_CONN_TO_SESSION operation
  NFSv4.1 test the mdsthreshold hint parameters
  ...
Diffstat (limited to 'fs/nfs')
39 files changed, 3733 insertions, 2776 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 2a0e6c59914..f90f4f5cd42 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -29,9 +29,20 @@ config NFS_FS  	  If unsure, say N. +config NFS_V2 +	bool "NFS client support for NFS version 2" +	depends on NFS_FS +	default y +	help +	  This option enables support for version 2 of the NFS protocol +	  (RFC 1094) in the kernel's NFS client. + +	  If unsure, say Y. +  config NFS_V3  	bool "NFS client support for NFS version 3"  	depends on NFS_FS +	default y  	help  	  This option enables support for version 3 of the NFS protocol  	  (RFC 1813) in the kernel's NFS client. diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index b58613d0abb..7ddd45d9f17 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -4,11 +4,12 @@  obj-$(CONFIG_NFS_FS) += nfs.o -nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ -			   direct.o pagelist.o proc.o read.o symlink.o unlink.o \ +nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o \ +			   direct.o pagelist.o read.o symlink.o unlink.o \  			   write.o namespace.o mount_clnt.o \  			   dns_resolve.o cache_lib.o  nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o +nfs-$(CONFIG_NFS_V2)	+= proc.o nfs2xdr.o  nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o  nfs-$(CONFIG_NFS_V3_ACL)	+= nfs3acl.o  nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 7f6a23f0244..7ae8a608956 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -187,7 +187,6 @@ static void bl_end_io_read(struct bio *bio, int err)  	struct parallel_io *par = bio->bi_private;  	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);  	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; -	struct nfs_read_data *rdata = (struct nfs_read_data *)par->data;  	do {  		struct page *page = bvec->bv_page; @@ -198,9 +197,12 @@ static void bl_end_io_read(struct bio *bio, int err)  			SetPageUptodate(page);  	} while (bvec >= bio->bi_io_vec);  	if (!uptodate) { -		if (!rdata->pnfs_error) -			rdata->pnfs_error = -EIO; -		pnfs_set_lo_fail(rdata->lseg); +		struct nfs_read_data *rdata = par->data; +		struct nfs_pgio_header *header = rdata->header; + +		if (!header->pnfs_error) +			header->pnfs_error = -EIO; +		pnfs_set_lo_fail(header->lseg);  	}  	bio_put(bio);  	put_parallel(par); @@ -221,7 +223,7 @@ bl_end_par_io_read(void *data, int unused)  {  	struct nfs_read_data *rdata = data; -	rdata->task.tk_status = rdata->pnfs_error; +	rdata->task.tk_status = rdata->header->pnfs_error;  	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);  	schedule_work(&rdata->task.u.tk_work);  } @@ -229,6 +231,7 @@ bl_end_par_io_read(void *data, int unused)  static enum pnfs_try_status  bl_read_pagelist(struct nfs_read_data *rdata)  { +	struct nfs_pgio_header *header = rdata->header;  	int i, hole;  	struct bio *bio = NULL;  	struct pnfs_block_extent *be = NULL, *cow_read = NULL; @@ -239,7 +242,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)  	int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;  	dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, -	       rdata->npages, f_offset, (unsigned int)rdata->args.count); +	       rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);  	par = alloc_parallel(rdata);  	if (!par) @@ -249,17 +252,17 @@ bl_read_pagelist(struct nfs_read_data *rdata)  	isect = (sector_t) (f_offset >> SECTOR_SHIFT);  	/* Code assumes extents are page-aligned */ -	for (i = pg_index; i < rdata->npages; i++) { +	for (i = pg_index; i < rdata->pages.npages; i++) {  		if (!extent_length) {  			/* We've used up the previous extent */  			bl_put_extent(be);  			bl_put_extent(cow_read);  			bio = bl_submit_bio(READ, bio);  			/* Get the next one */ -			be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg), +			be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),  					     isect, &cow_read);  			if (!be) { -				rdata->pnfs_error = -EIO; +				header->pnfs_error = -EIO;  				goto out;  			}  			extent_length = be->be_length - @@ -282,11 +285,12 @@ bl_read_pagelist(struct nfs_read_data *rdata)  			struct pnfs_block_extent *be_read;  			be_read = (hole && cow_read) ? cow_read : be; -			bio = bl_add_page_to_bio(bio, rdata->npages - i, READ, +			bio = bl_add_page_to_bio(bio, rdata->pages.npages - i, +						 READ,  						 isect, pages[i], be_read,  						 bl_end_io_read, par);  			if (IS_ERR(bio)) { -				rdata->pnfs_error = PTR_ERR(bio); +				header->pnfs_error = PTR_ERR(bio);  				bio = NULL;  				goto out;  			} @@ -294,9 +298,9 @@ bl_read_pagelist(struct nfs_read_data *rdata)  		isect += PAGE_CACHE_SECTORS;  		extent_length -= PAGE_CACHE_SECTORS;  	} -	if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) { +	if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {  		rdata->res.eof = 1; -		rdata->res.count = rdata->inode->i_size - f_offset; +		rdata->res.count = header->inode->i_size - f_offset;  	} else {  		rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;  	} @@ -345,7 +349,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)  	struct parallel_io *par = bio->bi_private;  	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);  	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; -	struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;  	do {  		struct page *page = bvec->bv_page; @@ -358,9 +361,12 @@ static void bl_end_io_write_zero(struct bio *bio, int err)  	} while (bvec >= bio->bi_io_vec);  	if (unlikely(!uptodate)) { -		if (!wdata->pnfs_error) -			wdata->pnfs_error = -EIO; -		pnfs_set_lo_fail(wdata->lseg); +		struct nfs_write_data *data = par->data; +		struct nfs_pgio_header *header = data->header; + +		if (!header->pnfs_error) +			header->pnfs_error = -EIO; +		pnfs_set_lo_fail(header->lseg);  	}  	bio_put(bio);  	put_parallel(par); @@ -370,12 +376,13 @@ static void bl_end_io_write(struct bio *bio, int err)  {  	struct parallel_io *par = bio->bi_private;  	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); -	struct nfs_write_data *wdata = (struct nfs_write_data *)par->data; +	struct nfs_write_data *data = par->data; +	struct nfs_pgio_header *header = data->header;  	if (!uptodate) { -		if (!wdata->pnfs_error) -			wdata->pnfs_error = -EIO; -		pnfs_set_lo_fail(wdata->lseg); +		if (!header->pnfs_error) +			header->pnfs_error = -EIO; +		pnfs_set_lo_fail(header->lseg);  	}  	bio_put(bio);  	put_parallel(par); @@ -391,9 +398,9 @@ static void bl_write_cleanup(struct work_struct *work)  	dprintk("%s enter\n", __func__);  	task = container_of(work, struct rpc_task, u.tk_work);  	wdata = container_of(task, struct nfs_write_data, task); -	if (likely(!wdata->pnfs_error)) { +	if (likely(!wdata->header->pnfs_error)) {  		/* Marks for LAYOUTCOMMIT */ -		mark_extents_written(BLK_LSEG2EXT(wdata->lseg), +		mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),  				     wdata->args.offset, wdata->args.count);  	}  	pnfs_ld_write_done(wdata); @@ -404,12 +411,12 @@ static void bl_end_par_io_write(void *data, int num_se)  {  	struct nfs_write_data *wdata = data; -	if (unlikely(wdata->pnfs_error)) { -		bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval, +	if (unlikely(wdata->header->pnfs_error)) { +		bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,  					num_se);  	} -	wdata->task.tk_status = wdata->pnfs_error; +	wdata->task.tk_status = wdata->header->pnfs_error;  	wdata->verf.committed = NFS_FILE_SYNC;  	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);  	schedule_work(&wdata->task.u.tk_work); @@ -540,6 +547,7 @@ check_page:  static enum pnfs_try_status  bl_write_pagelist(struct nfs_write_data *wdata, int sync)  { +	struct nfs_pgio_header *header = wdata->header;  	int i, ret, npg_zero, pg_index, last = 0;  	struct bio *bio = NULL;  	struct pnfs_block_extent *be = NULL, *cow_read = NULL; @@ -552,7 +560,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)  	pgoff_t index;  	u64 temp;  	int npg_per_block = -	    NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; +	    NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;  	dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);  	/* At this point, wdata->pages is a (sequential) list of nfs_pages. @@ -566,7 +574,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)  	/* At this point, have to be more careful with error handling */  	isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); -	be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); +	be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read);  	if (!be || !is_writable(be, isect)) {  		dprintk("%s no matching extents!\n", __func__);  		goto out_mds; @@ -597,10 +605,10 @@ fill_invalid_ext:  			dprintk("%s zero %dth page: index %lu isect %llu\n",  				__func__, npg_zero, index,  				(unsigned long long)isect); -			page = bl_find_get_zeroing_page(wdata->inode, index, +			page = bl_find_get_zeroing_page(header->inode, index,  							cow_read);  			if (unlikely(IS_ERR(page))) { -				wdata->pnfs_error = PTR_ERR(page); +				header->pnfs_error = PTR_ERR(page);  				goto out;  			} else if (page == NULL)  				goto next_page; @@ -612,7 +620,7 @@ fill_invalid_ext:  					__func__, ret);  				end_page_writeback(page);  				page_cache_release(page); -				wdata->pnfs_error = ret; +				header->pnfs_error = ret;  				goto out;  			}  			if (likely(!bl_push_one_short_extent(be->be_inval))) @@ -620,11 +628,11 @@ fill_invalid_ext:  			else {  				end_page_writeback(page);  				page_cache_release(page); -				wdata->pnfs_error = -ENOMEM; +				header->pnfs_error = -ENOMEM;  				goto out;  			}  			/* FIXME: This should be done in bi_end_io */ -			mark_extents_written(BLK_LSEG2EXT(wdata->lseg), +			mark_extents_written(BLK_LSEG2EXT(header->lseg),  					     page->index << PAGE_CACHE_SHIFT,  					     PAGE_CACHE_SIZE); @@ -632,7 +640,7 @@ fill_invalid_ext:  						 isect, page, be,  						 bl_end_io_write_zero, par);  			if (IS_ERR(bio)) { -				wdata->pnfs_error = PTR_ERR(bio); +				header->pnfs_error = PTR_ERR(bio);  				bio = NULL;  				goto out;  			} @@ -647,16 +655,16 @@ next_page:  	/* Middle pages */  	pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; -	for (i = pg_index; i < wdata->npages; i++) { +	for (i = pg_index; i < wdata->pages.npages; i++) {  		if (!extent_length) {  			/* We've used up the previous extent */  			bl_put_extent(be);  			bio = bl_submit_bio(WRITE, bio);  			/* Get the next one */ -			be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), +			be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),  					     isect, NULL);  			if (!be || !is_writable(be, isect)) { -				wdata->pnfs_error = -EINVAL; +				header->pnfs_error = -EINVAL;  				goto out;  			}  			if (be->be_state == PNFS_BLOCK_INVALID_DATA) { @@ -664,7 +672,7 @@ next_page:  								be->be_inval)))  					par->bse_count++;  				else { -					wdata->pnfs_error = -ENOMEM; +					header->pnfs_error = -ENOMEM;  					goto out;  				}  			} @@ -677,15 +685,15 @@ next_page:  			if (unlikely(ret)) {  				dprintk("%s bl_mark_sectors_init fail %d\n",  					__func__, ret); -				wdata->pnfs_error = ret; +				header->pnfs_error = ret;  				goto out;  			}  		} -		bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE, +		bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,  					 isect, pages[i], be,  					 bl_end_io_write, par);  		if (IS_ERR(bio)) { -			wdata->pnfs_error = PTR_ERR(bio); +			header->pnfs_error = PTR_ERR(bio);  			bio = NULL;  			goto out;  		} diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index a5c88a554d9..c96554245cc 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -123,7 +123,7 @@ nfs4_blk_decode_device(struct nfs_server *server,  	uint8_t *dataptr;  	DECLARE_WAITQUEUE(wq, current);  	int offset, len, i, rc; -	struct net *net = server->nfs_client->net; +	struct net *net = server->nfs_client->cl_net;  	struct nfs_net *nn = net_generic(net, nfs_net_id);  	struct bl_dev_msg *reply = &nn->bl_mount_reply; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 60f7e4ec842..7d108753af8 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -65,7 +65,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);  static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)  {  	int ret = 0; -	struct nfs_net *nn = net_generic(clp->net, nfs_net_id); +	struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);  	if (clp->rpc_ops->version != 4 || minorversion != 0)  		return ret; @@ -90,7 +90,9 @@ static bool nfs4_disable_idmapping = true;   * RPC cruft for NFS   */  static const struct rpc_version *nfs_version[5] = { +#ifdef CONFIG_NFS_V2  	[2]			= &nfs_version2, +#endif  #ifdef CONFIG_NFS_V3  	[3]			= &nfs_version3,  #endif @@ -129,6 +131,7 @@ const struct rpc_program nfsacl_program = {  #endif  /* CONFIG_NFS_V3_ACL */  struct nfs_client_initdata { +	unsigned long init_flags;  	const char *hostname;  	const struct sockaddr *addr;  	size_t addrlen; @@ -172,7 +175,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_  	clp->cl_rpcclient = ERR_PTR(-EINVAL);  	clp->cl_proto = cl_init->proto; -	clp->net = get_net(cl_init->net); +	clp->cl_net = get_net(cl_init->net);  #ifdef CONFIG_NFS_V4  	err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); @@ -182,7 +185,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_  	spin_lock_init(&clp->cl_lock);  	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);  	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); -	clp->cl_boot_time = CURRENT_TIME;  	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;  	clp->cl_minorversion = cl_init->minorversion;  	clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; @@ -207,6 +209,7 @@ static void nfs4_shutdown_session(struct nfs_client *clp)  	if (nfs4_has_session(clp)) {  		nfs4_deviceid_purge_client(clp);  		nfs4_destroy_session(clp->cl_session); +		nfs4_destroy_clientid(clp);  	}  } @@ -235,6 +238,9 @@ static void nfs4_shutdown_client(struct nfs_client *clp)  		nfs_idmap_delete(clp);  	rpc_destroy_wait_queue(&clp->cl_rpcwaitq); +	kfree(clp->cl_serverowner); +	kfree(clp->cl_serverscope); +	kfree(clp->cl_implid);  }  /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ @@ -248,7 +254,7 @@ void nfs_cleanup_cb_ident_idr(struct net *net)  /* nfs_client_lock held */  static void nfs_cb_idr_remove_locked(struct nfs_client *clp)  { -	struct nfs_net *nn = net_generic(clp->net, nfs_net_id); +	struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);  	if (clp->cl_cb_ident)  		idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident); @@ -301,10 +307,8 @@ static void nfs_free_client(struct nfs_client *clp)  	if (clp->cl_machine_cred != NULL)  		put_rpccred(clp->cl_machine_cred); -	put_net(clp->net); +	put_net(clp->cl_net);  	kfree(clp->cl_hostname); -	kfree(clp->server_scope); -	kfree(clp->impl_id);  	kfree(clp);  	dprintk("<-- nfs_free_client()\n"); @@ -321,7 +325,7 @@ void nfs_put_client(struct nfs_client *clp)  		return;  	dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); -	nn = net_generic(clp->net, nfs_net_id); +	nn = net_generic(clp->cl_net, nfs_net_id);  	if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {  		list_del(&clp->cl_share_link); @@ -456,6 +460,8 @@ static bool nfs4_cb_match_client(const struct sockaddr *addr,  	    clp->cl_cons_state == NFS_CS_SESSION_INITING))  		return false; +	smp_rmb(); +  	/* Match the version and minorversion */  	if (clp->rpc_ops->version != 4 ||  	    clp->cl_minorversion != minorversion) @@ -504,6 +510,47 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat  	return NULL;  } +static bool nfs_client_init_is_complete(const struct nfs_client *clp) +{ +	return clp->cl_cons_state != NFS_CS_INITING; +} + +int nfs_wait_client_init_complete(const struct nfs_client *clp) +{ +	return wait_event_killable(nfs_client_active_wq, +			nfs_client_init_is_complete(clp)); +} + +/* + * Found an existing client.  Make sure it's ready before returning. + */ +static struct nfs_client * +nfs_found_client(const struct nfs_client_initdata *cl_init, +		 struct nfs_client *clp) +{ +	int error; + +	error = nfs_wait_client_init_complete(clp); +	if (error < 0) { +		nfs_put_client(clp); +		return ERR_PTR(-ERESTARTSYS); +	} + +	if (clp->cl_cons_state < NFS_CS_READY) { +		error = clp->cl_cons_state; +		nfs_put_client(clp); +		return ERR_PTR(error); +	} + +	smp_rmb(); + +	BUG_ON(clp->cl_cons_state != NFS_CS_READY); + +	dprintk("<-- %s found nfs_client %p for %s\n", +		__func__, clp, cl_init->hostname ?: ""); +	return clp; +} +  /*   * Look up a client by IP address and protocol version   * - creates a new record if one doesn't yet exist @@ -512,11 +559,9 @@ static struct nfs_client *  nfs_get_client(const struct nfs_client_initdata *cl_init,  	       const struct rpc_timeout *timeparms,  	       const char *ip_addr, -	       rpc_authflavor_t authflavour, -	       int noresvport) +	       rpc_authflavor_t authflavour)  {  	struct nfs_client *clp, *new = NULL; -	int error;  	struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);  	dprintk("--> nfs_get_client(%s,v%u)\n", @@ -527,60 +572,29 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,  		spin_lock(&nn->nfs_client_lock);  		clp = nfs_match_client(cl_init); -		if (clp) -			goto found_client; -		if (new) -			goto install_client; +		if (clp) { +			spin_unlock(&nn->nfs_client_lock); +			if (new) +				nfs_free_client(new); +			return nfs_found_client(cl_init, clp); +		} +		if (new) { +			list_add(&new->cl_share_link, &nn->nfs_client_list); +			spin_unlock(&nn->nfs_client_lock); +			new->cl_flags = cl_init->init_flags; +			return cl_init->rpc_ops->init_client(new, +						timeparms, ip_addr, +						authflavour); +		}  		spin_unlock(&nn->nfs_client_lock);  		new = nfs_alloc_client(cl_init);  	} while (!IS_ERR(new)); -	dprintk("--> nfs_get_client() = %ld [failed]\n", PTR_ERR(new)); +	dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", +		cl_init->hostname ?: "", PTR_ERR(new));  	return new; - -	/* install a new client and return with it unready */ -install_client: -	clp = new; -	list_add(&clp->cl_share_link, &nn->nfs_client_list); -	spin_unlock(&nn->nfs_client_lock); - -	error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr, -					      authflavour, noresvport); -	if (error < 0) { -		nfs_put_client(clp); -		return ERR_PTR(error); -	} -	dprintk("--> nfs_get_client() = %p [new]\n", clp); -	return clp; - -	/* found an existing client -	 * - make sure it's ready before returning -	 */ -found_client: -	spin_unlock(&nn->nfs_client_lock); - -	if (new) -		nfs_free_client(new); - -	error = wait_event_killable(nfs_client_active_wq, -				clp->cl_cons_state < NFS_CS_INITING); -	if (error < 0) { -		nfs_put_client(clp); -		return ERR_PTR(-ERESTARTSYS); -	} - -	if (clp->cl_cons_state < NFS_CS_READY) { -		error = clp->cl_cons_state; -		nfs_put_client(clp); -		return ERR_PTR(error); -	} - -	BUG_ON(clp->cl_cons_state != NFS_CS_READY); - -	dprintk("--> nfs_get_client() = %p [share]\n", clp); -	return clp;  }  /* @@ -588,27 +602,12 @@ found_client:   */  void nfs_mark_client_ready(struct nfs_client *clp, int state)  { +	smp_wmb();  	clp->cl_cons_state = state;  	wake_up_all(&nfs_client_active_wq);  }  /* - * With sessions, the client is not marked ready until after a - * successful EXCHANGE_ID and CREATE_SESSION. - * - * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate - * other versions of NFS can be tried. - */ -int nfs4_check_client_ready(struct nfs_client *clp) -{ -	if (!nfs4_has_session(clp)) -		return 0; -	if (clp->cl_cons_state < NFS_CS_READY) -		return -EPROTONOSUPPORT; -	return 0; -} - -/*   * Initialise the timeout values for a connection   */  static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, @@ -654,12 +653,11 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,   */  static int nfs_create_rpc_client(struct nfs_client *clp,  				 const struct rpc_timeout *timeparms, -				 rpc_authflavor_t flavor, -				 int discrtry, int noresvport) +				 rpc_authflavor_t flavor)  {  	struct rpc_clnt		*clnt = NULL;  	struct rpc_create_args args = { -		.net		= clp->net, +		.net		= clp->cl_net,  		.protocol	= clp->cl_proto,  		.address	= (struct sockaddr *)&clp->cl_addr,  		.addrsize	= clp->cl_addrlen, @@ -670,9 +668,9 @@ static int nfs_create_rpc_client(struct nfs_client *clp,  		.authflavor	= flavor,  	}; -	if (discrtry) +	if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))  		args.flags |= RPC_CLNT_CREATE_DISCRTRY; -	if (noresvport) +	if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags))  		args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;  	if (!IS_ERR(clp->cl_rpcclient)) @@ -713,7 +711,7 @@ static int nfs_start_lockd(struct nfs_server *server)  		.nfs_version	= clp->rpc_ops->version,  		.noresvport	= server->flags & NFS_MOUNT_NORESVPORT ?  					1 : 0, -		.net		= clp->net, +		.net		= clp->cl_net,  	};  	if (nlm_init.nfs_version > 3) @@ -805,36 +803,43 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,  	return 0;  } -/* - * Initialise an NFS2 or NFS3 client +/** + * nfs_init_client - Initialise an NFS2 or NFS3 client + * + * @clp: nfs_client to initialise + * @timeparms: timeout parameters for underlying RPC transport + * @ip_addr: IP presentation address (not used) + * @authflavor: authentication flavor for underlying RPC transport + * + * Returns pointer to an NFS client, or an ERR_PTR value.   */ -int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, -		    const char *ip_addr, rpc_authflavor_t authflavour, -		    int noresvport) +struct nfs_client *nfs_init_client(struct nfs_client *clp, +		    const struct rpc_timeout *timeparms, +		    const char *ip_addr, rpc_authflavor_t authflavour)  {  	int error;  	if (clp->cl_cons_state == NFS_CS_READY) {  		/* the client is already initialised */  		dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp); -		return 0; +		return clp;  	}  	/*  	 * Create a client RPC handle for doing FSSTAT with UNIX auth only  	 * - RFC 2623, sec 2.3.2  	 */ -	error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, -				      0, noresvport); +	error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);  	if (error < 0)  		goto error;  	nfs_mark_client_ready(clp, NFS_CS_READY); -	return 0; +	return clp;  error:  	nfs_mark_client_ready(clp, error); +	nfs_put_client(clp);  	dprintk("<-- nfs_init_client() = xerror %d\n", error); -	return error; +	return ERR_PTR(error);  }  /* @@ -847,7 +852,7 @@ static int nfs_init_server(struct nfs_server *server,  		.hostname = data->nfs_server.hostname,  		.addr = (const struct sockaddr *)&data->nfs_server.address,  		.addrlen = data->nfs_server.addrlen, -		.rpc_ops = &nfs_v2_clientops, +		.rpc_ops = NULL,  		.proto = data->nfs_server.protocol,  		.net = data->net,  	}; @@ -857,17 +862,28 @@ static int nfs_init_server(struct nfs_server *server,  	dprintk("--> nfs_init_server()\n"); +	switch (data->version) { +#ifdef CONFIG_NFS_V2 +	case 2: +		cl_init.rpc_ops = &nfs_v2_clientops; +		break; +#endif  #ifdef CONFIG_NFS_V3 -	if (data->version == 3) +	case 3:  		cl_init.rpc_ops = &nfs_v3_clientops; +		break;  #endif +	default: +		return -EPROTONOSUPPORT; +	}  	nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,  			data->timeo, data->retrans); +	if (data->flags & NFS_MOUNT_NORESVPORT) +		set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);  	/* Allocate or find a client reference we can use */ -	clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX, -			     data->flags & NFS_MOUNT_NORESVPORT); +	clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX);  	if (IS_ERR(clp)) {  		dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));  		return PTR_ERR(clp); @@ -880,7 +896,7 @@ static int nfs_init_server(struct nfs_server *server,  	server->options = data->options;  	server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|  		NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| -		NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; +		NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR;  	if (data->rsize)  		server->rsize = nfs_block_size(data->rsize, NULL); @@ -1048,7 +1064,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve  static void nfs_server_insert_lists(struct nfs_server *server)  {  	struct nfs_client *clp = server->nfs_client; -	struct nfs_net *nn = net_generic(clp->net, nfs_net_id); +	struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);  	spin_lock(&nn->nfs_client_lock);  	list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); @@ -1065,7 +1081,7 @@ static void nfs_server_remove_lists(struct nfs_server *server)  	if (clp == NULL)  		return; -	nn = net_generic(clp->net, nfs_net_id); +	nn = net_generic(clp->cl_net, nfs_net_id);  	spin_lock(&nn->nfs_client_lock);  	list_del_rcu(&server->client_link);  	if (list_empty(&clp->cl_superblocks)) @@ -1333,21 +1349,27 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)  		 * so that the client back channel can find the  		 * nfs_client struct  		 */ -		clp->cl_cons_state = NFS_CS_SESSION_INITING; +		nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING);  	}  #endif /* CONFIG_NFS_V4_1 */  	return nfs4_init_callback(clp);  } -/* - * Initialise an NFS4 client record +/** + * nfs4_init_client - Initialise an NFS4 client record + * + * @clp: nfs_client to initialise + * @timeparms: timeout parameters for underlying RPC transport + * @ip_addr: callback IP address in presentation format + * @authflavor: authentication flavor for underlying RPC transport + * + * Returns pointer to an NFS client, or an ERR_PTR value.   */ -int nfs4_init_client(struct nfs_client *clp, -		     const struct rpc_timeout *timeparms, -		     const char *ip_addr, -		     rpc_authflavor_t authflavour, -		     int noresvport) +struct nfs_client *nfs4_init_client(struct nfs_client *clp, +				    const struct rpc_timeout *timeparms, +				    const char *ip_addr, +				    rpc_authflavor_t authflavour)  {  	char buf[INET6_ADDRSTRLEN + 1];  	int error; @@ -1355,14 +1377,14 @@ int nfs4_init_client(struct nfs_client *clp,  	if (clp->cl_cons_state == NFS_CS_READY) {  		/* the client is initialised already */  		dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); -		return 0; +		return clp;  	}  	/* Check NFS protocol revision and initialize RPC op vector */  	clp->rpc_ops = &nfs_v4_clientops; -	error = nfs_create_rpc_client(clp, timeparms, authflavour, -				      1, noresvport); +	__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); +	error = nfs_create_rpc_client(clp, timeparms, authflavour);  	if (error < 0)  		goto error; @@ -1395,12 +1417,13 @@ int nfs4_init_client(struct nfs_client *clp,  	if (!nfs4_has_session(clp))  		nfs_mark_client_ready(clp, NFS_CS_READY); -	return 0; +	return clp;  error:  	nfs_mark_client_ready(clp, error); +	nfs_put_client(clp);  	dprintk("<-- nfs4_init_client() = xerror %d\n", error); -	return error; +	return ERR_PTR(error);  }  /* @@ -1429,9 +1452,11 @@ static int nfs4_set_client(struct nfs_server *server,  	dprintk("--> nfs4_set_client()\n"); +	if (server->flags & NFS_MOUNT_NORESVPORT) +		set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); +  	/* Allocate or find a client reference we can use */ -	clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour, -			     server->flags & NFS_MOUNT_NORESVPORT); +	clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour);  	if (IS_ERR(clp)) {  		error = PTR_ERR(clp);  		goto error; @@ -1465,8 +1490,8 @@ error:   * the MDS.   */  struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, -		const struct sockaddr *ds_addr, -		int ds_addrlen, int ds_proto) +		const struct sockaddr *ds_addr, int ds_addrlen, +		int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)  {  	struct nfs_client_initdata cl_init = {  		.addr = ds_addr, @@ -1474,14 +1499,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,  		.rpc_ops = &nfs_v4_clientops,  		.proto = ds_proto,  		.minorversion = mds_clp->cl_minorversion, -		.net = mds_clp->net, -	}; -	struct rpc_timeout ds_timeout = { -		.to_initval = 15 * HZ, -		.to_maxval = 15 * HZ, -		.to_retries = 1, -		.to_exponential = 1, +		.net = mds_clp->cl_net,  	}; +	struct rpc_timeout ds_timeout;  	struct nfs_client *clp;  	/* @@ -1489,8 +1509,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,  	 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS  	 * (section 13.1 RFC 5661).  	 */ +	nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);  	clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, -			     mds_clp->cl_rpcclient->cl_auth->au_flavor, 0); +			     mds_clp->cl_rpcclient->cl_auth->au_flavor);  	dprintk("<-- %s %p\n", __func__, clp);  	return clp; @@ -1701,7 +1722,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,  				rpc_protocol(parent_server->client),  				parent_server->client->cl_timeout,  				parent_client->cl_mvops->minor_version, -				parent_client->net); +				parent_client->cl_net);  	if (error < 0)  		goto error; @@ -1805,6 +1826,7 @@ void nfs_clients_init(struct net *net)  	idr_init(&nn->cb_ident_idr);  #endif  	spin_lock_init(&nn->nfs_client_lock); +	nn->boot_time = CURRENT_TIME;  }  #ifdef CONFIG_PROC_FS diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 89af1d26927..bd3a9601d32 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -316,6 +316,10 @@ out:   * nfs_client_return_marked_delegations - return previously marked delegations   * @clp: nfs_client to process   * + * Note that this function is designed to be called by the state + * manager thread. For this reason, it cannot flush the dirty data, + * since that could deadlock in case of a state recovery error. + *   * Returns zero on success, or a negative errno value.   */  int nfs_client_return_marked_delegations(struct nfs_client *clp) @@ -340,11 +344,9 @@ restart:  								server);  			rcu_read_unlock(); -			if (delegation != NULL) { -				filemap_flush(inode->i_mapping); +			if (delegation != NULL)  				err = __nfs_inode_return_delegation(inode,  								delegation, 0); -			}  			iput(inode);  			if (!err)  				goto restart; @@ -380,6 +382,10 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)   * nfs_inode_return_delegation - synchronously return a delegation   * @inode: inode to process   * + * This routine will always flush any dirty data to disk on the + * assumption that if we need to return the delegation, then + * we should stop caching. + *   * Returns zero on success, or a negative errno value.   */  int nfs_inode_return_delegation(struct inode *inode) @@ -389,10 +395,10 @@ int nfs_inode_return_delegation(struct inode *inode)  	struct nfs_delegation *delegation;  	int err = 0; +	nfs_wb_all(inode);  	if (rcu_access_pointer(nfsi->delegation) != NULL) {  		delegation = nfs_detach_delegation(nfsi, server);  		if (delegation != NULL) { -			nfs_wb_all(inode);  			err = __nfs_inode_return_delegation(inode, delegation, 1);  		}  	} @@ -538,6 +544,8 @@ int nfs_async_inode_return_delegation(struct inode *inode,  	struct nfs_client *clp = server->nfs_client;  	struct nfs_delegation *delegation; +	filemap_flush(inode->i_mapping); +  	rcu_read_lock();  	delegation = rcu_dereference(NFS_I(inode)->delegation); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index cd6a7a8dada..72709c4193f 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -66,6 +66,7 @@ static inline int nfs_have_delegation(struct inode *inode, fmode_t flags)  static inline int nfs_inode_return_delegation(struct inode *inode)  { +	nfs_wb_all(inode);  	return 0;  }  #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index eedd24d0ad2..0989a209968 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -475,6 +475,29 @@ different:  }  static +bool nfs_use_readdirplus(struct inode *dir, struct file *filp) +{ +	if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) +		return false; +	if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags)) +		return true; +	if (filp->f_pos == 0) +		return true; +	return false; +} + +/* + * This function is called by the lookup code to request the use of + * readdirplus to accelerate any future lookups in the same + * directory. + */ +static +void nfs_advise_use_readdirplus(struct inode *dir) +{ +	set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags); +} + +static  void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)  {  	struct qstr filename = QSTR_INIT(entry->name, entry->len); @@ -871,7 +894,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)  	desc->file = filp;  	desc->dir_cookie = &dir_ctx->dir_cookie;  	desc->decode = NFS_PROTO(inode)->decode_dirent; -	desc->plus = NFS_USE_READDIRPLUS(inode); +	desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0;  	nfs_block_sillyrename(dentry);  	res = nfs_revalidate_mapping(inode, filp->f_mapping); @@ -1111,7 +1134,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)  	if (!inode) {  		if (nfs_neg_need_reval(dir, dentry, nd))  			goto out_bad; -		goto out_valid; +		goto out_valid_noent;  	}  	if (is_bad_inode(inode)) { @@ -1140,7 +1163,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)  	if (fhandle == NULL || fattr == NULL)  		goto out_error; -	error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); +	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);  	if (error)  		goto out_bad;  	if (nfs_compare_fh(NFS_FH(inode), fhandle)) @@ -1153,6 +1176,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)  out_set_verifier:  	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));   out_valid: +	/* Success: notify readdir to use READDIRPLUS */ +	nfs_advise_use_readdirplus(dir); + out_valid_noent:  	dput(parent);  	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",  			__func__, dentry->d_parent->d_name.name, @@ -1296,7 +1322,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru  	parent = dentry->d_parent;  	/* Protect against concurrent sillydeletes */  	nfs_block_sillyrename(parent); -	error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); +	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);  	if (error == -ENOENT)  		goto no_entry;  	if (error < 0) { @@ -1308,6 +1334,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru  	if (IS_ERR(res))  		goto out_unblock_sillyrename; +	/* Success: notify readdir to use READDIRPLUS */ +	nfs_advise_use_readdirplus(dir); +  no_entry:  	res = d_materialise_unique(dentry, inode);  	if (res != NULL) { @@ -1643,7 +1672,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,  	if (dentry->d_inode)  		goto out;  	if (fhandle->size == 0) { -		error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); +		error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);  		if (error)  			goto out_error;  	} diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 481be7f7bdd..23d170bc44f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -56,6 +56,7 @@  #include "internal.h"  #include "iostat.h" +#include "pnfs.h"  #define NFSDBG_FACILITY		NFSDBG_VFS @@ -81,16 +82,19 @@ struct nfs_direct_req {  	struct completion	completion;	/* wait for i/o completion */  	/* commit state */ -	struct list_head	rewrite_list;	/* saved nfs_write_data structs */ -	struct nfs_write_data *	commit_data;	/* special write_data for commits */ +	struct nfs_mds_commit_info mds_cinfo;	/* Storage for cinfo */ +	struct pnfs_ds_commit_info ds_cinfo;	/* Storage for cinfo */ +	struct work_struct	work;  	int			flags;  #define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */  #define NFS_ODIRECT_RESCHED_WRITES	(2)	/* write verification failed */  	struct nfs_writeverf	verf;		/* unstable write verifier */  }; +static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops; +static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;  static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); -static const struct rpc_call_ops nfs_write_direct_ops; +static void nfs_direct_write_schedule_work(struct work_struct *work);  static inline void get_dreq(struct nfs_direct_req *dreq)  { @@ -124,22 +128,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_  	return -EINVAL;  } -static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count) -{ -	unsigned int npages; -	unsigned int i; - -	if (count == 0) -		return; -	pages += (pgbase >> PAGE_SHIFT); -	npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; -	for (i = 0; i < npages; i++) { -		struct page *page = pages[i]; -		if (!PageCompound(page)) -			set_page_dirty(page); -	} -} -  static void nfs_direct_release_pages(struct page **pages, unsigned int npages)  {  	unsigned int i; @@ -147,26 +135,30 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)  		page_cache_release(pages[i]);  } +void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, +			      struct nfs_direct_req *dreq) +{ +	cinfo->lock = &dreq->lock; +	cinfo->mds = &dreq->mds_cinfo; +	cinfo->ds = &dreq->ds_cinfo; +	cinfo->dreq = dreq; +	cinfo->completion_ops = &nfs_direct_commit_completion_ops; +} +  static inline struct nfs_direct_req *nfs_direct_req_alloc(void)  {  	struct nfs_direct_req *dreq; -	dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL); +	dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL);  	if (!dreq)  		return NULL;  	kref_init(&dreq->kref);  	kref_get(&dreq->kref);  	init_completion(&dreq->completion); -	INIT_LIST_HEAD(&dreq->rewrite_list); -	dreq->iocb = NULL; -	dreq->ctx = NULL; -	dreq->l_ctx = NULL; +	INIT_LIST_HEAD(&dreq->mds_cinfo.list); +	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);  	spin_lock_init(&dreq->lock); -	atomic_set(&dreq->io_count, 0); -	dreq->count = 0; -	dreq->error = 0; -	dreq->flags = 0;  	return dreq;  } @@ -226,47 +218,80 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)  	nfs_direct_req_release(dreq);  } -/* - * We must hold a reference to all the pages in this direct read request - * until the RPCs complete.  This could be long *after* we are woken up in - * nfs_direct_wait (for instance, if someone hits ^C on a slow server). - */ -static void nfs_direct_read_result(struct rpc_task *task, void *calldata) +static void nfs_direct_readpage_release(struct nfs_page *req)  { -	struct nfs_read_data *data = calldata; - -	nfs_readpage_result(task, data); +	dprintk("NFS: direct read done (%s/%lld %d@%lld)\n", +		req->wb_context->dentry->d_inode->i_sb->s_id, +		(long long)NFS_FILEID(req->wb_context->dentry->d_inode), +		req->wb_bytes, +		(long long)req_offset(req)); +	nfs_release_request(req);  } -static void nfs_direct_read_release(void *calldata) +static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)  { +	unsigned long bytes = 0; +	struct nfs_direct_req *dreq = hdr->dreq; -	struct nfs_read_data *data = calldata; -	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; -	int status = data->task.tk_status; +	if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) +		goto out_put;  	spin_lock(&dreq->lock); -	if (unlikely(status < 0)) { -		dreq->error = status; -		spin_unlock(&dreq->lock); -	} else { -		dreq->count += data->res.count; -		spin_unlock(&dreq->lock); -		nfs_direct_dirty_pages(data->pagevec, -				data->args.pgbase, -				data->res.count); -	} -	nfs_direct_release_pages(data->pagevec, data->npages); +	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) +		dreq->error = hdr->error; +	else +		dreq->count += hdr->good_bytes; +	spin_unlock(&dreq->lock); + +	while (!list_empty(&hdr->pages)) { +		struct nfs_page *req = nfs_list_entry(hdr->pages.next); +		struct page *page = req->wb_page; +		if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { +			if (bytes > hdr->good_bytes) +				zero_user(page, 0, PAGE_SIZE); +			else if (hdr->good_bytes - bytes < PAGE_SIZE) +				zero_user_segment(page, +					hdr->good_bytes & ~PAGE_MASK, +					PAGE_SIZE); +		} +		if (!PageCompound(page)) { +			if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { +				if (bytes < hdr->good_bytes) +					set_page_dirty(page); +			} else +				set_page_dirty(page); +		} +		bytes += req->wb_bytes; +		nfs_list_remove_request(req); +		nfs_direct_readpage_release(req); +	} +out_put:  	if (put_dreq(dreq))  		nfs_direct_complete(dreq); -	nfs_readdata_free(data); +	hdr->release(hdr); +} + +static void nfs_read_sync_pgio_error(struct list_head *head) +{ +	struct nfs_page *req; + +	while (!list_empty(head)) { +		req = nfs_list_entry(head->next); +		nfs_list_remove_request(req); +		nfs_release_request(req); +	} +} + +static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr) +{ +	get_dreq(hdr->dreq);  } -static const struct rpc_call_ops nfs_read_direct_ops = { -	.rpc_call_prepare = nfs_read_prepare, -	.rpc_call_done = nfs_direct_read_result, -	.rpc_release = nfs_direct_read_release, +static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { +	.error_cleanup = nfs_read_sync_pgio_error, +	.init_hdr = nfs_direct_pgio_init, +	.completion = nfs_direct_read_completion,  };  /* @@ -276,107 +301,82 @@ static const struct rpc_call_ops nfs_read_direct_ops = {   * handled automatically by nfs_direct_read_result().  Otherwise, if   * no requests have been sent, just return an error.   */ -static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, +static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,  						const struct iovec *iov,  						loff_t pos)  { +	struct nfs_direct_req *dreq = desc->pg_dreq;  	struct nfs_open_context *ctx = dreq->ctx;  	struct inode *inode = ctx->dentry->d_inode;  	unsigned long user_addr = (unsigned long)iov->iov_base;  	size_t count = iov->iov_len;  	size_t rsize = NFS_SERVER(inode)->rsize; -	struct rpc_task *task; -	struct rpc_message msg = { -		.rpc_cred = ctx->cred, -	}; -	struct rpc_task_setup task_setup_data = { -		.rpc_client = NFS_CLIENT(inode), -		.rpc_message = &msg, -		.callback_ops = &nfs_read_direct_ops, -		.workqueue = nfsiod_workqueue, -		.flags = RPC_TASK_ASYNC, -	};  	unsigned int pgbase;  	int result;  	ssize_t started = 0; +	struct page **pagevec = NULL; +	unsigned int npages;  	do { -		struct nfs_read_data *data;  		size_t bytes; +		int i;  		pgbase = user_addr & ~PAGE_MASK; -		bytes = min(rsize,count); +		bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);  		result = -ENOMEM; -		data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes)); -		if (unlikely(!data)) +		npages = nfs_page_array_len(pgbase, bytes); +		if (!pagevec) +			pagevec = kmalloc(npages * sizeof(struct page *), +					  GFP_KERNEL); +		if (!pagevec)  			break; -  		down_read(¤t->mm->mmap_sem);  		result = get_user_pages(current, current->mm, user_addr, -					data->npages, 1, 0, data->pagevec, NULL); +					npages, 1, 0, pagevec, NULL);  		up_read(¤t->mm->mmap_sem); -		if (result < 0) { -			nfs_readdata_free(data); +		if (result < 0)  			break; -		} -		if ((unsigned)result < data->npages) { +		if ((unsigned)result < npages) {  			bytes = result * PAGE_SIZE;  			if (bytes <= pgbase) { -				nfs_direct_release_pages(data->pagevec, result); -				nfs_readdata_free(data); +				nfs_direct_release_pages(pagevec, result);  				break;  			}  			bytes -= pgbase; -			data->npages = result; +			npages = result;  		} -		get_dreq(dreq); - -		data->req = (struct nfs_page *) dreq; -		data->inode = inode; -		data->cred = msg.rpc_cred; -		data->args.fh = NFS_FH(inode); -		data->args.context = ctx; -		data->args.lock_context = dreq->l_ctx; -		data->args.offset = pos; -		data->args.pgbase = pgbase; -		data->args.pages = data->pagevec; -		data->args.count = bytes; -		data->res.fattr = &data->fattr; -		data->res.eof = 0; -		data->res.count = bytes; -		nfs_fattr_init(&data->fattr); -		msg.rpc_argp = &data->args; -		msg.rpc_resp = &data->res; - -		task_setup_data.task = &data->task; -		task_setup_data.callback_data = data; -		NFS_PROTO(inode)->read_setup(data, &msg); - -		task = rpc_run_task(&task_setup_data); -		if (IS_ERR(task)) -			break; -		rpc_put_task(task); - -		dprintk("NFS: %5u initiated direct read call " -			"(req %s/%Ld, %zu bytes @ offset %Lu)\n", -				data->task.tk_pid, -				inode->i_sb->s_id, -				(long long)NFS_FILEID(inode), -				bytes, -				(unsigned long long)data->args.offset); - -		started += bytes; -		user_addr += bytes; -		pos += bytes; -		/* FIXME: Remove this unnecessary math from final patch */ -		pgbase += bytes; -		pgbase &= ~PAGE_MASK; -		BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); +		for (i = 0; i < npages; i++) { +			struct nfs_page *req; +			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); +			/* XXX do we need to do the eof zeroing found in async_filler? */ +			req = nfs_create_request(dreq->ctx, dreq->inode, +						 pagevec[i], +						 pgbase, req_len); +			if (IS_ERR(req)) { +				result = PTR_ERR(req); +				break; +			} +			req->wb_index = pos >> PAGE_SHIFT; +			req->wb_offset = pos & ~PAGE_MASK; +			if (!nfs_pageio_add_request(desc, req)) { +				result = desc->pg_error; +				nfs_release_request(req); +				break; +			} +			pgbase = 0; +			bytes -= req_len; +			started += req_len; +			user_addr += req_len; +			pos += req_len; +			count -= req_len; +		} +		/* The nfs_page now hold references to these pages */ +		nfs_direct_release_pages(pagevec, npages); +	} while (count != 0 && result >= 0); -		count -= bytes; -	} while (count != 0); +	kfree(pagevec);  	if (started)  		return started; @@ -388,15 +388,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,  					      unsigned long nr_segs,  					      loff_t pos)  { +	struct nfs_pageio_descriptor desc;  	ssize_t result = -EINVAL;  	size_t requested_bytes = 0;  	unsigned long seg; +	nfs_pageio_init_read(&desc, dreq->inode, +			     &nfs_direct_read_completion_ops);  	get_dreq(dreq); +	desc.pg_dreq = dreq;  	for (seg = 0; seg < nr_segs; seg++) {  		const struct iovec *vec = &iov[seg]; -		result = nfs_direct_read_schedule_segment(dreq, vec, pos); +		result = nfs_direct_read_schedule_segment(&desc, vec, pos);  		if (result < 0)  			break;  		requested_bytes += result; @@ -405,6 +409,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,  		pos += vec->iov_len;  	} +	nfs_pageio_complete(&desc); +  	/*  	 * If no bytes were started, return the error, and let the  	 * generic layer handle the completion. @@ -441,104 +447,64 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,  	result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);  	if (!result)  		result = nfs_direct_wait(dreq); +	NFS_I(inode)->read_io += result;  out_release:  	nfs_direct_req_release(dreq);  out:  	return result;  } -static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) -{ -	while (!list_empty(&dreq->rewrite_list)) { -		struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); -		list_del(&data->pages); -		nfs_direct_release_pages(data->pagevec, data->npages); -		nfs_writedata_free(data); -	} -} -  #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)  static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)  { -	struct inode *inode = dreq->inode; -	struct list_head *p; -	struct nfs_write_data *data; -	struct rpc_task *task; -	struct rpc_message msg = { -		.rpc_cred = dreq->ctx->cred, -	}; -	struct rpc_task_setup task_setup_data = { -		.rpc_client = NFS_CLIENT(inode), -		.rpc_message = &msg, -		.callback_ops = &nfs_write_direct_ops, -		.workqueue = nfsiod_workqueue, -		.flags = RPC_TASK_ASYNC, -	}; +	struct nfs_pageio_descriptor desc; +	struct nfs_page *req, *tmp; +	LIST_HEAD(reqs); +	struct nfs_commit_info cinfo; +	LIST_HEAD(failed); + +	nfs_init_cinfo_from_dreq(&cinfo, dreq); +	pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo); +	spin_lock(cinfo.lock); +	nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0); +	spin_unlock(cinfo.lock);  	dreq->count = 0;  	get_dreq(dreq); -	list_for_each(p, &dreq->rewrite_list) { -		data = list_entry(p, struct nfs_write_data, pages); - -		get_dreq(dreq); - -		/* Use stable writes */ -		data->args.stable = NFS_FILE_SYNC; - -		/* -		 * Reset data->res. -		 */ -		nfs_fattr_init(&data->fattr); -		data->res.count = data->args.count; -		memset(&data->verf, 0, sizeof(data->verf)); - -		/* -		 * Reuse data->task; data->args should not have changed -		 * since the original request was sent. -		 */ -		task_setup_data.task = &data->task; -		task_setup_data.callback_data = data; -		msg.rpc_argp = &data->args; -		msg.rpc_resp = &data->res; -		NFS_PROTO(inode)->write_setup(data, &msg); - -		/* -		 * We're called via an RPC callback, so BKL is already held. -		 */ -		task = rpc_run_task(&task_setup_data); -		if (!IS_ERR(task)) -			rpc_put_task(task); +	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, +			      &nfs_direct_write_completion_ops); +	desc.pg_dreq = dreq; -		dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", -				data->task.tk_pid, -				inode->i_sb->s_id, -				(long long)NFS_FILEID(inode), -				data->args.count, -				(unsigned long long)data->args.offset); +	list_for_each_entry_safe(req, tmp, &reqs, wb_list) { +		if (!nfs_pageio_add_request(&desc, req)) { +			nfs_list_add_request(req, &failed); +			spin_lock(cinfo.lock); +			dreq->flags = 0; +			dreq->error = -EIO; +			spin_unlock(cinfo.lock); +		}  	} +	nfs_pageio_complete(&desc); -	if (put_dreq(dreq)) -		nfs_direct_write_complete(dreq, inode); -} - -static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) -{ -	struct nfs_write_data *data = calldata; +	while (!list_empty(&failed)) +		nfs_unlock_and_release_request(req); -	/* Call the NFS version-specific code */ -	NFS_PROTO(data->inode)->commit_done(task, data); +	if (put_dreq(dreq)) +		nfs_direct_write_complete(dreq, dreq->inode);  } -static void nfs_direct_commit_release(void *calldata) +static void nfs_direct_commit_complete(struct nfs_commit_data *data)  { -	struct nfs_write_data *data = calldata; -	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; +	struct nfs_direct_req *dreq = data->dreq; +	struct nfs_commit_info cinfo; +	struct nfs_page *req;  	int status = data->task.tk_status; +	nfs_init_cinfo_from_dreq(&cinfo, dreq);  	if (status < 0) {  		dprintk("NFS: %5u commit failed with error %d.\n", -				data->task.tk_pid, status); +			data->task.tk_pid, status);  		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;  	} else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {  		dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); @@ -546,62 +512,47 @@ static void nfs_direct_commit_release(void *calldata)  	}  	dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); -	nfs_direct_write_complete(dreq, data->inode); -	nfs_commit_free(data); +	while (!list_empty(&data->pages)) { +		req = nfs_list_entry(data->pages.next); +		nfs_list_remove_request(req); +		if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { +			/* Note the rewrite will go through mds */ +			kref_get(&req->wb_kref); +			nfs_mark_request_commit(req, NULL, &cinfo); +		} +		nfs_unlock_and_release_request(req); +	} + +	if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) +		nfs_direct_write_complete(dreq, data->inode); +} + +static void nfs_direct_error_cleanup(struct nfs_inode *nfsi) +{ +	/* There is no lock to clear */  } -static const struct rpc_call_ops nfs_commit_direct_ops = { -	.rpc_call_prepare = nfs_write_prepare, -	.rpc_call_done = nfs_direct_commit_result, -	.rpc_release = nfs_direct_commit_release, +static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { +	.completion = nfs_direct_commit_complete, +	.error_cleanup = nfs_direct_error_cleanup,  };  static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)  { -	struct nfs_write_data *data = dreq->commit_data; -	struct rpc_task *task; -	struct rpc_message msg = { -		.rpc_argp = &data->args, -		.rpc_resp = &data->res, -		.rpc_cred = dreq->ctx->cred, -	}; -	struct rpc_task_setup task_setup_data = { -		.task = &data->task, -		.rpc_client = NFS_CLIENT(dreq->inode), -		.rpc_message = &msg, -		.callback_ops = &nfs_commit_direct_ops, -		.callback_data = data, -		.workqueue = nfsiod_workqueue, -		.flags = RPC_TASK_ASYNC, -	}; - -	data->inode = dreq->inode; -	data->cred = msg.rpc_cred; - -	data->args.fh = NFS_FH(data->inode); -	data->args.offset = 0; -	data->args.count = 0; -	data->args.context = dreq->ctx; -	data->args.lock_context = dreq->l_ctx; -	data->res.count = 0; -	data->res.fattr = &data->fattr; -	data->res.verf = &data->verf; -	nfs_fattr_init(&data->fattr); - -	NFS_PROTO(data->inode)->commit_setup(data, &msg); - -	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */ -	dreq->commit_data = NULL; +	int res; +	struct nfs_commit_info cinfo; +	LIST_HEAD(mds_list); -	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); - -	task = rpc_run_task(&task_setup_data); -	if (!IS_ERR(task)) -		rpc_put_task(task); +	nfs_init_cinfo_from_dreq(&cinfo, dreq); +	nfs_scan_commit(dreq->inode, &mds_list, &cinfo); +	res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo); +	if (res < 0) /* res == -ENOMEM */ +		nfs_direct_write_reschedule(dreq);  } -static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) +static void nfs_direct_write_schedule_work(struct work_struct *work)  { +	struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);  	int flags = dreq->flags;  	dreq->flags = 0; @@ -613,89 +564,32 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode  			nfs_direct_write_reschedule(dreq);  			break;  		default: -			if (dreq->commit_data != NULL) -				nfs_commit_free(dreq->commit_data); -			nfs_direct_free_writedata(dreq); -			nfs_zap_mapping(inode, inode->i_mapping); +			nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);  			nfs_direct_complete(dreq);  	}  } -static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)  { -	dreq->commit_data = nfs_commitdata_alloc(); -	if (dreq->commit_data != NULL) -		dreq->commit_data->req = (struct nfs_page *) dreq; +	schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */  } +  #else -static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) +static void nfs_direct_write_schedule_work(struct work_struct *work)  { -	dreq->commit_data = NULL;  }  static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)  { -	nfs_direct_free_writedata(dreq);  	nfs_zap_mapping(inode, inode->i_mapping);  	nfs_direct_complete(dreq);  }  #endif -static void nfs_direct_write_result(struct rpc_task *task, void *calldata) -{ -	struct nfs_write_data *data = calldata; - -	nfs_writeback_done(task, data); -} -  /*   * NB: Return the value of the first error return code.  Subsequent   *     errors after the first one are ignored.   */ -static void nfs_direct_write_release(void *calldata) -{ -	struct nfs_write_data *data = calldata; -	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; -	int status = data->task.tk_status; - -	spin_lock(&dreq->lock); - -	if (unlikely(status < 0)) { -		/* An error has occurred, so we should not commit */ -		dreq->flags = 0; -		dreq->error = status; -	} -	if (unlikely(dreq->error != 0)) -		goto out_unlock; - -	dreq->count += data->res.count; - -	if (data->res.verf->committed != NFS_FILE_SYNC) { -		switch (dreq->flags) { -			case 0: -				memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf)); -				dreq->flags = NFS_ODIRECT_DO_COMMIT; -				break; -			case NFS_ODIRECT_DO_COMMIT: -				if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) { -					dprintk("NFS: %5u write verify failed\n", data->task.tk_pid); -					dreq->flags = NFS_ODIRECT_RESCHED_WRITES; -				} -		} -	} -out_unlock: -	spin_unlock(&dreq->lock); - -	if (put_dreq(dreq)) -		nfs_direct_write_complete(dreq, data->inode); -} - -static const struct rpc_call_ops nfs_write_direct_ops = { -	.rpc_call_prepare = nfs_write_prepare, -	.rpc_call_done = nfs_direct_write_result, -	.rpc_release = nfs_direct_write_release, -}; -  /*   * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE   * operation.  If nfs_writedata_alloc() or get_user_pages() fails, @@ -703,132 +597,187 @@ static const struct rpc_call_ops nfs_write_direct_ops = {   * handled automatically by nfs_direct_write_result().  Otherwise, if   * no requests have been sent, just return an error.   */ -static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, +static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,  						 const struct iovec *iov, -						 loff_t pos, int sync) +						 loff_t pos)  { +	struct nfs_direct_req *dreq = desc->pg_dreq;  	struct nfs_open_context *ctx = dreq->ctx;  	struct inode *inode = ctx->dentry->d_inode;  	unsigned long user_addr = (unsigned long)iov->iov_base;  	size_t count = iov->iov_len; -	struct rpc_task *task; -	struct rpc_message msg = { -		.rpc_cred = ctx->cred, -	}; -	struct rpc_task_setup task_setup_data = { -		.rpc_client = NFS_CLIENT(inode), -		.rpc_message = &msg, -		.callback_ops = &nfs_write_direct_ops, -		.workqueue = nfsiod_workqueue, -		.flags = RPC_TASK_ASYNC, -	};  	size_t wsize = NFS_SERVER(inode)->wsize;  	unsigned int pgbase;  	int result;  	ssize_t started = 0; +	struct page **pagevec = NULL; +	unsigned int npages;  	do { -		struct nfs_write_data *data;  		size_t bytes; +		int i;  		pgbase = user_addr & ~PAGE_MASK; -		bytes = min(wsize,count); +		bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);  		result = -ENOMEM; -		data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes)); -		if (unlikely(!data)) +		npages = nfs_page_array_len(pgbase, bytes); +		if (!pagevec) +			pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); +		if (!pagevec)  			break;  		down_read(¤t->mm->mmap_sem);  		result = get_user_pages(current, current->mm, user_addr, -					data->npages, 0, 0, data->pagevec, NULL); +					npages, 0, 0, pagevec, NULL);  		up_read(¤t->mm->mmap_sem); -		if (result < 0) { -			nfs_writedata_free(data); +		if (result < 0)  			break; -		} -		if ((unsigned)result < data->npages) { + +		if ((unsigned)result < npages) {  			bytes = result * PAGE_SIZE;  			if (bytes <= pgbase) { -				nfs_direct_release_pages(data->pagevec, result); -				nfs_writedata_free(data); +				nfs_direct_release_pages(pagevec, result);  				break;  			}  			bytes -= pgbase; -			data->npages = result; +			npages = result;  		} -		get_dreq(dreq); +		for (i = 0; i < npages; i++) { +			struct nfs_page *req; +			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); -		list_move_tail(&data->pages, &dreq->rewrite_list); +			req = nfs_create_request(dreq->ctx, dreq->inode, +						 pagevec[i], +						 pgbase, req_len); +			if (IS_ERR(req)) { +				result = PTR_ERR(req); +				break; +			} +			nfs_lock_request(req); +			req->wb_index = pos >> PAGE_SHIFT; +			req->wb_offset = pos & ~PAGE_MASK; +			if (!nfs_pageio_add_request(desc, req)) { +				result = desc->pg_error; +				nfs_unlock_and_release_request(req); +				break; +			} +			pgbase = 0; +			bytes -= req_len; +			started += req_len; +			user_addr += req_len; +			pos += req_len; +			count -= req_len; +		} +		/* The nfs_page now hold references to these pages */ +		nfs_direct_release_pages(pagevec, npages); +	} while (count != 0 && result >= 0); -		data->req = (struct nfs_page *) dreq; -		data->inode = inode; -		data->cred = msg.rpc_cred; -		data->args.fh = NFS_FH(inode); -		data->args.context = ctx; -		data->args.lock_context = dreq->l_ctx; -		data->args.offset = pos; -		data->args.pgbase = pgbase; -		data->args.pages = data->pagevec; -		data->args.count = bytes; -		data->args.stable = sync; -		data->res.fattr = &data->fattr; -		data->res.count = bytes; -		data->res.verf = &data->verf; -		nfs_fattr_init(&data->fattr); +	kfree(pagevec); -		task_setup_data.task = &data->task; -		task_setup_data.callback_data = data; -		msg.rpc_argp = &data->args; -		msg.rpc_resp = &data->res; -		NFS_PROTO(inode)->write_setup(data, &msg); +	if (started) +		return started; +	return result < 0 ? (ssize_t) result : -EFAULT; +} -		task = rpc_run_task(&task_setup_data); -		if (IS_ERR(task)) -			break; -		rpc_put_task(task); +static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) +{ +	struct nfs_direct_req *dreq = hdr->dreq; +	struct nfs_commit_info cinfo; +	int bit = -1; +	struct nfs_page *req = nfs_list_entry(hdr->pages.next); -		dprintk("NFS: %5u initiated direct write call " -			"(req %s/%Ld, %zu bytes @ offset %Lu)\n", -				data->task.tk_pid, -				inode->i_sb->s_id, -				(long long)NFS_FILEID(inode), -				bytes, -				(unsigned long long)data->args.offset); +	if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) +		goto out_put; -		started += bytes; -		user_addr += bytes; -		pos += bytes; +	nfs_init_cinfo_from_dreq(&cinfo, dreq); -		/* FIXME: Remove this useless math from the final patch */ -		pgbase += bytes; -		pgbase &= ~PAGE_MASK; -		BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); +	spin_lock(&dreq->lock); -		count -= bytes; -	} while (count != 0); +	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { +		dreq->flags = 0; +		dreq->error = hdr->error; +	} +	if (dreq->error != 0) +		bit = NFS_IOHDR_ERROR; +	else { +		dreq->count += hdr->good_bytes; +		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { +			dreq->flags = NFS_ODIRECT_RESCHED_WRITES; +			bit = NFS_IOHDR_NEED_RESCHED; +		} else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { +			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) +				bit = NFS_IOHDR_NEED_RESCHED; +			else if (dreq->flags == 0) { +				memcpy(&dreq->verf, &req->wb_verf, +				       sizeof(dreq->verf)); +				bit = NFS_IOHDR_NEED_COMMIT; +				dreq->flags = NFS_ODIRECT_DO_COMMIT; +			} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { +				if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) { +					dreq->flags = NFS_ODIRECT_RESCHED_WRITES; +					bit = NFS_IOHDR_NEED_RESCHED; +				} else +					bit = NFS_IOHDR_NEED_COMMIT; +			} +		} +	} +	spin_unlock(&dreq->lock); -	if (started) -		return started; -	return result < 0 ? (ssize_t) result : -EFAULT; +	while (!list_empty(&hdr->pages)) { +		req = nfs_list_entry(hdr->pages.next); +		nfs_list_remove_request(req); +		switch (bit) { +		case NFS_IOHDR_NEED_RESCHED: +		case NFS_IOHDR_NEED_COMMIT: +			kref_get(&req->wb_kref); +			nfs_mark_request_commit(req, hdr->lseg, &cinfo); +		} +		nfs_unlock_and_release_request(req); +	} + +out_put: +	if (put_dreq(dreq)) +		nfs_direct_write_complete(dreq, hdr->inode); +	hdr->release(hdr); +} + +static void nfs_write_sync_pgio_error(struct list_head *head) +{ +	struct nfs_page *req; + +	while (!list_empty(head)) { +		req = nfs_list_entry(head->next); +		nfs_list_remove_request(req); +		nfs_unlock_and_release_request(req); +	}  } +static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { +	.error_cleanup = nfs_write_sync_pgio_error, +	.init_hdr = nfs_direct_pgio_init, +	.completion = nfs_direct_write_completion, +}; +  static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,  					       const struct iovec *iov,  					       unsigned long nr_segs, -					       loff_t pos, int sync) +					       loff_t pos)  { +	struct nfs_pageio_descriptor desc;  	ssize_t result = 0;  	size_t requested_bytes = 0;  	unsigned long seg; +	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_COND_STABLE, +			      &nfs_direct_write_completion_ops); +	desc.pg_dreq = dreq;  	get_dreq(dreq);  	for (seg = 0; seg < nr_segs; seg++) {  		const struct iovec *vec = &iov[seg]; -		result = nfs_direct_write_schedule_segment(dreq, vec, -							   pos, sync); +		result = nfs_direct_write_schedule_segment(&desc, vec, pos);  		if (result < 0)  			break;  		requested_bytes += result; @@ -836,6 +785,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,  			break;  		pos += vec->iov_len;  	} +	nfs_pageio_complete(&desc); +	NFS_I(dreq->inode)->write_io += desc.pg_bytes_written;  	/*  	 * If no bytes were started, return the error, and let the @@ -858,16 +809,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,  	ssize_t result = -ENOMEM;  	struct inode *inode = iocb->ki_filp->f_mapping->host;  	struct nfs_direct_req *dreq; -	size_t wsize = NFS_SERVER(inode)->wsize; -	int sync = NFS_UNSTABLE;  	dreq = nfs_direct_req_alloc();  	if (!dreq)  		goto out; -	nfs_alloc_commit_data(dreq); - -	if (dreq->commit_data == NULL || count <= wsize) -		sync = NFS_FILE_SYNC;  	dreq->inode = inode;  	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); @@ -877,7 +822,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,  	if (!is_sync_kiocb(iocb))  		dreq->iocb = iocb; -	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); +	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);  	if (!result)  		result = nfs_direct_wait(dreq);  out_release: @@ -997,10 +942,15 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,  	task_io_account_write(count);  	retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); +	if (retval > 0) { +		struct inode *inode = mapping->host; -	if (retval > 0)  		iocb->ki_pos = pos + retval; - +		spin_lock(&inode->i_lock); +		if (i_size_read(inode) < iocb->ki_pos) +			i_size_write(inode, iocb->ki_pos); +		spin_unlock(&inode->i_lock); +	}  out:  	return retval;  } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index aa9b709fd32..56311ca5f9f 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -174,6 +174,13 @@ nfs_file_flush(struct file *file, fl_owner_t id)  	if ((file->f_mode & FMODE_WRITE) == 0)  		return 0; +	/* +	 * If we're holding a write delegation, then just start the i/o +	 * but don't wait for completion (or send a commit). +	 */ +	if (nfs_have_delegation(inode, FMODE_WRITE)) +		return filemap_fdatawrite(file->f_mapping); +  	/* Flush writes to the server and return any errors */  	return vfs_fsync(file, 0);  } @@ -417,6 +424,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,  	if (status < 0)  		return status; +	NFS_I(mapping->host)->write_io += copied;  	return copied;  } diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index ae65c16b367..c817787fbdb 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -64,23 +64,12 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp)   * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent   * superblock across an automount point of some nature.   */ -void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, -				  struct nfs_clone_mount *mntdata) +void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int ulen)  {  	struct nfs_fscache_key *key, *xkey;  	struct nfs_server *nfss = NFS_SB(sb);  	struct rb_node **p, *parent; -	int diff, ulen; - -	if (uniq) { -		ulen = strlen(uniq); -	} else if (mntdata) { -		struct nfs_server *mnt_s = NFS_SB(mntdata->sb); -		if (mnt_s->fscache_key) { -			uniq = mnt_s->fscache_key->key.uniquifier; -			ulen = mnt_s->fscache_key->key.uniq_len; -		} -	} +	int diff;  	if (!uniq) {  		uniq = ""; diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index b9c572d0679..c5b11b53ff3 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -73,9 +73,7 @@ extern void nfs_fscache_unregister(void);  extern void nfs_fscache_get_client_cookie(struct nfs_client *);  extern void nfs_fscache_release_client_cookie(struct nfs_client *); -extern void nfs_fscache_get_super_cookie(struct super_block *, -					 const char *, -					 struct nfs_clone_mount *); +extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int);  extern void nfs_fscache_release_super_cookie(struct super_block *);  extern void nfs_fscache_init_inode_cookie(struct inode *); @@ -172,12 +170,6 @@ static inline void nfs_fscache_unregister(void) {}  static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {}  static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} -static inline void nfs_fscache_get_super_cookie( -	struct super_block *sb, -	const char *uniq, -	struct nfs_clone_mount *mntdata) -{ -}  static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}  static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {} diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 4ca6f5c8038..8abfb19bd3a 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -150,7 +150,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)  		goto out;  	/* Start by getting the root filehandle from the server */ -	ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); +	ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo);  	if (ret < 0) {  		dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);  		goto out; @@ -178,87 +178,4 @@ out:  	return ret;  } -/* - * get an NFS4 root dentry from the root filehandle - */ -struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh, -			     const char *devname) -{ -	struct nfs_server *server = NFS_SB(sb); -	struct nfs_fattr *fattr = NULL; -	struct dentry *ret; -	struct inode *inode; -	void *name = kstrdup(devname, GFP_KERNEL); -	int error; - -	dprintk("--> nfs4_get_root()\n"); - -	if (!name) -		return ERR_PTR(-ENOMEM); - -	/* get the info about the server and filesystem */ -	error = nfs4_server_capabilities(server, mntfh); -	if (error < 0) { -		dprintk("nfs_get_root: getcaps error = %d\n", -			-error); -		kfree(name); -		return ERR_PTR(error); -	} - -	fattr = nfs_alloc_fattr(); -	if (fattr == NULL) { -		kfree(name); -		return ERR_PTR(-ENOMEM); -	} - -	/* get the actual root for this mount */ -	error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr); -	if (error < 0) { -		dprintk("nfs_get_root: getattr error = %d\n", -error); -		ret = ERR_PTR(error); -		goto out; -	} - -	if (fattr->valid & NFS_ATTR_FATTR_FSID && -	    !nfs_fsid_equal(&server->fsid, &fattr->fsid)) -		memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); - -	inode = nfs_fhget(sb, mntfh, fattr); -	if (IS_ERR(inode)) { -		dprintk("nfs_get_root: get root inode failed\n"); -		ret = ERR_CAST(inode); -		goto out; -	} - -	error = nfs_superblock_set_dummy_root(sb, inode); -	if (error != 0) { -		ret = ERR_PTR(error); -		goto out; -	} - -	/* root dentries normally start off anonymous and get spliced in later -	 * if the dentry tree reaches them; however if the dentry already -	 * exists, we'll pick it up at this point and use it as the root -	 */ -	ret = d_obtain_alias(inode); -	if (IS_ERR(ret)) { -		dprintk("nfs_get_root: get root dentry failed\n"); -		goto out; -	} - -	security_d_instantiate(ret, inode); -	spin_lock(&ret->d_lock); -	if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { -		ret->d_fsdata = name; -		name = NULL; -	} -	spin_unlock(&ret->d_lock); -out: -	if (name) -		kfree(name); -	nfs_free_fattr(fattr); -	dprintk("<-- nfs4_get_root()\n"); -	return ret; -} -  #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index ba3019f5934..b5b86a05059 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -415,7 +415,7 @@ static int __nfs_idmap_register(struct dentry *dir,  static void nfs_idmap_unregister(struct nfs_client *clp,  				      struct rpc_pipe *pipe)  { -	struct net *net = clp->net; +	struct net *net = clp->cl_net;  	struct super_block *pipefs_sb;  	pipefs_sb = rpc_get_sb_net(net); @@ -429,7 +429,7 @@ static int nfs_idmap_register(struct nfs_client *clp,  				   struct idmap *idmap,  				   struct rpc_pipe *pipe)  { -	struct net *net = clp->net; +	struct net *net = clp->cl_net;  	struct super_block *pipefs_sb;  	int err = 0; @@ -530,9 +530,25 @@ static struct nfs_client *nfs_get_client_for_event(struct net *net, int event)  	struct nfs_net *nn = net_generic(net, nfs_net_id);  	struct dentry *cl_dentry;  	struct nfs_client *clp; +	int err; +restart:  	spin_lock(&nn->nfs_client_lock);  	list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { +		/* Wait for initialisation to finish */ +		if (clp->cl_cons_state == NFS_CS_INITING) { +			atomic_inc(&clp->cl_count); +			spin_unlock(&nn->nfs_client_lock); +			err = nfs_wait_client_init_complete(clp); +			nfs_put_client(clp); +			if (err) +				return NULL; +			goto restart; +		} +		/* Skip nfs_clients that failed to initialise */ +		if (clp->cl_cons_state < 0) +			continue; +		smp_rmb();  		if (clp->rpc_ops != &nfs_v4_clientops)  			continue;  		cl_dentry = clp->cl_idmap->idmap_pipe->dentry; @@ -640,20 +656,16 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,  	struct idmap_msg *im;  	struct idmap *idmap = (struct idmap *)aux;  	struct key *key = cons->key; -	int ret; +	int ret = -ENOMEM;  	/* msg and im are freed in idmap_pipe_destroy_msg */  	msg = kmalloc(sizeof(*msg), GFP_KERNEL); -	if (IS_ERR(msg)) { -		ret = PTR_ERR(msg); +	if (!msg)  		goto out0; -	}  	im = kmalloc(sizeof(*im), GFP_KERNEL); -	if (IS_ERR(im)) { -		ret = PTR_ERR(im); +	if (!im)  		goto out1; -	}  	ret = nfs_idmap_prepare_message(key->description, im, msg);  	if (ret < 0) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c6073139b40..2f6f78c4b42 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -285,9 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)  		inode->i_mode = fattr->mode;  		if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0  				&& nfs_server_capable(inode, NFS_CAP_MODE)) -			nfsi->cache_validity |= NFS_INO_INVALID_ATTR -				| NFS_INO_INVALID_ACCESS -				| NFS_INO_INVALID_ACL; +			nfsi->cache_validity |= NFS_INO_INVALID_ATTR;  		/* Why so? Because we want revalidate for devices/FIFOs, and  		 * that's precisely what we have in nfs_file_inode_operations.  		 */ @@ -300,8 +298,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)  			inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;  			inode->i_fop = &nfs_dir_operations;  			inode->i_data.a_ops = &nfs_dir_aops; -			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) -				set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);  			/* Deal with crossing mountpoints */  			if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||  					fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { @@ -327,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)  		inode->i_gid = -2;  		inode->i_blocks = 0;  		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); +		nfsi->write_io = 0; +		nfsi->read_io = 0;  		nfsi->read_cache_jiffies = fattr->time_start;  		nfsi->attr_gencount = fattr->gencount; @@ -337,24 +335,19 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)  		if (fattr->valid & NFS_ATTR_FATTR_MTIME)  			inode->i_mtime = fattr->mtime;  		else if (nfs_server_capable(inode, NFS_CAP_MTIME)) -			nfsi->cache_validity |= NFS_INO_INVALID_ATTR -				| NFS_INO_INVALID_DATA; +			nfsi->cache_validity |= NFS_INO_INVALID_ATTR;  		if (fattr->valid & NFS_ATTR_FATTR_CTIME)  			inode->i_ctime = fattr->ctime;  		else if (nfs_server_capable(inode, NFS_CAP_CTIME)) -			nfsi->cache_validity |= NFS_INO_INVALID_ATTR -				| NFS_INO_INVALID_ACCESS -				| NFS_INO_INVALID_ACL; +			nfsi->cache_validity |= NFS_INO_INVALID_ATTR;  		if (fattr->valid & NFS_ATTR_FATTR_CHANGE)  			inode->i_version = fattr->change_attr;  		else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) -			nfsi->cache_validity |= NFS_INO_INVALID_ATTR -				| NFS_INO_INVALID_DATA; +			nfsi->cache_validity |= NFS_INO_INVALID_ATTR;  		if (fattr->valid & NFS_ATTR_FATTR_SIZE)  			inode->i_size = nfs_size_to_loff_t(fattr->size);  		else  			nfsi->cache_validity |= NFS_INO_INVALID_ATTR -				| NFS_INO_INVALID_DATA  				| NFS_INO_REVAL_PAGECACHE;  		if (fattr->valid & NFS_ATTR_FATTR_NLINK)  			set_nlink(inode, fattr->nlink); @@ -363,15 +356,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)  		if (fattr->valid & NFS_ATTR_FATTR_OWNER)  			inode->i_uid = fattr->uid;  		else if (nfs_server_capable(inode, NFS_CAP_OWNER)) -			nfsi->cache_validity |= NFS_INO_INVALID_ATTR -				| NFS_INO_INVALID_ACCESS -				| NFS_INO_INVALID_ACL; +			nfsi->cache_validity |= NFS_INO_INVALID_ATTR;  		if (fattr->valid & NFS_ATTR_FATTR_GROUP)  			inode->i_gid = fattr->gid;  		else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) -			nfsi->cache_validity |= NFS_INO_INVALID_ATTR -				| NFS_INO_INVALID_ACCESS -				| NFS_INO_INVALID_ACL; +			nfsi->cache_validity |= NFS_INO_INVALID_ATTR;  		if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)  			inode->i_blocks = fattr->du.nfs2.blocks;  		if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { @@ -654,6 +643,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f  	nfs_init_lock_context(&ctx->lock_context);  	ctx->lock_context.open_context = ctx;  	INIT_LIST_HEAD(&ctx->list); +	ctx->mdsthreshold = NULL;  	return ctx;  } @@ -682,6 +672,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)  		put_rpccred(ctx->cred);  	dput(ctx->dentry);  	nfs_sb_deactive(sb); +	kfree(ctx->mdsthreshold);  	kfree(ctx);  } @@ -870,6 +861,15 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map  	return 0;  } +static bool nfs_mapping_need_revalidate_inode(struct inode *inode) +{ +	if (nfs_have_delegated_attributes(inode)) +		return false; +	return (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE) +		|| nfs_attribute_timeout(inode) +		|| NFS_STALE(inode); +} +  /**   * nfs_revalidate_mapping - Revalidate the pagecache   * @inode - pointer to host inode @@ -880,9 +880,7 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)  	struct nfs_inode *nfsi = NFS_I(inode);  	int ret = 0; -	if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) -			|| nfs_attribute_cache_expired(inode) -			|| NFS_STALE(inode)) { +	if (nfs_mapping_need_revalidate_inode(inode)) {  		ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);  		if (ret < 0)  			goto out; @@ -948,6 +946,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat  	unsigned long invalid = 0; +	if (nfs_have_delegated_attributes(inode)) +		return 0;  	/* Has the inode gone and changed behind our back? */  	if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)  		return -EIO; @@ -960,7 +960,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat  	/* Verify a few of the more important attributes */  	if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) -		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; +		invalid |= NFS_INO_INVALID_ATTR;  	if (fattr->valid & NFS_ATTR_FATTR_SIZE) {  		cur_size = i_size_read(inode); @@ -1279,14 +1279,26 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)  			nfs_display_fhandle_hash(NFS_FH(inode)),  			atomic_read(&inode->i_count), fattr->valid); -	if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) -		goto out_fileid; +	if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) { +		printk(KERN_ERR "NFS: server %s error: fileid changed\n" +			"fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", +			NFS_SERVER(inode)->nfs_client->cl_hostname, +			inode->i_sb->s_id, (long long)nfsi->fileid, +			(long long)fattr->fileid); +		goto out_err; +	}  	/*  	 * Make sure the inode's type hasn't changed.  	 */ -	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) -		goto out_changed; +	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { +		/* +		* Big trouble! The inode has become a different object. +		*/ +		printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n", +				__func__, inode->i_ino, inode->i_mode, fattr->mode); +		goto out_err; +	}  	server = NFS_SERVER(inode);  	/* Update the fsid? */ @@ -1314,7 +1326,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)  		if (inode->i_version != fattr->change_attr) {  			dprintk("NFS: change_attr change on server for file %s/%ld\n",  					inode->i_sb->s_id, inode->i_ino); -			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; +			invalid |= NFS_INO_INVALID_ATTR +				| NFS_INO_INVALID_DATA +				| NFS_INO_INVALID_ACCESS +				| NFS_INO_INVALID_ACL +				| NFS_INO_REVAL_PAGECACHE;  			if (S_ISDIR(inode->i_mode))  				nfs_force_lookup_revalidate(inode);  			inode->i_version = fattr->change_attr; @@ -1323,38 +1339,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)  		invalid |= save_cache_validity;  	if (fattr->valid & NFS_ATTR_FATTR_MTIME) { -		/* NFSv2/v3: Check if the mtime agrees */ -		if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { -			dprintk("NFS: mtime change on server for file %s/%ld\n", -					inode->i_sb->s_id, inode->i_ino); -			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; -			if (S_ISDIR(inode->i_mode)) -				nfs_force_lookup_revalidate(inode); -			memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); -		} +		memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));  	} else if (server->caps & NFS_CAP_MTIME)  		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR -				| NFS_INO_INVALID_DATA -				| NFS_INO_REVAL_PAGECACHE  				| NFS_INO_REVAL_FORCED);  	if (fattr->valid & NFS_ATTR_FATTR_CTIME) { -		/* If ctime has changed we should definitely clear access+acl caches */ -		if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { -			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; -			/* and probably clear data for a directory too as utimes can cause -			 * havoc with our cache. -			 */ -			if (S_ISDIR(inode->i_mode)) { -				invalid |= NFS_INO_INVALID_DATA; -				nfs_force_lookup_revalidate(inode); -			} -			memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); -		} +		memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));  	} else if (server->caps & NFS_CAP_CTIME)  		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR -				| NFS_INO_INVALID_ACCESS -				| NFS_INO_INVALID_ACL  				| NFS_INO_REVAL_FORCED);  	/* Check if our cached file size is stale */ @@ -1466,12 +1459,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)  		nfsi->cache_validity |= invalid;  	return 0; - out_changed: -	/* -	 * Big trouble! The inode has become a different object. -	 */ -	printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n", -			__func__, inode->i_ino, inode->i_mode, fattr->mode);   out_err:  	/*  	 * No need to worry about unhashing the dentry, as the @@ -1480,13 +1467,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)  	 */  	nfs_invalidate_inode(inode);  	return -ESTALE; - - out_fileid: -	printk(KERN_ERR "NFS: server %s error: fileid changed\n" -		"fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", -		NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id, -		(long long)nfsi->fileid, (long long)fattr->fileid); -	goto out_err;  } @@ -1547,7 +1527,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)  	nfsi->delegation_state = 0;  	init_rwsem(&nfsi->rwsem);  	nfsi->layout = NULL; -	atomic_set(&nfsi->commits_outstanding, 0); +	atomic_set(&nfsi->commit_info.rpcs_out, 0);  #endif  } @@ -1559,9 +1539,9 @@ static void init_once(void *foo)  	INIT_LIST_HEAD(&nfsi->open_files);  	INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);  	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); -	INIT_LIST_HEAD(&nfsi->commit_list); +	INIT_LIST_HEAD(&nfsi->commit_info.list);  	nfsi->npages = 0; -	nfsi->ncommit = 0; +	nfsi->commit_info.ncommit = 0;  	atomic_set(&nfsi->silly_count, 1);  	INIT_HLIST_HEAD(&nfsi->silly_list);  	init_waitqueue_head(&nfsi->waitqueue); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b777bdaba4c..1848a727559 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -103,6 +103,7 @@ struct nfs_parsed_mount_data {  	unsigned int		version;  	unsigned int		minorversion;  	char			*fscache_uniq; +	bool			need_mount;  	struct {  		struct sockaddr_storage	address; @@ -167,11 +168,13 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,  					   struct nfs_fh *,  					   struct nfs_fattr *,  					   rpc_authflavor_t); +extern int nfs_wait_client_init_complete(const struct nfs_client *clp);  extern void nfs_mark_client_ready(struct nfs_client *clp, int state); -extern int nfs4_check_client_ready(struct nfs_client *clp);  extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,  					     const struct sockaddr *ds_addr, -					     int ds_addrlen, int ds_proto); +					     int ds_addrlen, int ds_proto, +					     unsigned int ds_timeo, +					     unsigned int ds_retrans);  #ifdef CONFIG_PROC_FS  extern int __init nfs_fs_proc_init(void);  extern void nfs_fs_proc_exit(void); @@ -185,21 +188,11 @@ static inline void nfs_fs_proc_exit(void)  }  #endif -/* nfs4namespace.c */ -#ifdef CONFIG_NFS_V4 -extern struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry); -#else -static inline -struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) -{ -	return ERR_PTR(-ENOENT); -} -#endif -  /* callback_xdr.c */  extern struct svc_version nfs4_callback_version1;  extern struct svc_version nfs4_callback_version4; +struct nfs_pageio_descriptor;  /* pagelist.c */  extern int __init nfs_init_nfspagecache(void);  extern void nfs_destroy_nfspagecache(void); @@ -210,9 +203,13 @@ extern void nfs_destroy_writepagecache(void);  extern int __init nfs_init_directcache(void);  extern void nfs_destroy_directcache(void); +extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount); +extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, +			      struct nfs_pgio_header *hdr, +			      void (*release)(struct nfs_pgio_header *hdr)); +void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);  /* nfs2xdr.c */ -extern int nfs_stat_to_errno(enum nfs_stat);  extern struct rpc_procinfo nfs_procedures[];  extern int nfs2_decode_dirent(struct xdr_stream *,  				struct nfs_entry *, int); @@ -237,14 +234,13 @@ extern const u32 nfs41_maxwrite_overhead;  extern struct rpc_procinfo nfs4_procedures[];  #endif -extern int nfs4_init_ds_session(struct nfs_client *clp); +extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);  /* proc.c */  void nfs_close_context(struct nfs_open_context *ctx, int is_sync); -extern int nfs_init_client(struct nfs_client *clp, +extern struct nfs_client *nfs_init_client(struct nfs_client *clp,  			   const struct rpc_timeout *timeparms, -			   const char *ip_addr, rpc_authflavor_t authflavour, -			   int noresvport); +			   const char *ip_addr, rpc_authflavor_t authflavour);  /* dir.c */  extern int nfs_access_cache_shrinker(struct shrinker *shrink, @@ -280,9 +276,10 @@ extern void nfs_sb_deactive(struct super_block *sb);  extern char *nfs_path(char **p, struct dentry *dentry,  		      char *buffer, ssize_t buflen);  extern struct vfsmount *nfs_d_automount(struct path *path); -#ifdef CONFIG_NFS_V4 -rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); -#endif +struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *, +			      struct nfs_fh *, struct nfs_fattr *); +struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *, +				 struct nfs_fattr *, rpc_authflavor_t);  /* getroot.c */  extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, @@ -294,46 +291,73 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,  extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);  #endif -struct nfs_pageio_descriptor; +struct nfs_pgio_completion_ops;  /* read.c */ -extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, -			     const struct rpc_call_ops *call_ops); +extern struct nfs_read_header *nfs_readhdr_alloc(void); +extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); +extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, +			struct inode *inode, +			const struct nfs_pgio_completion_ops *compl_ops); +extern int nfs_initiate_read(struct rpc_clnt *clnt, +			     struct nfs_read_data *data, +			     const struct rpc_call_ops *call_ops, int flags);  extern void nfs_read_prepare(struct rpc_task *task, void *calldata);  extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, -		struct list_head *head); - +			      struct nfs_pgio_header *hdr);  extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, -		struct inode *inode); +			struct inode *inode, +			const struct nfs_pgio_completion_ops *compl_ops);  extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);  extern void nfs_readdata_release(struct nfs_read_data *rdata);  /* write.c */ +extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, +			struct inode *inode, int ioflags, +			const struct nfs_pgio_completion_ops *compl_ops); +extern struct nfs_write_header *nfs_writehdr_alloc(void); +extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);  extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, -		struct list_head *head); +			     struct nfs_pgio_header *hdr);  extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, -				  struct inode *inode, int ioflags); +			struct inode *inode, int ioflags, +			const struct nfs_pgio_completion_ops *compl_ops);  extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);  extern void nfs_writedata_release(struct nfs_write_data *wdata); -extern void nfs_commit_free(struct nfs_write_data *p); -extern int nfs_initiate_write(struct nfs_write_data *data, -			      struct rpc_clnt *clnt, +extern void nfs_commit_free(struct nfs_commit_data *p); +extern int nfs_initiate_write(struct rpc_clnt *clnt, +			      struct nfs_write_data *data,  			      const struct rpc_call_ops *call_ops, -			      int how); +			      int how, int flags);  extern void nfs_write_prepare(struct rpc_task *task, void *calldata); -extern int nfs_initiate_commit(struct nfs_write_data *data, -			       struct rpc_clnt *clnt, +extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); +extern int nfs_initiate_commit(struct rpc_clnt *clnt, +			       struct nfs_commit_data *data,  			       const struct rpc_call_ops *call_ops, -			       int how); -extern void nfs_init_commit(struct nfs_write_data *data, +			       int how, int flags); +extern void nfs_init_commit(struct nfs_commit_data *data,  			    struct list_head *head, -			    struct pnfs_layout_segment *lseg); +			    struct pnfs_layout_segment *lseg, +			    struct nfs_commit_info *cinfo); +int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, +			 struct nfs_commit_info *cinfo, int max); +int nfs_scan_commit(struct inode *inode, struct list_head *dst, +		    struct nfs_commit_info *cinfo); +void nfs_mark_request_commit(struct nfs_page *req, +			     struct pnfs_layout_segment *lseg, +			     struct nfs_commit_info *cinfo); +int nfs_generic_commit_list(struct inode *inode, struct list_head *head, +			    int how, struct nfs_commit_info *cinfo);  void nfs_retry_commit(struct list_head *page_list, -		      struct pnfs_layout_segment *lseg); -void nfs_commit_clear_lock(struct nfs_inode *nfsi); -void nfs_commitdata_release(void *data); -void nfs_commit_release_pages(struct nfs_write_data *data); -void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head); -void nfs_request_remove_commit_list(struct nfs_page *req); +		      struct pnfs_layout_segment *lseg, +		      struct nfs_commit_info *cinfo); +void nfs_commitdata_release(struct nfs_commit_data *data); +void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, +				 struct nfs_commit_info *cinfo); +void nfs_request_remove_commit_list(struct nfs_page *req, +				    struct nfs_commit_info *cinfo); +void nfs_init_cinfo(struct nfs_commit_info *cinfo, +		    struct inode *inode, +		    struct nfs_direct_req *dreq);  #ifdef CONFIG_MIGRATION  extern int nfs_migrate_page(struct address_space *, @@ -342,15 +366,16 @@ extern int nfs_migrate_page(struct address_space *,  #define nfs_migrate_page NULL  #endif +/* direct.c */ +void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, +			      struct nfs_direct_req *dreq); +  /* nfs4proc.c */  extern void __nfs4_read_done_cb(struct nfs_read_data *); -extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); -extern int nfs4_init_client(struct nfs_client *clp, +extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,  			    const struct rpc_timeout *timeparms,  			    const char *ip_addr, -			    rpc_authflavor_t authflavour, -			    int noresvport); -extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data); +			    rpc_authflavor_t authflavour);  extern int _nfs4_call_sync(struct rpc_clnt *clnt,  			   struct nfs_server *server,  			   struct rpc_message *msg, @@ -466,3 +491,15 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)  		PAGE_SIZE - 1) >> PAGE_SHIFT;  } +/* + * Convert a struct timespec into a 64-bit change attribute + * + * This does approximately the same thing as timespec_to_ns(), + * but for calculation efficiency, we multiply the seconds by + * 1024*1024*1024. + */ +static inline +u64 nfs_timespec_to_change_attr(const struct timespec *ts) +{ +	return ((u64)ts->tv_sec << 30) + ts->tv_nsec; +} diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index d51868e5683..08b9c93675d 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -26,11 +26,6 @@ static LIST_HEAD(nfs_automount_list);  static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);  int nfs_mountpoint_expiry_timeout = 500 * HZ; -static struct vfsmount *nfs_do_submount(struct dentry *dentry, -					struct nfs_fh *fh, -					struct nfs_fattr *fattr, -					rpc_authflavor_t authflavor); -  /*   * nfs_path - reconstruct the path given an arbitrary dentry   * @base - used to return pointer to the end of devname part of path @@ -118,64 +113,6 @@ Elong:  	return ERR_PTR(-ENAMETOOLONG);  } -#ifdef CONFIG_NFS_V4 -rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) -{ -	struct gss_api_mech *mech; -	struct xdr_netobj oid; -	int i; -	rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; - -	for (i = 0; i < flavors->num_flavors; i++) { -		struct nfs4_secinfo_flavor *flavor; -		flavor = &flavors->flavors[i]; - -		if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) { -			pseudoflavor = flavor->flavor; -			break; -		} else if (flavor->flavor == RPC_AUTH_GSS) { -			oid.len  = flavor->gss.sec_oid4.len; -			oid.data = flavor->gss.sec_oid4.data; -			mech = gss_mech_get_by_OID(&oid); -			if (!mech) -				continue; -			pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service); -			gss_mech_put(mech); -			break; -		} -	} - -	return pseudoflavor; -} - -static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir, -					      struct qstr *name, -					      struct nfs_fh *fh, -					      struct nfs_fattr *fattr) -{ -	int err; - -	if (NFS_PROTO(dir)->version == 4) -		return nfs4_proc_lookup_mountpoint(dir, name, fh, fattr); - -	err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr); -	if (err) -		return ERR_PTR(err); -	return rpc_clone_client(NFS_SERVER(dir)->client); -} -#else /* CONFIG_NFS_V4 */ -static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir, -						     struct qstr *name, -						     struct nfs_fh *fh, -						     struct nfs_fattr *fattr) -{ -	int err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr); -	if (err) -		return ERR_PTR(err); -	return rpc_clone_client(NFS_SERVER(dir)->client); -} -#endif /* CONFIG_NFS_V4 */ -  /*   * nfs_d_automount - Handle crossing a mountpoint on the server   * @path - The mountpoint @@ -191,10 +128,9 @@ static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,  struct vfsmount *nfs_d_automount(struct path *path)  {  	struct vfsmount *mnt; -	struct dentry *parent; +	struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);  	struct nfs_fh *fh = NULL;  	struct nfs_fattr *fattr = NULL; -	struct rpc_clnt *client;  	dprintk("--> nfs_d_automount()\n"); @@ -210,21 +146,7 @@ struct vfsmount *nfs_d_automount(struct path *path)  	dprintk("%s: enter\n", __func__); -	/* Look it up again to get its attributes */ -	parent = dget_parent(path->dentry); -	client = nfs_lookup_mountpoint(parent->d_inode, &path->dentry->d_name, fh, fattr); -	dput(parent); -	if (IS_ERR(client)) { -		mnt = ERR_CAST(client); -		goto out; -	} - -	if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) -		mnt = nfs_do_refmount(client, path->dentry); -	else -		mnt = nfs_do_submount(path->dentry, fh, fattr, client->cl_auth->au_flavor); -	rpc_shutdown_client(client); - +	mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);  	if (IS_ERR(mnt))  		goto out; @@ -297,10 +219,8 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,   * @authflavor - security flavor to use when performing the mount   *   */ -static struct vfsmount *nfs_do_submount(struct dentry *dentry, -					struct nfs_fh *fh, -					struct nfs_fattr *fattr, -					rpc_authflavor_t authflavor) +struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, +				 struct nfs_fattr *fattr, rpc_authflavor_t authflavor)  {  	struct nfs_clone_mount mountdata = {  		.sb = dentry->d_sb, @@ -333,3 +253,18 @@ out:  	dprintk("<-- nfs_do_submount() = %p\n", mnt);  	return mnt;  } + +struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, +			      struct nfs_fh *fh, struct nfs_fattr *fattr) +{ +	int err; +	struct dentry *parent = dget_parent(dentry); + +	/* Look it up again to get its attributes */ +	err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr); +	dput(parent); +	if (err != 0) +		return ERR_PTR(err); + +	return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor); +} diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index aa14ec303e9..8a6394edb8b 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -1,3 +1,7 @@ +/* + * NFS-private data for each "struct net".  Accessed with net_generic(). + */ +  #ifndef __NFS_NETNS_H__  #define __NFS_NETNS_H__ @@ -20,6 +24,7 @@ struct nfs_net {  	struct idr cb_ident_idr; /* Protected by nfs_client_lock */  #endif  	spinlock_t nfs_client_lock; +	struct timespec boot_time;  };  extern int nfs_net_id; diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 1f56000fabb..baf759bccd0 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -61,6 +61,7 @@  #define NFS_readdirres_sz	(1)  #define NFS_statfsres_sz	(1+NFS_info_sz) +static int nfs_stat_to_errno(enum nfs_stat);  /*   * While encoding arguments, set up the reply buffer in advance to @@ -313,6 +314,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)  	p = xdr_decode_time(p, &fattr->atime);  	p = xdr_decode_time(p, &fattr->mtime);  	xdr_decode_time(p, &fattr->ctime); +	fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime); +  	return 0;  out_overflow:  	print_overflow_msg(__func__, xdr); @@ -1109,7 +1112,7 @@ static const struct {   * Returns a local errno value, or -EIO if the NFS status code is   * not recognized.  This function is used jointly by NFSv2 and NFSv3.   */ -int nfs_stat_to_errno(enum nfs_stat status) +static int nfs_stat_to_errno(enum nfs_stat status)  {  	int i; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 75c68299358..2292a0fd2bf 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -142,7 +142,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,  }  static int -nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, +nfs3_proc_lookup(struct inode *dir, struct qstr *name,  		 struct nfs_fh *fhandle, struct nfs_fattr *fattr)  {  	struct nfs3_diropargs	arg = { @@ -810,11 +810,13 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,  static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)  { -	if (nfs3_async_handle_jukebox(task, data->inode)) +	struct inode *inode = data->header->inode; + +	if (nfs3_async_handle_jukebox(task, inode))  		return -EAGAIN; -	nfs_invalidate_atime(data->inode); -	nfs_refresh_inode(data->inode, &data->fattr); +	nfs_invalidate_atime(inode); +	nfs_refresh_inode(inode, &data->fattr);  	return 0;  } @@ -830,10 +832,12 @@ static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da  static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)  { -	if (nfs3_async_handle_jukebox(task, data->inode)) +	struct inode *inode = data->header->inode; + +	if (nfs3_async_handle_jukebox(task, inode))  		return -EAGAIN;  	if (task->tk_status >= 0) -		nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); +		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);  	return 0;  } @@ -847,7 +851,12 @@ static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_  	rpc_call_start(task);  } -static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) +static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) +{ +	rpc_call_start(task); +} + +static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data)  {  	if (nfs3_async_handle_jukebox(task, data->inode))  		return -EAGAIN; @@ -855,7 +864,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)  	return 0;  } -static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)  {  	msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];  } @@ -875,6 +884,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {  	.file_inode_ops	= &nfs3_file_inode_operations,  	.file_ops	= &nfs_file_operations,  	.getroot	= nfs3_proc_get_root, +	.submount	= nfs_submount,  	.getattr	= nfs3_proc_getattr,  	.setattr	= nfs3_proc_setattr,  	.lookup		= nfs3_proc_lookup, @@ -906,6 +916,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {  	.write_rpc_prepare = nfs3_proc_write_rpc_prepare,  	.write_done	= nfs3_write_done,  	.commit_setup	= nfs3_proc_commit_setup, +	.commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,  	.commit_done	= nfs3_commit_done,  	.lock		= nfs3_proc_lock,  	.clear_acl_cache = nfs3_forget_cached_acls, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index a77cc9a3ce5..902de489ec9 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -86,6 +86,8 @@  				XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))  #define ACL3_setaclres_sz	(1+NFS3_post_op_attr_sz) +static int nfs3_stat_to_errno(enum nfs_stat); +  /*   * Map file type to S_IFMT bits   */ @@ -675,6 +677,7 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)  	p = xdr_decode_nfstime3(p, &fattr->atime);  	p = xdr_decode_nfstime3(p, &fattr->mtime);  	xdr_decode_nfstime3(p, &fattr->ctime); +	fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);  	fattr->valid |= NFS_ATTR_FATTR_V3;  	return 0; @@ -725,12 +728,14 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)  		goto out_overflow;  	fattr->valid |= NFS_ATTR_FATTR_PRESIZE +		| NFS_ATTR_FATTR_PRECHANGE  		| NFS_ATTR_FATTR_PREMTIME  		| NFS_ATTR_FATTR_PRECTIME;  	p = xdr_decode_size3(p, &fattr->pre_size);  	p = xdr_decode_nfstime3(p, &fattr->pre_mtime);  	xdr_decode_nfstime3(p, &fattr->pre_ctime); +	fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime);  	return 0;  out_overflow: @@ -1287,7 +1292,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,   *	};   */  static void encode_commit3args(struct xdr_stream *xdr, -			       const struct nfs_writeargs *args) +			       const struct nfs_commitargs *args)  {  	__be32 *p; @@ -1300,7 +1305,7 @@ static void encode_commit3args(struct xdr_stream *xdr,  static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,  				     struct xdr_stream *xdr, -				     const struct nfs_writeargs *args) +				     const struct nfs_commitargs *args)  {  	encode_commit3args(xdr, args);  } @@ -1385,7 +1390,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,  out:  	return error;  out_default: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /* @@ -1424,7 +1429,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,  out:  	return error;  out_status: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /* @@ -1472,7 +1477,7 @@ out_default:  	error = decode_post_op_attr(xdr, result->dir_attr);  	if (unlikely(error))  		goto out; -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /* @@ -1513,7 +1518,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,  out:  	return error;  out_default: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /* @@ -1554,7 +1559,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,  out:  	return error;  out_default: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /* @@ -1636,7 +1641,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,  out:  	return error;  out_status: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /* @@ -1706,7 +1711,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,  out:  	return error;  out_status: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /* @@ -1770,7 +1775,7 @@ out_default:  	error = decode_wcc_data(xdr, result->dir_attr);  	if (unlikely(error))  		goto out; -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /* @@ -1809,7 +1814,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,  out:  	return error;  out_status: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /* @@ -1853,7 +1858,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,  out:  	return error;  out_status: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /* @@ -1896,7 +1901,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,  out:  	return error;  out_status: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /** @@ -2088,7 +2093,7 @@ out_default:  	error = decode_post_op_attr(xdr, result->dir_attr);  	if (unlikely(error))  		goto out; -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /* @@ -2156,7 +2161,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,  out:  	return error;  out_status: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /* @@ -2232,7 +2237,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,  out:  	return error;  out_status: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /* @@ -2295,7 +2300,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,  out:  	return error;  out_status: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  /* @@ -2319,7 +2324,7 @@ out_status:   */  static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,  				   struct xdr_stream *xdr, -				   struct nfs_writeres *result) +				   struct nfs_commitres *result)  {  	enum nfs_stat status;  	int error; @@ -2336,7 +2341,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,  out:  	return error;  out_status: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  #ifdef CONFIG_NFS_V3_ACL @@ -2401,7 +2406,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,  out:  	return error;  out_default: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, @@ -2420,11 +2425,76 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,  out:  	return error;  out_default: -	return nfs_stat_to_errno(status); +	return nfs3_stat_to_errno(status);  }  #endif  /* CONFIG_NFS_V3_ACL */ + +/* + * We need to translate between nfs status return values and + * the local errno values which may not be the same. + */ +static const struct { +	int stat; +	int errno; +} nfs_errtbl[] = { +	{ NFS_OK,		0		}, +	{ NFSERR_PERM,		-EPERM		}, +	{ NFSERR_NOENT,		-ENOENT		}, +	{ NFSERR_IO,		-errno_NFSERR_IO}, +	{ NFSERR_NXIO,		-ENXIO		}, +/*	{ NFSERR_EAGAIN,	-EAGAIN		}, */ +	{ NFSERR_ACCES,		-EACCES		}, +	{ NFSERR_EXIST,		-EEXIST		}, +	{ NFSERR_XDEV,		-EXDEV		}, +	{ NFSERR_NODEV,		-ENODEV		}, +	{ NFSERR_NOTDIR,	-ENOTDIR	}, +	{ NFSERR_ISDIR,		-EISDIR		}, +	{ NFSERR_INVAL,		-EINVAL		}, +	{ NFSERR_FBIG,		-EFBIG		}, +	{ NFSERR_NOSPC,		-ENOSPC		}, +	{ NFSERR_ROFS,		-EROFS		}, +	{ NFSERR_MLINK,		-EMLINK		}, +	{ NFSERR_NAMETOOLONG,	-ENAMETOOLONG	}, +	{ NFSERR_NOTEMPTY,	-ENOTEMPTY	}, +	{ NFSERR_DQUOT,		-EDQUOT		}, +	{ NFSERR_STALE,		-ESTALE		}, +	{ NFSERR_REMOTE,	-EREMOTE	}, +#ifdef EWFLUSH +	{ NFSERR_WFLUSH,	-EWFLUSH	}, +#endif +	{ NFSERR_BADHANDLE,	-EBADHANDLE	}, +	{ NFSERR_NOT_SYNC,	-ENOTSYNC	}, +	{ NFSERR_BAD_COOKIE,	-EBADCOOKIE	}, +	{ NFSERR_NOTSUPP,	-ENOTSUPP	}, +	{ NFSERR_TOOSMALL,	-ETOOSMALL	}, +	{ NFSERR_SERVERFAULT,	-EREMOTEIO	}, +	{ NFSERR_BADTYPE,	-EBADTYPE	}, +	{ NFSERR_JUKEBOX,	-EJUKEBOX	}, +	{ -1,			-EIO		} +}; + +/** + * nfs3_stat_to_errno - convert an NFS status code to a local errno + * @status: NFS status code to convert + * + * Returns a local errno value, or -EIO if the NFS status code is + * not recognized.  This function is used jointly by NFSv2 and NFSv3. + */ +static int nfs3_stat_to_errno(enum nfs_stat status) +{ +	int i; + +	for (i = 0; nfs_errtbl[i].stat != -1; i++) { +		if (nfs_errtbl[i].stat == (int)status) +			return nfs_errtbl[i].errno; +	} +	dprintk("NFS: Unrecognized nfs status value: %u\n", status); +	return nfs_errtbl[i].errno; +} + +  #define PROC(proc, argtype, restype, timer)				\  [NFS3PROC_##proc] = {							\  	.p_proc      = NFS3PROC_##proc,					\ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 8d75021020b..c6827f93ab5 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -24,6 +24,8 @@ enum nfs4_client_state {  	NFS4CLNT_RECALL_SLOT,  	NFS4CLNT_LEASE_CONFIRM,  	NFS4CLNT_SERVER_SCOPE_MISMATCH, +	NFS4CLNT_PURGE_STATE, +	NFS4CLNT_BIND_CONN_TO_SESSION,  };  enum nfs4_session_state { @@ -52,11 +54,6 @@ struct nfs4_minor_version_ops {  	const struct nfs4_state_maintenance_ops *state_renewal_ops;  }; -struct nfs_unique_id { -	struct rb_node rb_node; -	__u64 id; -}; -  #define NFS_SEQID_CONFIRMED 1  struct nfs_seqid_counter {  	ktime_t create_time; @@ -206,12 +203,18 @@ extern const struct dentry_operations nfs4_dentry_operations;  extern const struct inode_operations nfs4_dir_inode_operations;  /* nfs4namespace.c */ +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);  struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); +struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, +			       struct nfs_fh *, struct nfs_fattr *);  /* nfs4proc.c */  extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);  extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); +extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); +extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred);  extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); +extern int nfs4_destroy_clientid(struct nfs_client *clp);  extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);  extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);  extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); @@ -239,8 +242,8 @@ extern int nfs41_setup_sequence(struct nfs4_session *session,  		struct rpc_task *task);  extern void nfs4_destroy_session(struct nfs4_session *session);  extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); -extern int nfs4_proc_create_session(struct nfs_client *); -extern int nfs4_proc_destroy_session(struct nfs4_session *); +extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *); +extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);  extern int nfs4_init_session(struct nfs_server *server);  extern int nfs4_proc_get_lease_time(struct nfs_client *clp,  		struct nfs_fsinfo *fsinfo); @@ -310,9 +313,9 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);  #if defined(CONFIG_NFS_V4_1)  struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);  struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); -extern void nfs4_schedule_session_recovery(struct nfs4_session *); +extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);  #else -static inline void nfs4_schedule_session_recovery(struct nfs4_session *session) +static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)  {  }  #endif /* CONFIG_NFS_V4_1 */ @@ -334,7 +337,7 @@ extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs  extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);  extern void nfs41_handle_recall_slot(struct nfs_client *clp);  extern void nfs41_handle_server_scope(struct nfs_client *, -				      struct server_scope **); +				      struct nfs41_server_scope **);  extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);  extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);  extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 5acfd9ea8a3..e1340293872 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -82,29 +82,76 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)  	BUG();  } +static void filelayout_reset_write(struct nfs_write_data *data) +{ +	struct nfs_pgio_header *hdr = data->header; +	struct rpc_task *task = &data->task; + +	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { +		dprintk("%s Reset task %5u for i/o through MDS " +			"(req %s/%lld, %u bytes @ offset %llu)\n", __func__, +			data->task.tk_pid, +			hdr->inode->i_sb->s_id, +			(long long)NFS_FILEID(hdr->inode), +			data->args.count, +			(unsigned long long)data->args.offset); + +		task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode, +							&hdr->pages, +							hdr->completion_ops); +	} +} + +static void filelayout_reset_read(struct nfs_read_data *data) +{ +	struct nfs_pgio_header *hdr = data->header; +	struct rpc_task *task = &data->task; + +	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { +		dprintk("%s Reset task %5u for i/o through MDS " +			"(req %s/%lld, %u bytes @ offset %llu)\n", __func__, +			data->task.tk_pid, +			hdr->inode->i_sb->s_id, +			(long long)NFS_FILEID(hdr->inode), +			data->args.count, +			(unsigned long long)data->args.offset); + +		task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode, +							&hdr->pages, +							hdr->completion_ops); +	} +} +  static int filelayout_async_handle_error(struct rpc_task *task,  					 struct nfs4_state *state,  					 struct nfs_client *clp, -					 int *reset) +					 struct pnfs_layout_segment *lseg)  { -	struct nfs_server *mds_server = NFS_SERVER(state->inode); +	struct inode *inode = lseg->pls_layout->plh_inode; +	struct nfs_server *mds_server = NFS_SERVER(inode); +	struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);  	struct nfs_client *mds_client = mds_server->nfs_client; +	struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;  	if (task->tk_status >= 0)  		return 0; -	*reset = 0;  	switch (task->tk_status) {  	/* MDS state errors */  	case -NFS4ERR_DELEG_REVOKED:  	case -NFS4ERR_ADMIN_REVOKED:  	case -NFS4ERR_BAD_STATEID: +		if (state == NULL) +			break;  		nfs_remove_bad_delegation(state->inode);  	case -NFS4ERR_OPENMODE: +		if (state == NULL) +			break;  		nfs4_schedule_stateid_recovery(mds_server, state);  		goto wait_on_recovery;  	case -NFS4ERR_EXPIRED: -		nfs4_schedule_stateid_recovery(mds_server, state); +		if (state != NULL) +			nfs4_schedule_stateid_recovery(mds_server, state);  		nfs4_schedule_lease_recovery(mds_client);  		goto wait_on_recovery;  	/* DS session errors */ @@ -118,7 +165,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,  		dprintk("%s ERROR %d, Reset session. Exchangeid "  			"flags 0x%x\n", __func__, task->tk_status,  			clp->cl_exchange_flags); -		nfs4_schedule_session_recovery(clp->cl_session); +		nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);  		break;  	case -NFS4ERR_DELAY:  	case -NFS4ERR_GRACE: @@ -127,11 +174,48 @@ static int filelayout_async_handle_error(struct rpc_task *task,  		break;  	case -NFS4ERR_RETRY_UNCACHED_REP:  		break; +	/* Invalidate Layout errors */ +	case -NFS4ERR_PNFS_NO_LAYOUT: +	case -ESTALE:           /* mapped NFS4ERR_STALE */ +	case -EBADHANDLE:       /* mapped NFS4ERR_BADHANDLE */ +	case -EISDIR:           /* mapped NFS4ERR_ISDIR */ +	case -NFS4ERR_FHEXPIRED: +	case -NFS4ERR_WRONG_TYPE: +		dprintk("%s Invalid layout error %d\n", __func__, +			task->tk_status); +		/* +		 * Destroy layout so new i/o will get a new layout. +		 * Layout will not be destroyed until all current lseg +		 * references are put. Mark layout as invalid to resend failed +		 * i/o and all i/o waiting on the slot table to the MDS until +		 * layout is destroyed and a new valid layout is obtained. +		 */ +		set_bit(NFS_LAYOUT_INVALID, +				&NFS_I(inode)->layout->plh_flags); +		pnfs_destroy_layout(NFS_I(inode)); +		rpc_wake_up(&tbl->slot_tbl_waitq); +		goto reset; +	/* RPC connection errors */ +	case -ECONNREFUSED: +	case -EHOSTDOWN: +	case -EHOSTUNREACH: +	case -ENETUNREACH: +	case -EIO: +	case -ETIMEDOUT: +	case -EPIPE: +		dprintk("%s DS connection error %d\n", __func__, +			task->tk_status); +		if (!filelayout_test_devid_invalid(devid)) +			_pnfs_return_layout(inode); +		filelayout_mark_devid_invalid(devid); +		rpc_wake_up(&tbl->slot_tbl_waitq); +		nfs4_ds_disconnect(clp); +		/* fall through */  	default: -		dprintk("%s DS error. Retry through MDS %d\n", __func__, +reset: +		dprintk("%s Retry through MDS. Error %d\n", __func__,  			task->tk_status); -		*reset = 1; -		break; +		return -NFS4ERR_RESET_TO_MDS;  	}  out:  	task->tk_status = 0; @@ -148,18 +232,17 @@ wait_on_recovery:  static int filelayout_read_done_cb(struct rpc_task *task,  				struct nfs_read_data *data)  { -	int reset = 0; +	struct nfs_pgio_header *hdr = data->header; +	int err; -	dprintk("%s DS read\n", __func__); +	err = filelayout_async_handle_error(task, data->args.context->state, +					    data->ds_clp, hdr->lseg); -	if (filelayout_async_handle_error(task, data->args.context->state, -					  data->ds_clp, &reset) == -EAGAIN) { -		dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", -			__func__, data->ds_clp, data->ds_clp->cl_session); -		if (reset) { -			pnfs_set_lo_fail(data->lseg); -			nfs4_reset_read(task, data); -		} +	switch (err) { +	case -NFS4ERR_RESET_TO_MDS: +		filelayout_reset_read(data); +		return task->tk_status; +	case -EAGAIN:  		rpc_restart_call_prepare(task);  		return -EAGAIN;  	} @@ -175,13 +258,15 @@ static int filelayout_read_done_cb(struct rpc_task *task,  static void  filelayout_set_layoutcommit(struct nfs_write_data *wdata)  { -	if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds || +	struct nfs_pgio_header *hdr = wdata->header; + +	if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||  	    wdata->res.verf->committed == NFS_FILE_SYNC)  		return;  	pnfs_set_layoutcommit(wdata); -	dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, -		(unsigned long) NFS_I(wdata->inode)->layout->plh_lwb); +	dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, +		(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);  }  /* @@ -191,8 +276,14 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)   */  static void filelayout_read_prepare(struct rpc_task *task, void *data)  { -	struct nfs_read_data *rdata = (struct nfs_read_data *)data; +	struct nfs_read_data *rdata = data; +	if (filelayout_reset_to_mds(rdata->header->lseg)) { +		dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); +		filelayout_reset_read(rdata); +		rpc_exit(task, 0); +		return; +	}  	rdata->read_done_cb = filelayout_read_done_cb;  	if (nfs41_setup_sequence(rdata->ds_clp->cl_session, @@ -205,42 +296,47 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)  static void filelayout_read_call_done(struct rpc_task *task, void *data)  { -	struct nfs_read_data *rdata = (struct nfs_read_data *)data; +	struct nfs_read_data *rdata = data;  	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); +	if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) && +	    task->tk_status == 0) +		return; +  	/* Note this may cause RPC to be resent */ -	rdata->mds_ops->rpc_call_done(task, data); +	rdata->header->mds_ops->rpc_call_done(task, data);  }  static void filelayout_read_count_stats(struct rpc_task *task, void *data)  { -	struct nfs_read_data *rdata = (struct nfs_read_data *)data; +	struct nfs_read_data *rdata = data; -	rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics); +	rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);  }  static void filelayout_read_release(void *data)  { -	struct nfs_read_data *rdata = (struct nfs_read_data *)data; +	struct nfs_read_data *rdata = data; -	put_lseg(rdata->lseg); -	rdata->mds_ops->rpc_release(data); +	nfs_put_client(rdata->ds_clp); +	rdata->header->mds_ops->rpc_release(data);  }  static int filelayout_write_done_cb(struct rpc_task *task,  				struct nfs_write_data *data)  { -	int reset = 0; +	struct nfs_pgio_header *hdr = data->header; +	int err; -	if (filelayout_async_handle_error(task, data->args.context->state, -					  data->ds_clp, &reset) == -EAGAIN) { -		dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", -			__func__, data->ds_clp, data->ds_clp->cl_session); -		if (reset) { -			pnfs_set_lo_fail(data->lseg); -			nfs4_reset_write(task, data); -		} +	err = filelayout_async_handle_error(task, data->args.context->state, +					    data->ds_clp, hdr->lseg); + +	switch (err) { +	case -NFS4ERR_RESET_TO_MDS: +		filelayout_reset_write(data); +		return task->tk_status; +	case -EAGAIN:  		rpc_restart_call_prepare(task);  		return -EAGAIN;  	} @@ -250,7 +346,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,  }  /* Fake up some data that will cause nfs_commit_release to retry the writes. */ -static void prepare_to_resend_writes(struct nfs_write_data *data) +static void prepare_to_resend_writes(struct nfs_commit_data *data)  {  	struct nfs_page *first = nfs_list_entry(data->pages.next); @@ -261,19 +357,19 @@ static void prepare_to_resend_writes(struct nfs_write_data *data)  }  static int filelayout_commit_done_cb(struct rpc_task *task, -				     struct nfs_write_data *data) +				     struct nfs_commit_data *data)  { -	int reset = 0; +	int err; -	if (filelayout_async_handle_error(task, data->args.context->state, -					  data->ds_clp, &reset) == -EAGAIN) { -		dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", -			__func__, data->ds_clp, data->ds_clp->cl_session); -		if (reset) { -			prepare_to_resend_writes(data); -			pnfs_set_lo_fail(data->lseg); -		} else -			rpc_restart_call_prepare(task); +	err = filelayout_async_handle_error(task, NULL, data->ds_clp, +					    data->lseg); + +	switch (err) { +	case -NFS4ERR_RESET_TO_MDS: +		prepare_to_resend_writes(data); +		return -EAGAIN; +	case -EAGAIN: +		rpc_restart_call_prepare(task);  		return -EAGAIN;  	} @@ -282,8 +378,14 @@ static int filelayout_commit_done_cb(struct rpc_task *task,  static void filelayout_write_prepare(struct rpc_task *task, void *data)  { -	struct nfs_write_data *wdata = (struct nfs_write_data *)data; +	struct nfs_write_data *wdata = data; +	if (filelayout_reset_to_mds(wdata->header->lseg)) { +		dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); +		filelayout_reset_write(wdata); +		rpc_exit(task, 0); +		return; +	}  	if (nfs41_setup_sequence(wdata->ds_clp->cl_session,  				&wdata->args.seq_args, &wdata->res.seq_res,  				task)) @@ -294,36 +396,66 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)  static void filelayout_write_call_done(struct rpc_task *task, void *data)  { -	struct nfs_write_data *wdata = (struct nfs_write_data *)data; +	struct nfs_write_data *wdata = data; + +	if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && +	    task->tk_status == 0) +		return;  	/* Note this may cause RPC to be resent */ -	wdata->mds_ops->rpc_call_done(task, data); +	wdata->header->mds_ops->rpc_call_done(task, data);  }  static void filelayout_write_count_stats(struct rpc_task *task, void *data)  { -	struct nfs_write_data *wdata = (struct nfs_write_data *)data; +	struct nfs_write_data *wdata = data; -	rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics); +	rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);  }  static void filelayout_write_release(void *data)  { -	struct nfs_write_data *wdata = (struct nfs_write_data *)data; +	struct nfs_write_data *wdata = data; + +	nfs_put_client(wdata->ds_clp); +	wdata->header->mds_ops->rpc_release(data); +} + +static void filelayout_commit_prepare(struct rpc_task *task, void *data) +{ +	struct nfs_commit_data *wdata = data; + +	if (nfs41_setup_sequence(wdata->ds_clp->cl_session, +				&wdata->args.seq_args, &wdata->res.seq_res, +				task)) +		return; + +	rpc_call_start(task); +} + +static void filelayout_write_commit_done(struct rpc_task *task, void *data) +{ +	struct nfs_commit_data *wdata = data; + +	/* Note this may cause RPC to be resent */ +	wdata->mds_ops->rpc_call_done(task, data); +} + +static void filelayout_commit_count_stats(struct rpc_task *task, void *data) +{ +	struct nfs_commit_data *cdata = data; -	put_lseg(wdata->lseg); -	wdata->mds_ops->rpc_release(data); +	rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics);  } -static void filelayout_commit_release(void *data) +static void filelayout_commit_release(void *calldata)  { -	struct nfs_write_data *wdata = (struct nfs_write_data *)data; +	struct nfs_commit_data *data = calldata; -	nfs_commit_release_pages(wdata); -	if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) -		nfs_commit_clear_lock(NFS_I(wdata->inode)); -	put_lseg(wdata->lseg); -	nfs_commitdata_release(wdata); +	data->completion_ops->completion(data); +	put_lseg(data->lseg); +	nfs_put_client(data->ds_clp); +	nfs_commitdata_release(data);  }  static const struct rpc_call_ops filelayout_read_call_ops = { @@ -341,16 +473,17 @@ static const struct rpc_call_ops filelayout_write_call_ops = {  };  static const struct rpc_call_ops filelayout_commit_call_ops = { -	.rpc_call_prepare = filelayout_write_prepare, -	.rpc_call_done = filelayout_write_call_done, -	.rpc_count_stats = filelayout_write_count_stats, +	.rpc_call_prepare = filelayout_commit_prepare, +	.rpc_call_done = filelayout_write_commit_done, +	.rpc_count_stats = filelayout_commit_count_stats,  	.rpc_release = filelayout_commit_release,  };  static enum pnfs_try_status  filelayout_read_pagelist(struct nfs_read_data *data)  { -	struct pnfs_layout_segment *lseg = data->lseg; +	struct nfs_pgio_header *hdr = data->header; +	struct pnfs_layout_segment *lseg = hdr->lseg;  	struct nfs4_pnfs_ds *ds;  	loff_t offset = data->args.offset;  	u32 j, idx; @@ -358,25 +491,20 @@ filelayout_read_pagelist(struct nfs_read_data *data)  	int status;  	dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", -		__func__, data->inode->i_ino, +		__func__, hdr->inode->i_ino,  		data->args.pgbase, (size_t)data->args.count, offset); -	if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) -		return PNFS_NOT_ATTEMPTED; -  	/* Retrieve the correct rpc_client for the byte range */  	j = nfs4_fl_calc_j_index(lseg, offset);  	idx = nfs4_fl_calc_ds_index(lseg, j);  	ds = nfs4_fl_prepare_ds(lseg, idx); -	if (!ds) { -		/* Either layout fh index faulty, or ds connect failed */ -		set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); -		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); +	if (!ds)  		return PNFS_NOT_ATTEMPTED; -	} -	dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); +	dprintk("%s USE DS: %s cl_count %d\n", __func__, +		ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));  	/* No multipath support. Use first DS */ +	atomic_inc(&ds->ds_clp->cl_count);  	data->ds_clp = ds->ds_clp;  	fh = nfs4_fl_select_ds_fh(lseg, j);  	if (fh) @@ -386,8 +514,8 @@ filelayout_read_pagelist(struct nfs_read_data *data)  	data->mds_offset = offset;  	/* Perform an asynchronous read to ds */ -	status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient, -				   &filelayout_read_call_ops); +	status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data, +				  &filelayout_read_call_ops, RPC_TASK_SOFTCONN);  	BUG_ON(status != 0);  	return PNFS_ATTEMPTED;  } @@ -396,32 +524,26 @@ filelayout_read_pagelist(struct nfs_read_data *data)  static enum pnfs_try_status  filelayout_write_pagelist(struct nfs_write_data *data, int sync)  { -	struct pnfs_layout_segment *lseg = data->lseg; +	struct nfs_pgio_header *hdr = data->header; +	struct pnfs_layout_segment *lseg = hdr->lseg;  	struct nfs4_pnfs_ds *ds;  	loff_t offset = data->args.offset;  	u32 j, idx;  	struct nfs_fh *fh;  	int status; -	if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) -		return PNFS_NOT_ATTEMPTED; -  	/* Retrieve the correct rpc_client for the byte range */  	j = nfs4_fl_calc_j_index(lseg, offset);  	idx = nfs4_fl_calc_ds_index(lseg, j);  	ds = nfs4_fl_prepare_ds(lseg, idx); -	if (!ds) { -		printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n", -			__func__); -		set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); -		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); +	if (!ds)  		return PNFS_NOT_ATTEMPTED; -	} -	dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, -		data->inode->i_ino, sync, (size_t) data->args.count, offset, -		ds->ds_remotestr); +	dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", +		__func__, hdr->inode->i_ino, sync, (size_t) data->args.count, +		offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));  	data->write_done_cb = filelayout_write_done_cb; +	atomic_inc(&ds->ds_clp->cl_count);  	data->ds_clp = ds->ds_clp;  	fh = nfs4_fl_select_ds_fh(lseg, j);  	if (fh) @@ -433,8 +555,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)  	data->args.offset = filelayout_get_dserver_offset(lseg, offset);  	/* Perform an asynchronous write */ -	status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, -				    &filelayout_write_call_ops, sync); +	status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data, +				    &filelayout_write_call_ops, sync, +				    RPC_TASK_SOFTCONN);  	BUG_ON(status != 0);  	return PNFS_ATTEMPTED;  } @@ -650,10 +773,65 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)  	dprintk("--> %s\n", __func__);  	nfs4_fl_put_deviceid(fl->dsaddr); -	kfree(fl->commit_buckets); +	/* This assumes a single RW lseg */ +	if (lseg->pls_range.iomode == IOMODE_RW) { +		struct nfs4_filelayout *flo; + +		flo = FILELAYOUT_FROM_HDR(lseg->pls_layout); +		flo->commit_info.nbuckets = 0; +		kfree(flo->commit_info.buckets); +		flo->commit_info.buckets = NULL; +	}  	_filelayout_free_lseg(fl);  } +static int +filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, +			     struct nfs_commit_info *cinfo, +			     gfp_t gfp_flags) +{ +	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); +	struct pnfs_commit_bucket *buckets; +	int size; + +	if (fl->commit_through_mds) +		return 0; +	if (cinfo->ds->nbuckets != 0) { +		/* This assumes there is only one IOMODE_RW lseg.  What +		 * we really want to do is have a layout_hdr level +		 * dictionary of <multipath_list4, fh> keys, each +		 * associated with a struct list_head, populated by calls +		 * to filelayout_write_pagelist(). +		 * */ +		return 0; +	} + +	size = (fl->stripe_type == STRIPE_SPARSE) ? +		fl->dsaddr->ds_num : fl->dsaddr->stripe_count; + +	buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), +			  gfp_flags); +	if (!buckets) +		return -ENOMEM; +	else { +		int i; + +		spin_lock(cinfo->lock); +		if (cinfo->ds->nbuckets != 0) +			kfree(buckets); +		else { +			cinfo->ds->buckets = buckets; +			cinfo->ds->nbuckets = size; +			for (i = 0; i < size; i++) { +				INIT_LIST_HEAD(&buckets[i].written); +				INIT_LIST_HEAD(&buckets[i].committing); +			} +		} +		spin_unlock(cinfo->lock); +		return 0; +	} +} +  static struct pnfs_layout_segment *  filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,  		      struct nfs4_layoutget_res *lgr, @@ -673,29 +851,6 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,  		_filelayout_free_lseg(fl);  		return NULL;  	} - -	/* This assumes there is only one IOMODE_RW lseg.  What -	 * we really want to do is have a layout_hdr level -	 * dictionary of <multipath_list4, fh> keys, each -	 * associated with a struct list_head, populated by calls -	 * to filelayout_write_pagelist(). -	 * */ -	if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) { -		int i; -		int size = (fl->stripe_type == STRIPE_SPARSE) ? -			fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - -		fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags); -		if (!fl->commit_buckets) { -			filelayout_free_lseg(&fl->generic_hdr); -			return NULL; -		} -		fl->number_of_buckets = size; -		for (i = 0; i < size; i++) { -			INIT_LIST_HEAD(&fl->commit_buckets[i].written); -			INIT_LIST_HEAD(&fl->commit_buckets[i].committing); -		} -	}  	return &fl->generic_hdr;  } @@ -716,8 +871,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,  	    !nfs_generic_pg_test(pgio, prev, req))  		return false; -	p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; -	r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; +	p_stripe = (u64)req_offset(prev); +	r_stripe = (u64)req_offset(req);  	stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;  	do_div(p_stripe, stripe_unit); @@ -732,6 +887,16 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,  {  	BUG_ON(pgio->pg_lseg != NULL); +	if (req->wb_offset != req->wb_pgbase) { +		/* +		 * Handling unaligned pages is difficult, because have to +		 * somehow split a req in two in certain cases in the +		 * pg.test code.  Avoid this by just not using pnfs +		 * in this case. +		 */ +		nfs_pageio_reset_read_mds(pgio); +		return; +	}  	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,  					   req->wb_context,  					   0, @@ -747,8 +912,13 @@ static void  filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,  			 struct nfs_page *req)  { +	struct nfs_commit_info cinfo; +	int status; +  	BUG_ON(pgio->pg_lseg != NULL); +	if (req->wb_offset != req->wb_pgbase) +		goto out_mds;  	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,  					   req->wb_context,  					   0, @@ -757,7 +927,17 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,  					   GFP_NOFS);  	/* If no lseg, fall back to write through mds */  	if (pgio->pg_lseg == NULL) -		nfs_pageio_reset_write_mds(pgio); +		goto out_mds; +	nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); +	status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); +	if (status < 0) { +		put_lseg(pgio->pg_lseg); +		pgio->pg_lseg = NULL; +		goto out_mds; +	} +	return; +out_mds: +	nfs_pageio_reset_write_mds(pgio);  }  static const struct nfs_pageio_ops filelayout_pg_read_ops = { @@ -784,43 +964,42 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)   * If this will make the bucket empty, it will need to put the lseg reference.   */  static void -filelayout_clear_request_commit(struct nfs_page *req) +filelayout_clear_request_commit(struct nfs_page *req, +				struct nfs_commit_info *cinfo)  {  	struct pnfs_layout_segment *freeme = NULL; -	struct inode *inode = req->wb_context->dentry->d_inode; -	spin_lock(&inode->i_lock); +	spin_lock(cinfo->lock);  	if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))  		goto out; +	cinfo->ds->nwritten--;  	if (list_is_singular(&req->wb_list)) { -		struct pnfs_layout_segment *lseg; +		struct pnfs_commit_bucket *bucket; -		/* From here we can find the bucket, but for the moment, -		 * since there is only one relevant lseg... -		 */ -		list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { -			if (lseg->pls_range.iomode == IOMODE_RW) { -				freeme = lseg; -				break; -			} -		} +		bucket = list_first_entry(&req->wb_list, +					  struct pnfs_commit_bucket, +					  written); +		freeme = bucket->wlseg; +		bucket->wlseg = NULL;  	}  out: -	nfs_request_remove_commit_list(req); -	spin_unlock(&inode->i_lock); +	nfs_request_remove_commit_list(req, cinfo); +	spin_unlock(cinfo->lock);  	put_lseg(freeme);  }  static struct list_head *  filelayout_choose_commit_list(struct nfs_page *req, -			      struct pnfs_layout_segment *lseg) +			      struct pnfs_layout_segment *lseg, +			      struct nfs_commit_info *cinfo)  {  	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);  	u32 i, j;  	struct list_head *list; +	struct pnfs_commit_bucket *buckets;  	if (fl->commit_through_mds) -		return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; +		return &cinfo->mds->list;  	/* Note that we are calling nfs4_fl_calc_j_index on each page  	 * that ends up being committed to a data server.  An attractive @@ -828,31 +1007,33 @@ filelayout_choose_commit_list(struct nfs_page *req,  	 * to store the value calculated in filelayout_write_pagelist  	 * and just use that here.  	 */ -	j = nfs4_fl_calc_j_index(lseg, -				 (loff_t)req->wb_index << PAGE_CACHE_SHIFT); +	j = nfs4_fl_calc_j_index(lseg, req_offset(req));  	i = select_bucket_index(fl, j); -	list = &fl->commit_buckets[i].written; +	buckets = cinfo->ds->buckets; +	list = &buckets[i].written;  	if (list_empty(list)) {  		/* Non-empty buckets hold a reference on the lseg.  That ref  		 * is normally transferred to the COMMIT call and released  		 * there.  It could also be released if the last req is pulled  		 * off due to a rewrite, in which case it will be done in -		 * filelayout_remove_commit_req +		 * filelayout_clear_request_commit  		 */ -		get_lseg(lseg); +		buckets[i].wlseg = get_lseg(lseg);  	}  	set_bit(PG_COMMIT_TO_DS, &req->wb_flags); +	cinfo->ds->nwritten++;  	return list;  }  static void  filelayout_mark_request_commit(struct nfs_page *req, -		struct pnfs_layout_segment *lseg) +			       struct pnfs_layout_segment *lseg, +			       struct nfs_commit_info *cinfo)  {  	struct list_head *list; -	list = filelayout_choose_commit_list(req, lseg); -	nfs_request_add_commit_list(req, list); +	list = filelayout_choose_commit_list(req, lseg, cinfo); +	nfs_request_add_commit_list(req, list, cinfo);  }  static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) @@ -880,7 +1061,7 @@ select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)  	return flseg->fh_array[i];  } -static int filelayout_initiate_commit(struct nfs_write_data *data, int how) +static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)  {  	struct pnfs_layout_segment *lseg = data->lseg;  	struct nfs4_pnfs_ds *ds; @@ -890,135 +1071,138 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)  	idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);  	ds = nfs4_fl_prepare_ds(lseg, idx);  	if (!ds) { -		printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n", -			__func__); -		set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); -		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);  		prepare_to_resend_writes(data);  		filelayout_commit_release(data);  		return -EAGAIN;  	} -	dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); -	data->write_done_cb = filelayout_commit_done_cb; +	dprintk("%s ino %lu, how %d cl_count %d\n", __func__, +		data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count)); +	data->commit_done_cb = filelayout_commit_done_cb; +	atomic_inc(&ds->ds_clp->cl_count);  	data->ds_clp = ds->ds_clp;  	fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);  	if (fh)  		data->args.fh = fh; -	return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient, -				   &filelayout_commit_call_ops, how); -} - -/* - * This is only useful while we are using whole file layouts. - */ -static struct pnfs_layout_segment * -find_only_write_lseg_locked(struct inode *inode) -{ -	struct pnfs_layout_segment *lseg; - -	list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) -		if (lseg->pls_range.iomode == IOMODE_RW) -			return lseg; -	return NULL; -} - -static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) -{ -	struct pnfs_layout_segment *rv; - -	spin_lock(&inode->i_lock); -	rv = find_only_write_lseg_locked(inode); -	if (rv) -		get_lseg(rv); -	spin_unlock(&inode->i_lock); -	return rv; +	return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data, +				   &filelayout_commit_call_ops, how, +				   RPC_TASK_SOFTCONN);  }  static int -filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, -		spinlock_t *lock) +transfer_commit_list(struct list_head *src, struct list_head *dst, +		     struct nfs_commit_info *cinfo, int max)  { -	struct list_head *src = &bucket->written; -	struct list_head *dst = &bucket->committing;  	struct nfs_page *req, *tmp;  	int ret = 0;  	list_for_each_entry_safe(req, tmp, src, wb_list) {  		if (!nfs_lock_request(req))  			continue; -		if (cond_resched_lock(lock)) +		kref_get(&req->wb_kref); +		if (cond_resched_lock(cinfo->lock))  			list_safe_reset_next(req, tmp, wb_list); -		nfs_request_remove_commit_list(req); +		nfs_request_remove_commit_list(req, cinfo);  		clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);  		nfs_list_add_request(req, dst);  		ret++; -		if (ret == max) +		if ((ret == max) && !cinfo->dreq)  			break;  	}  	return ret;  } +static int +filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, +			       struct nfs_commit_info *cinfo, +			       int max) +{ +	struct list_head *src = &bucket->written; +	struct list_head *dst = &bucket->committing; +	int ret; + +	ret = transfer_commit_list(src, dst, cinfo, max); +	if (ret) { +		cinfo->ds->nwritten -= ret; +		cinfo->ds->ncommitting += ret; +		bucket->clseg = bucket->wlseg; +		if (list_empty(src)) +			bucket->wlseg = NULL; +		else +			get_lseg(bucket->clseg); +	} +	return ret; +} +  /* Move reqs from written to committing lists, returning count of number moved. - * Note called with i_lock held. + * Note called with cinfo->lock held.   */ -static int filelayout_scan_commit_lists(struct inode *inode, int max, -		spinlock_t *lock) +static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo, +					int max)  { -	struct pnfs_layout_segment *lseg; -	struct nfs4_filelayout_segment *fl;  	int i, rv = 0, cnt; -	lseg = find_only_write_lseg_locked(inode); -	if (!lseg) -		goto out_done; -	fl = FILELAYOUT_LSEG(lseg); -	if (fl->commit_through_mds) -		goto out_done; -	for (i = 0; i < fl->number_of_buckets && max != 0; i++) { -		cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i], -				max, lock); +	for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { +		cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i], +						     cinfo, max);  		max -= cnt;  		rv += cnt;  	} -out_done:  	return rv;  } +/* Pull everything off the committing lists and dump into @dst */ +static void filelayout_recover_commit_reqs(struct list_head *dst, +					   struct nfs_commit_info *cinfo) +{ +	struct pnfs_commit_bucket *b; +	int i; + +	/* NOTE cinfo->lock is NOT held, relying on fact that this is +	 * only called on single thread per dreq. +	 * Can't take the lock because need to do put_lseg +	 */ +	for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { +		if (transfer_commit_list(&b->written, dst, cinfo, 0)) { +			BUG_ON(!list_empty(&b->written)); +			put_lseg(b->wlseg); +			b->wlseg = NULL; +		} +	} +	cinfo->ds->nwritten = 0; +} +  static unsigned int -alloc_ds_commits(struct inode *inode, struct list_head *list) +alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)  { -	struct pnfs_layout_segment *lseg; -	struct nfs4_filelayout_segment *fl; -	struct nfs_write_data *data; +	struct pnfs_ds_commit_info *fl_cinfo; +	struct pnfs_commit_bucket *bucket; +	struct nfs_commit_data *data;  	int i, j;  	unsigned int nreq = 0; -	/* Won't need this when non-whole file layout segments are supported -	 * instead we will use a pnfs_layout_hdr structure */ -	lseg = find_only_write_lseg(inode); -	if (!lseg) -		return 0; -	fl = FILELAYOUT_LSEG(lseg); -	for (i = 0; i < fl->number_of_buckets; i++) { -		if (list_empty(&fl->commit_buckets[i].committing)) +	fl_cinfo = cinfo->ds; +	bucket = fl_cinfo->buckets; +	for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { +		if (list_empty(&bucket->committing))  			continue;  		data = nfs_commitdata_alloc();  		if (!data)  			break;  		data->ds_commit_index = i; -		data->lseg = lseg; +		data->lseg = bucket->clseg; +		bucket->clseg = NULL;  		list_add(&data->pages, list);  		nreq++;  	}  	/* Clean up on error */ -	for (j = i; j < fl->number_of_buckets; j++) { -		if (list_empty(&fl->commit_buckets[i].committing)) +	for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) { +		if (list_empty(&bucket->committing))  			continue; -		nfs_retry_commit(&fl->commit_buckets[i].committing, lseg); -		put_lseg(lseg);  /* associated with emptying bucket */ +		nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); +		put_lseg(bucket->clseg); +		bucket->clseg = NULL;  	} -	put_lseg(lseg);  	/* Caller will clean up entries put on list */  	return nreq;  } @@ -1026,9 +1210,9 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)  /* This follows nfs_commit_list pretty closely */  static int  filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, -			   int how) +			   int how, struct nfs_commit_info *cinfo)  { -	struct nfs_write_data	*data, *tmp; +	struct nfs_commit_data *data, *tmp;  	LIST_HEAD(list);  	unsigned int nreq = 0; @@ -1039,30 +1223,34 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,  			list_add(&data->pages, &list);  			nreq++;  		} else -			nfs_retry_commit(mds_pages, NULL); +			nfs_retry_commit(mds_pages, NULL, cinfo);  	} -	nreq += alloc_ds_commits(inode, &list); +	nreq += alloc_ds_commits(cinfo, &list);  	if (nreq == 0) { -		nfs_commit_clear_lock(NFS_I(inode)); +		cinfo->completion_ops->error_cleanup(NFS_I(inode));  		goto out;  	} -	atomic_add(nreq, &NFS_I(inode)->commits_outstanding); +	atomic_add(nreq, &cinfo->mds->rpcs_out);  	list_for_each_entry_safe(data, tmp, &list, pages) {  		list_del_init(&data->pages);  		if (!data->lseg) { -			nfs_init_commit(data, mds_pages, NULL); -			nfs_initiate_commit(data, NFS_CLIENT(inode), -					    data->mds_ops, how); +			nfs_init_commit(data, mds_pages, NULL, cinfo); +			nfs_initiate_commit(NFS_CLIENT(inode), data, +					    data->mds_ops, how, 0);  		} else { -			nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg); +			struct pnfs_commit_bucket *buckets; + +			buckets = cinfo->ds->buckets; +			nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo);  			filelayout_initiate_commit(data, how);  		}  	}  out: +	cinfo->ds->ncommitting = 0;  	return PNFS_ATTEMPTED;  } @@ -1072,17 +1260,47 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)  	nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));  } +static struct pnfs_layout_hdr * +filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) +{ +	struct nfs4_filelayout *flo; + +	flo = kzalloc(sizeof(*flo), gfp_flags); +	return &flo->generic_hdr; +} + +static void +filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo) +{ +	kfree(FILELAYOUT_FROM_HDR(lo)); +} + +static struct pnfs_ds_commit_info * +filelayout_get_ds_info(struct inode *inode) +{ +	struct pnfs_layout_hdr *layout = NFS_I(inode)->layout; + +	if (layout == NULL) +		return NULL; +	else +		return &FILELAYOUT_FROM_HDR(layout)->commit_info; +} +  static struct pnfs_layoutdriver_type filelayout_type = {  	.id			= LAYOUT_NFSV4_1_FILES,  	.name			= "LAYOUT_NFSV4_1_FILES",  	.owner			= THIS_MODULE, +	.alloc_layout_hdr	= filelayout_alloc_layout_hdr, +	.free_layout_hdr	= filelayout_free_layout_hdr,  	.alloc_lseg		= filelayout_alloc_lseg,  	.free_lseg		= filelayout_free_lseg,  	.pg_read_ops		= &filelayout_pg_read_ops,  	.pg_write_ops		= &filelayout_pg_write_ops, +	.get_ds_info		= &filelayout_get_ds_info,  	.mark_request_commit	= filelayout_mark_request_commit,  	.clear_request_commit	= filelayout_clear_request_commit,  	.scan_commit_lists	= filelayout_scan_commit_lists, +	.recover_commit_reqs	= filelayout_recover_commit_reqs,  	.commit_pagelist	= filelayout_commit_pagelist,  	.read_pagelist		= filelayout_read_pagelist,  	.write_pagelist		= filelayout_write_pagelist, diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 21190bb1f5e..43fe802dd67 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -33,6 +33,13 @@  #include "pnfs.h"  /* + * Default data server connection timeout and retrans vaules. + * Set by module paramters dataserver_timeo and dataserver_retrans. + */ +#define NFS4_DEF_DS_TIMEO   60 +#define NFS4_DEF_DS_RETRANS 5 + +/*   * Field testing shows we need to support up to 4096 stripe indices.   * We store each index as a u8 (u32 on the wire) to keep the memory footprint   * reasonable. This in turn means we support a maximum of 256 @@ -41,6 +48,9 @@  #define NFS4_PNFS_MAX_STRIPE_CNT 4096  #define NFS4_PNFS_MAX_MULTI_CNT  256 /* 256 fit into a u8 stripe_index */ +/* error codes for internal use */ +#define NFS4ERR_RESET_TO_MDS   12001 +  enum stripetype4 {  	STRIPE_SPARSE = 1,  	STRIPE_DENSE = 2 @@ -62,23 +72,14 @@ struct nfs4_pnfs_ds {  	atomic_t		ds_count;  }; -/* nfs4_file_layout_dsaddr flags */ -#define NFS4_DEVICE_ID_NEG_ENTRY	0x00000001 -  struct nfs4_file_layout_dsaddr {  	struct nfs4_deviceid_node	id_node; -	unsigned long			flags;  	u32				stripe_count;  	u8				*stripe_indices;  	u32				ds_num;  	struct nfs4_pnfs_ds		*ds_list[1];  }; -struct nfs4_fl_commit_bucket { -	struct list_head written; -	struct list_head committing; -}; -  struct nfs4_filelayout_segment {  	struct pnfs_layout_segment generic_hdr;  	u32 stripe_type; @@ -89,10 +90,19 @@ struct nfs4_filelayout_segment {  	struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */  	unsigned int num_fh;  	struct nfs_fh **fh_array; -	struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */ -	int number_of_buckets;  }; +struct nfs4_filelayout { +	struct pnfs_layout_hdr generic_hdr; +	struct pnfs_ds_commit_info commit_info; +}; + +static inline struct nfs4_filelayout * +FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo) +{ +	return container_of(lo, struct nfs4_filelayout, generic_hdr); +} +  static inline struct nfs4_filelayout_segment *  FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)  { @@ -107,6 +117,36 @@ FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)  	return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;  } +static inline void +filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node) +{ +	u32 *p = (u32 *)&node->deviceid; + +	printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n", +		p[0], p[1], p[2], p[3]); + +	set_bit(NFS_DEVICEID_INVALID, &node->flags); +} + +static inline bool +filelayout_test_layout_invalid(struct pnfs_layout_hdr *lo) +{ +	return test_bit(NFS_LAYOUT_INVALID, &lo->plh_flags); +} + +static inline bool +filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) +{ +	return test_bit(NFS_DEVICEID_INVALID, &node->flags); +} + +static inline bool +filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) +{ +	return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) || +		filelayout_test_layout_invalid(lseg->pls_layout); +} +  extern struct nfs_fh *  nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); @@ -119,5 +159,6 @@ extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);  extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);  struct nfs4_file_layout_dsaddr *  get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); +void nfs4_ds_disconnect(struct nfs_client *clp);  #endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index c9cff9adb2d..a1fab8da7f0 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -30,12 +30,16 @@  #include <linux/nfs_fs.h>  #include <linux/vmalloc.h> +#include <linux/module.h>  #include "internal.h"  #include "nfs4filelayout.h"  #define NFSDBG_FACILITY		NFSDBG_PNFS_LD +static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; +static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; +  /*   * Data server cache   * @@ -145,6 +149,28 @@ _data_server_lookup_locked(const struct list_head *dsaddrs)  }  /* + * Lookup DS by nfs_client pointer. Zero data server client pointer + */ +void nfs4_ds_disconnect(struct nfs_client *clp) +{ +	struct nfs4_pnfs_ds *ds; +	struct nfs_client *found = NULL; + +	dprintk("%s clp %p\n", __func__, clp); +	spin_lock(&nfs4_ds_cache_lock); +	list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) +		if (ds->ds_clp && ds->ds_clp == clp) { +			found = ds->ds_clp; +			ds->ds_clp = NULL; +		} +	spin_unlock(&nfs4_ds_cache_lock); +	if (found) { +		set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); +		nfs_put_client(clp); +	} +} + +/*   * Create an rpc connection to the nfs4_pnfs_ds data server   * Currently only supports IPv4 and IPv6 addresses   */ @@ -165,8 +191,9 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)  			__func__, ds->ds_remotestr, da->da_remotestr);  		clp = nfs4_set_ds_client(mds_srv->nfs_client, -				 (struct sockaddr *)&da->da_addr, -				 da->da_addrlen, IPPROTO_TCP); +					(struct sockaddr *)&da->da_addr, +					da->da_addrlen, IPPROTO_TCP, +					dataserver_timeo, dataserver_retrans);  		if (!IS_ERR(clp))  			break;  	} @@ -176,28 +203,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)  		goto out;  	} -	if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) { -		if (!is_ds_client(clp)) { -			status = -ENODEV; -			goto out_put; -		} -		ds->ds_clp = clp; -		dprintk("%s [existing] server=%s\n", __func__, -			ds->ds_remotestr); -		goto out; -	} - -	/* -	 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to -	 * be equal to the MDS lease. Renewal is scheduled in create_session. -	 */ -	spin_lock(&mds_srv->nfs_client->cl_lock); -	clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time; -	spin_unlock(&mds_srv->nfs_client->cl_lock); -	clp->cl_last_renewal = jiffies; - -	/* New nfs_client */ -	status = nfs4_init_ds_session(clp); +	status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time);  	if (status)  		goto out_put; @@ -602,7 +608,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)  		mp_count = be32_to_cpup(p); /* multipath count */  		for (j = 0; j < mp_count; j++) { -			da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net, +			da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net,  					    &stream, gfp_flags);  			if (da)  				list_add_tail(&da->da_node, &dsaddrs); @@ -791,48 +797,42 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)  	return flseg->fh_array[i];  } -static void -filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, -			       int err, const char *ds_remotestr) -{ -	u32 *p = (u32 *)&dsaddr->id_node.deviceid; - -	printk(KERN_ERR "NFS: data server %s connection error %d." -		" Deviceid [%x%x%x%x] marked out of use.\n", -		ds_remotestr, err, p[0], p[1], p[2], p[3]); - -	spin_lock(&nfs4_ds_cache_lock); -	dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; -	spin_unlock(&nfs4_ds_cache_lock); -} -  struct nfs4_pnfs_ds *  nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)  {  	struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;  	struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; +	struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); + +	if (filelayout_test_devid_invalid(devid)) +		return NULL;  	if (ds == NULL) {  		printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",  			__func__, ds_idx); -		return NULL; +		goto mark_dev_invalid;  	}  	if (!ds->ds_clp) {  		struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);  		int err; -		if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) { -			/* Already tried to connect, don't try again */ -			dprintk("%s Deviceid marked out of use\n", __func__); -			return NULL; -		}  		err = nfs4_ds_connect(s, ds); -		if (err) { -			filelayout_mark_devid_negative(dsaddr, err, -						       ds->ds_remotestr); -			return NULL; -		} +		if (err) +			goto mark_dev_invalid;  	}  	return ds; + +mark_dev_invalid: +	filelayout_mark_devid_invalid(devid); +	return NULL;  } + +module_param(dataserver_retrans, uint, 0644); +MODULE_PARM_DESC(dataserver_retrans, "The  number of times the NFSv4.1 client " +			"retries a request before it attempts further " +			" recovery  action."); +module_param(dataserver_timeo, uint, 0644); +MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " +			"NFSv4.1  client  waits for a response from a " +			" data server before it retries an NFS request."); diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index a7f3dedc4ec..017b4b01a69 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -132,6 +132,35 @@ static size_t nfs_parse_server_name(char *string, size_t len,  	return ret;  } +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) +{ +	struct gss_api_mech *mech; +	struct xdr_netobj oid; +	int i; +	rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; + +	for (i = 0; i < flavors->num_flavors; i++) { +		struct nfs4_secinfo_flavor *flavor; +		flavor = &flavors->flavors[i]; + +		if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) { +			pseudoflavor = flavor->flavor; +			break; +		} else if (flavor->flavor == RPC_AUTH_GSS) { +			oid.len  = flavor->gss.sec_oid4.len; +			oid.data = flavor->gss.sec_oid4.data; +			mech = gss_mech_get_by_OID(&oid); +			if (!mech) +				continue; +			pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service); +			gss_mech_put(mech); +			break; +		} +	} + +	return pseudoflavor; +} +  static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)  {  	struct page *page; @@ -168,7 +197,7 @@ struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *ino  	rpc_authflavor_t flavor;  	flavor = nfs4_negotiate_security(inode, name); -	if (flavor < 0) +	if ((int)flavor < 0)  		return ERR_PTR(flavor);  	clone = rpc_clone_client(clnt); @@ -300,7 +329,7 @@ out:   * @dentry - dentry of referral   *   */ -struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) +static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)  {  	struct vfsmount *mnt = ERR_PTR(-ENOMEM);  	struct dentry *parent; @@ -341,3 +370,25 @@ out:  	dprintk("%s: done\n", __func__);  	return mnt;  } + +struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry, +			       struct nfs_fh *fh, struct nfs_fattr *fattr) +{ +	struct dentry *parent = dget_parent(dentry); +	struct rpc_clnt *client; +	struct vfsmount *mnt; + +	/* Look it up again to get its attributes and sec flavor */ +	client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr); +	dput(parent); +	if (IS_ERR(client)) +		return ERR_CAST(client); + +	if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) +		mnt = nfs_do_refmount(client, dentry); +	else +		mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor); + +	rpc_shutdown_client(client); +	return mnt; +} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ab985f6f0da..d48dbefa0e7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -64,6 +64,7 @@  #include "iostat.h"  #include "callback.h"  #include "pnfs.h" +#include "netns.h"  #define NFSDBG_FACILITY		NFSDBG_PROC @@ -80,6 +81,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data);  static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);  static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);  static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); +static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *);  static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);  static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,  			    struct nfs_fattr *fattr, struct iattr *sattr, @@ -101,6 +103,8 @@ static int nfs4_map_errors(int err)  	case -NFS4ERR_BADOWNER:  	case -NFS4ERR_BADNAME:  		return -EINVAL; +	case -NFS4ERR_SHARE_DENIED: +		return -EACCES;  	default:  		dprintk("%s could not handle NFSv4 error %d\n",  				__func__, -err); @@ -304,7 +308,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc  		case -NFS4ERR_SEQ_MISORDERED:  			dprintk("%s ERROR: %d Reset session\n", __func__,  				errorcode); -			nfs4_schedule_session_recovery(clp->cl_session); +			nfs4_schedule_session_recovery(clp->cl_session, errorcode);  			exception->retry = 1;  			break;  #endif /* defined(CONFIG_NFS_V4_1) */ @@ -772,7 +776,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)  	struct nfs_inode *nfsi = NFS_I(dir);  	spin_lock(&dir->i_lock); -	nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; +	nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;  	if (!cinfo->atomic || cinfo->before != dir->i_version)  		nfs_force_lookup_revalidate(dir);  	dir->i_version = cinfo->after; @@ -788,7 +792,6 @@ struct nfs4_opendata {  	struct nfs4_string owner_name;  	struct nfs4_string group_name;  	struct nfs_fattr f_attr; -	struct nfs_fattr dir_attr;  	struct dentry *dir;  	struct dentry *dentry;  	struct nfs4_state_owner *owner; @@ -804,12 +807,10 @@ struct nfs4_opendata {  static void nfs4_init_opendata_res(struct nfs4_opendata *p)  {  	p->o_res.f_attr = &p->f_attr; -	p->o_res.dir_attr = &p->dir_attr;  	p->o_res.seqid = p->o_arg.seqid;  	p->c_res.seqid = p->c_arg.seqid;  	p->o_res.server = p->o_arg.server;  	nfs_fattr_init(&p->f_attr); -	nfs_fattr_init(&p->dir_attr);  	nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name);  } @@ -843,7 +844,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,  	p->o_arg.name = &dentry->d_name;  	p->o_arg.server = server;  	p->o_arg.bitmask = server->attr_bitmask; -	p->o_arg.dir_bitmask = server->cache_consistency_bitmask;  	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;  	if (attrs != NULL && attrs->ia_valid != 0) {  		__be32 verf[2]; @@ -1332,7 +1332,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state  			case -NFS4ERR_BAD_HIGH_SLOT:  			case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:  			case -NFS4ERR_DEADSESSION: -				nfs4_schedule_session_recovery(server->nfs_client->cl_session); +				nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);  				goto out;  			case -NFS4ERR_STALE_CLIENTID:  			case -NFS4ERR_STALE_STATEID: @@ -1611,8 +1611,6 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)  	nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr); -	nfs_refresh_inode(dir, o_res->dir_attr); -  	if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {  		status = _nfs4_proc_open_confirm(data);  		if (status != 0) @@ -1645,11 +1643,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)  	nfs_fattr_map_and_free_names(server, &data->f_attr); -	if (o_arg->open_flags & O_CREAT) { +	if (o_arg->open_flags & O_CREAT)  		update_changeattr(dir, &o_res->cinfo); -		nfs_post_op_update_inode(dir, o_res->dir_attr); -	} else -		nfs_refresh_inode(dir, o_res->dir_attr);  	if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)  		server->caps &= ~NFS_CAP_POSIX_LOCK;  	if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { @@ -1789,7 +1784,14 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct  /*   * Returns a referenced nfs4_state   */ -static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) +static int _nfs4_do_open(struct inode *dir, +			struct dentry *dentry, +			fmode_t fmode, +			int flags, +			struct iattr *sattr, +			struct rpc_cred *cred, +			struct nfs4_state **res, +			struct nfs4_threshold **ctx_th)  {  	struct nfs4_state_owner  *sp;  	struct nfs4_state     *state = NULL; @@ -1814,6 +1816,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode  	if (opendata == NULL)  		goto err_put_state_owner; +	if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { +		opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); +		if (!opendata->f_attr.mdsthreshold) +			goto err_opendata_put; +	}  	if (dentry->d_inode != NULL)  		opendata->state = nfs4_get_open_state(dentry->d_inode, sp); @@ -1839,11 +1846,19 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode  			nfs_setattr_update_inode(state->inode, sattr);  		nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);  	} + +	if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) +		*ctx_th = opendata->f_attr.mdsthreshold; +	else +		kfree(opendata->f_attr.mdsthreshold); +	opendata->f_attr.mdsthreshold = NULL; +  	nfs4_opendata_put(opendata);  	nfs4_put_state_owner(sp);  	*res = state;  	return 0;  err_opendata_put: +	kfree(opendata->f_attr.mdsthreshold);  	nfs4_opendata_put(opendata);  err_put_state_owner:  	nfs4_put_state_owner(sp); @@ -1853,14 +1868,21 @@ out_err:  } -static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred) +static struct nfs4_state *nfs4_do_open(struct inode *dir, +					struct dentry *dentry, +					fmode_t fmode, +					int flags, +					struct iattr *sattr, +					struct rpc_cred *cred, +					struct nfs4_threshold **ctx_th)  {  	struct nfs4_exception exception = { };  	struct nfs4_state *res;  	int status;  	do { -		status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res); +		status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, +				       &res, ctx_th);  		if (status == 0)  			break;  		/* NOTE: BAD_SEQID means the server and client disagree about the @@ -2184,7 +2206,8 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags  	struct nfs4_state *state;  	/* Protect against concurrent sillydeletes */ -	state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred); +	state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, +			     ctx->cred, &ctx->mdsthreshold);  	if (IS_ERR(state))  		return ERR_CAST(state);  	ctx->state = state; @@ -2354,8 +2377,8 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,  /*   * get the file handle for the "/" directory on the server   */ -static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, -			      struct nfs_fsinfo *info) +int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle, +			 struct nfs_fsinfo *info)  {  	int minor_version = server->nfs_client->cl_minorversion;  	int status = nfs4_lookup_root(server, fhandle, info); @@ -2372,6 +2395,31 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,  	return nfs4_map_errors(status);  } +static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, +			      struct nfs_fsinfo *info) +{ +	int error; +	struct nfs_fattr *fattr = info->fattr; + +	error = nfs4_server_capabilities(server, mntfh); +	if (error < 0) { +		dprintk("nfs4_get_root: getcaps error = %d\n", -error); +		return error; +	} + +	error = nfs4_proc_getattr(server, mntfh, fattr); +	if (error < 0) { +		dprintk("nfs4_get_root: getattr error = %d\n", -error); +		return error; +	} + +	if (fattr->valid & NFS_ATTR_FATTR_FSID && +	    !nfs_fsid_equal(&server->fsid, &fattr->fsid)) +		memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); + +	return error; +} +  /*   * Get locations and (maybe) other attributes of a referral.   * Note that we'll actually follow the referral later when @@ -2578,7 +2626,7 @@ out:  	return err;  } -static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, +static int nfs4_proc_lookup(struct inode *dir, struct qstr *name,  			    struct nfs_fh *fhandle, struct nfs_fattr *fattr)  {  	int status; @@ -2761,7 +2809,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,  		fmode = ctx->mode;  	}  	sattr->ia_mode &= ~current_umask(); -	state = nfs4_do_open(dir, de, fmode, flags, sattr, cred); +	state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL);  	d_drop(dentry);  	if (IS_ERR(state)) {  		status = PTR_ERR(state); @@ -2783,7 +2831,6 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)  	struct nfs_removeargs args = {  		.fh = NFS_FH(dir),  		.name = *name, -		.bitmask = server->attr_bitmask,  	};  	struct nfs_removeres res = {  		.server = server, @@ -2793,19 +2840,11 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)  		.rpc_argp = &args,  		.rpc_resp = &res,  	}; -	int status = -ENOMEM; - -	res.dir_attr = nfs_alloc_fattr(); -	if (res.dir_attr == NULL) -		goto out; +	int status;  	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); -	if (status == 0) { +	if (status == 0)  		update_changeattr(dir, &res.cinfo); -		nfs_post_op_update_inode(dir, res.dir_attr); -	} -	nfs_free_fattr(res.dir_attr); -out:  	return status;  } @@ -2827,7 +2866,6 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)  	struct nfs_removeargs *args = msg->rpc_argp;  	struct nfs_removeres *res = msg->rpc_resp; -	args->bitmask = server->cache_consistency_bitmask;  	res->server = server;  	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];  	nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); @@ -2852,7 +2890,6 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)  	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)  		return 0;  	update_changeattr(dir, &res->cinfo); -	nfs_post_op_update_inode(dir, res->dir_attr);  	return 1;  } @@ -2863,7 +2900,6 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)  	struct nfs_renameres *res = msg->rpc_resp;  	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; -	arg->bitmask = server->attr_bitmask;  	res->server = server;  	nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1);  } @@ -2889,9 +2925,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,  		return 0;  	update_changeattr(old_dir, &res->old_cinfo); -	nfs_post_op_update_inode(old_dir, res->old_fattr);  	update_changeattr(new_dir, &res->new_cinfo); -	nfs_post_op_update_inode(new_dir, res->new_fattr);  	return 1;  } @@ -2904,7 +2938,6 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,  		.new_dir = NFS_FH(new_dir),  		.old_name = old_name,  		.new_name = new_name, -		.bitmask = server->attr_bitmask,  	};  	struct nfs_renameres res = {  		.server = server, @@ -2916,21 +2949,11 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,  	};  	int status = -ENOMEM; -	res.old_fattr = nfs_alloc_fattr(); -	res.new_fattr = nfs_alloc_fattr(); -	if (res.old_fattr == NULL || res.new_fattr == NULL) -		goto out; -  	status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);  	if (!status) {  		update_changeattr(old_dir, &res.old_cinfo); -		nfs_post_op_update_inode(old_dir, res.old_fattr);  		update_changeattr(new_dir, &res.new_cinfo); -		nfs_post_op_update_inode(new_dir, res.new_fattr);  	} -out: -	nfs_free_fattr(res.new_fattr); -	nfs_free_fattr(res.old_fattr);  	return status;  } @@ -2968,18 +2991,15 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *  	int status = -ENOMEM;  	res.fattr = nfs_alloc_fattr(); -	res.dir_attr = nfs_alloc_fattr(); -	if (res.fattr == NULL || res.dir_attr == NULL) +	if (res.fattr == NULL)  		goto out;  	status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);  	if (!status) {  		update_changeattr(dir, &res.cinfo); -		nfs_post_op_update_inode(dir, res.dir_attr);  		nfs_post_op_update_inode(inode, res.fattr);  	}  out: -	nfs_free_fattr(res.dir_attr);  	nfs_free_fattr(res.fattr);  	return status;  } @@ -3002,7 +3022,6 @@ struct nfs4_createdata {  	struct nfs4_create_res res;  	struct nfs_fh fh;  	struct nfs_fattr fattr; -	struct nfs_fattr dir_fattr;  };  static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, @@ -3026,9 +3045,7 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,  		data->res.server = server;  		data->res.fh = &data->fh;  		data->res.fattr = &data->fattr; -		data->res.dir_fattr = &data->dir_fattr;  		nfs_fattr_init(data->res.fattr); -		nfs_fattr_init(data->res.dir_fattr);  	}  	return data;  } @@ -3039,7 +3056,6 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_  				    &data->arg.seq_args, &data->res.seq_res, 1);  	if (status == 0) {  		update_changeattr(dir, &data->res.dir_cinfo); -		nfs_post_op_update_inode(dir, data->res.dir_fattr);  		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);  	}  	return status; @@ -3335,12 +3351,12 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,  void __nfs4_read_done_cb(struct nfs_read_data *data)  { -	nfs_invalidate_atime(data->inode); +	nfs_invalidate_atime(data->header->inode);  }  static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)  { -	struct nfs_server *server = NFS_SERVER(data->inode); +	struct nfs_server *server = NFS_SERVER(data->header->inode);  	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {  		rpc_restart_call_prepare(task); @@ -3375,7 +3391,7 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message  static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)  { -	if (nfs4_setup_sequence(NFS_SERVER(data->inode), +	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),  				&data->args.seq_args,  				&data->res.seq_res,  				task)) @@ -3383,25 +3399,9 @@ static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da  	rpc_call_start(task);  } -/* Reset the the nfs_read_data to send the read to the MDS. */ -void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data) -{ -	dprintk("%s Reset task for i/o through\n", __func__); -	put_lseg(data->lseg); -	data->lseg = NULL; -	/* offsets will differ in the dense stripe case */ -	data->args.offset = data->mds_offset; -	data->ds_clp = NULL; -	data->args.fh     = NFS_FH(data->inode); -	data->read_done_cb = nfs4_read_done_cb; -	task->tk_ops = data->mds_ops; -	rpc_task_reset_client(task, NFS_CLIENT(data->inode)); -} -EXPORT_SYMBOL_GPL(nfs4_reset_read); -  static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)  { -	struct inode *inode = data->inode; +	struct inode *inode = data->header->inode;  	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {  		rpc_restart_call_prepare(task); @@ -3409,7 +3409,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data  	}  	if (task->tk_status >= 0) {  		renew_lease(NFS_SERVER(inode), data->timestamp); -		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); +		nfs_post_op_update_inode_force_wcc(inode, &data->fattr);  	}  	return 0;  } @@ -3422,32 +3422,30 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)  		nfs4_write_done_cb(task, data);  } -/* Reset the the nfs_write_data to send the write to the MDS. */ -void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) +static +bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)  { -	dprintk("%s Reset task for i/o through\n", __func__); -	put_lseg(data->lseg); -	data->lseg          = NULL; -	data->ds_clp        = NULL; -	data->write_done_cb = nfs4_write_done_cb; -	data->args.fh       = NFS_FH(data->inode); -	data->args.bitmask  = data->res.server->cache_consistency_bitmask; -	data->args.offset   = data->mds_offset; -	data->res.fattr     = &data->fattr; -	task->tk_ops        = data->mds_ops; -	rpc_task_reset_client(task, NFS_CLIENT(data->inode)); +	const struct nfs_pgio_header *hdr = data->header; + +	/* Don't request attributes for pNFS or O_DIRECT writes */ +	if (data->ds_clp != NULL || hdr->dreq != NULL) +		return false; +	/* Otherwise, request attributes if and only if we don't hold +	 * a delegation +	 */ +	return nfs_have_delegation(hdr->inode, FMODE_READ) == 0;  } -EXPORT_SYMBOL_GPL(nfs4_reset_write);  static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)  { -	struct nfs_server *server = NFS_SERVER(data->inode); +	struct nfs_server *server = NFS_SERVER(data->header->inode); -	if (data->lseg) { +	if (!nfs4_write_need_cache_consistency_data(data)) {  		data->args.bitmask = NULL;  		data->res.fattr = NULL;  	} else  		data->args.bitmask = server->cache_consistency_bitmask; +  	if (!data->write_done_cb)  		data->write_done_cb = nfs4_write_done_cb;  	data->res.server = server; @@ -3459,6 +3457,16 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag  static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)  { +	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), +				&data->args.seq_args, +				&data->res.seq_res, +				task)) +		return; +	rpc_call_start(task); +} + +static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) +{  	if (nfs4_setup_sequence(NFS_SERVER(data->inode),  				&data->args.seq_args,  				&data->res.seq_res, @@ -3467,7 +3475,7 @@ static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_  	rpc_call_start(task);  } -static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data)  {  	struct inode *inode = data->inode; @@ -3475,28 +3483,22 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat  		rpc_restart_call_prepare(task);  		return -EAGAIN;  	} -	nfs_refresh_inode(inode, data->res.fattr);  	return 0;  } -static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data)  {  	if (!nfs4_sequence_done(task, &data->res.seq_res))  		return -EAGAIN; -	return data->write_done_cb(task, data); +	return data->commit_done_cb(task, data);  } -static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)  {  	struct nfs_server *server = NFS_SERVER(data->inode); -	if (data->lseg) { -		data->args.bitmask = NULL; -		data->res.fattr = NULL; -	} else -		data->args.bitmask = server->cache_consistency_bitmask; -	if (!data->write_done_cb) -		data->write_done_cb = nfs4_commit_done_cb; +	if (data->commit_done_cb == NULL) +		data->commit_done_cb = nfs4_commit_done_cb;  	data->res.server = server;  	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];  	nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); @@ -3905,7 +3907,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,  		case -NFS4ERR_SEQ_MISORDERED:  			dprintk("%s ERROR %d, Reset session\n", __func__,  				task->tk_status); -			nfs4_schedule_session_recovery(clp->cl_session); +			nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);  			task->tk_status = 0;  			return -EAGAIN;  #endif /* CONFIG_NFS_V4_1 */ @@ -3931,13 +3933,21 @@ wait_on_recovery:  	return -EAGAIN;  } -static void nfs4_construct_boot_verifier(struct nfs_client *clp, -					 nfs4_verifier *bootverf) +static void nfs4_init_boot_verifier(const struct nfs_client *clp, +				    nfs4_verifier *bootverf)  {  	__be32 verf[2]; -	verf[0] = htonl((u32)clp->cl_boot_time.tv_sec); -	verf[1] = htonl((u32)clp->cl_boot_time.tv_nsec); +	if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { +		/* An impossible timestamp guarantees this value +		 * will never match a generated boot time. */ +		verf[0] = 0; +		verf[1] = (__be32)(NSEC_PER_SEC + 1); +	} else { +		struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); +		verf[0] = (__be32)nn->boot_time.tv_sec; +		verf[1] = (__be32)nn->boot_time.tv_nsec; +	}  	memcpy(bootverf->data, verf, sizeof(bootverf->data));  } @@ -3960,7 +3970,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,  	int loop = 0;  	int status; -	nfs4_construct_boot_verifier(clp, &sc_verifier); +	nfs4_init_boot_verifier(clp, &sc_verifier);  	for(;;) {  		rcu_read_lock(); @@ -4104,7 +4114,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co  	nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);  	data->args.fhandle = &data->fh;  	data->args.stateid = &data->stateid; -	data->args.bitmask = server->attr_bitmask; +	data->args.bitmask = server->cache_consistency_bitmask;  	nfs_copy_fh(&data->fh, NFS_FH(inode));  	nfs4_stateid_copy(&data->stateid, stateid);  	data->res.fattr = &data->fattr; @@ -4125,9 +4135,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co  	if (status != 0)  		goto out;  	status = data->rpc_status; -	if (status != 0) -		goto out; -	nfs_refresh_inode(inode, &data->fattr); +	if (status == 0) +		nfs_post_op_update_inode_force_wcc(inode, &data->fattr); +	else +		nfs_refresh_inode(inode, &data->fattr);  out:  	rpc_put_task(task);  	return status; @@ -4837,7 +4848,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)  			case -NFS4ERR_BAD_HIGH_SLOT:  			case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:  			case -NFS4ERR_DEADSESSION: -				nfs4_schedule_session_recovery(server->nfs_client->cl_session); +				nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);  				goto out;  			case -ERESTARTSYS:  				/* @@ -5079,7 +5090,8 @@ out_inval:  }  static bool -nfs41_same_server_scope(struct server_scope *a, struct server_scope *b) +nfs41_same_server_scope(struct nfs41_server_scope *a, +			struct nfs41_server_scope *b)  {  	if (a->server_scope_sz == b->server_scope_sz &&  	    memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0) @@ -5089,6 +5101,61 @@ nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)  }  /* + * nfs4_proc_bind_conn_to_session() + * + * The 4.1 client currently uses the same TCP connection for the + * fore and backchannel. + */ +int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred) +{ +	int status; +	struct nfs41_bind_conn_to_session_res res; +	struct rpc_message msg = { +		.rpc_proc = +			&nfs4_procedures[NFSPROC4_CLNT_BIND_CONN_TO_SESSION], +		.rpc_argp = clp, +		.rpc_resp = &res, +		.rpc_cred = cred, +	}; + +	dprintk("--> %s\n", __func__); +	BUG_ON(clp == NULL); + +	res.session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS); +	if (unlikely(res.session == NULL)) { +		status = -ENOMEM; +		goto out; +	} + +	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); +	if (status == 0) { +		if (memcmp(res.session->sess_id.data, +		    clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) { +			dprintk("NFS: %s: Session ID mismatch\n", __func__); +			status = -EIO; +			goto out_session; +		} +		if (res.dir != NFS4_CDFS4_BOTH) { +			dprintk("NFS: %s: Unexpected direction from server\n", +				__func__); +			status = -EIO; +			goto out_session; +		} +		if (res.use_conn_in_rdma_mode) { +			dprintk("NFS: %s: Server returned RDMA mode = true\n", +				__func__); +			status = -EIO; +			goto out_session; +		} +	} +out_session: +	kfree(res.session); +out: +	dprintk("<-- %s status= %d\n", __func__, status); +	return status; +} + +/*   * nfs4_proc_exchange_id()   *   * Since the clientid has expired, all compounds using sessions @@ -5105,7 +5172,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)  		.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,  	};  	struct nfs41_exchange_id_res res = { -		.client = clp, +		0  	};  	int status;  	struct rpc_message msg = { @@ -5118,7 +5185,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)  	dprintk("--> %s\n", __func__);  	BUG_ON(clp == NULL); -	nfs4_construct_boot_verifier(clp, &verifier); +	nfs4_init_boot_verifier(clp, &verifier);  	args.id_len = scnprintf(args.id, sizeof(args.id),  				"%s/%s/%u", @@ -5126,59 +5193,135 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)  				clp->cl_rpcclient->cl_nodename,  				clp->cl_rpcclient->cl_auth->au_flavor); -	res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); -	if (unlikely(!res.server_scope)) { +	res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), +					GFP_NOFS); +	if (unlikely(res.server_owner == NULL)) {  		status = -ENOMEM;  		goto out;  	} -	res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL); -	if (unlikely(!res.impl_id)) { +	res.server_scope = kzalloc(sizeof(struct nfs41_server_scope), +					GFP_NOFS); +	if (unlikely(res.server_scope == NULL)) { +		status = -ENOMEM; +		goto out_server_owner; +	} + +	res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_NOFS); +	if (unlikely(res.impl_id == NULL)) {  		status = -ENOMEM;  		goto out_server_scope;  	}  	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); -	if (!status) -		status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); +	if (status == 0) +		status = nfs4_check_cl_exchange_flags(res.flags); + +	if (status == 0) { +		clp->cl_clientid = res.clientid; +		clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R); +		if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) +			clp->cl_seqid = res.seqid; + +		kfree(clp->cl_serverowner); +		clp->cl_serverowner = res.server_owner; +		res.server_owner = NULL; -	if (!status) {  		/* use the most recent implementation id */ -		kfree(clp->impl_id); -		clp->impl_id = res.impl_id; -	} else -		kfree(res.impl_id); +		kfree(clp->cl_implid); +		clp->cl_implid = res.impl_id; -	if (!status) { -		if (clp->server_scope && -		    !nfs41_same_server_scope(clp->server_scope, +		if (clp->cl_serverscope != NULL && +		    !nfs41_same_server_scope(clp->cl_serverscope,  					     res.server_scope)) {  			dprintk("%s: server_scope mismatch detected\n",  				__func__);  			set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); -			kfree(clp->server_scope); -			clp->server_scope = NULL; +			kfree(clp->cl_serverscope); +			clp->cl_serverscope = NULL;  		} -		if (!clp->server_scope) { -			clp->server_scope = res.server_scope; +		if (clp->cl_serverscope == NULL) { +			clp->cl_serverscope = res.server_scope;  			goto out;  		} -	} +	} else +		kfree(res.impl_id); +out_server_owner: +	kfree(res.server_owner);  out_server_scope:  	kfree(res.server_scope);  out: -	if (clp->impl_id) +	if (clp->cl_implid != NULL)  		dprintk("%s: Server Implementation ID: "  			"domain: %s, name: %s, date: %llu,%u\n", -			__func__, clp->impl_id->domain, clp->impl_id->name, -			clp->impl_id->date.seconds, -			clp->impl_id->date.nseconds); +			__func__, clp->cl_implid->domain, clp->cl_implid->name, +			clp->cl_implid->date.seconds, +			clp->cl_implid->date.nseconds);  	dprintk("<-- %s status= %d\n", __func__, status);  	return status;  } +static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, +		struct rpc_cred *cred) +{ +	struct rpc_message msg = { +		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_CLIENTID], +		.rpc_argp = clp, +		.rpc_cred = cred, +	}; +	int status; + +	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); +	if (status) +		pr_warn("NFS: Got error %d from the server %s on " +			"DESTROY_CLIENTID.", status, clp->cl_hostname); +	return status; +} + +static int nfs4_proc_destroy_clientid(struct nfs_client *clp, +		struct rpc_cred *cred) +{ +	unsigned int loop; +	int ret; + +	for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) { +		ret = _nfs4_proc_destroy_clientid(clp, cred); +		switch (ret) { +		case -NFS4ERR_DELAY: +		case -NFS4ERR_CLIENTID_BUSY: +			ssleep(1); +			break; +		default: +			return ret; +		} +	} +	return 0; +} + +int nfs4_destroy_clientid(struct nfs_client *clp) +{ +	struct rpc_cred *cred; +	int ret = 0; + +	if (clp->cl_mvops->minor_version < 1) +		goto out; +	if (clp->cl_exchange_flags == 0) +		goto out; +	cred = nfs4_get_exchange_id_cred(clp); +	ret = nfs4_proc_destroy_clientid(clp, cred); +	if (cred) +		put_rpccred(cred); +	switch (ret) { +	case 0: +	case -NFS4ERR_STALE_CLIENTID: +		clp->cl_exchange_flags = 0; +	} +out: +	return ret; +} +  struct nfs4_get_lease_time_data {  	struct nfs4_get_lease_time_args *args;  	struct nfs4_get_lease_time_res *res; @@ -5399,8 +5542,12 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)  void nfs4_destroy_session(struct nfs4_session *session)  {  	struct rpc_xprt *xprt; +	struct rpc_cred *cred; -	nfs4_proc_destroy_session(session); +	cred = nfs4_get_exchange_id_cred(session->clp); +	nfs4_proc_destroy_session(session, cred); +	if (cred) +		put_rpccred(cred);  	rcu_read_lock();  	xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt); @@ -5510,7 +5657,8 @@ static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,  	return nfs4_verify_back_channel_attrs(args, session);  } -static int _nfs4_proc_create_session(struct nfs_client *clp) +static int _nfs4_proc_create_session(struct nfs_client *clp, +		struct rpc_cred *cred)  {  	struct nfs4_session *session = clp->cl_session;  	struct nfs41_create_session_args args = { @@ -5524,6 +5672,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)  		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION],  		.rpc_argp = &args,  		.rpc_resp = &res, +		.rpc_cred = cred,  	};  	int status; @@ -5548,7 +5697,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)   * It is the responsibility of the caller to verify the session is   * expired before calling this routine.   */ -int nfs4_proc_create_session(struct nfs_client *clp) +int nfs4_proc_create_session(struct nfs_client *clp, struct rpc_cred *cred)  {  	int status;  	unsigned *ptr; @@ -5556,7 +5705,7 @@ int nfs4_proc_create_session(struct nfs_client *clp)  	dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); -	status = _nfs4_proc_create_session(clp); +	status = _nfs4_proc_create_session(clp, cred);  	if (status)  		goto out; @@ -5578,10 +5727,15 @@ out:   * Issue the over-the-wire RPC DESTROY_SESSION.   * The caller must serialize access to this routine.   */ -int nfs4_proc_destroy_session(struct nfs4_session *session) +int nfs4_proc_destroy_session(struct nfs4_session *session, +		struct rpc_cred *cred)  { +	struct rpc_message msg = { +		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION], +		.rpc_argp = session, +		.rpc_cred = cred, +	};  	int status = 0; -	struct rpc_message msg;  	dprintk("--> nfs4_proc_destroy_session\n"); @@ -5589,10 +5743,6 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)  	if (session->clp->cl_cons_state != NFS_CS_READY)  		return status; -	msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION]; -	msg.rpc_argp = session; -	msg.rpc_resp = NULL; -	msg.rpc_cred = NULL;  	status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);  	if (status) @@ -5604,53 +5754,79 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)  	return status;  } +/* + * With sessions, the client is not marked ready until after a + * successful EXCHANGE_ID and CREATE_SESSION. + * + * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate + * other versions of NFS can be tried. + */ +static int nfs41_check_session_ready(struct nfs_client *clp) +{ +	int ret; +	 +	if (clp->cl_cons_state == NFS_CS_SESSION_INITING) { +		ret = nfs4_client_recover_expired_lease(clp); +		if (ret) +			return ret; +	} +	if (clp->cl_cons_state < NFS_CS_READY) +		return -EPROTONOSUPPORT; +	smp_rmb(); +	return 0; +} +  int nfs4_init_session(struct nfs_server *server)  {  	struct nfs_client *clp = server->nfs_client;  	struct nfs4_session *session;  	unsigned int rsize, wsize; -	int ret;  	if (!nfs4_has_session(clp))  		return 0;  	session = clp->cl_session; -	if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) -		return 0; +	spin_lock(&clp->cl_lock); +	if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { -	rsize = server->rsize; -	if (rsize == 0) -		rsize = NFS_MAX_FILE_IO_SIZE; -	wsize = server->wsize; -	if (wsize == 0) -		wsize = NFS_MAX_FILE_IO_SIZE; +		rsize = server->rsize; +		if (rsize == 0) +			rsize = NFS_MAX_FILE_IO_SIZE; +		wsize = server->wsize; +		if (wsize == 0) +			wsize = NFS_MAX_FILE_IO_SIZE; -	session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; -	session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; +		session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; +		session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; +	} +	spin_unlock(&clp->cl_lock); -	ret = nfs4_recover_expired_lease(server); -	if (!ret) -		ret = nfs4_check_client_ready(clp); -	return ret; +	return nfs41_check_session_ready(clp);  } -int nfs4_init_ds_session(struct nfs_client *clp) +int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time)  {  	struct nfs4_session *session = clp->cl_session;  	int ret; -	if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) -		return 0; - -	ret = nfs4_client_recover_expired_lease(clp); -	if (!ret) -		/* Test for the DS role */ -		if (!is_ds_client(clp)) -			ret = -ENODEV; -	if (!ret) -		ret = nfs4_check_client_ready(clp); -	return ret; +	spin_lock(&clp->cl_lock); +	if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { +		/* +		 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the +		 * DS lease to be equal to the MDS lease. +		 */ +		clp->cl_lease_time = lease_time; +		clp->cl_last_renewal = jiffies; +	} +	spin_unlock(&clp->cl_lock); +	ret = nfs41_check_session_ready(clp); +	if (ret) +		return ret; +	/* Test for the DS role */ +	if (!is_ds_client(clp)) +		return -ENODEV; +	return 0;  }  EXPORT_SYMBOL_GPL(nfs4_init_ds_session); @@ -6557,6 +6733,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {  	.file_inode_ops	= &nfs4_file_inode_operations,  	.file_ops	= &nfs4_file_operations,  	.getroot	= nfs4_proc_get_root, +	.submount	= nfs4_submount,  	.getattr	= nfs4_proc_getattr,  	.setattr	= nfs4_proc_setattr,  	.lookup		= nfs4_proc_lookup, @@ -6589,13 +6766,13 @@ const struct nfs_rpc_ops nfs_v4_clientops = {  	.write_rpc_prepare = nfs4_proc_write_rpc_prepare,  	.write_done	= nfs4_write_done,  	.commit_setup	= nfs4_proc_commit_setup, +	.commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,  	.commit_done	= nfs4_commit_done,  	.lock		= nfs4_proc_lock,  	.clear_acl_cache = nfs4_zap_acl_attr,  	.close_context  = nfs4_close_context,  	.open_context	= nfs4_atomic_open,  	.init_client	= nfs4_init_client, -	.secinfo	= nfs4_proc_secinfo,  };  static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index dc484c0eae7..6930bec91bc 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -49,7 +49,7 @@  #include "nfs4_fs.h"  #include "delegation.h" -#define NFSDBG_FACILITY	NFSDBG_PROC +#define NFSDBG_FACILITY		NFSDBG_STATE  void  nfs4_renew_state(struct work_struct *work) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7f0fcfc1fe9..c679b9ecef6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -57,6 +57,8 @@  #include "internal.h"  #include "pnfs.h" +#define NFSDBG_FACILITY		NFSDBG_STATE +  #define OPENOWNER_POOL_SIZE	8  const nfs4_stateid zero_stateid; @@ -254,7 +256,7 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)  		goto out;  	set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);  do_confirm: -	status = nfs4_proc_create_session(clp); +	status = nfs4_proc_create_session(clp, cred);  	if (status != 0)  		goto out;  	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); @@ -1106,6 +1108,8 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)  		return;  	if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))  		set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); +	dprintk("%s: scheduling lease recovery for server %s\n", __func__, +			clp->cl_hostname);  	nfs4_schedule_state_manager(clp);  }  EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); @@ -1122,6 +1126,8 @@ static void nfs40_handle_cb_pathdown(struct nfs_client *clp)  {  	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);  	nfs_expire_all_delegations(clp); +	dprintk("%s: handling CB_PATHDOWN recovery for server %s\n", __func__, +			clp->cl_hostname);  }  void nfs4_schedule_path_down_recovery(struct nfs_client *clp) @@ -1158,6 +1164,8 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4  	struct nfs_client *clp = server->nfs_client;  	nfs4_state_mark_reclaim_nograce(clp, state); +	dprintk("%s: scheduling stateid recovery for server %s\n", __func__, +			clp->cl_hostname);  	nfs4_schedule_state_manager(clp);  }  EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); @@ -1491,19 +1499,25 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)  		case -NFS4ERR_BADSLOT:  		case -NFS4ERR_BAD_HIGH_SLOT:  		case -NFS4ERR_DEADSESSION: -		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:  		case -NFS4ERR_SEQ_FALSE_RETRY:  		case -NFS4ERR_SEQ_MISORDERED:  			set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);  			/* Zero session reset errors */  			break; +		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: +			set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); +			break;  		case -EKEYEXPIRED:  			/* Nothing we can do */  			nfs4_warn_keyexpired(clp->cl_hostname);  			break;  		default: +			dprintk("%s: failed to handle error %d for server %s\n", +					__func__, error, clp->cl_hostname);  			return error;  	} +	dprintk("%s: handled error %d for server %s\n", __func__, error, +			clp->cl_hostname);  	return 0;  } @@ -1572,34 +1586,82 @@ out:  	return nfs4_recovery_handle_error(clp, status);  } +/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors + * on EXCHANGE_ID for v4.1 + */ +static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) +{ +	switch (status) { +	case -NFS4ERR_SEQ_MISORDERED: +		if (test_and_set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) +			return -ESERVERFAULT; +		/* Lease confirmation error: retry after purging the lease */ +		ssleep(1); +	case -NFS4ERR_CLID_INUSE: +	case -NFS4ERR_STALE_CLIENTID: +		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); +		break; +	case -EACCES: +		if (clp->cl_machine_cred == NULL) +			return -EACCES; +		/* Handle case where the user hasn't set up machine creds */ +		nfs4_clear_machine_cred(clp); +	case -NFS4ERR_DELAY: +	case -ETIMEDOUT: +	case -EAGAIN: +		ssleep(1); +		break; + +	case -NFS4ERR_MINOR_VERS_MISMATCH: +		if (clp->cl_cons_state == NFS_CS_SESSION_INITING) +			nfs_mark_client_ready(clp, -EPROTONOSUPPORT); +		dprintk("%s: exit with error %d for server %s\n", +				__func__, -EPROTONOSUPPORT, clp->cl_hostname); +		return -EPROTONOSUPPORT; +	case -EKEYEXPIRED: +		nfs4_warn_keyexpired(clp->cl_hostname); +	case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery +				 * in nfs4_exchange_id */ +	default: +		dprintk("%s: exit with error %d for server %s\n", __func__, +				status, clp->cl_hostname); +		return status; +	} +	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); +	dprintk("%s: handled error %d for server %s\n", __func__, status, +			clp->cl_hostname); +	return 0; +} +  static int nfs4_reclaim_lease(struct nfs_client *clp)  {  	struct rpc_cred *cred;  	const struct nfs4_state_recovery_ops *ops =  		clp->cl_mvops->reboot_recovery_ops; -	int status = -ENOENT; +	int status;  	cred = ops->get_clid_cred(clp); -	if (cred != NULL) { -		status = ops->establish_clid(clp, cred); -		put_rpccred(cred); -		/* Handle case where the user hasn't set up machine creds */ -		if (status == -EACCES && cred == clp->cl_machine_cred) { -			nfs4_clear_machine_cred(clp); -			status = -EAGAIN; -		} -		if (status == -NFS4ERR_MINOR_VERS_MISMATCH) -			status = -EPROTONOSUPPORT; -	} -	return status; +	if (cred == NULL) +		return -ENOENT; +	status = ops->establish_clid(clp, cred); +	put_rpccred(cred); +	if (status != 0) +		return nfs4_handle_reclaim_lease_error(clp, status); +	return 0;  }  #ifdef CONFIG_NFS_V4_1 -void nfs4_schedule_session_recovery(struct nfs4_session *session) +void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)  {  	struct nfs_client *clp = session->clp; -	set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); +	switch (err) { +	default: +		set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); +		break; +	case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: +		set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); +	}  	nfs4_schedule_lease_recovery(clp);  }  EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); @@ -1607,14 +1669,19 @@ EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);  void nfs41_handle_recall_slot(struct nfs_client *clp)  {  	set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); +	dprintk("%s: scheduling slot recall for server %s\n", __func__, +			clp->cl_hostname);  	nfs4_schedule_state_manager(clp);  }  static void nfs4_reset_all_state(struct nfs_client *clp)  {  	if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { -		clp->cl_boot_time = CURRENT_TIME; +		set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); +		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);  		nfs4_state_start_reclaim_nograce(clp); +		dprintk("%s: scheduling reset of all state for server %s!\n", +				__func__, clp->cl_hostname);  		nfs4_schedule_state_manager(clp);  	}  } @@ -1623,33 +1690,50 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)  {  	if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {  		nfs4_state_start_reclaim_reboot(clp); +		dprintk("%s: server %s rebooted!\n", __func__, +				clp->cl_hostname);  		nfs4_schedule_state_manager(clp);  	}  }  static void nfs41_handle_state_revoked(struct nfs_client *clp)  { -	/* Temporary */  	nfs4_reset_all_state(clp); +	dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);  }  static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)  {  	/* This will need to handle layouts too */  	nfs_expire_all_delegations(clp); +	dprintk("%s: Recallable state revoked on server %s!\n", __func__, +			clp->cl_hostname);  } -static void nfs41_handle_cb_path_down(struct nfs_client *clp) +static void nfs41_handle_backchannel_fault(struct nfs_client *clp)  {  	nfs_expire_all_delegations(clp);  	if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)  		nfs4_schedule_state_manager(clp); +	dprintk("%s: server %s declared a backchannel fault\n", __func__, +			clp->cl_hostname); +} + +static void nfs41_handle_cb_path_down(struct nfs_client *clp) +{ +	if (test_and_set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, +		&clp->cl_state) == 0) +		nfs4_schedule_state_manager(clp);  }  void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)  {  	if (!flags)  		return; + +	dprintk("%s: \"%s\" (client ID %llx) flags=0x%08x\n", +		__func__, clp->cl_hostname, clp->cl_clientid, flags); +  	if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)  		nfs41_handle_server_reboot(clp);  	if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | @@ -1659,18 +1743,21 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)  		nfs41_handle_state_revoked(clp);  	if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)  		nfs41_handle_recallable_state_revoked(clp); -	if (flags & (SEQ4_STATUS_CB_PATH_DOWN | -			    SEQ4_STATUS_BACKCHANNEL_FAULT | -			    SEQ4_STATUS_CB_PATH_DOWN_SESSION)) +	if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT) +		nfs41_handle_backchannel_fault(clp); +	else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | +				SEQ4_STATUS_CB_PATH_DOWN_SESSION))  		nfs41_handle_cb_path_down(clp);  }  static int nfs4_reset_session(struct nfs_client *clp)  { +	struct rpc_cred *cred;  	int status;  	nfs4_begin_drain_session(clp); -	status = nfs4_proc_destroy_session(clp->cl_session); +	cred = nfs4_get_exchange_id_cred(clp); +	status = nfs4_proc_destroy_session(clp->cl_session, cred);  	if (status && status != -NFS4ERR_BADSESSION &&  	    status != -NFS4ERR_DEADSESSION) {  		status = nfs4_recovery_handle_error(clp, status); @@ -1678,19 +1765,26 @@ static int nfs4_reset_session(struct nfs_client *clp)  	}  	memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); -	status = nfs4_proc_create_session(clp); +	status = nfs4_proc_create_session(clp, cred);  	if (status) { -		status = nfs4_recovery_handle_error(clp, status); +		dprintk("%s: session reset failed with status %d for server %s!\n", +			__func__, status, clp->cl_hostname); +		status = nfs4_handle_reclaim_lease_error(clp, status);  		goto out;  	}  	clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);  	/* create_session negotiated new slot table */  	clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); +	clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); +	dprintk("%s: session reset was successful for server %s!\n", +			__func__, clp->cl_hostname);  	 /* Let the state manager reestablish state */  	if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))  		nfs41_setup_state_renewal(clp);  out: +	if (cred) +		put_rpccred(cred);  	return status;  } @@ -1722,37 +1816,41 @@ static int nfs4_recall_slot(struct nfs_client *clp)  	return 0;  } -#else /* CONFIG_NFS_V4_1 */ -static int nfs4_reset_session(struct nfs_client *clp) { return 0; } -static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; } -static int nfs4_recall_slot(struct nfs_client *clp) { return 0; } -#endif /* CONFIG_NFS_V4_1 */ - -/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors - * on EXCHANGE_ID for v4.1 - */ -static void nfs4_set_lease_expired(struct nfs_client *clp, int status) +static int nfs4_bind_conn_to_session(struct nfs_client *clp)  { -	switch (status) { -	case -NFS4ERR_CLID_INUSE: -	case -NFS4ERR_STALE_CLIENTID: -		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); +	struct rpc_cred *cred; +	int ret; + +	nfs4_begin_drain_session(clp); +	cred = nfs4_get_exchange_id_cred(clp); +	ret = nfs4_proc_bind_conn_to_session(clp, cred); +	if (cred) +		put_rpccred(cred); +	clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); +	switch (ret) { +	case 0: +		dprintk("%s: bind_conn_to_session was successful for server %s!\n", +			__func__, clp->cl_hostname);  		break;  	case -NFS4ERR_DELAY: -	case -ETIMEDOUT: -	case -EAGAIN:  		ssleep(1); +		set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);  		break; - -	case -EKEYEXPIRED: -		nfs4_warn_keyexpired(clp->cl_hostname); -	case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery -				 * in nfs4_exchange_id */  	default: -		return; +		return nfs4_recovery_handle_error(clp, ret);  	} -	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); +	return 0;  } +#else /* CONFIG_NFS_V4_1 */ +static int nfs4_reset_session(struct nfs_client *clp) { return 0; } +static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; } +static int nfs4_recall_slot(struct nfs_client *clp) { return 0; } + +static int nfs4_bind_conn_to_session(struct nfs_client *clp) +{ +	return 0; +} +#endif /* CONFIG_NFS_V4_1 */  static void nfs4_state_manager(struct nfs_client *clp)  { @@ -1760,19 +1858,21 @@ static void nfs4_state_manager(struct nfs_client *clp)  	/* Ensure exclusive access to NFSv4 state */  	do { +		if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { +			status = nfs4_reclaim_lease(clp); +			if (status < 0) +				goto out_error; +			clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); +			set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); +		} +  		if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {  			/* We're going to have to re-establish a clientid */  			status = nfs4_reclaim_lease(clp); -			if (status) { -				nfs4_set_lease_expired(clp, status); -				if (test_bit(NFS4CLNT_LEASE_EXPIRED, -							&clp->cl_state)) -					continue; -				if (clp->cl_cons_state == -							NFS_CS_SESSION_INITING) -					nfs_mark_client_ready(clp, status); +			if (status < 0)  				goto out_error; -			} +			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) +				continue;  			clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);  			if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, @@ -1803,6 +1903,15 @@ static void nfs4_state_manager(struct nfs_client *clp)  				goto out_error;  		} +		/* Send BIND_CONN_TO_SESSION */ +		if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, +				&clp->cl_state) && nfs4_has_session(clp)) { +			status = nfs4_bind_conn_to_session(clp); +			if (status < 0) +				goto out_error; +			continue; +		} +  		/* First recover reboot state... */  		if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {  			status = nfs4_do_reclaim(clp, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c54aae364be..ee4a74db95d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -53,9 +53,11 @@  #include <linux/nfs4.h>  #include <linux/nfs_fs.h>  #include <linux/nfs_idmap.h> +  #include "nfs4_fs.h"  #include "internal.h"  #include "pnfs.h" +#include "netns.h"  #define NFSDBG_FACILITY		NFSDBG_XDR @@ -99,9 +101,12 @@ static int nfs4_stat_to_errno(int);  #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))  #define nfs4_owner_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))  #define nfs4_group_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ)) +/* We support only one layout type per file system */ +#define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8)  /* This is based on getfattr, which uses the most attributes: */  #define nfs4_fattr_value_maxsz	(1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ -				3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz)) +				3 + 3 + 3 + nfs4_owner_maxsz + \ +				nfs4_group_maxsz + decode_mdsthreshold_maxsz))  #define nfs4_fattr_maxsz	(nfs4_fattr_bitmap_maxsz + \  				nfs4_fattr_value_maxsz)  #define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz) @@ -321,8 +326,20 @@ static int nfs4_stat_to_errno(int);  				     1 /* csr_flags */ + \  				     decode_channel_attrs_maxsz + \  				     decode_channel_attrs_maxsz) +#define encode_bind_conn_to_session_maxsz  (op_encode_hdr_maxsz + \ +				     /* bctsa_sessid */ \ +				     XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ +				     1 /* bctsa_dir */ + \ +				     1 /* bctsa_use_conn_in_rdma_mode */) +#define decode_bind_conn_to_session_maxsz  (op_decode_hdr_maxsz +	\ +				     /* bctsr_sessid */ \ +				     XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ +				     1 /* bctsr_dir */ + \ +				     1 /* bctsr_use_conn_in_rdma_mode */)  #define encode_destroy_session_maxsz    (op_encode_hdr_maxsz + 4)  #define decode_destroy_session_maxsz    (op_decode_hdr_maxsz) +#define encode_destroy_clientid_maxsz   (op_encode_hdr_maxsz + 2) +#define decode_destroy_clientid_maxsz   (op_decode_hdr_maxsz)  #define encode_sequence_maxsz	(op_encode_hdr_maxsz + \  				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4)  #define decode_sequence_maxsz	(op_decode_hdr_maxsz + \ @@ -421,30 +438,22 @@ static int nfs4_stat_to_errno(int);  #define NFS4_enc_commit_sz	(compound_encode_hdr_maxsz + \  				encode_sequence_maxsz + \  				encode_putfh_maxsz + \ -				encode_commit_maxsz + \ -				encode_getattr_maxsz) +				encode_commit_maxsz)  #define NFS4_dec_commit_sz	(compound_decode_hdr_maxsz + \  				decode_sequence_maxsz + \  				decode_putfh_maxsz + \ -				decode_commit_maxsz + \ -				decode_getattr_maxsz) +				decode_commit_maxsz)  #define NFS4_enc_open_sz        (compound_encode_hdr_maxsz + \  				encode_sequence_maxsz + \  				encode_putfh_maxsz + \ -				encode_savefh_maxsz + \  				encode_open_maxsz + \  				encode_getfh_maxsz + \ -				encode_getattr_maxsz + \ -				encode_restorefh_maxsz + \  				encode_getattr_maxsz)  #define NFS4_dec_open_sz        (compound_decode_hdr_maxsz + \  				decode_sequence_maxsz + \  				decode_putfh_maxsz + \ -				decode_savefh_maxsz + \  				decode_open_maxsz + \  				decode_getfh_maxsz + \ -				decode_getattr_maxsz + \ -				decode_restorefh_maxsz + \  				decode_getattr_maxsz)  #define NFS4_enc_open_confirm_sz \  				(compound_encode_hdr_maxsz + \ @@ -595,47 +604,37 @@ static int nfs4_stat_to_errno(int);  #define NFS4_enc_remove_sz	(compound_encode_hdr_maxsz + \  				encode_sequence_maxsz + \  				encode_putfh_maxsz + \ -				encode_remove_maxsz + \ -				encode_getattr_maxsz) +				encode_remove_maxsz)  #define NFS4_dec_remove_sz	(compound_decode_hdr_maxsz + \  				decode_sequence_maxsz + \  				decode_putfh_maxsz + \ -				decode_remove_maxsz + \ -				decode_getattr_maxsz) +				decode_remove_maxsz)  #define NFS4_enc_rename_sz	(compound_encode_hdr_maxsz + \  				encode_sequence_maxsz + \  				encode_putfh_maxsz + \  				encode_savefh_maxsz + \  				encode_putfh_maxsz + \ -				encode_rename_maxsz + \ -				encode_getattr_maxsz + \ -				encode_restorefh_maxsz + \ -				encode_getattr_maxsz) +				encode_rename_maxsz)  #define NFS4_dec_rename_sz	(compound_decode_hdr_maxsz + \  				decode_sequence_maxsz + \  				decode_putfh_maxsz + \  				decode_savefh_maxsz + \  				decode_putfh_maxsz + \ -				decode_rename_maxsz + \ -				decode_getattr_maxsz + \ -				decode_restorefh_maxsz + \ -				decode_getattr_maxsz) +				decode_rename_maxsz)  #define NFS4_enc_link_sz	(compound_encode_hdr_maxsz + \  				encode_sequence_maxsz + \  				encode_putfh_maxsz + \  				encode_savefh_maxsz + \  				encode_putfh_maxsz + \  				encode_link_maxsz + \ -				decode_getattr_maxsz + \  				encode_restorefh_maxsz + \ -				decode_getattr_maxsz) +				encode_getattr_maxsz)  #define NFS4_dec_link_sz	(compound_decode_hdr_maxsz + \  				decode_sequence_maxsz + \  				decode_putfh_maxsz + \  				decode_savefh_maxsz + \  				decode_putfh_maxsz + \  				decode_link_maxsz + \ -				decode_getattr_maxsz + \  				decode_restorefh_maxsz + \  				decode_getattr_maxsz)  #define NFS4_enc_symlink_sz	(compound_encode_hdr_maxsz + \ @@ -653,20 +652,14 @@ static int nfs4_stat_to_errno(int);  #define NFS4_enc_create_sz	(compound_encode_hdr_maxsz + \  				encode_sequence_maxsz + \  				encode_putfh_maxsz + \ -				encode_savefh_maxsz + \  				encode_create_maxsz + \  				encode_getfh_maxsz + \ -				encode_getattr_maxsz + \ -				encode_restorefh_maxsz + \  				encode_getattr_maxsz)  #define NFS4_dec_create_sz	(compound_decode_hdr_maxsz + \  				decode_sequence_maxsz + \  				decode_putfh_maxsz + \ -				decode_savefh_maxsz + \  				decode_create_maxsz + \  				decode_getfh_maxsz + \ -				decode_getattr_maxsz + \ -				decode_restorefh_maxsz + \  				decode_getattr_maxsz)  #define NFS4_enc_pathconf_sz	(compound_encode_hdr_maxsz + \  				encode_sequence_maxsz + \ @@ -738,6 +731,12 @@ static int nfs4_stat_to_errno(int);  				decode_putfh_maxsz + \  				decode_secinfo_maxsz)  #if defined(CONFIG_NFS_V4_1) +#define NFS4_enc_bind_conn_to_session_sz \ +				(compound_encode_hdr_maxsz + \ +				 encode_bind_conn_to_session_maxsz) +#define NFS4_dec_bind_conn_to_session_sz \ +				(compound_decode_hdr_maxsz + \ +				 decode_bind_conn_to_session_maxsz)  #define NFS4_enc_exchange_id_sz \  				(compound_encode_hdr_maxsz + \  				 encode_exchange_id_maxsz) @@ -754,6 +753,10 @@ static int nfs4_stat_to_errno(int);  					 encode_destroy_session_maxsz)  #define NFS4_dec_destroy_session_sz	(compound_decode_hdr_maxsz + \  					 decode_destroy_session_maxsz) +#define NFS4_enc_destroy_clientid_sz	(compound_encode_hdr_maxsz + \ +					 encode_destroy_clientid_maxsz) +#define NFS4_dec_destroy_clientid_sz	(compound_decode_hdr_maxsz + \ +					 decode_destroy_clientid_maxsz)  #define NFS4_enc_sequence_sz \  				(compound_decode_hdr_maxsz + \  				 encode_sequence_maxsz) @@ -1103,7 +1106,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg  	encode_nfs4_stateid(xdr, arg->stateid);  } -static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) +static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr)  {  	__be32 *p; @@ -1194,6 +1197,16 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c  			   bitmask[1] & nfs4_fattr_bitmap[1], hdr);  } +static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, +				 struct compound_hdr *hdr) +{ +	encode_getattr_three(xdr, +			     bitmask[0] & nfs4_fattr_bitmap[0], +			     bitmask[1] & nfs4_fattr_bitmap[1], +			     bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD, +			     hdr); +} +  static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)  {  	encode_getattr_three(xdr, @@ -1678,6 +1691,20 @@ static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, stru  #if defined(CONFIG_NFS_V4_1)  /* NFSv4.1 operations */ +static void encode_bind_conn_to_session(struct xdr_stream *xdr, +				   struct nfs4_session *session, +				   struct compound_hdr *hdr) +{ +	__be32 *p; + +	encode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION, +		decode_bind_conn_to_session_maxsz, hdr); +	encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); +	p = xdr_reserve_space(xdr, 8); +	*p++ = cpu_to_be32(NFS4_CDFC4_BACK_OR_BOTH); +	*p = 0;	/* use_conn_in_rdma_mode = False */ +} +  static void encode_exchange_id(struct xdr_stream *xdr,  			       struct nfs41_exchange_id_args *args,  			       struct compound_hdr *hdr) @@ -1726,6 +1753,7 @@ static void encode_create_session(struct xdr_stream *xdr,  	char machine_name[NFS4_MAX_MACHINE_NAME_LEN];  	uint32_t len;  	struct nfs_client *clp = args->client; +	struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);  	u32 max_resp_sz_cached;  	/* @@ -1767,7 +1795,7 @@ static void encode_create_session(struct xdr_stream *xdr,  	*p++ = cpu_to_be32(RPC_AUTH_UNIX);			/* auth_sys */  	/* authsys_parms rfc1831 */ -	*p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec);	/* stamp */ +	*p++ = (__be32)nn->boot_time.tv_nsec;		/* stamp */  	p = xdr_encode_opaque(p, machine_name, len);  	*p++ = cpu_to_be32(0);				/* UID */  	*p++ = cpu_to_be32(0);				/* GID */ @@ -1782,6 +1810,14 @@ static void encode_destroy_session(struct xdr_stream *xdr,  	encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);  } +static void encode_destroy_clientid(struct xdr_stream *xdr, +				   uint64_t clientid, +				   struct compound_hdr *hdr) +{ +	encode_op_hdr(xdr, OP_DESTROY_CLIENTID, decode_destroy_clientid_maxsz, hdr); +	encode_uint64(xdr, clientid); +} +  static void encode_reclaim_complete(struct xdr_stream *xdr,  				    struct nfs41_reclaim_complete_args *args,  				    struct compound_hdr *hdr) @@ -2064,7 +2100,6 @@ static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,  	encode_sequence(xdr, &args->seq_args, &hdr);  	encode_putfh(xdr, args->fh, &hdr);  	encode_remove(xdr, &args->name, &hdr); -	encode_getfattr(xdr, args->bitmask, &hdr);  	encode_nops(&hdr);  } @@ -2084,9 +2119,6 @@ static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,  	encode_savefh(xdr, &hdr);  	encode_putfh(xdr, args->new_dir, &hdr);  	encode_rename(xdr, args->old_name, args->new_name, &hdr); -	encode_getfattr(xdr, args->bitmask, &hdr); -	encode_restorefh(xdr, &hdr); -	encode_getfattr(xdr, args->bitmask, &hdr);  	encode_nops(&hdr);  } @@ -2106,7 +2138,6 @@ static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,  	encode_savefh(xdr, &hdr);  	encode_putfh(xdr, args->dir_fh, &hdr);  	encode_link(xdr, args->name, &hdr); -	encode_getfattr(xdr, args->bitmask, &hdr);  	encode_restorefh(xdr, &hdr);  	encode_getfattr(xdr, args->bitmask, &hdr);  	encode_nops(&hdr); @@ -2125,12 +2156,9 @@ static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,  	encode_compound_hdr(xdr, req, &hdr);  	encode_sequence(xdr, &args->seq_args, &hdr);  	encode_putfh(xdr, args->dir_fh, &hdr); -	encode_savefh(xdr, &hdr);  	encode_create(xdr, args, &hdr);  	encode_getfh(xdr, &hdr);  	encode_getfattr(xdr, args->bitmask, &hdr); -	encode_restorefh(xdr, &hdr); -	encode_getfattr(xdr, args->bitmask, &hdr);  	encode_nops(&hdr);  } @@ -2191,12 +2219,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,  	encode_compound_hdr(xdr, req, &hdr);  	encode_sequence(xdr, &args->seq_args, &hdr);  	encode_putfh(xdr, args->fh, &hdr); -	encode_savefh(xdr, &hdr);  	encode_open(xdr, args, &hdr);  	encode_getfh(xdr, &hdr); -	encode_getfattr(xdr, args->bitmask, &hdr); -	encode_restorefh(xdr, &hdr); -	encode_getfattr(xdr, args->dir_bitmask, &hdr); +	encode_getfattr_open(xdr, args->bitmask, &hdr);  	encode_nops(&hdr);  } @@ -2448,7 +2473,7 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,   *  a COMMIT request   */  static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr, -				struct nfs_writeargs *args) +				struct nfs_commitargs *args)  {  	struct compound_hdr hdr = {  		.minorversion = nfs4_xdr_minorversion(&args->seq_args), @@ -2458,8 +2483,6 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,  	encode_sequence(xdr, &args->seq_args, &hdr);  	encode_putfh(xdr, args->fh, &hdr);  	encode_commit(xdr, args, &hdr); -	if (args->bitmask) -		encode_getfattr(xdr, args->bitmask, &hdr);  	encode_nops(&hdr);  } @@ -2602,8 +2625,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,  	encode_compound_hdr(xdr, req, &hdr);  	encode_sequence(xdr, &args->seq_args, &hdr);  	encode_putfh(xdr, args->fhandle, &hdr); -	encode_delegreturn(xdr, args->stateid, &hdr);  	encode_getfattr(xdr, args->bitmask, &hdr); +	encode_delegreturn(xdr, args->stateid, &hdr);  	encode_nops(&hdr);  } @@ -2651,6 +2674,22 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,  #if defined(CONFIG_NFS_V4_1)  /* + * BIND_CONN_TO_SESSION request + */ +static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req, +				struct xdr_stream *xdr, +				struct nfs_client *clp) +{ +	struct compound_hdr hdr = { +		.minorversion = clp->cl_mvops->minor_version, +	}; + +	encode_compound_hdr(xdr, req, &hdr); +	encode_bind_conn_to_session(xdr, clp->cl_session, &hdr); +	encode_nops(&hdr); +} + +/*   * EXCHANGE_ID request   */  static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, @@ -2699,6 +2738,22 @@ static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,  }  /* + * a DESTROY_CLIENTID request + */ +static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req, +					 struct xdr_stream *xdr, +					 struct nfs_client *clp) +{ +	struct compound_hdr hdr = { +		.minorversion = clp->cl_mvops->minor_version, +	}; + +	encode_compound_hdr(xdr, req, &hdr); +	encode_destroy_clientid(xdr, clp->cl_clientid, &hdr); +	encode_nops(&hdr); +} + +/*   * a SEQUENCE request   */  static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr, @@ -4102,7 +4157,7 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier)  	return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);  } -static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) +static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res)  {  	int status; @@ -4220,6 +4275,110 @@ xdr_error:  	return status;  } +static int decode_threshold_hint(struct xdr_stream *xdr, +				  uint32_t *bitmap, +				  uint64_t *res, +				  uint32_t hint_bit) +{ +	__be32 *p; + +	*res = 0; +	if (likely(bitmap[0] & hint_bit)) { +		p = xdr_inline_decode(xdr, 8); +		if (unlikely(!p)) +			goto out_overflow; +		xdr_decode_hyper(p, res); +	} +	return 0; +out_overflow: +	print_overflow_msg(__func__, xdr); +	return -EIO; +} + +static int decode_first_threshold_item4(struct xdr_stream *xdr, +					struct nfs4_threshold *res) +{ +	__be32 *p, *savep; +	uint32_t bitmap[3] = {0,}, attrlen; +	int status; + +	/* layout type */ +	p = xdr_inline_decode(xdr, 4); +	if (unlikely(!p)) { +		print_overflow_msg(__func__, xdr); +		return -EIO; +	} +	res->l_type = be32_to_cpup(p); + +	/* thi_hintset bitmap */ +	status = decode_attr_bitmap(xdr, bitmap); +	if (status < 0) +		goto xdr_error; + +	/* thi_hintlist length */ +	status = decode_attr_length(xdr, &attrlen, &savep); +	if (status < 0) +		goto xdr_error; +	/* thi_hintlist */ +	status = decode_threshold_hint(xdr, bitmap, &res->rd_sz, THRESHOLD_RD); +	if (status < 0) +		goto xdr_error; +	status = decode_threshold_hint(xdr, bitmap, &res->wr_sz, THRESHOLD_WR); +	if (status < 0) +		goto xdr_error; +	status = decode_threshold_hint(xdr, bitmap, &res->rd_io_sz, +				       THRESHOLD_RD_IO); +	if (status < 0) +		goto xdr_error; +	status = decode_threshold_hint(xdr, bitmap, &res->wr_io_sz, +				       THRESHOLD_WR_IO); +	if (status < 0) +		goto xdr_error; + +	status = verify_attr_len(xdr, savep, attrlen); +	res->bm = bitmap[0]; + +	dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", +		 __func__, res->bm, res->rd_sz, res->wr_sz, res->rd_io_sz, +		res->wr_io_sz); +xdr_error: +	dprintk("%s ret=%d!\n", __func__, status); +	return status; +} + +/* + * Thresholds on pNFS direct I/O vrs MDS I/O + */ +static int decode_attr_mdsthreshold(struct xdr_stream *xdr, +				    uint32_t *bitmap, +				    struct nfs4_threshold *res) +{ +	__be32 *p; +	int status = 0; +	uint32_t num; + +	if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U))) +		return -EIO; +	if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) { +		p = xdr_inline_decode(xdr, 4); +		if (unlikely(!p)) +			goto out_overflow; +		num = be32_to_cpup(p); +		if (num == 0) +			return 0; +		if (num > 1) +			printk(KERN_INFO "%s: Warning: Multiple pNFS layout " +				"drivers per filesystem not supported\n", +				__func__); + +		status = decode_first_threshold_item4(xdr, res); +	} +	return status; +out_overflow: +	print_overflow_msg(__func__, xdr); +	return -EIO; +} +  static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,  		struct nfs_fattr *fattr, struct nfs_fh *fh,  		struct nfs4_fs_locations *fs_loc, @@ -4326,6 +4485,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,  		goto xdr_error;  	fattr->valid |= status; +	status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold); +	if (status < 0) +		goto xdr_error; +  xdr_error:  	dprintk("%s: xdr returned %d\n", __func__, -status);  	return status; @@ -5156,7 +5319,6 @@ static int decode_exchange_id(struct xdr_stream *xdr,  	uint32_t dummy;  	char *dummy_str;  	int status; -	struct nfs_client *clp = res->client;  	uint32_t impl_id_count;  	status = decode_op_hdr(xdr, OP_EXCHANGE_ID); @@ -5166,36 +5328,39 @@ static int decode_exchange_id(struct xdr_stream *xdr,  	p = xdr_inline_decode(xdr, 8);  	if (unlikely(!p))  		goto out_overflow; -	xdr_decode_hyper(p, &clp->cl_clientid); +	xdr_decode_hyper(p, &res->clientid);  	p = xdr_inline_decode(xdr, 12);  	if (unlikely(!p))  		goto out_overflow; -	clp->cl_seqid = be32_to_cpup(p++); -	clp->cl_exchange_flags = be32_to_cpup(p++); +	res->seqid = be32_to_cpup(p++); +	res->flags = be32_to_cpup(p++);  	/* We ask for SP4_NONE */  	dummy = be32_to_cpup(p);  	if (dummy != SP4_NONE)  		return -EIO; -	/* Throw away minor_id */ +	/* server_owner4.so_minor_id */  	p = xdr_inline_decode(xdr, 8);  	if (unlikely(!p))  		goto out_overflow; +	p = xdr_decode_hyper(p, &res->server_owner->minor_id); -	/* Throw away Major id */ +	/* server_owner4.so_major_id */  	status = decode_opaque_inline(xdr, &dummy, &dummy_str);  	if (unlikely(status))  		return status; +	if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) +		return -EIO; +	memcpy(res->server_owner->major_id, dummy_str, dummy); +	res->server_owner->major_id_sz = dummy; -	/* Save server_scope */ +	/* server_scope4 */  	status = decode_opaque_inline(xdr, &dummy, &dummy_str);  	if (unlikely(status))  		return status; -  	if (unlikely(dummy > NFS4_OPAQUE_LIMIT))  		return -EIO; -  	memcpy(res->server_scope->server_scope, dummy_str, dummy);  	res->server_scope->server_scope_sz = dummy; @@ -5276,6 +5441,37 @@ static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)  	return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN);  } +static int decode_bind_conn_to_session(struct xdr_stream *xdr, +				struct nfs41_bind_conn_to_session_res *res) +{ +	__be32 *p; +	int status; + +	status = decode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION); +	if (!status) +		status = decode_sessionid(xdr, &res->session->sess_id); +	if (unlikely(status)) +		return status; + +	/* dir flags, rdma mode bool */ +	p = xdr_inline_decode(xdr, 8); +	if (unlikely(!p)) +		goto out_overflow; + +	res->dir = be32_to_cpup(p++); +	if (res->dir == 0 || res->dir > NFS4_CDFS4_BOTH) +		return -EIO; +	if (be32_to_cpup(p) == 0) +		res->use_conn_in_rdma_mode = false; +	else +		res->use_conn_in_rdma_mode = true; + +	return 0; +out_overflow: +	print_overflow_msg(__func__, xdr); +	return -EIO; +} +  static int decode_create_session(struct xdr_stream *xdr,  				 struct nfs41_create_session_res *res)  { @@ -5312,6 +5508,11 @@ static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)  	return decode_op_hdr(xdr, OP_DESTROY_SESSION);  } +static int decode_destroy_clientid(struct xdr_stream *xdr, void *dummy) +{ +	return decode_op_hdr(xdr, OP_DESTROY_CLIENTID); +} +  static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy)  {  	return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE); @@ -5800,9 +6001,6 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,  	if (status)  		goto out;  	status = decode_remove(xdr, &res->cinfo); -	if (status) -		goto out; -	decode_getfattr(xdr, res->dir_attr, res->server);  out:  	return status;  } @@ -5832,15 +6030,6 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,  	if (status)  		goto out;  	status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo); -	if (status) -		goto out; -	/* Current FH is target directory */ -	if (decode_getfattr(xdr, res->new_fattr, res->server)) -		goto out; -	status = decode_restorefh(xdr); -	if (status) -		goto out; -	decode_getfattr(xdr, res->old_fattr, res->server);  out:  	return status;  } @@ -5876,8 +6065,6 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,  	 * Note order: OP_LINK leaves the directory as the current  	 *             filehandle.  	 */ -	if (decode_getfattr(xdr, res->dir_attr, res->server)) -		goto out;  	status = decode_restorefh(xdr);  	if (status)  		goto out; @@ -5904,21 +6091,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,  	status = decode_putfh(xdr);  	if (status)  		goto out; -	status = decode_savefh(xdr); -	if (status) -		goto out;  	status = decode_create(xdr, &res->dir_cinfo);  	if (status)  		goto out;  	status = decode_getfh(xdr, res->fh);  	if (status)  		goto out; -	if (decode_getfattr(xdr, res->fattr, res->server)) -		goto out; -	status = decode_restorefh(xdr); -	if (status) -		goto out; -	decode_getfattr(xdr, res->dir_fattr, res->server); +	decode_getfattr(xdr, res->fattr, res->server);  out:  	return status;  } @@ -6075,19 +6254,12 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,  	status = decode_putfh(xdr);  	if (status)  		goto out; -	status = decode_savefh(xdr); -	if (status) -		goto out;  	status = decode_open(xdr, res);  	if (status)  		goto out;  	if (decode_getfh(xdr, &res->fh) != 0)  		goto out; -	if (decode_getfattr(xdr, res->f_attr, res->server) != 0) -		goto out; -	if (decode_restorefh(xdr) != 0) -		goto out; -	decode_getfattr(xdr, res->dir_attr, res->server); +	decode_getfattr(xdr, res->f_attr, res->server);  out:  	return status;  } @@ -6353,7 +6525,7 @@ out:   * Decode COMMIT response   */  static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, -			       struct nfs_writeres *res) +			       struct nfs_commitres *res)  {  	struct compound_hdr hdr;  	int status; @@ -6368,10 +6540,6 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,  	if (status)  		goto out;  	status = decode_commit(xdr, res); -	if (status) -		goto out; -	if (res->fattr) -		decode_getfattr(xdr, res->fattr, res->server);  out:  	return status;  } @@ -6527,10 +6695,10 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,  	status = decode_putfh(xdr);  	if (status != 0)  		goto out; -	status = decode_delegreturn(xdr); +	status = decode_getfattr(xdr, res->fattr, res->server);  	if (status != 0)  		goto out; -	decode_getfattr(xdr, res->fattr, res->server); +	status = decode_delegreturn(xdr);  out:  	return status;  } @@ -6591,6 +6759,22 @@ out:  #if defined(CONFIG_NFS_V4_1)  /* + * Decode BIND_CONN_TO_SESSION response + */ +static int nfs4_xdr_dec_bind_conn_to_session(struct rpc_rqst *rqstp, +					struct xdr_stream *xdr, +					void *res) +{ +	struct compound_hdr hdr; +	int status; + +	status = decode_compound_hdr(xdr, &hdr); +	if (!status) +		status = decode_bind_conn_to_session(xdr, res); +	return status; +} + +/*   * Decode EXCHANGE_ID response   */  static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, @@ -6639,6 +6823,22 @@ static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp,  }  /* + * Decode DESTROY_CLIENTID response + */ +static int nfs4_xdr_dec_destroy_clientid(struct rpc_rqst *rqstp, +					struct xdr_stream *xdr, +					void *res) +{ +	struct compound_hdr hdr; +	int status; + +	status = decode_compound_hdr(xdr, &hdr); +	if (!status) +		status = decode_destroy_clientid(xdr, res); +	return status; +} + +/*   * Decode SEQUENCE response   */  static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, @@ -7085,6 +7285,9 @@ struct rpc_procinfo	nfs4_procedures[] = {  	PROC(TEST_STATEID,	enc_test_stateid,	dec_test_stateid),  	PROC(FREE_STATEID,	enc_free_stateid,	dec_free_stateid),  	PROC(GETDEVICELIST,	enc_getdevicelist,	dec_getdevicelist), +	PROC(BIND_CONN_TO_SESSION, +			enc_bind_conn_to_session, dec_bind_conn_to_session), +	PROC(DESTROY_CLIENTID,	enc_destroy_clientid,	dec_destroy_clientid),  #endif /* CONFIG_NFS_V4_1 */  }; diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 4bff4a3dab4..b47277baeba 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -211,7 +211,7 @@ static void copy_single_comp(struct ore_components *oc, unsigned c,  	memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));  } -int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, +static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,  		       struct objio_segment **pseg)  {  /*	This is the in memory structure of the objio_segment @@ -440,11 +440,12 @@ static void _read_done(struct ore_io_state *ios, void *private)  int objio_read_pagelist(struct nfs_read_data *rdata)  { +	struct nfs_pgio_header *hdr = rdata->header;  	struct objio_state *objios;  	int ret; -	ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true, -			rdata->lseg, rdata->args.pages, rdata->args.pgbase, +	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true, +			hdr->lseg, rdata->args.pages, rdata->args.pgbase,  			rdata->args.offset, rdata->args.count, rdata,  			GFP_KERNEL, &objios);  	if (unlikely(ret)) @@ -483,12 +484,12 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)  {  	struct objio_state *objios = priv;  	struct nfs_write_data *wdata = objios->oir.rpcdata; +	struct address_space *mapping = wdata->header->inode->i_mapping;  	pgoff_t index = offset / PAGE_SIZE; -	struct page *page = find_get_page(wdata->inode->i_mapping, index); +	struct page *page = find_get_page(mapping, index);  	if (!page) { -		page = find_or_create_page(wdata->inode->i_mapping, -						index, GFP_NOFS); +		page = find_or_create_page(mapping, index, GFP_NOFS);  		if (unlikely(!page)) {  			dprintk("%s: grab_cache_page Failed index=0x%lx\n",  				__func__, index); @@ -518,11 +519,12 @@ static const struct _ore_r4w_op _r4w_op = {  int objio_write_pagelist(struct nfs_write_data *wdata, int how)  { +	struct nfs_pgio_header *hdr = wdata->header;  	struct objio_state *objios;  	int ret; -	ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false, -			wdata->lseg, wdata->args.pages, wdata->args.pgbase, +	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false, +			hdr->lseg, wdata->args.pages, wdata->args.pgbase,  			wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,  			&objios);  	if (unlikely(ret)) diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 595c5fc21a1..87461354530 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -258,7 +258,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)  	if (status >= 0)  		rdata->res.count = status;  	else -		rdata->pnfs_error = status; +		rdata->header->pnfs_error = status;  	objlayout_iodone(oir);  	/* must not use oir after this point */ @@ -279,12 +279,14 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)  enum pnfs_try_status  objlayout_read_pagelist(struct nfs_read_data *rdata)  { +	struct nfs_pgio_header *hdr = rdata->header; +	struct inode *inode = hdr->inode;  	loff_t offset = rdata->args.offset;  	size_t count = rdata->args.count;  	int err;  	loff_t eof; -	eof = i_size_read(rdata->inode); +	eof = i_size_read(inode);  	if (unlikely(offset + count > eof)) {  		if (offset >= eof) {  			err = 0; @@ -297,17 +299,17 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)  	}  	rdata->res.eof = (offset + count) >= eof; -	_fix_verify_io_params(rdata->lseg, &rdata->args.pages, +	_fix_verify_io_params(hdr->lseg, &rdata->args.pages,  			      &rdata->args.pgbase,  			      rdata->args.offset, rdata->args.count);  	dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", -		__func__, rdata->inode->i_ino, offset, count, rdata->res.eof); +		__func__, inode->i_ino, offset, count, rdata->res.eof);  	err = objio_read_pagelist(rdata);   out:  	if (unlikely(err)) { -		rdata->pnfs_error = err; +		hdr->pnfs_error = err;  		dprintk("%s: Returned Error %d\n", __func__, err);  		return PNFS_NOT_ATTEMPTED;  	} @@ -340,7 +342,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)  		wdata->res.count = status;  		wdata->verf.committed = oir->committed;  	} else { -		wdata->pnfs_error = status; +		wdata->header->pnfs_error = status;  	}  	objlayout_iodone(oir);  	/* must not use oir after this point */ @@ -363,15 +365,16 @@ enum pnfs_try_status  objlayout_write_pagelist(struct nfs_write_data *wdata,  			 int how)  { +	struct nfs_pgio_header *hdr = wdata->header;  	int err; -	_fix_verify_io_params(wdata->lseg, &wdata->args.pages, +	_fix_verify_io_params(hdr->lseg, &wdata->args.pages,  			      &wdata->args.pgbase,  			      wdata->args.offset, wdata->args.count);  	err = objio_write_pagelist(wdata, how);  	if (unlikely(err)) { -		wdata->pnfs_error = err; +		hdr->pnfs_error = err;  		dprintk("%s: Returned Error %d\n", __func__, err);  		return PNFS_NOT_ATTEMPTED;  	} diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d21fceaa9f6..aed913c833f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -26,6 +26,47 @@  static struct kmem_cache *nfs_page_cachep; +bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) +{ +	p->npages = pagecount; +	if (pagecount <= ARRAY_SIZE(p->page_array)) +		p->pagevec = p->page_array; +	else { +		p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); +		if (!p->pagevec) +			p->npages = 0; +	} +	return p->pagevec != NULL; +} + +void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, +		       struct nfs_pgio_header *hdr, +		       void (*release)(struct nfs_pgio_header *hdr)) +{ +	hdr->req = nfs_list_entry(desc->pg_list.next); +	hdr->inode = desc->pg_inode; +	hdr->cred = hdr->req->wb_context->cred; +	hdr->io_start = req_offset(hdr->req); +	hdr->good_bytes = desc->pg_count; +	hdr->dreq = desc->pg_dreq; +	hdr->release = release; +	hdr->completion_ops = desc->pg_completion_ops; +	if (hdr->completion_ops->init_hdr) +		hdr->completion_ops->init_hdr(hdr); +} + +void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) +{ +	spin_lock(&hdr->lock); +	if (pos < hdr->io_start + hdr->good_bytes) { +		set_bit(NFS_IOHDR_ERROR, &hdr->flags); +		clear_bit(NFS_IOHDR_EOF, &hdr->flags); +		hdr->good_bytes = pos - hdr->io_start; +		hdr->error = error; +	} +	spin_unlock(&hdr->lock); +} +  static inline struct nfs_page *  nfs_page_alloc(void)  { @@ -76,12 +117,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,  	 * long write-back delay. This will be adjusted in  	 * update_nfs_request below if the region is not locked. */  	req->wb_page    = page; -	atomic_set(&req->wb_complete, 0);  	req->wb_index	= page->index;  	page_cache_get(page); -	BUG_ON(PagePrivate(page)); -	BUG_ON(!PageLocked(page)); -	BUG_ON(page->mapping->host != inode);  	req->wb_offset  = offset;  	req->wb_pgbase	= offset;  	req->wb_bytes   = count; @@ -104,6 +141,15 @@ void nfs_unlock_request(struct nfs_page *req)  	clear_bit(PG_BUSY, &req->wb_flags);  	smp_mb__after_clear_bit();  	wake_up_bit(&req->wb_flags, PG_BUSY); +} + +/** + * nfs_unlock_and_release_request - Unlock request and release the nfs_page + * @req: + */ +void nfs_unlock_and_release_request(struct nfs_page *req) +{ +	nfs_unlock_request(req);  	nfs_release_request(req);  } @@ -203,6 +249,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);  void nfs_pageio_init(struct nfs_pageio_descriptor *desc,  		     struct inode *inode,  		     const struct nfs_pageio_ops *pg_ops, +		     const struct nfs_pgio_completion_ops *compl_ops,  		     size_t bsize,  		     int io_flags)  { @@ -215,9 +262,11 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,  	desc->pg_recoalesce = 0;  	desc->pg_inode = inode;  	desc->pg_ops = pg_ops; +	desc->pg_completion_ops = compl_ops;  	desc->pg_ioflags = io_flags;  	desc->pg_error = 0;  	desc->pg_lseg = NULL; +	desc->pg_dreq = NULL;  }  /** @@ -241,12 +290,12 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,  		return false;  	if (req->wb_context->state != prev->wb_context->state)  		return false; -	if (req->wb_index != (prev->wb_index + 1)) -		return false;  	if (req->wb_pgbase != 0)  		return false;  	if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)  		return false; +	if (req_offset(req) != req_offset(prev) + prev->wb_bytes) +		return false;  	return pgio->pg_ops->pg_test(pgio, prev, req);  } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 38512bcd2e9..b8323aa7b54 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,  	dprintk("%s:Begin lo %p\n", __func__, lo);  	if (list_empty(&lo->plh_segs)) { +		/* Reset MDS Threshold I/O counters */ +		NFS_I(lo->plh_inode)->write_io = 0; +		NFS_I(lo->plh_inode)->read_io = 0;  		if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))  			put_layout_hdr_locked(lo);  		return 0; @@ -455,6 +458,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)  	spin_unlock(&nfsi->vfs_inode.i_lock);  	pnfs_free_lseg_list(&tmp_list);  } +EXPORT_SYMBOL_GPL(pnfs_destroy_layout);  /*   * Called by the state manger to remove all layouts established under an @@ -692,6 +696,7 @@ out:  	dprintk("<-- %s status: %d\n", __func__, status);  	return status;  } +EXPORT_SYMBOL_GPL(_pnfs_return_layout);  bool pnfs_roc(struct inode *ino)  { @@ -931,6 +936,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,  }  /* + * Use mdsthreshold hints set at each OPEN to determine if I/O should go + * to the MDS or over pNFS + * + * The nfs_inode read_io and write_io fields are cumulative counters reset + * when there are no layout segments. Note that in pnfs_update_layout iomode + * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a + * WRITE request. + * + * A return of true means use MDS I/O. + * + * From rfc 5661: + * If a file's size is smaller than the file size threshold, data accesses + * SHOULD be sent to the metadata server.  If an I/O request has a length that + * is below the I/O size threshold, the I/O SHOULD be sent to the metadata + * server.  If both file size and I/O size are provided, the client SHOULD + * reach or exceed  both thresholds before sending its read or write + * requests to the data server. + */ +static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, +				     struct inode *ino, int iomode) +{ +	struct nfs4_threshold *t = ctx->mdsthreshold; +	struct nfs_inode *nfsi = NFS_I(ino); +	loff_t fsize = i_size_read(ino); +	bool size = false, size_set = false, io = false, io_set = false, ret = false; + +	if (t == NULL) +		return ret; + +	dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", +		__func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz); + +	switch (iomode) { +	case IOMODE_READ: +		if (t->bm & THRESHOLD_RD) { +			dprintk("%s fsize %llu\n", __func__, fsize); +			size_set = true; +			if (fsize < t->rd_sz) +				size = true; +		} +		if (t->bm & THRESHOLD_RD_IO) { +			dprintk("%s nfsi->read_io %llu\n", __func__, +				nfsi->read_io); +			io_set = true; +			if (nfsi->read_io < t->rd_io_sz) +				io = true; +		} +		break; +	case IOMODE_RW: +		if (t->bm & THRESHOLD_WR) { +			dprintk("%s fsize %llu\n", __func__, fsize); +			size_set = true; +			if (fsize < t->wr_sz) +				size = true; +		} +		if (t->bm & THRESHOLD_WR_IO) { +			dprintk("%s nfsi->write_io %llu\n", __func__, +				nfsi->write_io); +			io_set = true; +			if (nfsi->write_io < t->wr_io_sz) +				io = true; +		} +		break; +	} +	if (size_set && io_set) { +		if (size && io) +			ret = true; +	} else if (size || io) +		ret = true; + +	dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret); +	return ret; +} + +/*   * Layout segment is retreived from the server if not cached.   * The appropriate layout segment is referenced and returned to the caller.   */ @@ -957,6 +1037,10 @@ pnfs_update_layout(struct inode *ino,  	if (!pnfs_enabled_sb(NFS_SERVER(ino)))  		return NULL; + +	if (pnfs_within_mdsthreshold(ctx, ino, iomode)) +		return NULL; +  	spin_lock(&ino->i_lock);  	lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);  	if (lo == NULL) { @@ -1082,6 +1166,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r  {  	BUG_ON(pgio->pg_lseg != NULL); +	if (req->wb_offset != req->wb_pgbase) { +		nfs_pageio_reset_read_mds(pgio); +		return; +	}  	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,  					   req->wb_context,  					   req_offset(req), @@ -1100,6 +1188,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *  {  	BUG_ON(pgio->pg_lseg != NULL); +	if (req->wb_offset != req->wb_pgbase) { +		nfs_pageio_reset_write_mds(pgio); +		return; +	}  	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,  					   req->wb_context,  					   req_offset(req), @@ -1113,26 +1205,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *  EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);  bool -pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) +pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, +		      const struct nfs_pgio_completion_ops *compl_ops)  {  	struct nfs_server *server = NFS_SERVER(inode);  	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;  	if (ld == NULL)  		return false; -	nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); +	nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, +			server->rsize, 0);  	return true;  }  bool -pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, +		       int ioflags, +		       const struct nfs_pgio_completion_ops *compl_ops)  {  	struct nfs_server *server = NFS_SERVER(inode);  	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;  	if (ld == NULL)  		return false; -	nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); +	nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, +			server->wsize, ioflags);  	return true;  } @@ -1162,13 +1259,15 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,  }  EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); -static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head) +int pnfs_write_done_resend_to_mds(struct inode *inode, +				struct list_head *head, +				const struct nfs_pgio_completion_ops *compl_ops)  {  	struct nfs_pageio_descriptor pgio;  	LIST_HEAD(failed);  	/* Resend all requests through the MDS */ -	nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE); +	nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops);  	while (!list_empty(head)) {  		struct nfs_page *req = nfs_list_entry(head->next); @@ -1188,30 +1287,37 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *  	}  	return 0;  } +EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); + +static void pnfs_ld_handle_write_error(struct nfs_write_data *data) +{ +	struct nfs_pgio_header *hdr = data->header; + +	dprintk("pnfs write error = %d\n", hdr->pnfs_error); +	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & +	    PNFS_LAYOUTRET_ON_ERROR) { +		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); +		pnfs_return_layout(hdr->inode); +	} +	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) +		data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, +							&hdr->pages, +							hdr->completion_ops); +}  /*   * Called by non rpc-based layout drivers   */  void pnfs_ld_write_done(struct nfs_write_data *data)  { -	if (likely(!data->pnfs_error)) { +	struct nfs_pgio_header *hdr = data->header; + +	if (!hdr->pnfs_error) {  		pnfs_set_layoutcommit(data); -		data->mds_ops->rpc_call_done(&data->task, data); -	} else { -		dprintk("pnfs write error = %d\n", data->pnfs_error); -		if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & -						PNFS_LAYOUTRET_ON_ERROR) { -			/* Don't lo_commit on error, Server will needs to -			 * preform a file recovery. -			 */ -			clear_bit(NFS_INO_LAYOUTCOMMIT, -				  &NFS_I(data->inode)->flags); -			pnfs_return_layout(data->inode); -		} -		data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); -	} -	put_lseg(data->lseg); -	data->mds_ops->rpc_release(data); +		hdr->mds_ops->rpc_call_done(&data->task, data); +	} else +		pnfs_ld_handle_write_error(data); +	hdr->mds_ops->rpc_release(data);  }  EXPORT_SYMBOL_GPL(pnfs_ld_write_done); @@ -1219,12 +1325,13 @@ static void  pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,  		struct nfs_write_data *data)  { -	list_splice_tail_init(&data->pages, &desc->pg_list); -	if (data->req && list_empty(&data->req->wb_list)) -		nfs_list_add_request(data->req, &desc->pg_list); -	nfs_pageio_reset_write_mds(desc); -	desc->pg_recoalesce = 1; -	put_lseg(data->lseg); +	struct nfs_pgio_header *hdr = data->header; + +	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { +		list_splice_tail_init(&hdr->pages, &desc->pg_list); +		nfs_pageio_reset_write_mds(desc); +		desc->pg_recoalesce = 1; +	}  	nfs_writedata_release(data);  } @@ -1234,23 +1341,18 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,  			struct pnfs_layout_segment *lseg,  			int how)  { -	struct inode *inode = wdata->inode; +	struct nfs_pgio_header *hdr = wdata->header; +	struct inode *inode = hdr->inode;  	enum pnfs_try_status trypnfs;  	struct nfs_server *nfss = NFS_SERVER(inode); -	wdata->mds_ops = call_ops; -	wdata->lseg = get_lseg(lseg); +	hdr->mds_ops = call_ops;  	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,  		inode->i_ino, wdata->args.count, wdata->args.offset, how); -  	trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); -	if (trypnfs == PNFS_NOT_ATTEMPTED) { -		put_lseg(wdata->lseg); -		wdata->lseg = NULL; -	} else +	if (trypnfs != PNFS_NOT_ATTEMPTED)  		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); -  	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);  	return trypnfs;  } @@ -1266,7 +1368,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he  	while (!list_empty(head)) {  		enum pnfs_try_status trypnfs; -		data = list_entry(head->next, struct nfs_write_data, list); +		data = list_first_entry(head, struct nfs_write_data, list);  		list_del_init(&data->list);  		trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); @@ -1276,43 +1378,82 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he  	put_lseg(lseg);  } +static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) +{ +	put_lseg(hdr->lseg); +	nfs_writehdr_free(hdr); +} +  int  pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)  { -	LIST_HEAD(head); +	struct nfs_write_header *whdr; +	struct nfs_pgio_header *hdr;  	int ret; -	ret = nfs_generic_flush(desc, &head); -	if (ret != 0) { +	whdr = nfs_writehdr_alloc(); +	if (!whdr) { +		desc->pg_completion_ops->error_cleanup(&desc->pg_list);  		put_lseg(desc->pg_lseg);  		desc->pg_lseg = NULL; -		return ret; +		return -ENOMEM;  	} -	pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); -	return 0; +	hdr = &whdr->header; +	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); +	hdr->lseg = get_lseg(desc->pg_lseg); +	atomic_inc(&hdr->refcnt); +	ret = nfs_generic_flush(desc, hdr); +	if (ret != 0) { +		put_lseg(desc->pg_lseg); +		desc->pg_lseg = NULL; +	} else +		pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); +	if (atomic_dec_and_test(&hdr->refcnt)) +		hdr->completion_ops->completion(hdr); +	return ret;  }  EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); -static void pnfs_ld_handle_read_error(struct nfs_read_data *data) +int pnfs_read_done_resend_to_mds(struct inode *inode, +				struct list_head *head, +				const struct nfs_pgio_completion_ops *compl_ops)  {  	struct nfs_pageio_descriptor pgio; +	LIST_HEAD(failed); -	put_lseg(data->lseg); -	data->lseg = NULL; -	dprintk("pnfs write error = %d\n", data->pnfs_error); -	if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & -						PNFS_LAYOUTRET_ON_ERROR) -		pnfs_return_layout(data->inode); - -	nfs_pageio_init_read_mds(&pgio, data->inode); - -	while (!list_empty(&data->pages)) { -		struct nfs_page *req = nfs_list_entry(data->pages.next); +	/* Resend all requests through the MDS */ +	nfs_pageio_init_read_mds(&pgio, inode, compl_ops); +	while (!list_empty(head)) { +		struct nfs_page *req = nfs_list_entry(head->next);  		nfs_list_remove_request(req); -		nfs_pageio_add_request(&pgio, req); +		if (!nfs_pageio_add_request(&pgio, req)) +			nfs_list_add_request(req, &failed);  	}  	nfs_pageio_complete(&pgio); + +	if (!list_empty(&failed)) { +		list_move(&failed, head); +		return -EIO; +	} +	return 0; +} +EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); + +static void pnfs_ld_handle_read_error(struct nfs_read_data *data) +{ +	struct nfs_pgio_header *hdr = data->header; + +	dprintk("pnfs read error = %d\n", hdr->pnfs_error); +	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & +	    PNFS_LAYOUTRET_ON_ERROR) { +		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); +		pnfs_return_layout(hdr->inode); +	} +	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) +		data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, +							&hdr->pages, +							hdr->completion_ops);  }  /* @@ -1320,13 +1461,14 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)   */  void pnfs_ld_read_done(struct nfs_read_data *data)  { -	if (likely(!data->pnfs_error)) { +	struct nfs_pgio_header *hdr = data->header; + +	if (likely(!hdr->pnfs_error)) {  		__nfs4_read_done_cb(data); -		data->mds_ops->rpc_call_done(&data->task, data); +		hdr->mds_ops->rpc_call_done(&data->task, data);  	} else  		pnfs_ld_handle_read_error(data); -	put_lseg(data->lseg); -	data->mds_ops->rpc_release(data); +	hdr->mds_ops->rpc_release(data);  }  EXPORT_SYMBOL_GPL(pnfs_ld_read_done); @@ -1334,11 +1476,13 @@ static void  pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,  		struct nfs_read_data *data)  { -	list_splice_tail_init(&data->pages, &desc->pg_list); -	if (data->req && list_empty(&data->req->wb_list)) -		nfs_list_add_request(data->req, &desc->pg_list); -	nfs_pageio_reset_read_mds(desc); -	desc->pg_recoalesce = 1; +	struct nfs_pgio_header *hdr = data->header; + +	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { +		list_splice_tail_init(&hdr->pages, &desc->pg_list); +		nfs_pageio_reset_read_mds(desc); +		desc->pg_recoalesce = 1; +	}  	nfs_readdata_release(data);  } @@ -1350,23 +1494,19 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,  		       const struct rpc_call_ops *call_ops,  		       struct pnfs_layout_segment *lseg)  { -	struct inode *inode = rdata->inode; +	struct nfs_pgio_header *hdr = rdata->header; +	struct inode *inode = hdr->inode;  	struct nfs_server *nfss = NFS_SERVER(inode);  	enum pnfs_try_status trypnfs; -	rdata->mds_ops = call_ops; -	rdata->lseg = get_lseg(lseg); +	hdr->mds_ops = call_ops;  	dprintk("%s: Reading ino:%lu %u@%llu\n",  		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);  	trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); -	if (trypnfs == PNFS_NOT_ATTEMPTED) { -		put_lseg(rdata->lseg); -		rdata->lseg = NULL; -	} else { +	if (trypnfs != PNFS_NOT_ATTEMPTED)  		nfs_inc_stats(inode, NFSIOS_PNFS_READ); -	}  	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);  	return trypnfs;  } @@ -1382,7 +1522,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea  	while (!list_empty(head)) {  		enum pnfs_try_status trypnfs; -		data = list_entry(head->next, struct nfs_read_data, list); +		data = list_first_entry(head, struct nfs_read_data, list);  		list_del_init(&data->list);  		trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); @@ -1392,20 +1532,40 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea  	put_lseg(lseg);  } +static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) +{ +	put_lseg(hdr->lseg); +	nfs_readhdr_free(hdr); +} +  int  pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)  { -	LIST_HEAD(head); +	struct nfs_read_header *rhdr; +	struct nfs_pgio_header *hdr;  	int ret; -	ret = nfs_generic_pagein(desc, &head); -	if (ret != 0) { +	rhdr = nfs_readhdr_alloc(); +	if (!rhdr) { +		desc->pg_completion_ops->error_cleanup(&desc->pg_list); +		ret = -ENOMEM;  		put_lseg(desc->pg_lseg);  		desc->pg_lseg = NULL;  		return ret;  	} -	pnfs_do_multiple_reads(desc, &head); -	return 0; +	hdr = &rhdr->header; +	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); +	hdr->lseg = get_lseg(desc->pg_lseg); +	atomic_inc(&hdr->refcnt); +	ret = nfs_generic_pagein(desc, hdr); +	if (ret != 0) { +		put_lseg(desc->pg_lseg); +		desc->pg_lseg = NULL; +	} else +		pnfs_do_multiple_reads(desc, &hdr->rpc_list); +	if (atomic_dec_and_test(&hdr->refcnt)) +		hdr->completion_ops->completion(hdr); +	return ret;  }  EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); @@ -1438,30 +1598,32 @@ EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);  void  pnfs_set_layoutcommit(struct nfs_write_data *wdata)  { -	struct nfs_inode *nfsi = NFS_I(wdata->inode); +	struct nfs_pgio_header *hdr = wdata->header; +	struct inode *inode = hdr->inode; +	struct nfs_inode *nfsi = NFS_I(inode);  	loff_t end_pos = wdata->mds_offset + wdata->res.count;  	bool mark_as_dirty = false; -	spin_lock(&nfsi->vfs_inode.i_lock); +	spin_lock(&inode->i_lock);  	if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {  		mark_as_dirty = true;  		dprintk("%s: Set layoutcommit for inode %lu ", -			__func__, wdata->inode->i_ino); +			__func__, inode->i_ino);  	} -	if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) { +	if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {  		/* references matched in nfs4_layoutcommit_release */ -		get_lseg(wdata->lseg); +		get_lseg(hdr->lseg);  	}  	if (end_pos > nfsi->layout->plh_lwb)  		nfsi->layout->plh_lwb = end_pos; -	spin_unlock(&nfsi->vfs_inode.i_lock); +	spin_unlock(&inode->i_lock);  	dprintk("%s: lseg %p end_pos %llu\n", -		__func__, wdata->lseg, nfsi->layout->plh_lwb); +		__func__, hdr->lseg, nfsi->layout->plh_lwb);  	/* if pnfs_layoutcommit_inode() runs between inode locks, the next one  	 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */  	if (mark_as_dirty) -		mark_inode_dirty_sync(wdata->inode); +		mark_inode_dirty_sync(inode);  }  EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); @@ -1550,3 +1712,15 @@ out_free:  	kfree(data);  	goto out;  } + +struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) +{ +	struct nfs4_threshold *thp; + +	thp = kzalloc(sizeof(*thp), GFP_NOFS); +	if (!thp) { +		dprintk("%s mdsthreshold allocation failed\n", __func__); +		return NULL; +	} +	return thp; +} diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 442ebf68eee..29fd23c0efd 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -63,6 +63,7 @@ enum {  	NFS_LAYOUT_BULK_RECALL,		/* bulk recall affecting layout */  	NFS_LAYOUT_ROC,			/* some lseg had roc bit set */  	NFS_LAYOUT_DESTROYED,		/* no new use of layout allowed */ +	NFS_LAYOUT_INVALID,		/* layout is being destroyed */  };  enum layoutdriver_policy_flags { @@ -94,11 +95,20 @@ struct pnfs_layoutdriver_type {  	const struct nfs_pageio_ops *pg_read_ops;  	const struct nfs_pageio_ops *pg_write_ops; +	struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode);  	void (*mark_request_commit) (struct nfs_page *req, -					struct pnfs_layout_segment *lseg); -	void (*clear_request_commit) (struct nfs_page *req); -	int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock); -	int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); +				     struct pnfs_layout_segment *lseg, +				     struct nfs_commit_info *cinfo); +	void (*clear_request_commit) (struct nfs_page *req, +				      struct nfs_commit_info *cinfo); +	int (*scan_commit_lists) (struct nfs_commit_info *cinfo, +				  int max); +	void (*recover_commit_reqs) (struct list_head *list, +				     struct nfs_commit_info *cinfo); +	int (*commit_pagelist)(struct inode *inode, +			       struct list_head *mds_pages, +			       int how, +			       struct nfs_commit_info *cinfo);  	/*  	 * Return PNFS_ATTEMPTED to indicate the layout code has attempted @@ -168,8 +178,10 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);  void get_layout_hdr(struct pnfs_layout_hdr *lo);  void put_lseg(struct pnfs_layout_segment *lseg); -bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); -bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); +bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, +			   const struct nfs_pgio_completion_ops *); +bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, +			    int, const struct nfs_pgio_completion_ops *);  void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);  void unset_pnfs_layoutdriver(struct nfs_server *); @@ -211,6 +223,11 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,  					       gfp_t gfp_flags);  void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); +int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, +			const struct nfs_pgio_completion_ops *compl_ops); +int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head, +			const struct nfs_pgio_completion_ops *compl_ops); +struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);  /* nfs4_deviceid_flags */  enum { @@ -261,49 +278,66 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)  }  static inline int -pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) +pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, +		 struct nfs_commit_info *cinfo)  { -	if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags)) +	if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0)  		return PNFS_NOT_ATTEMPTED; -	return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); +	return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo); +} + +static inline struct pnfs_ds_commit_info * +pnfs_get_ds_info(struct inode *inode) +{ +	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + +	if (ld == NULL || ld->get_ds_info == NULL) +		return NULL; +	return ld->get_ds_info(inode);  }  static inline bool -pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, +			 struct nfs_commit_info *cinfo)  {  	struct inode *inode = req->wb_context->dentry->d_inode;  	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;  	if (lseg == NULL || ld->mark_request_commit == NULL)  		return false; -	ld->mark_request_commit(req, lseg); +	ld->mark_request_commit(req, lseg, cinfo);  	return true;  }  static inline bool -pnfs_clear_request_commit(struct nfs_page *req) +pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)  {  	struct inode *inode = req->wb_context->dentry->d_inode;  	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;  	if (ld == NULL || ld->clear_request_commit == NULL)  		return false; -	ld->clear_request_commit(req); +	ld->clear_request_commit(req, cinfo);  	return true;  }  static inline int -pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) +pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, +		       int max)  { -	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; -	int ret; - -	if (ld == NULL || ld->scan_commit_lists == NULL) +	if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)  		return 0; -	ret = ld->scan_commit_lists(inode, max, lock); -	if (ret != 0) -		set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); -	return ret; +	else +		return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max); +} + +static inline void +pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, +			 struct nfs_commit_info *cinfo) +{ +	if (cinfo->ds == NULL || cinfo->ds->nwritten == 0) +		return; +	NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);  }  /* Should the pNFS client commit and return the layout upon a setattr */ @@ -327,6 +361,14 @@ static inline int pnfs_return_layout(struct inode *ino)  	return 0;  } +static inline bool +pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src, +		   struct nfs_server *nfss) +{ +	return (dst && src && src->bm != 0 && +					nfss->pnfs_curr_ld->id == src->l_type); +} +  #ifdef NFS_DEBUG  void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);  #else @@ -396,45 +438,74 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)  {  } -static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) +static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, +					 const struct nfs_pgio_completion_ops *compl_ops)  {  	return false;  } -static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) +static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, +					  const struct nfs_pgio_completion_ops *compl_ops)  {  	return false;  }  static inline int -pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) +pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, +		 struct nfs_commit_info *cinfo)  {  	return PNFS_NOT_ATTEMPTED;  } +static inline struct pnfs_ds_commit_info * +pnfs_get_ds_info(struct inode *inode) +{ +	return NULL; +} +  static inline bool -pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, +			 struct nfs_commit_info *cinfo)  {  	return false;  }  static inline bool -pnfs_clear_request_commit(struct nfs_page *req) +pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)  {  	return false;  }  static inline int -pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) +pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, +		       int max)  {  	return 0;  } +static inline void +pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, +			 struct nfs_commit_info *cinfo) +{ +} +  static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)  {  	return 0;  } +static inline bool +pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src, +		   struct nfs_server *nfss) +{ +	return false; +} + +static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) +{ +	return NULL; +} +  #endif /* CONFIG_NFS_V4_1 */  #endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index d6408b6437d..a706b6bcc28 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -178,7 +178,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,  }  static int -nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, +nfs_proc_lookup(struct inode *dir, struct qstr *name,  		struct nfs_fh *fhandle, struct nfs_fattr *fattr)  {  	struct nfs_diropargs	arg = { @@ -640,12 +640,14 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,  static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)  { +	struct inode *inode = data->header->inode; +  	if (nfs_async_handle_expired_key(task))  		return -EAGAIN; -	nfs_invalidate_atime(data->inode); +	nfs_invalidate_atime(inode);  	if (task->tk_status >= 0) { -		nfs_refresh_inode(data->inode, data->res.fattr); +		nfs_refresh_inode(inode, data->res.fattr);  		/* Emulate the eof flag, which isn't normally needed in NFSv2  		 * as it is guaranteed to always return the file attributes  		 */ @@ -667,11 +669,13 @@ static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat  static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)  { +	struct inode *inode = data->header->inode; +  	if (nfs_async_handle_expired_key(task))  		return -EAGAIN;  	if (task->tk_status >= 0) -		nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); +		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);  	return 0;  } @@ -687,8 +691,13 @@ static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_d  	rpc_call_start(task);  } +static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) +{ +	BUG(); +} +  static void -nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) +nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)  {  	BUG();  } @@ -732,6 +741,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {  	.file_inode_ops	= &nfs_file_inode_operations,  	.file_ops	= &nfs_file_operations,  	.getroot	= nfs_proc_get_root, +	.submount	= nfs_submount,  	.getattr	= nfs_proc_getattr,  	.setattr	= nfs_proc_setattr,  	.lookup		= nfs_proc_lookup, @@ -763,6 +773,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {  	.write_rpc_prepare = nfs_proc_write_rpc_prepare,  	.write_done	= nfs_write_done,  	.commit_setup	= nfs_proc_commit_setup, +	.commit_rpc_prepare = nfs_proc_commit_rpc_prepare,  	.lock		= nfs_proc_lock,  	.lock_check_bounds = nfs_lock_check_bounds,  	.close_context	= nfs_close_context, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 0a4be28c2ea..86ced783621 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -30,43 +30,73 @@  #define NFSDBG_FACILITY		NFSDBG_PAGECACHE  static const struct nfs_pageio_ops nfs_pageio_read_ops; -static const struct rpc_call_ops nfs_read_partial_ops; -static const struct rpc_call_ops nfs_read_full_ops; +static const struct rpc_call_ops nfs_read_common_ops; +static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;  static struct kmem_cache *nfs_rdata_cachep; -struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) +struct nfs_read_header *nfs_readhdr_alloc(void)  { -	struct nfs_read_data *p; +	struct nfs_read_header *rhdr; -	p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); -	if (p) { -		INIT_LIST_HEAD(&p->pages); -		p->npages = pagecount; -		if (pagecount <= ARRAY_SIZE(p->page_array)) -			p->pagevec = p->page_array; -		else { -			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); -			if (!p->pagevec) { -				kmem_cache_free(nfs_rdata_cachep, p); -				p = NULL; -			} -		} +	rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); +	if (rhdr) { +		struct nfs_pgio_header *hdr = &rhdr->header; + +		INIT_LIST_HEAD(&hdr->pages); +		INIT_LIST_HEAD(&hdr->rpc_list); +		spin_lock_init(&hdr->lock); +		atomic_set(&hdr->refcnt, 0); +	} +	return rhdr; +} + +static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, +						unsigned int pagecount) +{ +	struct nfs_read_data *data, *prealloc; + +	prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data; +	if (prealloc->header == NULL) +		data = prealloc; +	else +		data = kzalloc(sizeof(*data), GFP_KERNEL); +	if (!data) +		goto out; + +	if (nfs_pgarray_set(&data->pages, pagecount)) { +		data->header = hdr; +		atomic_inc(&hdr->refcnt); +	} else { +		if (data != prealloc) +			kfree(data); +		data = NULL;  	} -	return p; +out: +	return data;  } -void nfs_readdata_free(struct nfs_read_data *p) +void nfs_readhdr_free(struct nfs_pgio_header *hdr)  { -	if (p && (p->pagevec != &p->page_array[0])) -		kfree(p->pagevec); -	kmem_cache_free(nfs_rdata_cachep, p); +	struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header); + +	kmem_cache_free(nfs_rdata_cachep, rhdr);  }  void nfs_readdata_release(struct nfs_read_data *rdata)  { +	struct nfs_pgio_header *hdr = rdata->header; +	struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header); +  	put_nfs_open_context(rdata->args.context); -	nfs_readdata_free(rdata); +	if (rdata->pages.pagevec != rdata->pages.page_array) +		kfree(rdata->pages.pagevec); +	if (rdata != &read_header->rpc_data) +		kfree(rdata); +	else +		rdata->header = NULL; +	if (atomic_dec_and_test(&hdr->refcnt)) +		hdr->completion_ops->completion(hdr);  }  static @@ -78,39 +108,11 @@ int nfs_return_empty_page(struct page *page)  	return 0;  } -static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) -{ -	unsigned int remainder = data->args.count - data->res.count; -	unsigned int base = data->args.pgbase + data->res.count; -	unsigned int pglen; -	struct page **pages; - -	if (data->res.eof == 0 || remainder == 0) -		return; -	/* -	 * Note: "remainder" can never be negative, since we check for -	 * 	this in the XDR code. -	 */ -	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; -	base &= ~PAGE_CACHE_MASK; -	pglen = PAGE_CACHE_SIZE - base; -	for (;;) { -		if (remainder <= pglen) { -			zero_user(*pages, base, remainder); -			break; -		} -		zero_user(*pages, base, pglen); -		pages++; -		remainder -= pglen; -		pglen = PAGE_CACHE_SIZE; -		base = 0; -	} -} -  void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, -		struct inode *inode) +			      struct inode *inode, +			      const struct nfs_pgio_completion_ops *compl_ops)  { -	nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, +	nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,  			NFS_SERVER(inode)->rsize, 0);  } @@ -121,11 +123,12 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)  }  EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); -static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, -		struct inode *inode) +void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, +			  struct inode *inode, +			  const struct nfs_pgio_completion_ops *compl_ops)  { -	if (!pnfs_pageio_init_read(pgio, inode)) -		nfs_pageio_init_read_mds(pgio, inode); +	if (!pnfs_pageio_init_read(pgio, inode, compl_ops)) +		nfs_pageio_init_read_mds(pgio, inode, compl_ops);  }  int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, @@ -146,9 +149,10 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,  	if (len < PAGE_CACHE_SIZE)  		zero_user_segment(page, len, PAGE_CACHE_SIZE); -	nfs_pageio_init_read(&pgio, inode); +	nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);  	nfs_pageio_add_request(&pgio, new);  	nfs_pageio_complete(&pgio); +	NFS_I(inode)->read_io += pgio.pg_bytes_written;  	return 0;  } @@ -169,16 +173,49 @@ static void nfs_readpage_release(struct nfs_page *req)  	nfs_release_request(req);  } -int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, -		      const struct rpc_call_ops *call_ops) +/* Note io was page aligned */ +static void nfs_read_completion(struct nfs_pgio_header *hdr) +{ +	unsigned long bytes = 0; + +	if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) +		goto out; +	while (!list_empty(&hdr->pages)) { +		struct nfs_page *req = nfs_list_entry(hdr->pages.next); +		struct page *page = req->wb_page; + +		if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { +			if (bytes > hdr->good_bytes) +				zero_user(page, 0, PAGE_SIZE); +			else if (hdr->good_bytes - bytes < PAGE_SIZE) +				zero_user_segment(page, +					hdr->good_bytes & ~PAGE_MASK, +					PAGE_SIZE); +		} +		bytes += req->wb_bytes; +		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { +			if (bytes <= hdr->good_bytes) +				SetPageUptodate(page); +		} else +			SetPageUptodate(page); +		nfs_list_remove_request(req); +		nfs_readpage_release(req); +	} +out: +	hdr->release(hdr); +} + +int nfs_initiate_read(struct rpc_clnt *clnt, +		      struct nfs_read_data *data, +		      const struct rpc_call_ops *call_ops, int flags)  { -	struct inode *inode = data->inode; +	struct inode *inode = data->header->inode;  	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;  	struct rpc_task *task;  	struct rpc_message msg = {  		.rpc_argp = &data->args,  		.rpc_resp = &data->res, -		.rpc_cred = data->cred, +		.rpc_cred = data->header->cred,  	};  	struct rpc_task_setup task_setup_data = {  		.task = &data->task, @@ -187,7 +224,7 @@ int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,  		.callback_ops = call_ops,  		.callback_data = data,  		.workqueue = nfsiod_workqueue, -		.flags = RPC_TASK_ASYNC | swap_flags, +		.flags = RPC_TASK_ASYNC | swap_flags | flags,  	};  	/* Set up the initial task struct. */ @@ -212,19 +249,15 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);  /*   * Set up the NFS read request struct   */ -static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, +static void nfs_read_rpcsetup(struct nfs_read_data *data,  		unsigned int count, unsigned int offset)  { -	struct inode *inode = req->wb_context->dentry->d_inode; - -	data->req	  = req; -	data->inode	  = inode; -	data->cred	  = req->wb_context->cred; +	struct nfs_page *req = data->header->req; -	data->args.fh     = NFS_FH(inode); +	data->args.fh     = NFS_FH(data->header->inode);  	data->args.offset = req_offset(req) + offset;  	data->args.pgbase = req->wb_pgbase + offset; -	data->args.pages  = data->pagevec; +	data->args.pages  = data->pages.pagevec;  	data->args.count  = count;  	data->args.context = get_nfs_open_context(req->wb_context);  	data->args.lock_context = req->wb_lock_context; @@ -238,9 +271,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,  static int nfs_do_read(struct nfs_read_data *data,  		const struct rpc_call_ops *call_ops)  { -	struct inode *inode = data->args.context->dentry->d_inode; +	struct inode *inode = data->header->inode; -	return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); +	return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);  }  static int @@ -253,7 +286,7 @@ nfs_do_multiple_reads(struct list_head *head,  	while (!list_empty(head)) {  		int ret2; -		data = list_entry(head->next, struct nfs_read_data, list); +		data = list_first_entry(head, struct nfs_read_data, list);  		list_del_init(&data->list);  		ret2 = nfs_do_read(data, call_ops); @@ -275,6 +308,24 @@ nfs_async_read_error(struct list_head *head)  	}  } +static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { +	.error_cleanup = nfs_async_read_error, +	.completion = nfs_read_completion, +}; + +static void nfs_pagein_error(struct nfs_pageio_descriptor *desc, +		struct nfs_pgio_header *hdr) +{ +	set_bit(NFS_IOHDR_REDO, &hdr->flags); +	while (!list_empty(&hdr->rpc_list)) { +		struct nfs_read_data *data = list_first_entry(&hdr->rpc_list, +				struct nfs_read_data, list); +		list_del(&data->list); +		nfs_readdata_release(data); +	} +	desc->pg_completion_ops->error_cleanup(&desc->pg_list); +} +  /*   * Generate multiple requests to fill a single page.   * @@ -288,93 +339,95 @@ nfs_async_read_error(struct list_head *head)   * won't see the new data until our attribute cache is updated.  This is more   * or less conventional NFS client behavior.   */ -static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) +static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, +			    struct nfs_pgio_header *hdr)  { -	struct nfs_page *req = nfs_list_entry(desc->pg_list.next); +	struct nfs_page *req = hdr->req;  	struct page *page = req->wb_page;  	struct nfs_read_data *data;  	size_t rsize = desc->pg_bsize, nbytes;  	unsigned int offset; -	int requests = 0; -	int ret = 0; - -	nfs_list_remove_request(req);  	offset = 0;  	nbytes = desc->pg_count;  	do {  		size_t len = min(nbytes,rsize); -		data = nfs_readdata_alloc(1); -		if (!data) -			goto out_bad; -		data->pagevec[0] = page; -		nfs_read_rpcsetup(req, data, len, offset); -		list_add(&data->list, res); -		requests++; +		data = nfs_readdata_alloc(hdr, 1); +		if (!data) { +			nfs_pagein_error(desc, hdr); +			return -ENOMEM; +		} +		data->pages.pagevec[0] = page; +		nfs_read_rpcsetup(data, len, offset); +		list_add(&data->list, &hdr->rpc_list);  		nbytes -= len;  		offset += len; -	} while(nbytes != 0); -	atomic_set(&req->wb_complete, requests); -	desc->pg_rpc_callops = &nfs_read_partial_ops; -	return ret; -out_bad: -	while (!list_empty(res)) { -		data = list_entry(res->next, struct nfs_read_data, list); -		list_del(&data->list); -		nfs_readdata_release(data); -	} -	nfs_readpage_release(req); -	return -ENOMEM; +	} while (nbytes != 0); + +	nfs_list_remove_request(req); +	nfs_list_add_request(req, &hdr->pages); +	desc->pg_rpc_callops = &nfs_read_common_ops; +	return 0;  } -static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) +static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, +			  struct nfs_pgio_header *hdr)  {  	struct nfs_page		*req;  	struct page		**pages; -	struct nfs_read_data	*data; +	struct nfs_read_data    *data;  	struct list_head *head = &desc->pg_list; -	int ret = 0; -	data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, -						     desc->pg_count)); +	data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, +							  desc->pg_count));  	if (!data) { -		nfs_async_read_error(head); -		ret = -ENOMEM; -		goto out; +		nfs_pagein_error(desc, hdr); +		return -ENOMEM;  	} -	pages = data->pagevec; +	pages = data->pages.pagevec;  	while (!list_empty(head)) {  		req = nfs_list_entry(head->next);  		nfs_list_remove_request(req); -		nfs_list_add_request(req, &data->pages); +		nfs_list_add_request(req, &hdr->pages);  		*pages++ = req->wb_page;  	} -	req = nfs_list_entry(data->pages.next); -	nfs_read_rpcsetup(req, data, desc->pg_count, 0); -	list_add(&data->list, res); -	desc->pg_rpc_callops = &nfs_read_full_ops; -out: -	return ret; +	nfs_read_rpcsetup(data, desc->pg_count, 0); +	list_add(&data->list, &hdr->rpc_list); +	desc->pg_rpc_callops = &nfs_read_common_ops; +	return 0;  } -int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) +int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, +		       struct nfs_pgio_header *hdr)  {  	if (desc->pg_bsize < PAGE_CACHE_SIZE) -		return nfs_pagein_multi(desc, head); -	return nfs_pagein_one(desc, head); +		return nfs_pagein_multi(desc, hdr); +	return nfs_pagein_one(desc, hdr);  }  static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)  { -	LIST_HEAD(head); +	struct nfs_read_header *rhdr; +	struct nfs_pgio_header *hdr;  	int ret; -	ret = nfs_generic_pagein(desc, &head); +	rhdr = nfs_readhdr_alloc(); +	if (!rhdr) { +		desc->pg_completion_ops->error_cleanup(&desc->pg_list); +		return -ENOMEM; +	} +	hdr = &rhdr->header; +	nfs_pgheader_init(desc, hdr, nfs_readhdr_free); +	atomic_inc(&hdr->refcnt); +	ret = nfs_generic_pagein(desc, hdr);  	if (ret == 0) -		ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); +		ret = nfs_do_multiple_reads(&hdr->rpc_list, +					    desc->pg_rpc_callops); +	if (atomic_dec_and_test(&hdr->refcnt)) +		hdr->completion_ops->completion(hdr);  	return ret;  } @@ -389,20 +442,21 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = {   */  int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)  { +	struct inode *inode = data->header->inode;  	int status;  	dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,  			task->tk_status); -	status = NFS_PROTO(data->inode)->read_done(task, data); +	status = NFS_PROTO(inode)->read_done(task, data);  	if (status != 0)  		return status; -	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); +	nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);  	if (task->tk_status == -ESTALE) { -		set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); -		nfs_mark_for_revalidate(data->inode); +		set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); +		nfs_mark_for_revalidate(inode);  	}  	return 0;  } @@ -412,15 +466,13 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data  	struct nfs_readargs *argp = &data->args;  	struct nfs_readres *resp = &data->res; -	if (resp->eof || resp->count == argp->count) -		return; -  	/* This is a short read! */ -	nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); +	nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);  	/* Has the server at least made some progress? */ -	if (resp->count == 0) +	if (resp->count == 0) { +		nfs_set_pgio_error(data->header, -EIO, argp->offset);  		return; - +	}  	/* Yes, so retry the read at the end of the data */  	data->mds_offset += resp->count;  	argp->offset += resp->count; @@ -429,114 +481,46 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data  	rpc_restart_call_prepare(task);  } -/* - * Handle a read reply that fills part of a page. - */ -static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) +static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)  {  	struct nfs_read_data *data = calldata; -  +	struct nfs_pgio_header *hdr = data->header; + +	/* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */  	if (nfs_readpage_result(task, data) != 0)  		return;  	if (task->tk_status < 0) -		return; +		nfs_set_pgio_error(hdr, task->tk_status, data->args.offset); +	else if (data->res.eof) { +		loff_t bound; -	nfs_readpage_truncate_uninitialised_page(data); -	nfs_readpage_retry(task, data); +		bound = data->args.offset + data->res.count; +		spin_lock(&hdr->lock); +		if (bound < hdr->io_start + hdr->good_bytes) { +			set_bit(NFS_IOHDR_EOF, &hdr->flags); +			clear_bit(NFS_IOHDR_ERROR, &hdr->flags); +			hdr->good_bytes = bound - hdr->io_start; +		} +		spin_unlock(&hdr->lock); +	} else if (data->res.count != data->args.count) +		nfs_readpage_retry(task, data);  } -static void nfs_readpage_release_partial(void *calldata) +static void nfs_readpage_release_common(void *calldata)  { -	struct nfs_read_data *data = calldata; -	struct nfs_page *req = data->req; -	struct page *page = req->wb_page; -	int status = data->task.tk_status; - -	if (status < 0) -		set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags); - -	if (atomic_dec_and_test(&req->wb_complete)) { -		if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags)) -			SetPageUptodate(page); -		nfs_readpage_release(req); -	}  	nfs_readdata_release(calldata);  }  void nfs_read_prepare(struct rpc_task *task, void *calldata)  {  	struct nfs_read_data *data = calldata; -	NFS_PROTO(data->inode)->read_rpc_prepare(task, data); -} - -static const struct rpc_call_ops nfs_read_partial_ops = { -	.rpc_call_prepare = nfs_read_prepare, -	.rpc_call_done = nfs_readpage_result_partial, -	.rpc_release = nfs_readpage_release_partial, -}; - -static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) -{ -	unsigned int count = data->res.count; -	unsigned int base = data->args.pgbase; -	struct page **pages; - -	if (data->res.eof) -		count = data->args.count; -	if (unlikely(count == 0)) -		return; -	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; -	base &= ~PAGE_CACHE_MASK; -	count += base; -	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) -		SetPageUptodate(*pages); -	if (count == 0) -		return; -	/* Was this a short read? */ -	if (data->res.eof || data->res.count == data->args.count) -		SetPageUptodate(*pages); -} - -/* - * This is the callback from RPC telling us whether a reply was - * received or some error occurred (timeout or socket shutdown). - */ -static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) -{ -	struct nfs_read_data *data = calldata; - -	if (nfs_readpage_result(task, data) != 0) -		return; -	if (task->tk_status < 0) -		return; -	/* -	 * Note: nfs_readpage_retry may change the values of -	 * data->args. In the multi-page case, we therefore need -	 * to ensure that we call nfs_readpage_set_pages_uptodate() -	 * first. -	 */ -	nfs_readpage_truncate_uninitialised_page(data); -	nfs_readpage_set_pages_uptodate(data); -	nfs_readpage_retry(task, data); -} - -static void nfs_readpage_release_full(void *calldata) -{ -	struct nfs_read_data *data = calldata; - -	while (!list_empty(&data->pages)) { -		struct nfs_page *req = nfs_list_entry(data->pages.next); - -		nfs_list_remove_request(req); -		nfs_readpage_release(req); -	} -	nfs_readdata_release(calldata); +	NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);  } -static const struct rpc_call_ops nfs_read_full_ops = { +static const struct rpc_call_ops nfs_read_common_ops = {  	.rpc_call_prepare = nfs_read_prepare, -	.rpc_call_done = nfs_readpage_result_full, -	.rpc_release = nfs_readpage_release_full, +	.rpc_call_done = nfs_readpage_result_common, +	.rpc_release = nfs_readpage_release_common,  };  /* @@ -668,11 +652,12 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,  	if (ret == 0)  		goto read_complete; /* all pages were read */ -	nfs_pageio_init_read(&pgio, inode); +	nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);  	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);  	nfs_pageio_complete(&pgio); +	NFS_I(inode)->read_io += pgio.pg_bytes_written;  	npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;  	nfs_add_stats(inode, NFSIOS_READPAGES, npages);  read_complete: @@ -684,7 +669,7 @@ out:  int __init nfs_init_readpagecache(void)  {  	nfs_rdata_cachep = kmem_cache_create("nfs_read_data", -					     sizeof(struct nfs_read_data), +					     sizeof(struct nfs_read_header),  					     0, SLAB_HWCACHE_ALIGN,  					     NULL);  	if (nfs_rdata_cachep == NULL) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 4ac7fca7e4b..ff656c02268 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -66,6 +66,7 @@  #include "pnfs.h"  #define NFSDBG_FACILITY		NFSDBG_VFS +#define NFS_TEXT_DATA		1  #ifdef CONFIG_NFS_V3  #define NFS_DEFAULT_VERSION 3 @@ -277,12 +278,22 @@ static match_table_t nfs_vers_tokens = {  	{ Opt_vers_err, NULL }  }; +struct nfs_mount_info { +	void (*fill_super)(struct super_block *, struct nfs_mount_info *); +	int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); +	struct nfs_parsed_mount_data *parsed; +	struct nfs_clone_mount *cloned; +	struct nfs_fh *mntfh; +}; +  static void nfs_umount_begin(struct super_block *);  static int  nfs_statfs(struct dentry *, struct kstatfs *);  static int  nfs_show_options(struct seq_file *, struct dentry *);  static int  nfs_show_devname(struct seq_file *, struct dentry *);  static int  nfs_show_path(struct seq_file *, struct dentry *);  static int  nfs_show_stats(struct seq_file *, struct dentry *); +static struct dentry *nfs_fs_mount_common(struct file_system_type *, +		struct nfs_server *, int, const char *, struct nfs_mount_info *);  static struct dentry *nfs_fs_mount(struct file_system_type *,  		int, const char *, void *);  static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, @@ -323,12 +334,11 @@ static const struct super_operations nfs_sops = {  };  #ifdef CONFIG_NFS_V4 -static int nfs4_validate_text_mount_data(void *options, +static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); +static int nfs4_validate_mount_data(void *options,  	struct nfs_parsed_mount_data *args, const char *dev_name);  static struct dentry *nfs4_try_mount(int flags, const char *dev_name, -	struct nfs_parsed_mount_data *data); -static struct dentry *nfs4_mount(struct file_system_type *fs_type, -	int flags, const char *dev_name, void *raw_data); +	struct nfs_mount_info *mount_info);  static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,  	int flags, const char *dev_name, void *raw_data);  static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, @@ -342,7 +352,7 @@ static void nfs4_kill_super(struct super_block *sb);  static struct file_system_type nfs4_fs_type = {  	.owner		= THIS_MODULE,  	.name		= "nfs4", -	.mount		= nfs4_mount, +	.mount		= nfs_fs_mount,  	.kill_sb	= nfs4_kill_super,  	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,  }; @@ -786,8 +796,8 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server)  static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)  { -	if (nfss->nfs_client && nfss->nfs_client->impl_id) { -		struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id; +	if (nfss->nfs_client && nfss->nfs_client->cl_implid) { +		struct nfs41_impl_id *impl_id = nfss->nfs_client->cl_implid;  		seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s',"  			   "date='%llu,%u'",  			   impl_id->name, impl_id->domain, @@ -938,7 +948,7 @@ static void nfs_umount_begin(struct super_block *sb)  		rpc_killall_tasks(rpc);  } -static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int version) +static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)  {  	struct nfs_parsed_mount_data *data; @@ -953,8 +963,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve  		data->nfs_server.protocol = XPRT_TRANSPORT_TCP;  		data->auth_flavors[0]	= RPC_AUTH_UNIX;  		data->auth_flavor_len	= 1; -		data->version		= version;  		data->minorversion	= 0; +		data->need_mount	= true;  		data->net		= current->nsproxy->net_ns;  		security_init_mnt_opts(&data->lsm_opts);  	} @@ -1674,8 +1684,8 @@ static int nfs_walk_authlist(struct nfs_parsed_mount_data *args,   * Use the remote server's MOUNT service to request the NFS file handle   * corresponding to the provided path.   */ -static int nfs_try_mount(struct nfs_parsed_mount_data *args, -			 struct nfs_fh *root_fh) +static int nfs_request_mount(struct nfs_parsed_mount_data *args, +			     struct nfs_fh *root_fh)  {  	rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS];  	unsigned int server_authlist_len = ARRAY_SIZE(server_authlist); @@ -1738,6 +1748,26 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,  	return nfs_walk_authlist(args, &request);  } +static struct dentry *nfs_try_mount(int flags, const char *dev_name, +				    struct nfs_mount_info *mount_info) +{ +	int status; +	struct nfs_server *server; + +	if (mount_info->parsed->need_mount) { +		status = nfs_request_mount(mount_info->parsed, mount_info->mntfh); +		if (status) +			return ERR_PTR(status); +	} + +	/* Get a volume representation */ +	server = nfs_create_server(mount_info->parsed, mount_info->mntfh); +	if (IS_ERR(server)) +		return ERR_CAST(server); + +	return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mount_info); +} +  /*   * Split "dev_name" into "hostname:export_path".   * @@ -1826,10 +1856,10 @@ out_path:   * + breaking back: trying proto=udp after proto=tcp, v2 after v3,   *   mountproto=tcp after mountproto=udp, and so on   */ -static int nfs_validate_mount_data(void *options, -				   struct nfs_parsed_mount_data *args, -				   struct nfs_fh *mntfh, -				   const char *dev_name) +static int nfs23_validate_mount_data(void *options, +				     struct nfs_parsed_mount_data *args, +				     struct nfs_fh *mntfh, +				     const char *dev_name)  {  	struct nfs_mount_data *data = (struct nfs_mount_data *)options;  	struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; @@ -1883,6 +1913,7 @@ static int nfs_validate_mount_data(void *options,  		args->acregmax		= data->acregmax;  		args->acdirmin		= data->acdirmin;  		args->acdirmax		= data->acdirmax; +		args->need_mount	= false;  		memcpy(sap, &data->addr, sizeof(data->addr));  		args->nfs_server.addrlen = sizeof(data->addr); @@ -1934,43 +1965,8 @@ static int nfs_validate_mount_data(void *options,  		}  		break; -	default: { -		int status; - -		if (nfs_parse_mount_options((char *)options, args) == 0) -			return -EINVAL; - -		if (!nfs_verify_server_address(sap)) -			goto out_no_address; - -		if (args->version == 4) -#ifdef CONFIG_NFS_V4 -			return nfs4_validate_text_mount_data(options, -							     args, dev_name); -#else -			goto out_v4_not_compiled; -#endif - -		nfs_set_port(sap, &args->nfs_server.port, 0); - -		nfs_set_mount_transport_protocol(args); - -		status = nfs_parse_devname(dev_name, -					   &args->nfs_server.hostname, -					   PAGE_SIZE, -					   &args->nfs_server.export_path, -					   NFS_MAXPATHLEN); -		if (!status) -			status = nfs_try_mount(args, mntfh); - -		kfree(args->nfs_server.export_path); -		args->nfs_server.export_path = NULL; - -		if (status) -			return status; - -		break; -		} +	default: +		return NFS_TEXT_DATA;  	}  #ifndef CONFIG_NFS_V3 @@ -1999,12 +1995,6 @@ out_v3_not_compiled:  	return -EPROTONOSUPPORT;  #endif /* !CONFIG_NFS_V3 */ -#ifndef CONFIG_NFS_V4 -out_v4_not_compiled: -	dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); -	return -EPROTONOSUPPORT; -#endif /* !CONFIG_NFS_V4 */ -  out_nomem:  	dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");  	return -ENOMEM; @@ -2018,6 +2008,82 @@ out_invalid_fh:  	return -EINVAL;  } +#ifdef CONFIG_NFS_V4 +static int nfs_validate_mount_data(struct file_system_type *fs_type, +				   void *options, +				   struct nfs_parsed_mount_data *args, +				   struct nfs_fh *mntfh, +				   const char *dev_name) +{ +	if (fs_type == &nfs_fs_type) +		return nfs23_validate_mount_data(options, args, mntfh, dev_name); +	return nfs4_validate_mount_data(options, args, dev_name); +} +#else +static int nfs_validate_mount_data(struct file_system_type *fs_type, +				   void *options, +				   struct nfs_parsed_mount_data *args, +				   struct nfs_fh *mntfh, +				   const char *dev_name) +{ +	return nfs23_validate_mount_data(options, args, mntfh, dev_name); +} +#endif + +static int nfs_validate_text_mount_data(void *options, +					struct nfs_parsed_mount_data *args, +					const char *dev_name) +{ +	int port = 0; +	int max_namelen = PAGE_SIZE; +	int max_pathlen = NFS_MAXPATHLEN; +	struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; + +	if (nfs_parse_mount_options((char *)options, args) == 0) +		return -EINVAL; + +	if (!nfs_verify_server_address(sap)) +		goto out_no_address; + +	if (args->version == 4) { +#ifdef CONFIG_NFS_V4 +		port = NFS_PORT; +		max_namelen = NFS4_MAXNAMLEN; +		max_pathlen = NFS4_MAXPATHLEN; +		nfs_validate_transport_protocol(args); +		nfs4_validate_mount_flags(args); +#else +		goto out_v4_not_compiled; +#endif /* CONFIG_NFS_V4 */ +	} else +		nfs_set_mount_transport_protocol(args); + +	nfs_set_port(sap, &args->nfs_server.port, port); + +	if (args->auth_flavor_len > 1) +		goto out_bad_auth; + +	return nfs_parse_devname(dev_name, +				   &args->nfs_server.hostname, +				   max_namelen, +				   &args->nfs_server.export_path, +				   max_pathlen); + +#ifndef CONFIG_NFS_V4 +out_v4_not_compiled: +	dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); +	return -EPROTONOSUPPORT; +#endif /* !CONFIG_NFS_V4 */ + +out_no_address: +	dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); +	return -EINVAL; + +out_bad_auth: +	dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n"); +	return -EINVAL; +} +  static int  nfs_compare_remount_data(struct nfs_server *nfss,  			 struct nfs_parsed_mount_data *data) @@ -2129,8 +2195,9 @@ static inline void nfs_initialise_sb(struct super_block *sb)   * Finish setting up an NFS2/3 superblock   */  static void nfs_fill_super(struct super_block *sb, -			   struct nfs_parsed_mount_data *data) +			   struct nfs_mount_info *mount_info)  { +	struct nfs_parsed_mount_data *data = mount_info->parsed;  	struct nfs_server *server = NFS_SB(sb);  	sb->s_blocksize_bits = 0; @@ -2154,8 +2221,9 @@ static void nfs_fill_super(struct super_block *sb,   * Finish setting up a cloned NFS2/3 superblock   */  static void nfs_clone_super(struct super_block *sb, -			    const struct super_block *old_sb) +			    struct nfs_mount_info *mount_info)  { +	const struct super_block *old_sb = mount_info->cloned->sb;  	struct nfs_server *server = NFS_SB(sb);  	sb->s_blocksize_bits = old_sb->s_blocksize_bits; @@ -2278,52 +2346,70 @@ static int nfs_compare_super(struct super_block *sb, void *data)  	return nfs_compare_mount_options(sb, server, mntflags);  } +#ifdef CONFIG_NFS_FSCACHE +static void nfs_get_cache_cookie(struct super_block *sb, +				 struct nfs_parsed_mount_data *parsed, +				 struct nfs_clone_mount *cloned) +{ +	char *uniq = NULL; +	int ulen = 0; + +	if (parsed && parsed->fscache_uniq) { +		uniq = parsed->fscache_uniq; +		ulen = strlen(parsed->fscache_uniq); +	} else if (cloned) { +		struct nfs_server *mnt_s = NFS_SB(cloned->sb); +		if (mnt_s->fscache_key) { +			uniq = mnt_s->fscache_key->key.uniquifier; +			ulen = mnt_s->fscache_key->key.uniq_len; +		}; +	} + +	nfs_fscache_get_super_cookie(sb, uniq, ulen); +} +#else +static void nfs_get_cache_cookie(struct super_block *sb, +				 struct nfs_parsed_mount_data *parsed, +				 struct nfs_clone_mount *cloned) +{ +} +#endif +  static int nfs_bdi_register(struct nfs_server *server)  {  	return bdi_register_dev(&server->backing_dev_info, server->s_dev);  } -static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, -	int flags, const char *dev_name, void *raw_data) +static int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, +			       struct nfs_mount_info *mount_info) +{ +	return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts); +} + +static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, +				 struct nfs_mount_info *mount_info) +{ +	/* clone any lsm security options from the parent to the new sb */ +	security_sb_clone_mnt_opts(mount_info->cloned->sb, s); +	if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) +		return -ESTALE; +	return 0; +} + +static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, +					  struct nfs_server *server, +					  int flags, const char *dev_name, +					  struct nfs_mount_info *mount_info)  { -	struct nfs_server *server = NULL;  	struct super_block *s; -	struct nfs_parsed_mount_data *data; -	struct nfs_fh *mntfh;  	struct dentry *mntroot = ERR_PTR(-ENOMEM);  	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;  	struct nfs_sb_mountdata sb_mntdata = {  		.mntflags = flags, +		.server = server,  	};  	int error; -	data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); -	mntfh = nfs_alloc_fhandle(); -	if (data == NULL || mntfh == NULL) -		goto out; - -	/* Validate the mount data */ -	error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); -	if (error < 0) { -		mntroot = ERR_PTR(error); -		goto out; -	} - -#ifdef CONFIG_NFS_V4 -	if (data->version == 4) { -		mntroot = nfs4_try_mount(flags, dev_name, data); -		goto out; -	} -#endif	/* CONFIG_NFS_V4 */ - -	/* Get a volume representation */ -	server = nfs_create_server(data, mntfh); -	if (IS_ERR(server)) { -		mntroot = ERR_CAST(server); -		goto out; -	} -	sb_mntdata.server = server; -  	if (server->flags & NFS_MOUNT_UNSHARED)  		compare_super = NULL; @@ -2351,23 +2437,21 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,  	if (!s->s_root) {  		/* initial superblock/root creation */ -		nfs_fill_super(s, data); -		nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL); +		mount_info->fill_super(s, mount_info); +		nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);  	} -	mntroot = nfs_get_root(s, mntfh, dev_name); +	mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);  	if (IS_ERR(mntroot))  		goto error_splat_super; -	error = security_sb_set_mnt_opts(s, &data->lsm_opts); +	error = mount_info->set_security(s, mntroot, mount_info);  	if (error)  		goto error_splat_root;  	s->s_flags |= MS_ACTIVE;  out: -	nfs_free_parsed_mount_data(data); -	nfs_free_fhandle(mntfh);  	return mntroot;  out_err_nosb: @@ -2385,6 +2469,43 @@ error_splat_bdi:  	goto out;  } +static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, +	int flags, const char *dev_name, void *raw_data) +{ +	struct nfs_mount_info mount_info = { +		.fill_super = nfs_fill_super, +		.set_security = nfs_set_sb_security, +	}; +	struct dentry *mntroot = ERR_PTR(-ENOMEM); +	int error; + +	mount_info.parsed = nfs_alloc_parsed_mount_data(); +	mount_info.mntfh = nfs_alloc_fhandle(); +	if (mount_info.parsed == NULL || mount_info.mntfh == NULL) +		goto out; + +	/* Validate the mount data */ +	error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name); +	if (error == NFS_TEXT_DATA) +		error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name); +	if (error < 0) { +		mntroot = ERR_PTR(error); +		goto out; +	} + +#ifdef CONFIG_NFS_V4 +	if (mount_info.parsed->version == 4) +		mntroot = nfs4_try_mount(flags, dev_name, &mount_info); +	else +#endif	/* CONFIG_NFS_V4 */ +		mntroot = nfs_try_mount(flags, dev_name, &mount_info); + +out: +	nfs_free_parsed_mount_data(mount_info.parsed); +	nfs_free_fhandle(mount_info.mntfh); +	return mntroot; +} +  /*   * Ensure that we unregister the bdi before kill_anon_super   * releases the device name @@ -2409,93 +2530,51 @@ static void nfs_kill_super(struct super_block *s)  }  /* - * Clone an NFS2/3 server record on xdev traversal (FSID-change) + * Clone an NFS2/3/4 server record on xdev traversal (FSID-change)   */  static struct dentry * -nfs_xdev_mount(struct file_system_type *fs_type, int flags, -		const char *dev_name, void *raw_data) +nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, +		const char *dev_name, struct nfs_mount_info *mount_info)  { -	struct nfs_clone_mount *data = raw_data; -	struct super_block *s; +	struct nfs_clone_mount *data = mount_info->cloned;  	struct nfs_server *server; -	struct dentry *mntroot; -	int (*compare_super)(struct super_block *, void *) = nfs_compare_super; -	struct nfs_sb_mountdata sb_mntdata = { -		.mntflags = flags, -	}; +	struct dentry *mntroot = ERR_PTR(-ENOMEM);  	int error; -	dprintk("--> nfs_xdev_mount()\n"); +	dprintk("--> nfs_xdev_mount_common()\n"); + +	mount_info->mntfh = data->fh;  	/* create a new volume representation */  	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);  	if (IS_ERR(server)) {  		error = PTR_ERR(server); -		goto out_err_noserver; -	} -	sb_mntdata.server = server; - -	if (server->flags & NFS_MOUNT_UNSHARED) -		compare_super = NULL; - -	/* -o noac implies -o sync */ -	if (server->flags & NFS_MOUNT_NOAC) -		sb_mntdata.mntflags |= MS_SYNCHRONOUS; - -	/* Get a superblock - note that we may end up sharing one that already exists */ -	s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); -	if (IS_ERR(s)) { -		error = PTR_ERR(s); -		goto out_err_nosb; -	} - -	if (s->s_fs_info != server) { -		nfs_free_server(server); -		server = NULL; -	} else { -		error = nfs_bdi_register(server); -		if (error) -			goto error_splat_bdi; -	} - -	if (!s->s_root) { -		/* initial superblock/root creation */ -		nfs_clone_super(s, data->sb); -		nfs_fscache_get_super_cookie(s, NULL, data); -	} - -	mntroot = nfs_get_root(s, data->fh, dev_name); -	if (IS_ERR(mntroot)) { -		error = PTR_ERR(mntroot); -		goto error_splat_super; -	} -	if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) { -		dput(mntroot); -		error = -ESTALE; -		goto error_splat_super; +		goto out_err;  	} -	s->s_flags |= MS_ACTIVE; - -	/* clone any lsm security options from the parent to the new sb */ -	security_sb_clone_mnt_opts(data->sb, s); - -	dprintk("<-- nfs_xdev_mount() = 0\n"); +	mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); +	dprintk("<-- nfs_xdev_mount_common() = 0\n"); +out:  	return mntroot; -out_err_nosb: -	nfs_free_server(server); -out_err_noserver: -	dprintk("<-- nfs_xdev_mount() = %d [error]\n", error); -	return ERR_PTR(error); +out_err: +	dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error); +	goto out; +} -error_splat_super: -	if (server && !s->s_root) -		bdi_unregister(&server->backing_dev_info); -error_splat_bdi: -	deactivate_locked_super(s); -	dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error); -	return ERR_PTR(error); +/* + * Clone an NFS2/3 server record on xdev traversal (FSID-change) + */ +static struct dentry * +nfs_xdev_mount(struct file_system_type *fs_type, int flags, +		const char *dev_name, void *raw_data) +{ +	struct nfs_mount_info mount_info = { +		.fill_super = nfs_clone_super, +		.set_security = nfs_clone_sb_security, +		.cloned   = raw_data, +	}; +	return nfs_xdev_mount_common(&nfs_fs_type, flags, dev_name, &mount_info);  }  #ifdef CONFIG_NFS_V4 @@ -2504,8 +2583,9 @@ error_splat_bdi:   * Finish setting up a cloned NFS4 superblock   */  static void nfs4_clone_super(struct super_block *sb, -			    const struct super_block *old_sb) +			     struct nfs_mount_info *mount_info)  { +	const struct super_block *old_sb = mount_info->cloned->sb;  	sb->s_blocksize_bits = old_sb->s_blocksize_bits;  	sb->s_blocksize = old_sb->s_blocksize;  	sb->s_maxbytes = old_sb->s_maxbytes; @@ -2523,7 +2603,8 @@ static void nfs4_clone_super(struct super_block *sb,  /*   * Set up an NFS4 superblock   */ -static void nfs4_fill_super(struct super_block *sb) +static void nfs4_fill_super(struct super_block *sb, +			    struct nfs_mount_info *mount_info)  {  	sb->s_time_gran = 1;  	sb->s_op = &nfs4_sops; @@ -2542,37 +2623,6 @@ static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)  			 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);  } -static int nfs4_validate_text_mount_data(void *options, -					 struct nfs_parsed_mount_data *args, -					 const char *dev_name) -{ -	struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; - -	nfs_set_port(sap, &args->nfs_server.port, NFS_PORT); - -	nfs_validate_transport_protocol(args); - -	nfs4_validate_mount_flags(args); - -	if (args->version != 4) { -		dfprintk(MOUNT, -			 "NFS4: Illegal mount version\n"); -		return -EINVAL; -	} - -	if (args->auth_flavor_len > 1) { -		dfprintk(MOUNT, -			 "NFS4: Too many RPC auth flavours specified\n"); -		return -EINVAL; -	} - -	return nfs_parse_devname(dev_name, -				   &args->nfs_server.hostname, -				   NFS4_MAXNAMLEN, -				   &args->nfs_server.export_path, -				   NFS4_MAXPATHLEN); -} -  /*   * Validate NFSv4 mount options   */ @@ -2643,13 +2693,7 @@ static int nfs4_validate_mount_data(void *options,  		break;  	default: -		if (nfs_parse_mount_options((char *)options, args) == 0) -			return -EINVAL; - -		if (!nfs_verify_server_address(sap)) -			return -EINVAL; - -		return nfs4_validate_text_mount_data(options, args, dev_name); +		return NFS_TEXT_DATA;  	}  	return 0; @@ -2673,91 +2717,26 @@ out_no_address:   */  static struct dentry *  nfs4_remote_mount(struct file_system_type *fs_type, int flags, -		  const char *dev_name, void *raw_data) +		  const char *dev_name, void *info)  { -	struct nfs_parsed_mount_data *data = raw_data; -	struct super_block *s; +	struct nfs_mount_info *mount_info = info;  	struct nfs_server *server; -	struct nfs_fh *mntfh; -	struct dentry *mntroot; -	int (*compare_super)(struct super_block *, void *) = nfs_compare_super; -	struct nfs_sb_mountdata sb_mntdata = { -		.mntflags = flags, -	}; -	int error = -ENOMEM; +	struct dentry *mntroot = ERR_PTR(-ENOMEM); -	mntfh = nfs_alloc_fhandle(); -	if (data == NULL || mntfh == NULL) -		goto out; +	mount_info->fill_super = nfs4_fill_super; +	mount_info->set_security = nfs_set_sb_security;  	/* Get a volume representation */ -	server = nfs4_create_server(data, mntfh); +	server = nfs4_create_server(mount_info->parsed, mount_info->mntfh);  	if (IS_ERR(server)) { -		error = PTR_ERR(server); +		mntroot = ERR_CAST(server);  		goto out;  	} -	sb_mntdata.server = server; -	if (server->flags & NFS4_MOUNT_UNSHARED) -		compare_super = NULL; - -	/* -o noac implies -o sync */ -	if (server->flags & NFS_MOUNT_NOAC) -		sb_mntdata.mntflags |= MS_SYNCHRONOUS; - -	/* Get a superblock - note that we may end up sharing one that already exists */ -	s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); -	if (IS_ERR(s)) { -		error = PTR_ERR(s); -		goto out_free; -	} - -	if (s->s_fs_info != server) { -		nfs_free_server(server); -		server = NULL; -	} else { -		error = nfs_bdi_register(server); -		if (error) -			goto error_splat_bdi; -	} - -	if (!s->s_root) { -		/* initial superblock/root creation */ -		nfs4_fill_super(s); -		nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL); -	} - -	mntroot = nfs4_get_root(s, mntfh, dev_name); -	if (IS_ERR(mntroot)) { -		error = PTR_ERR(mntroot); -		goto error_splat_super; -	} - -	error = security_sb_set_mnt_opts(s, &data->lsm_opts); -	if (error) -		goto error_splat_root; - -	s->s_flags |= MS_ACTIVE; - -	nfs_free_fhandle(mntfh); -	return mntroot; +	mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);  out: -	nfs_free_fhandle(mntfh); -	return ERR_PTR(error); - -out_free: -	nfs_free_server(server); -	goto out; - -error_splat_root: -	dput(mntroot); -error_splat_super: -	if (server && !s->s_root) -		bdi_unregister(&server->backing_dev_info); -error_splat_bdi: -	deactivate_locked_super(s); -	goto out; +	return mntroot;  }  static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, @@ -2869,17 +2848,18 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,  }  static struct dentry *nfs4_try_mount(int flags, const char *dev_name, -			 struct nfs_parsed_mount_data *data) +			 struct nfs_mount_info *mount_info)  {  	char *export_path;  	struct vfsmount *root_mnt;  	struct dentry *res; +	struct nfs_parsed_mount_data *data = mount_info->parsed;  	dfprintk(MOUNT, "--> nfs4_try_mount()\n");  	export_path = data->nfs_server.export_path;  	data->nfs_server.export_path = "/"; -	root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data, +	root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,  			data->nfs_server.hostname);  	data->nfs_server.export_path = export_path; @@ -2891,38 +2871,6 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name,  	return res;  } -/* - * Get the superblock for an NFS4 mountpoint - */ -static struct dentry *nfs4_mount(struct file_system_type *fs_type, -	int flags, const char *dev_name, void *raw_data) -{ -	struct nfs_parsed_mount_data *data; -	int error = -ENOMEM; -	struct dentry *res = ERR_PTR(-ENOMEM); - -	data = nfs_alloc_parsed_mount_data(4); -	if (data == NULL) -		goto out; - -	/* Validate the mount data */ -	error = nfs4_validate_mount_data(raw_data, data, dev_name); -	if (error < 0) { -		res = ERR_PTR(error); -		goto out; -	} - -	res = nfs4_try_mount(flags, dev_name, data); -	if (IS_ERR(res)) -		error = PTR_ERR(res); - -out: -	nfs_free_parsed_mount_data(data); -	dprintk("<-- nfs4_mount() = %d%s\n", error, -			error != 0 ? " [error]" : ""); -	return res; -} -  static void nfs4_kill_super(struct super_block *sb)  {  	struct nfs_server *server = NFS_SB(sb); @@ -2942,181 +2890,43 @@ static struct dentry *  nfs4_xdev_mount(struct file_system_type *fs_type, int flags,  		 const char *dev_name, void *raw_data)  { -	struct nfs_clone_mount *data = raw_data; -	struct super_block *s; -	struct nfs_server *server; -	struct dentry *mntroot; -	int (*compare_super)(struct super_block *, void *) = nfs_compare_super; -	struct nfs_sb_mountdata sb_mntdata = { -		.mntflags = flags, +	struct nfs_mount_info mount_info = { +		.fill_super = nfs4_clone_super, +		.set_security = nfs_clone_sb_security, +		.cloned = raw_data,  	}; -	int error; - -	dprintk("--> nfs4_xdev_mount()\n"); - -	/* create a new volume representation */ -	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); -	if (IS_ERR(server)) { -		error = PTR_ERR(server); -		goto out_err_noserver; -	} -	sb_mntdata.server = server; - -	if (server->flags & NFS4_MOUNT_UNSHARED) -		compare_super = NULL; - -	/* -o noac implies -o sync */ -	if (server->flags & NFS_MOUNT_NOAC) -		sb_mntdata.mntflags |= MS_SYNCHRONOUS; - -	/* Get a superblock - note that we may end up sharing one that already exists */ -	s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); -	if (IS_ERR(s)) { -		error = PTR_ERR(s); -		goto out_err_nosb; -	} - -	if (s->s_fs_info != server) { -		nfs_free_server(server); -		server = NULL; -	} else { -		error = nfs_bdi_register(server); -		if (error) -			goto error_splat_bdi; -	} - -	if (!s->s_root) { -		/* initial superblock/root creation */ -		nfs4_clone_super(s, data->sb); -		nfs_fscache_get_super_cookie(s, NULL, data); -	} - -	mntroot = nfs4_get_root(s, data->fh, dev_name); -	if (IS_ERR(mntroot)) { -		error = PTR_ERR(mntroot); -		goto error_splat_super; -	} -	if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) { -		dput(mntroot); -		error = -ESTALE; -		goto error_splat_super; -	} - -	s->s_flags |= MS_ACTIVE; - -	security_sb_clone_mnt_opts(data->sb, s); - -	dprintk("<-- nfs4_xdev_mount() = 0\n"); -	return mntroot; - -out_err_nosb: -	nfs_free_server(server); -out_err_noserver: -	dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error); -	return ERR_PTR(error); - -error_splat_super: -	if (server && !s->s_root) -		bdi_unregister(&server->backing_dev_info); -error_splat_bdi: -	deactivate_locked_super(s); -	dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error); -	return ERR_PTR(error); +	return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info);  }  static struct dentry *  nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,  			   const char *dev_name, void *raw_data)  { -	struct nfs_clone_mount *data = raw_data; -	struct super_block *s; -	struct nfs_server *server; -	struct dentry *mntroot; -	struct nfs_fh *mntfh; -	int (*compare_super)(struct super_block *, void *) = nfs_compare_super; -	struct nfs_sb_mountdata sb_mntdata = { -		.mntflags = flags, +	struct nfs_mount_info mount_info = { +		.fill_super = nfs4_fill_super, +		.set_security = nfs_clone_sb_security, +		.cloned = raw_data,  	}; -	int error = -ENOMEM; +	struct nfs_server *server; +	struct dentry *mntroot = ERR_PTR(-ENOMEM);  	dprintk("--> nfs4_referral_get_sb()\n"); -	mntfh = nfs_alloc_fhandle(); -	if (mntfh == NULL) -		goto out_err_nofh; +	mount_info.mntfh = nfs_alloc_fhandle(); +	if (mount_info.cloned == NULL || mount_info.mntfh == NULL) +		goto out;  	/* create a new volume representation */ -	server = nfs4_create_referral_server(data, mntfh); +	server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh);  	if (IS_ERR(server)) { -		error = PTR_ERR(server); -		goto out_err_noserver; -	} -	sb_mntdata.server = server; - -	if (server->flags & NFS4_MOUNT_UNSHARED) -		compare_super = NULL; - -	/* -o noac implies -o sync */ -	if (server->flags & NFS_MOUNT_NOAC) -		sb_mntdata.mntflags |= MS_SYNCHRONOUS; - -	/* Get a superblock - note that we may end up sharing one that already exists */ -	s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); -	if (IS_ERR(s)) { -		error = PTR_ERR(s); -		goto out_err_nosb; -	} - -	if (s->s_fs_info != server) { -		nfs_free_server(server); -		server = NULL; -	} else { -		error = nfs_bdi_register(server); -		if (error) -			goto error_splat_bdi; -	} - -	if (!s->s_root) { -		/* initial superblock/root creation */ -		nfs4_fill_super(s); -		nfs_fscache_get_super_cookie(s, NULL, data); -	} - -	mntroot = nfs4_get_root(s, mntfh, dev_name); -	if (IS_ERR(mntroot)) { -		error = PTR_ERR(mntroot); -		goto error_splat_super; -	} -	if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) { -		dput(mntroot); -		error = -ESTALE; -		goto error_splat_super; +		mntroot = ERR_CAST(server); +		goto out;  	} -	s->s_flags |= MS_ACTIVE; - -	security_sb_clone_mnt_opts(data->sb, s); - -	nfs_free_fhandle(mntfh); -	dprintk("<-- nfs4_referral_get_sb() = 0\n"); +	mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info); +out: +	nfs_free_fhandle(mount_info.mntfh);  	return mntroot; - -out_err_nosb: -	nfs_free_server(server); -out_err_noserver: -	nfs_free_fhandle(mntfh); -out_err_nofh: -	dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error); -	return ERR_PTR(error); - -error_splat_super: -	if (server && !s->s_root) -		bdi_unregister(&server->backing_dev_info); -error_splat_bdi: -	deactivate_locked_super(s); -	nfs_free_fhandle(mntfh); -	dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); -	return ERR_PTR(error);  }  /* diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c07462320f6..e6fe3d69d14 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -39,20 +39,20 @@  /*   * Local function declarations   */ -static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, -				  struct inode *inode, int ioflags);  static void nfs_redirty_request(struct nfs_page *req); -static const struct rpc_call_ops nfs_write_partial_ops; -static const struct rpc_call_ops nfs_write_full_ops; +static const struct rpc_call_ops nfs_write_common_ops;  static const struct rpc_call_ops nfs_commit_ops; +static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; +static const struct nfs_commit_completion_ops nfs_commit_completion_ops;  static struct kmem_cache *nfs_wdata_cachep;  static mempool_t *nfs_wdata_mempool; +static struct kmem_cache *nfs_cdata_cachep;  static mempool_t *nfs_commit_mempool; -struct nfs_write_data *nfs_commitdata_alloc(void) +struct nfs_commit_data *nfs_commitdata_alloc(void)  { -	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); +	struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);  	if (p) {  		memset(p, 0, sizeof(*p)); @@ -62,46 +62,73 @@ struct nfs_write_data *nfs_commitdata_alloc(void)  }  EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); -void nfs_commit_free(struct nfs_write_data *p) +void nfs_commit_free(struct nfs_commit_data *p)  { -	if (p && (p->pagevec != &p->page_array[0])) -		kfree(p->pagevec);  	mempool_free(p, nfs_commit_mempool);  }  EXPORT_SYMBOL_GPL(nfs_commit_free); -struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) +struct nfs_write_header *nfs_writehdr_alloc(void)  { -	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); +	struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);  	if (p) { +		struct nfs_pgio_header *hdr = &p->header; +  		memset(p, 0, sizeof(*p)); -		INIT_LIST_HEAD(&p->pages); -		p->npages = pagecount; -		if (pagecount <= ARRAY_SIZE(p->page_array)) -			p->pagevec = p->page_array; -		else { -			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); -			if (!p->pagevec) { -				mempool_free(p, nfs_wdata_mempool); -				p = NULL; -			} -		} +		INIT_LIST_HEAD(&hdr->pages); +		INIT_LIST_HEAD(&hdr->rpc_list); +		spin_lock_init(&hdr->lock); +		atomic_set(&hdr->refcnt, 0);  	}  	return p;  } -void nfs_writedata_free(struct nfs_write_data *p) +static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, +						  unsigned int pagecount) +{ +	struct nfs_write_data *data, *prealloc; + +	prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data; +	if (prealloc->header == NULL) +		data = prealloc; +	else +		data = kzalloc(sizeof(*data), GFP_KERNEL); +	if (!data) +		goto out; + +	if (nfs_pgarray_set(&data->pages, pagecount)) { +		data->header = hdr; +		atomic_inc(&hdr->refcnt); +	} else { +		if (data != prealloc) +			kfree(data); +		data = NULL; +	} +out: +	return data; +} + +void nfs_writehdr_free(struct nfs_pgio_header *hdr)  { -	if (p && (p->pagevec != &p->page_array[0])) -		kfree(p->pagevec); -	mempool_free(p, nfs_wdata_mempool); +	struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); +	mempool_free(whdr, nfs_wdata_mempool);  }  void nfs_writedata_release(struct nfs_write_data *wdata)  { +	struct nfs_pgio_header *hdr = wdata->header; +	struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header); +  	put_nfs_open_context(wdata->args.context); -	nfs_writedata_free(wdata); +	if (wdata->pages.pagevec != wdata->pages.page_array) +		kfree(wdata->pages.pagevec); +	if (wdata != &write_header->rpc_data) +		kfree(wdata); +	else +		wdata->header = NULL; +	if (atomic_dec_and_test(&hdr->refcnt)) +		hdr->completion_ops->completion(hdr);  }  static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) @@ -203,7 +230,6 @@ static int nfs_set_page_writeback(struct page *page)  		struct inode *inode = page->mapping->host;  		struct nfs_server *nfss = NFS_SERVER(inode); -		page_cache_get(page);  		if (atomic_long_inc_return(&nfss->writeback) >  				NFS_CONGESTION_ON_THRESH) {  			set_bdi_congested(&nfss->backing_dev_info, @@ -219,7 +245,6 @@ static void nfs_end_page_writeback(struct page *page)  	struct nfs_server *nfss = NFS_SERVER(inode);  	end_page_writeback(page); -	page_cache_release(page);  	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)  		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);  } @@ -235,10 +260,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo  		req = nfs_page_find_request_locked(page);  		if (req == NULL)  			break; -		if (nfs_lock_request_dontget(req)) +		if (nfs_lock_request(req))  			break;  		/* Note: If we hold the page lock, as is the case in nfs_writepage, -		 *	 then the call to nfs_lock_request_dontget() will always +		 *	 then the call to nfs_lock_request() will always  		 *	 succeed provided that someone hasn't already marked the  		 *	 request as dirty (in which case we don't care).  		 */ @@ -310,7 +335,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc  	struct nfs_pageio_descriptor pgio;  	int err; -	nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); +	nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc), +			      &nfs_async_write_completion_ops);  	err = nfs_do_writepage(page, wbc, &pgio);  	nfs_pageio_complete(&pgio);  	if (err < 0) @@ -353,7 +379,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)  	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); -	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); +	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), +			      &nfs_async_write_completion_ops);  	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);  	nfs_pageio_complete(&pgio); @@ -379,7 +406,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)  	struct nfs_inode *nfsi = NFS_I(inode);  	/* Lock the request! */ -	nfs_lock_request_dontget(req); +	nfs_lock_request(req);  	spin_lock(&inode->i_lock);  	if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) @@ -421,65 +448,88 @@ nfs_mark_request_dirty(struct nfs_page *req)  /**   * nfs_request_add_commit_list - add request to a commit list   * @req: pointer to a struct nfs_page - * @head: commit list head + * @dst: commit list head + * @cinfo: holds list lock and accounting info   * - * This sets the PG_CLEAN bit, updates the inode global count of + * This sets the PG_CLEAN bit, updates the cinfo count of   * number of outstanding requests requiring a commit as well as   * the MM page stats.   * - * The caller must _not_ hold the inode->i_lock, but must be + * The caller must _not_ hold the cinfo->lock, but must be   * holding the nfs_page lock.   */  void -nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head) +nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, +			    struct nfs_commit_info *cinfo)  { -	struct inode *inode = req->wb_context->dentry->d_inode; -  	set_bit(PG_CLEAN, &(req)->wb_flags); -	spin_lock(&inode->i_lock); -	nfs_list_add_request(req, head); -	NFS_I(inode)->ncommit++; -	spin_unlock(&inode->i_lock); -	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); -	inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); -	__mark_inode_dirty(inode, I_DIRTY_DATASYNC); +	spin_lock(cinfo->lock); +	nfs_list_add_request(req, dst); +	cinfo->mds->ncommit++; +	spin_unlock(cinfo->lock); +	if (!cinfo->dreq) { +		inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); +		inc_bdi_stat(req->wb_page->mapping->backing_dev_info, +			     BDI_RECLAIMABLE); +		__mark_inode_dirty(req->wb_context->dentry->d_inode, +				   I_DIRTY_DATASYNC); +	}  }  EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);  /**   * nfs_request_remove_commit_list - Remove request from a commit list   * @req: pointer to a nfs_page + * @cinfo: holds list lock and accounting info   * - * This clears the PG_CLEAN bit, and updates the inode global count of + * This clears the PG_CLEAN bit, and updates the cinfo's count of   * number of outstanding requests requiring a commit   * It does not update the MM page stats.   * - * The caller _must_ hold the inode->i_lock and the nfs_page lock. + * The caller _must_ hold the cinfo->lock and the nfs_page lock.   */  void -nfs_request_remove_commit_list(struct nfs_page *req) +nfs_request_remove_commit_list(struct nfs_page *req, +			       struct nfs_commit_info *cinfo)  { -	struct inode *inode = req->wb_context->dentry->d_inode; -  	if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))  		return;  	nfs_list_remove_request(req); -	NFS_I(inode)->ncommit--; +	cinfo->mds->ncommit--;  }  EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); +static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, +				      struct inode *inode) +{ +	cinfo->lock = &inode->i_lock; +	cinfo->mds = &NFS_I(inode)->commit_info; +	cinfo->ds = pnfs_get_ds_info(inode); +	cinfo->dreq = NULL; +	cinfo->completion_ops = &nfs_commit_completion_ops; +} + +void nfs_init_cinfo(struct nfs_commit_info *cinfo, +		    struct inode *inode, +		    struct nfs_direct_req *dreq) +{ +	if (dreq) +		nfs_init_cinfo_from_dreq(cinfo, dreq); +	else +		nfs_init_cinfo_from_inode(cinfo, inode); +} +EXPORT_SYMBOL_GPL(nfs_init_cinfo);  /*   * Add a request to the inode's commit list.   */ -static void -nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +void +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, +			struct nfs_commit_info *cinfo)  { -	struct inode *inode = req->wb_context->dentry->d_inode; - -	if (pnfs_mark_request_commit(req, lseg)) +	if (pnfs_mark_request_commit(req, lseg, cinfo))  		return; -	nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list); +	nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);  }  static void @@ -494,11 +544,13 @@ nfs_clear_request_commit(struct nfs_page *req)  {  	if (test_bit(PG_CLEAN, &req->wb_flags)) {  		struct inode *inode = req->wb_context->dentry->d_inode; +		struct nfs_commit_info cinfo; -		if (!pnfs_clear_request_commit(req)) { -			spin_lock(&inode->i_lock); -			nfs_request_remove_commit_list(req); -			spin_unlock(&inode->i_lock); +		nfs_init_cinfo_from_inode(&cinfo, inode); +		if (!pnfs_clear_request_commit(req, &cinfo)) { +			spin_lock(cinfo.lock); +			nfs_request_remove_commit_list(req, &cinfo); +			spin_unlock(cinfo.lock);  		}  		nfs_clear_page_commit(req->wb_page);  	} @@ -508,28 +560,25 @@ static inline  int nfs_write_need_commit(struct nfs_write_data *data)  {  	if (data->verf.committed == NFS_DATA_SYNC) -		return data->lseg == NULL; -	else -		return data->verf.committed != NFS_FILE_SYNC; +		return data->header->lseg == NULL; +	return data->verf.committed != NFS_FILE_SYNC;  } -static inline -int nfs_reschedule_unstable_write(struct nfs_page *req, -				  struct nfs_write_data *data) +#else +static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, +				      struct inode *inode)  { -	if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { -		nfs_mark_request_commit(req, data->lseg); -		return 1; -	} -	if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { -		nfs_mark_request_dirty(req); -		return 1; -	} -	return 0;  } -#else -static void -nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) + +void nfs_init_cinfo(struct nfs_commit_info *cinfo, +		    struct inode *inode, +		    struct nfs_direct_req *dreq) +{ +} + +void +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, +			struct nfs_commit_info *cinfo)  {  } @@ -544,25 +593,57 @@ int nfs_write_need_commit(struct nfs_write_data *data)  	return 0;  } -static inline -int nfs_reschedule_unstable_write(struct nfs_page *req, -				  struct nfs_write_data *data) +#endif + +static void nfs_write_completion(struct nfs_pgio_header *hdr)  { -	return 0; +	struct nfs_commit_info cinfo; +	unsigned long bytes = 0; + +	if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) +		goto out; +	nfs_init_cinfo_from_inode(&cinfo, hdr->inode); +	while (!list_empty(&hdr->pages)) { +		struct nfs_page *req = nfs_list_entry(hdr->pages.next); + +		bytes += req->wb_bytes; +		nfs_list_remove_request(req); +		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && +		    (hdr->good_bytes < bytes)) { +			nfs_set_pageerror(req->wb_page); +			nfs_context_set_write_error(req->wb_context, hdr->error); +			goto remove_req; +		} +		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { +			nfs_mark_request_dirty(req); +			goto next; +		} +		if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { +			nfs_mark_request_commit(req, hdr->lseg, &cinfo); +			goto next; +		} +remove_req: +		nfs_inode_remove_request(req); +next: +		nfs_unlock_request(req); +		nfs_end_page_writeback(req->wb_page); +		nfs_release_request(req); +	} +out: +	hdr->release(hdr);  } -#endif  #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -static int -nfs_need_commit(struct nfs_inode *nfsi) +static unsigned long +nfs_reqs_to_commit(struct nfs_commit_info *cinfo)  { -	return nfsi->ncommit > 0; +	return cinfo->mds->ncommit;  } -/* i_lock held by caller */ -static int -nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, -		spinlock_t *lock) +/* cinfo->lock held by caller */ +int +nfs_scan_commit_list(struct list_head *src, struct list_head *dst, +		     struct nfs_commit_info *cinfo, int max)  {  	struct nfs_page *req, *tmp;  	int ret = 0; @@ -570,12 +651,13 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,  	list_for_each_entry_safe(req, tmp, src, wb_list) {  		if (!nfs_lock_request(req))  			continue; -		if (cond_resched_lock(lock)) +		kref_get(&req->wb_kref); +		if (cond_resched_lock(cinfo->lock))  			list_safe_reset_next(req, tmp, wb_list); -		nfs_request_remove_commit_list(req); +		nfs_request_remove_commit_list(req, cinfo);  		nfs_list_add_request(req, dst);  		ret++; -		if (ret == max) +		if ((ret == max) && !cinfo->dreq)  			break;  	}  	return ret; @@ -584,37 +666,38 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,  /*   * nfs_scan_commit - Scan an inode for commit requests   * @inode: NFS inode to scan - * @dst: destination list + * @dst: mds destination list + * @cinfo: mds and ds lists of reqs ready to commit   *   * Moves requests from the inode's 'commit' request list.   * The requests are *not* checked to ensure that they form a contiguous set.   */ -static int -nfs_scan_commit(struct inode *inode, struct list_head *dst) +int +nfs_scan_commit(struct inode *inode, struct list_head *dst, +		struct nfs_commit_info *cinfo)  { -	struct nfs_inode *nfsi = NFS_I(inode);  	int ret = 0; -	spin_lock(&inode->i_lock); -	if (nfsi->ncommit > 0) { +	spin_lock(cinfo->lock); +	if (cinfo->mds->ncommit > 0) {  		const int max = INT_MAX; -		ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max, -				&inode->i_lock); -		ret += pnfs_scan_commit_lists(inode, max - ret, -				&inode->i_lock); +		ret = nfs_scan_commit_list(&cinfo->mds->list, dst, +					   cinfo, max); +		ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);  	} -	spin_unlock(&inode->i_lock); +	spin_unlock(cinfo->lock);  	return ret;  }  #else -static inline int nfs_need_commit(struct nfs_inode *nfsi) +static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)  {  	return 0;  } -static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst) +int nfs_scan_commit(struct inode *inode, struct list_head *dst, +		    struct nfs_commit_info *cinfo)  {  	return 0;  } @@ -659,7 +742,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,  		    || end < req->wb_offset)  			goto out_flushme; -		if (nfs_lock_request_dontget(req)) +		if (nfs_lock_request(req))  			break;  		/* The request is locked, so wait and then retry */ @@ -729,7 +812,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,  	nfs_grow_file(page, offset, count);  	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);  	nfs_mark_request_dirty(req); -	nfs_unlock_request(req); +	nfs_unlock_and_release_request(req);  	return 0;  } @@ -766,10 +849,14 @@ int nfs_flush_incompatible(struct file *file, struct page *page)   * the PageUptodate() flag. In this case, we will need to turn off   * write optimisations that depend on the page contents being correct.   */ -static int nfs_write_pageuptodate(struct page *page, struct inode *inode) +static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)  { -	return PageUptodate(page) && -		!(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); +	if (nfs_have_delegated_attributes(inode)) +		goto out; +	if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE) +		return false; +out: +	return PageUptodate(page) != 0;  }  /* @@ -815,17 +902,6 @@ int nfs_updatepage(struct file *file, struct page *page,  	return status;  } -static void nfs_writepage_release(struct nfs_page *req, -				  struct nfs_write_data *data) -{ -	struct page *page = req->wb_page; - -	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) -		nfs_inode_remove_request(req); -	nfs_unlock_request(req); -	nfs_end_page_writeback(page); -} -  static int flush_task_priority(int how)  {  	switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { @@ -837,18 +913,18 @@ static int flush_task_priority(int how)  	return RPC_PRIORITY_NORMAL;  } -int nfs_initiate_write(struct nfs_write_data *data, -		       struct rpc_clnt *clnt, +int nfs_initiate_write(struct rpc_clnt *clnt, +		       struct nfs_write_data *data,  		       const struct rpc_call_ops *call_ops, -		       int how) +		       int how, int flags)  { -	struct inode *inode = data->inode; +	struct inode *inode = data->header->inode;  	int priority = flush_task_priority(how);  	struct rpc_task *task;  	struct rpc_message msg = {  		.rpc_argp = &data->args,  		.rpc_resp = &data->res, -		.rpc_cred = data->cred, +		.rpc_cred = data->header->cred,  	};  	struct rpc_task_setup task_setup_data = {  		.rpc_client = clnt, @@ -857,7 +933,7 @@ int nfs_initiate_write(struct nfs_write_data *data,  		.callback_ops = call_ops,  		.callback_data = data,  		.workqueue = nfsiod_workqueue, -		.flags = RPC_TASK_ASYNC, +		.flags = RPC_TASK_ASYNC | flags,  		.priority = priority,  	};  	int ret = 0; @@ -892,26 +968,21 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);  /*   * Set up the argument/result storage required for the RPC call.   */ -static void nfs_write_rpcsetup(struct nfs_page *req, -		struct nfs_write_data *data, +static void nfs_write_rpcsetup(struct nfs_write_data *data,  		unsigned int count, unsigned int offset, -		int how) +		int how, struct nfs_commit_info *cinfo)  { -	struct inode *inode = req->wb_context->dentry->d_inode; +	struct nfs_page *req = data->header->req;  	/* Set up the RPC argument and reply structs  	 * NB: take care not to mess about with data->commit et al. */ -	data->req = req; -	data->inode = inode = req->wb_context->dentry->d_inode; -	data->cred = req->wb_context->cred; - -	data->args.fh     = NFS_FH(inode); +	data->args.fh     = NFS_FH(data->header->inode);  	data->args.offset = req_offset(req) + offset;  	/* pnfs_set_layoutcommit needs this */  	data->mds_offset = data->args.offset;  	data->args.pgbase = req->wb_pgbase + offset; -	data->args.pages  = data->pagevec; +	data->args.pages  = data->pages.pagevec;  	data->args.count  = count;  	data->args.context = get_nfs_open_context(req->wb_context);  	data->args.lock_context = req->wb_lock_context; @@ -920,7 +991,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,  	case 0:  		break;  	case FLUSH_COND_STABLE: -		if (nfs_need_commit(NFS_I(inode))) +		if (nfs_reqs_to_commit(cinfo))  			break;  	default:  		data->args.stable = NFS_FILE_SYNC; @@ -936,9 +1007,9 @@ static int nfs_do_write(struct nfs_write_data *data,  		const struct rpc_call_ops *call_ops,  		int how)  { -	struct inode *inode = data->args.context->dentry->d_inode; +	struct inode *inode = data->header->inode; -	return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); +	return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);  }  static int nfs_do_multiple_writes(struct list_head *head, @@ -951,7 +1022,7 @@ static int nfs_do_multiple_writes(struct list_head *head,  	while (!list_empty(head)) {  		int ret2; -		data = list_entry(head->next, struct nfs_write_data, list); +		data = list_first_entry(head, struct nfs_write_data, list);  		list_del_init(&data->list);  		ret2 = nfs_do_write(data, call_ops, how); @@ -967,31 +1038,60 @@ static int nfs_do_multiple_writes(struct list_head *head,   */  static void nfs_redirty_request(struct nfs_page *req)  { -	struct page *page = req->wb_page; -  	nfs_mark_request_dirty(req);  	nfs_unlock_request(req); -	nfs_end_page_writeback(page); +	nfs_end_page_writeback(req->wb_page); +	nfs_release_request(req); +} + +static void nfs_async_write_error(struct list_head *head) +{ +	struct nfs_page	*req; + +	while (!list_empty(head)) { +		req = nfs_list_entry(head->next); +		nfs_list_remove_request(req); +		nfs_redirty_request(req); +	} +} + +static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { +	.error_cleanup = nfs_async_write_error, +	.completion = nfs_write_completion, +}; + +static void nfs_flush_error(struct nfs_pageio_descriptor *desc, +		struct nfs_pgio_header *hdr) +{ +	set_bit(NFS_IOHDR_REDO, &hdr->flags); +	while (!list_empty(&hdr->rpc_list)) { +		struct nfs_write_data *data = list_first_entry(&hdr->rpc_list, +				struct nfs_write_data, list); +		list_del(&data->list); +		nfs_writedata_release(data); +	} +	desc->pg_completion_ops->error_cleanup(&desc->pg_list);  }  /*   * Generate multiple small requests to write out a single   * contiguous dirty area on one page.   */ -static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) +static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, +			   struct nfs_pgio_header *hdr)  { -	struct nfs_page *req = nfs_list_entry(desc->pg_list.next); +	struct nfs_page *req = hdr->req;  	struct page *page = req->wb_page;  	struct nfs_write_data *data;  	size_t wsize = desc->pg_bsize, nbytes;  	unsigned int offset;  	int requests = 0; -	int ret = 0; +	struct nfs_commit_info cinfo; -	nfs_list_remove_request(req); +	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);  	if ((desc->pg_ioflags & FLUSH_COND_STABLE) && -	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit || +	    (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||  	     desc->pg_count > wsize))  		desc->pg_ioflags &= ~FLUSH_COND_STABLE; @@ -1001,28 +1101,22 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head  	do {  		size_t len = min(nbytes, wsize); -		data = nfs_writedata_alloc(1); -		if (!data) -			goto out_bad; -		data->pagevec[0] = page; -		nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); -		list_add(&data->list, res); +		data = nfs_writedata_alloc(hdr, 1); +		if (!data) { +			nfs_flush_error(desc, hdr); +			return -ENOMEM; +		} +		data->pages.pagevec[0] = page; +		nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); +		list_add(&data->list, &hdr->rpc_list);  		requests++;  		nbytes -= len;  		offset += len;  	} while (nbytes != 0); -	atomic_set(&req->wb_complete, requests); -	desc->pg_rpc_callops = &nfs_write_partial_ops; -	return ret; - -out_bad: -	while (!list_empty(res)) { -		data = list_entry(res->next, struct nfs_write_data, list); -		list_del(&data->list); -		nfs_writedata_release(data); -	} -	nfs_redirty_request(req); -	return -ENOMEM; +	nfs_list_remove_request(req); +	nfs_list_add_request(req, &hdr->pages); +	desc->pg_rpc_callops = &nfs_write_common_ops; +	return 0;  }  /* @@ -1033,62 +1127,71 @@ out_bad:   * This is the case if nfs_updatepage detects a conflicting request   * that has been written but not committed.   */ -static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) +static int nfs_flush_one(struct nfs_pageio_descriptor *desc, +			 struct nfs_pgio_header *hdr)  {  	struct nfs_page		*req;  	struct page		**pages;  	struct nfs_write_data	*data;  	struct list_head *head = &desc->pg_list; -	int ret = 0; +	struct nfs_commit_info cinfo; -	data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, -						      desc->pg_count)); +	data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, +							   desc->pg_count));  	if (!data) { -		while (!list_empty(head)) { -			req = nfs_list_entry(head->next); -			nfs_list_remove_request(req); -			nfs_redirty_request(req); -		} -		ret = -ENOMEM; -		goto out; +		nfs_flush_error(desc, hdr); +		return -ENOMEM;  	} -	pages = data->pagevec; + +	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); +	pages = data->pages.pagevec;  	while (!list_empty(head)) {  		req = nfs_list_entry(head->next);  		nfs_list_remove_request(req); -		nfs_list_add_request(req, &data->pages); +		nfs_list_add_request(req, &hdr->pages);  		*pages++ = req->wb_page;  	} -	req = nfs_list_entry(data->pages.next);  	if ((desc->pg_ioflags & FLUSH_COND_STABLE) && -	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) +	    (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))  		desc->pg_ioflags &= ~FLUSH_COND_STABLE;  	/* Set up the argument struct */ -	nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); -	list_add(&data->list, res); -	desc->pg_rpc_callops = &nfs_write_full_ops; -out: -	return ret; +	nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); +	list_add(&data->list, &hdr->rpc_list); +	desc->pg_rpc_callops = &nfs_write_common_ops; +	return 0;  } -int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) +int nfs_generic_flush(struct nfs_pageio_descriptor *desc, +		      struct nfs_pgio_header *hdr)  {  	if (desc->pg_bsize < PAGE_CACHE_SIZE) -		return nfs_flush_multi(desc, head); -	return nfs_flush_one(desc, head); +		return nfs_flush_multi(desc, hdr); +	return nfs_flush_one(desc, hdr);  }  static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)  { -	LIST_HEAD(head); +	struct nfs_write_header *whdr; +	struct nfs_pgio_header *hdr;  	int ret; -	ret = nfs_generic_flush(desc, &head); +	whdr = nfs_writehdr_alloc(); +	if (!whdr) { +		desc->pg_completion_ops->error_cleanup(&desc->pg_list); +		return -ENOMEM; +	} +	hdr = &whdr->header; +	nfs_pgheader_init(desc, hdr, nfs_writehdr_free); +	atomic_inc(&hdr->refcnt); +	ret = nfs_generic_flush(desc, hdr);  	if (ret == 0) -		ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, -				desc->pg_ioflags); +		ret = nfs_do_multiple_writes(&hdr->rpc_list, +					     desc->pg_rpc_callops, +					     desc->pg_ioflags); +	if (atomic_dec_and_test(&hdr->refcnt)) +		hdr->completion_ops->completion(hdr);  	return ret;  } @@ -1098,9 +1201,10 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {  };  void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, -				  struct inode *inode, int ioflags) +			       struct inode *inode, int ioflags, +			       const struct nfs_pgio_completion_ops *compl_ops)  { -	nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, +	nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,  				NFS_SERVER(inode)->wsize, ioflags);  } @@ -1111,80 +1215,27 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)  }  EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); -static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, -				  struct inode *inode, int ioflags) +void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, +			   struct inode *inode, int ioflags, +			   const struct nfs_pgio_completion_ops *compl_ops)  { -	if (!pnfs_pageio_init_write(pgio, inode, ioflags)) -		nfs_pageio_init_write_mds(pgio, inode, ioflags); +	if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops)) +		nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops);  } -/* - * Handle a write reply that flushed part of a page. - */ -static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) +void nfs_write_prepare(struct rpc_task *task, void *calldata)  { -	struct nfs_write_data	*data = calldata; - -	dprintk("NFS: %5u write(%s/%lld %d@%lld)", -		task->tk_pid, -		data->req->wb_context->dentry->d_inode->i_sb->s_id, -		(long long) -		  NFS_FILEID(data->req->wb_context->dentry->d_inode), -		data->req->wb_bytes, (long long)req_offset(data->req)); - -	nfs_writeback_done(task, data); +	struct nfs_write_data *data = calldata; +	NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);  } -static void nfs_writeback_release_partial(void *calldata) +void nfs_commit_prepare(struct rpc_task *task, void *calldata)  { -	struct nfs_write_data	*data = calldata; -	struct nfs_page		*req = data->req; -	struct page		*page = req->wb_page; -	int status = data->task.tk_status; +	struct nfs_commit_data *data = calldata; -	if (status < 0) { -		nfs_set_pageerror(page); -		nfs_context_set_write_error(req->wb_context, status); -		dprintk(", error = %d\n", status); -		goto out; -	} - -	if (nfs_write_need_commit(data)) { -		struct inode *inode = page->mapping->host; - -		spin_lock(&inode->i_lock); -		if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { -			/* Do nothing we need to resend the writes */ -		} else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { -			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); -			dprintk(" defer commit\n"); -		} else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { -			set_bit(PG_NEED_RESCHED, &req->wb_flags); -			clear_bit(PG_NEED_COMMIT, &req->wb_flags); -			dprintk(" server reboot detected\n"); -		} -		spin_unlock(&inode->i_lock); -	} else -		dprintk(" OK\n"); - -out: -	if (atomic_dec_and_test(&req->wb_complete)) -		nfs_writepage_release(req, data); -	nfs_writedata_release(calldata); +	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);  } -void nfs_write_prepare(struct rpc_task *task, void *calldata) -{ -	struct nfs_write_data *data = calldata; -	NFS_PROTO(data->inode)->write_rpc_prepare(task, data); -} - -static const struct rpc_call_ops nfs_write_partial_ops = { -	.rpc_call_prepare = nfs_write_prepare, -	.rpc_call_done = nfs_writeback_done_partial, -	.rpc_release = nfs_writeback_release_partial, -}; -  /*   * Handle a write reply that flushes a whole page.   * @@ -1192,59 +1243,37 @@ static const struct rpc_call_ops nfs_write_partial_ops = {   *	  writebacks since the page->count is kept > 1 for as long   *	  as the page has a write request pending.   */ -static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) +static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)  {  	struct nfs_write_data	*data = calldata;  	nfs_writeback_done(task, data);  } -static void nfs_writeback_release_full(void *calldata) +static void nfs_writeback_release_common(void *calldata)  {  	struct nfs_write_data	*data = calldata; +	struct nfs_pgio_header *hdr = data->header;  	int status = data->task.tk_status; +	struct nfs_page *req = hdr->req; -	/* Update attributes as result of writeback. */ -	while (!list_empty(&data->pages)) { -		struct nfs_page *req = nfs_list_entry(data->pages.next); -		struct page *page = req->wb_page; - -		nfs_list_remove_request(req); - -		dprintk("NFS: %5u write (%s/%lld %d@%lld)", -			data->task.tk_pid, -			req->wb_context->dentry->d_inode->i_sb->s_id, -			(long long)NFS_FILEID(req->wb_context->dentry->d_inode), -			req->wb_bytes, -			(long long)req_offset(req)); - -		if (status < 0) { -			nfs_set_pageerror(page); -			nfs_context_set_write_error(req->wb_context, status); -			dprintk(", error = %d\n", status); -			goto remove_request; -		} - -		if (nfs_write_need_commit(data)) { +	if ((status >= 0) && nfs_write_need_commit(data)) { +		spin_lock(&hdr->lock); +		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) +			; /* Do nothing */ +		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))  			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); -			nfs_mark_request_commit(req, data->lseg); -			dprintk(" marked for commit\n"); -			goto next; -		} -		dprintk(" OK\n"); -remove_request: -		nfs_inode_remove_request(req); -	next: -		nfs_unlock_request(req); -		nfs_end_page_writeback(page); +		else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) +			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); +		spin_unlock(&hdr->lock);  	} -	nfs_writedata_release(calldata); +	nfs_writedata_release(data);  } -static const struct rpc_call_ops nfs_write_full_ops = { +static const struct rpc_call_ops nfs_write_common_ops = {  	.rpc_call_prepare = nfs_write_prepare, -	.rpc_call_done = nfs_writeback_done_full, -	.rpc_release = nfs_writeback_release_full, +	.rpc_call_done = nfs_writeback_done_common, +	.rpc_release = nfs_writeback_release_common,  }; @@ -1255,6 +1284,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)  {  	struct nfs_writeargs	*argp = &data->args;  	struct nfs_writeres	*resp = &data->res; +	struct inode		*inode = data->header->inode;  	int status;  	dprintk("NFS: %5u nfs_writeback_done (status %d)\n", @@ -1267,10 +1297,10 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)  	 * another writer had changed the file, but some applications  	 * depend on tighter cache coherency when writing.  	 */ -	status = NFS_PROTO(data->inode)->write_done(task, data); +	status = NFS_PROTO(inode)->write_done(task, data);  	if (status != 0)  		return; -	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); +	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);  #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)  	if (resp->verf->committed < argp->stable && task->tk_status >= 0) { @@ -1288,46 +1318,47 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)  		if (time_before(complain, jiffies)) {  			dprintk("NFS:       faulty NFS server %s:"  				" (committed = %d) != (stable = %d)\n", -				NFS_SERVER(data->inode)->nfs_client->cl_hostname, +				NFS_SERVER(inode)->nfs_client->cl_hostname,  				resp->verf->committed, argp->stable);  			complain = jiffies + 300 * HZ;  		}  	}  #endif -	/* Is this a short write? */ -	if (task->tk_status >= 0 && resp->count < argp->count) { +	if (task->tk_status < 0) +		nfs_set_pgio_error(data->header, task->tk_status, argp->offset); +	else if (resp->count < argp->count) {  		static unsigned long    complain; -		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); +		/* This a short write! */ +		nfs_inc_stats(inode, NFSIOS_SHORTWRITE);  		/* Has the server at least made some progress? */ -		if (resp->count != 0) { -			/* Was this an NFSv2 write or an NFSv3 stable write? */ -			if (resp->verf->committed != NFS_UNSTABLE) { -				/* Resend from where the server left off */ -				data->mds_offset += resp->count; -				argp->offset += resp->count; -				argp->pgbase += resp->count; -				argp->count -= resp->count; -			} else { -				/* Resend as a stable write in order to avoid -				 * headaches in the case of a server crash. -				 */ -				argp->stable = NFS_FILE_SYNC; +		if (resp->count == 0) { +			if (time_before(complain, jiffies)) { +				printk(KERN_WARNING +				       "NFS: Server wrote zero bytes, expected %u.\n", +				       argp->count); +				complain = jiffies + 300 * HZ;  			} -			rpc_restart_call_prepare(task); +			nfs_set_pgio_error(data->header, -EIO, argp->offset); +			task->tk_status = -EIO;  			return;  		} -		if (time_before(complain, jiffies)) { -			printk(KERN_WARNING -			       "NFS: Server wrote zero bytes, expected %u.\n", -					argp->count); -			complain = jiffies + 300 * HZ; +		/* Was this an NFSv2 write or an NFSv3 stable write? */ +		if (resp->verf->committed != NFS_UNSTABLE) { +			/* Resend from where the server left off */ +			data->mds_offset += resp->count; +			argp->offset += resp->count; +			argp->pgbase += resp->count; +			argp->count -= resp->count; +		} else { +			/* Resend as a stable write in order to avoid +			 * headaches in the case of a server crash. +			 */ +			argp->stable = NFS_FILE_SYNC;  		} -		/* Can't do anything about it except throw an error. */ -		task->tk_status = -EIO; +		rpc_restart_call_prepare(task);  	} -	return;  } @@ -1347,26 +1378,23 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)  	return (ret < 0) ? ret : 1;  } -void nfs_commit_clear_lock(struct nfs_inode *nfsi) +static void nfs_commit_clear_lock(struct nfs_inode *nfsi)  {  	clear_bit(NFS_INO_COMMIT, &nfsi->flags);  	smp_mb__after_clear_bit();  	wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);  } -EXPORT_SYMBOL_GPL(nfs_commit_clear_lock); -void nfs_commitdata_release(void *data) +void nfs_commitdata_release(struct nfs_commit_data *data)  { -	struct nfs_write_data *wdata = data; - -	put_nfs_open_context(wdata->args.context); -	nfs_commit_free(wdata); +	put_nfs_open_context(data->context); +	nfs_commit_free(data);  }  EXPORT_SYMBOL_GPL(nfs_commitdata_release); -int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt, +int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,  			const struct rpc_call_ops *call_ops, -			int how) +			int how, int flags)  {  	struct rpc_task *task;  	int priority = flush_task_priority(how); @@ -1382,7 +1410,7 @@ int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,  		.callback_ops = call_ops,  		.callback_data = data,  		.workqueue = nfsiod_workqueue, -		.flags = RPC_TASK_ASYNC, +		.flags = RPC_TASK_ASYNC | flags,  		.priority = priority,  	};  	/* Set up the initial task struct.  */ @@ -1403,9 +1431,10 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit);  /*   * Set up the argument/result storage required for the RPC call.   */ -void nfs_init_commit(struct nfs_write_data *data, -			    struct list_head *head, -			    struct pnfs_layout_segment *lseg) +void nfs_init_commit(struct nfs_commit_data *data, +		     struct list_head *head, +		     struct pnfs_layout_segment *lseg, +		     struct nfs_commit_info *cinfo)  {  	struct nfs_page *first = nfs_list_entry(head->next);  	struct inode *inode = first->wb_context->dentry->d_inode; @@ -1419,13 +1448,14 @@ void nfs_init_commit(struct nfs_write_data *data,  	data->cred	  = first->wb_context->cred;  	data->lseg	  = lseg; /* reference transferred */  	data->mds_ops     = &nfs_commit_ops; +	data->completion_ops = cinfo->completion_ops; +	data->dreq	  = cinfo->dreq;  	data->args.fh     = NFS_FH(data->inode);  	/* Note: we always request a commit of the entire inode */  	data->args.offset = 0;  	data->args.count  = 0; -	data->args.context = get_nfs_open_context(first->wb_context); -	data->res.count   = 0; +	data->context     = get_nfs_open_context(first->wb_context);  	data->res.fattr   = &data->fattr;  	data->res.verf    = &data->verf;  	nfs_fattr_init(&data->fattr); @@ -1433,18 +1463,21 @@ void nfs_init_commit(struct nfs_write_data *data,  EXPORT_SYMBOL_GPL(nfs_init_commit);  void nfs_retry_commit(struct list_head *page_list, -		      struct pnfs_layout_segment *lseg) +		      struct pnfs_layout_segment *lseg, +		      struct nfs_commit_info *cinfo)  {  	struct nfs_page *req;  	while (!list_empty(page_list)) {  		req = nfs_list_entry(page_list->next);  		nfs_list_remove_request(req); -		nfs_mark_request_commit(req, lseg); -		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); -		dec_bdi_stat(req->wb_page->mapping->backing_dev_info, -			     BDI_RECLAIMABLE); -		nfs_unlock_request(req); +		nfs_mark_request_commit(req, lseg, cinfo); +		if (!cinfo->dreq) { +			dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); +			dec_bdi_stat(req->wb_page->mapping->backing_dev_info, +				     BDI_RECLAIMABLE); +		} +		nfs_unlock_and_release_request(req);  	}  }  EXPORT_SYMBOL_GPL(nfs_retry_commit); @@ -1453,9 +1486,10 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit);   * Commit dirty pages   */  static int -nfs_commit_list(struct inode *inode, struct list_head *head, int how) +nfs_commit_list(struct inode *inode, struct list_head *head, int how, +		struct nfs_commit_info *cinfo)  { -	struct nfs_write_data	*data; +	struct nfs_commit_data	*data;  	data = nfs_commitdata_alloc(); @@ -1463,11 +1497,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)  		goto out_bad;  	/* Set up the argument struct */ -	nfs_init_commit(data, head, NULL); -	return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); +	nfs_init_commit(data, head, NULL, cinfo); +	atomic_inc(&cinfo->mds->rpcs_out); +	return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, +				   how, 0);   out_bad: -	nfs_retry_commit(head, NULL); -	nfs_commit_clear_lock(NFS_I(inode)); +	nfs_retry_commit(head, NULL, cinfo); +	cinfo->completion_ops->error_cleanup(NFS_I(inode));  	return -ENOMEM;  } @@ -1476,7 +1512,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)   */  static void nfs_commit_done(struct rpc_task *task, void *calldata)  { -	struct nfs_write_data	*data = calldata; +	struct nfs_commit_data	*data = calldata;          dprintk("NFS: %5u nfs_commit_done (status %d)\n",                                  task->tk_pid, task->tk_status); @@ -1485,10 +1521,11 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)  	NFS_PROTO(data->inode)->commit_done(task, data);  } -void nfs_commit_release_pages(struct nfs_write_data *data) +static void nfs_commit_release_pages(struct nfs_commit_data *data)  {  	struct nfs_page	*req;  	int status = data->task.tk_status; +	struct nfs_commit_info cinfo;  	while (!list_empty(&data->pages)) {  		req = nfs_list_entry(data->pages.next); @@ -1519,42 +1556,59 @@ void nfs_commit_release_pages(struct nfs_write_data *data)  		dprintk(" mismatch\n");  		nfs_mark_request_dirty(req);  	next: -		nfs_unlock_request(req); +		nfs_unlock_and_release_request(req);  	} +	nfs_init_cinfo(&cinfo, data->inode, data->dreq); +	if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) +		nfs_commit_clear_lock(NFS_I(data->inode));  } -EXPORT_SYMBOL_GPL(nfs_commit_release_pages);  static void nfs_commit_release(void *calldata)  { -	struct nfs_write_data *data = calldata; +	struct nfs_commit_data *data = calldata; -	nfs_commit_release_pages(data); -	nfs_commit_clear_lock(NFS_I(data->inode)); +	data->completion_ops->completion(data);  	nfs_commitdata_release(calldata);  }  static const struct rpc_call_ops nfs_commit_ops = { -	.rpc_call_prepare = nfs_write_prepare, +	.rpc_call_prepare = nfs_commit_prepare,  	.rpc_call_done = nfs_commit_done,  	.rpc_release = nfs_commit_release,  }; +static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { +	.completion = nfs_commit_release_pages, +	.error_cleanup = nfs_commit_clear_lock, +}; + +int nfs_generic_commit_list(struct inode *inode, struct list_head *head, +			    int how, struct nfs_commit_info *cinfo) +{ +	int status; + +	status = pnfs_commit_list(inode, head, how, cinfo); +	if (status == PNFS_NOT_ATTEMPTED) +		status = nfs_commit_list(inode, head, how, cinfo); +	return status; +} +  int nfs_commit_inode(struct inode *inode, int how)  {  	LIST_HEAD(head); +	struct nfs_commit_info cinfo;  	int may_wait = how & FLUSH_SYNC;  	int res;  	res = nfs_commit_set_lock(NFS_I(inode), may_wait);  	if (res <= 0)  		goto out_mark_dirty; -	res = nfs_scan_commit(inode, &head); +	nfs_init_cinfo_from_inode(&cinfo, inode); +	res = nfs_scan_commit(inode, &head, &cinfo);  	if (res) {  		int error; -		error = pnfs_commit_list(inode, &head, how); -		if (error == PNFS_NOT_ATTEMPTED) -			error = nfs_commit_list(inode, &head, how); +		error = nfs_generic_commit_list(inode, &head, how, &cinfo);  		if (error < 0)  			return error;  		if (!may_wait) @@ -1585,14 +1639,14 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr  	int ret = 0;  	/* no commits means nothing needs to be done */ -	if (!nfsi->ncommit) +	if (!nfsi->commit_info.ncommit)  		return ret;  	if (wbc->sync_mode == WB_SYNC_NONE) {  		/* Don't commit yet if this is a non-blocking flush and there  		 * are a lot of outstanding writes for this mapping.  		 */ -		if (nfsi->ncommit <= (nfsi->npages >> 1)) +		if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))  			goto out_mark_dirty;  		/* don't wait for the COMMIT response */ @@ -1665,7 +1719,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)  		req = nfs_page_find_request(page);  		if (req == NULL)  			break; -		if (nfs_lock_request_dontget(req)) { +		if (nfs_lock_request(req)) {  			nfs_clear_request_commit(req);  			nfs_inode_remove_request(req);  			/* @@ -1673,7 +1727,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)  			 * page as being dirty  			 */  			cancel_dirty_page(page, PAGE_CACHE_SIZE); -			nfs_unlock_request(req); +			nfs_unlock_and_release_request(req);  			break;  		}  		ret = nfs_wait_on_request(req); @@ -1742,7 +1796,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,  int __init nfs_init_writepagecache(void)  {  	nfs_wdata_cachep = kmem_cache_create("nfs_write_data", -					     sizeof(struct nfs_write_data), +					     sizeof(struct nfs_write_header),  					     0, SLAB_HWCACHE_ALIGN,  					     NULL);  	if (nfs_wdata_cachep == NULL) @@ -1753,6 +1807,13 @@ int __init nfs_init_writepagecache(void)  	if (nfs_wdata_mempool == NULL)  		return -ENOMEM; +	nfs_cdata_cachep = kmem_cache_create("nfs_commit_data", +					     sizeof(struct nfs_commit_data), +					     0, SLAB_HWCACHE_ALIGN, +					     NULL); +	if (nfs_cdata_cachep == NULL) +		return -ENOMEM; +  	nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,  						      nfs_wdata_cachep);  	if (nfs_commit_mempool == NULL)  |