diff options
Diffstat (limited to 'fs/nfs/pnfs.c')
| -rw-r--r-- | fs/nfs/pnfs.c | 432 | 
1 files changed, 245 insertions, 187 deletions
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2e00feacd4b..e7165d91536 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -35,6 +35,7 @@  #include "iostat.h"  #define NFSDBG_FACILITY		NFSDBG_PNFS +#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)  /* Locking:   * @@ -190,7 +191,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);  /* Need to hold i_lock if caller does not already hold reference */  void -get_layout_hdr(struct pnfs_layout_hdr *lo) +pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo)  {  	atomic_inc(&lo->plh_refcount);  } @@ -199,43 +200,107 @@ static struct pnfs_layout_hdr *  pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)  {  	struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; -	return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) : -		kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); +	return ld->alloc_layout_hdr(ino, gfp_flags);  }  static void  pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)  { -	struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; +	struct nfs_server *server = NFS_SERVER(lo->plh_inode); +	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; + +	if (!list_empty(&lo->plh_layouts)) { +		struct nfs_client *clp = server->nfs_client; + +		spin_lock(&clp->cl_lock); +		list_del_init(&lo->plh_layouts); +		spin_unlock(&clp->cl_lock); +	}  	put_rpccred(lo->plh_lc_cred); -	return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); +	return ld->free_layout_hdr(lo);  }  static void -destroy_layout_hdr(struct pnfs_layout_hdr *lo) +pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)  { +	struct nfs_inode *nfsi = NFS_I(lo->plh_inode);  	dprintk("%s: freeing layout cache %p\n", __func__, lo); -	BUG_ON(!list_empty(&lo->plh_layouts)); -	NFS_I(lo->plh_inode)->layout = NULL; -	pnfs_free_layout_hdr(lo); +	nfsi->layout = NULL; +	/* Reset MDS Threshold I/O counters */ +	nfsi->write_io = 0; +	nfsi->read_io = 0; +} + +void +pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) +{ +	struct inode *inode = lo->plh_inode; + +	if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { +		pnfs_detach_layout_hdr(lo); +		spin_unlock(&inode->i_lock); +		pnfs_free_layout_hdr(lo); +	} +} + +static int +pnfs_iomode_to_fail_bit(u32 iomode) +{ +	return iomode == IOMODE_RW ? +		NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;  }  static void -put_layout_hdr_locked(struct pnfs_layout_hdr *lo) +pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)  { -	if (atomic_dec_and_test(&lo->plh_refcount)) -		destroy_layout_hdr(lo); +	lo->plh_retry_timestamp = jiffies; +	if (test_and_set_bit(fail_bit, &lo->plh_flags)) +		atomic_inc(&lo->plh_refcount);  } -void -put_layout_hdr(struct pnfs_layout_hdr *lo) +static void +pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) +{ +	if (test_and_clear_bit(fail_bit, &lo->plh_flags)) +		atomic_dec(&lo->plh_refcount); +} + +static void +pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode)  {  	struct inode *inode = lo->plh_inode; +	struct pnfs_layout_range range = { +		.iomode = iomode, +		.offset = 0, +		.length = NFS4_MAX_UINT64, +	}; +	LIST_HEAD(head); -	if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { -		destroy_layout_hdr(lo); -		spin_unlock(&inode->i_lock); +	spin_lock(&inode->i_lock); +	pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); +	pnfs_mark_matching_lsegs_invalid(lo, &head, &range); +	spin_unlock(&inode->i_lock); +	pnfs_free_lseg_list(&head); +	dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, +			iomode == IOMODE_RW ?  "RW" : "READ"); +} + +static bool +pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) +{ +	unsigned long start, end; +	int fail_bit = pnfs_iomode_to_fail_bit(iomode); + +	if (test_bit(fail_bit, &lo->plh_flags) == 0) +		return false; +	end = jiffies; +	start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; +	if (!time_in_range(lo->plh_retry_timestamp, start, end)) { +		/* It is time to retry the failed layoutgets */ +		pnfs_layout_clear_fail_bit(lo, fail_bit); +		return false;  	} +	return true;  }  static void @@ -249,33 +314,32 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)  	lseg->pls_layout = lo;  } -static void free_lseg(struct pnfs_layout_segment *lseg) +static void pnfs_free_lseg(struct pnfs_layout_segment *lseg)  {  	struct inode *ino = lseg->pls_layout->plh_inode;  	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); -	/* Matched by get_layout_hdr in pnfs_insert_layout */ -	put_layout_hdr(NFS_I(ino)->layout);  }  static void -put_lseg_common(struct pnfs_layout_segment *lseg) +pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, +		struct pnfs_layout_segment *lseg)  { -	struct inode *inode = lseg->pls_layout->plh_inode; +	struct inode *inode = lo->plh_inode;  	WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));  	list_del_init(&lseg->pls_list); -	if (list_empty(&lseg->pls_layout->plh_segs)) { -		set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); -		/* Matched by initial refcount set in alloc_init_layout_hdr */ -		put_layout_hdr_locked(lseg->pls_layout); -	} +	/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ +	atomic_dec(&lo->plh_refcount); +	if (list_empty(&lo->plh_segs)) +		clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);  	rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);  }  void -put_lseg(struct pnfs_layout_segment *lseg) +pnfs_put_lseg(struct pnfs_layout_segment *lseg)  { +	struct pnfs_layout_hdr *lo;  	struct inode *inode;  	if (!lseg) @@ -284,17 +348,17 @@ put_lseg(struct pnfs_layout_segment *lseg)  	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,  		atomic_read(&lseg->pls_refcount),  		test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); -	inode = lseg->pls_layout->plh_inode; +	lo = lseg->pls_layout; +	inode = lo->plh_inode;  	if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { -		LIST_HEAD(free_me); - -		put_lseg_common(lseg); -		list_add(&lseg->pls_list, &free_me); +		pnfs_get_layout_hdr(lo); +		pnfs_layout_remove_lseg(lo, lseg);  		spin_unlock(&inode->i_lock); -		pnfs_free_lseg_list(&free_me); +		pnfs_free_lseg(lseg); +		pnfs_put_layout_hdr(lo);  	}  } -EXPORT_SYMBOL_GPL(put_lseg); +EXPORT_SYMBOL_GPL(pnfs_put_lseg);  static inline u64  end_offset(u64 start, u64 len) @@ -305,17 +369,6 @@ end_offset(u64 start, u64 len)  	return end >= start ? end : NFS4_MAX_UINT64;  } -/* last octet in a range */ -static inline u64 -last_byte_offset(u64 start, u64 len) -{ -	u64 end; - -	BUG_ON(!len); -	end = start + len; -	return end > start ? end - 1 : NFS4_MAX_UINT64; -} -  /*   * is l2 fully contained in l1?   *   start1                             end1 @@ -378,7 +431,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,  		dprintk("%s: lseg %p ref %d\n", __func__, lseg,  			atomic_read(&lseg->pls_refcount));  		if (atomic_dec_and_test(&lseg->pls_refcount)) { -			put_lseg_common(lseg); +			pnfs_layout_remove_lseg(lseg->pls_layout, lseg);  			list_add(&lseg->pls_list, tmp_list);  			rv = 1;  		} @@ -390,7 +443,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,   * after call.   */  int -mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, +pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,  			    struct list_head *tmp_list,  			    struct pnfs_layout_range *recall_range)  { @@ -399,14 +452,8 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,  	dprintk("%s:Begin lo %p\n", __func__, lo); -	if (list_empty(&lo->plh_segs)) { -		/* Reset MDS Threshold I/O counters */ -		NFS_I(lo->plh_inode)->write_io = 0; -		NFS_I(lo->plh_inode)->read_io = 0; -		if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) -			put_layout_hdr_locked(lo); +	if (list_empty(&lo->plh_segs))  		return 0; -	}  	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)  		if (!recall_range ||  		    should_free_lseg(&lseg->pls_range, recall_range)) { @@ -426,25 +473,13 @@ void  pnfs_free_lseg_list(struct list_head *free_me)  {  	struct pnfs_layout_segment *lseg, *tmp; -	struct pnfs_layout_hdr *lo;  	if (list_empty(free_me))  		return; -	lo = list_first_entry(free_me, struct pnfs_layout_segment, -			      pls_list)->pls_layout; - -	if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) { -		struct nfs_client *clp; - -		clp = NFS_SERVER(lo->plh_inode)->nfs_client; -		spin_lock(&clp->cl_lock); -		list_del_init(&lo->plh_layouts); -		spin_unlock(&clp->cl_lock); -	}  	list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {  		list_del(&lseg->pls_list); -		free_lseg(lseg); +		pnfs_free_lseg(lseg);  	}  } @@ -458,10 +493,15 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)  	lo = nfsi->layout;  	if (lo) {  		lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ -		mark_matching_lsegs_invalid(lo, &tmp_list, NULL); -	} -	spin_unlock(&nfsi->vfs_inode.i_lock); -	pnfs_free_lseg_list(&tmp_list); +		pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); +		pnfs_get_layout_hdr(lo); +		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); +		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); +		spin_unlock(&nfsi->vfs_inode.i_lock); +		pnfs_free_lseg_list(&tmp_list); +		pnfs_put_layout_hdr(lo); +	} else +		spin_unlock(&nfsi->vfs_inode.i_lock);  }  EXPORT_SYMBOL_GPL(pnfs_destroy_layout); @@ -498,46 +538,54 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)  	}  } +/* + * Compare 2 layout stateid sequence ids, to see which is newer, + * taking into account wraparound issues. + */ +static bool pnfs_seqid_is_newer(u32 s1, u32 s2) +{ +	return (s32)s1 - (s32)s2 > 0; +} +  /* update lo->plh_stateid with new if is more recent */  void  pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,  			bool update_barrier)  { -	u32 oldseq, newseq; +	u32 oldseq, newseq, new_barrier; +	int empty = list_empty(&lo->plh_segs);  	oldseq = be32_to_cpu(lo->plh_stateid.seqid);  	newseq = be32_to_cpu(new->seqid); -	if ((int)(newseq - oldseq) > 0) { +	if (empty || pnfs_seqid_is_newer(newseq, oldseq)) {  		nfs4_stateid_copy(&lo->plh_stateid, new);  		if (update_barrier) { -			u32 new_barrier = be32_to_cpu(new->seqid); - -			if ((int)(new_barrier - lo->plh_barrier)) -				lo->plh_barrier = new_barrier; +			new_barrier = be32_to_cpu(new->seqid);  		} else {  			/* Because of wraparound, we want to keep the barrier -			 * "close" to the current seqids.  It needs to be -			 * within 2**31 to count as "behind", so if it -			 * gets too near that limit, give us a litle leeway -			 * and bring it to within 2**30. -			 * NOTE - and yes, this is all unsigned arithmetic. +			 * "close" to the current seqids.  			 */ -			if (unlikely((newseq - lo->plh_barrier) > (3 << 29))) -				lo->plh_barrier = newseq - (1 << 30); +			new_barrier = newseq - atomic_read(&lo->plh_outstanding);  		} +		if (empty || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) +			lo->plh_barrier = new_barrier;  	}  } +static bool +pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, +		const nfs4_stateid *stateid) +{ +	u32 seqid = be32_to_cpu(stateid->seqid); + +	return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); +} +  /* lget is set to 1 if called from inside send_layoutget call chain */  static bool -pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, -			int lget) +pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, int lget)  { -	if ((stateid) && -	    (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0) -		return true;  	return lo->plh_block_lgets || -		test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||  		test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||  		(list_empty(&lo->plh_segs) &&  		 (atomic_read(&lo->plh_outstanding) > lget)); @@ -551,7 +599,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,  	dprintk("--> %s\n", __func__);  	spin_lock(&lo->plh_inode->i_lock); -	if (pnfs_layoutgets_blocked(lo, NULL, 1)) { +	if (pnfs_layoutgets_blocked(lo, 1)) {  		status = -EAGAIN;  	} else if (list_empty(&lo->plh_segs)) {  		int seq; @@ -582,11 +630,10 @@ send_layoutget(struct pnfs_layout_hdr *lo,  	struct inode *ino = lo->plh_inode;  	struct nfs_server *server = NFS_SERVER(ino);  	struct nfs4_layoutget *lgp; -	struct pnfs_layout_segment *lseg = NULL; +	struct pnfs_layout_segment *lseg;  	dprintk("--> %s\n", __func__); -	BUG_ON(ctx == NULL);  	lgp = kzalloc(sizeof(*lgp), gfp_flags);  	if (lgp == NULL)  		return NULL; @@ -599,16 +646,22 @@ send_layoutget(struct pnfs_layout_hdr *lo,  	lgp->args.type = server->pnfs_curr_ld->id;  	lgp->args.inode = ino;  	lgp->args.ctx = get_nfs_open_context(ctx); -	lgp->lsegpp = &lseg;  	lgp->gfp_flags = gfp_flags;  	/* Synchronously retrieve layout information from server and  	 * store in lseg.  	 */ -	nfs4_proc_layoutget(lgp, gfp_flags); -	if (!lseg) { -		/* remember that LAYOUTGET failed and suspend trying */ -		set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); +	lseg = nfs4_proc_layoutget(lgp, gfp_flags); +	if (IS_ERR(lseg)) { +		switch (PTR_ERR(lseg)) { +		case -ENOMEM: +		case -ERESTARTSYS: +			break; +		default: +			/* remember that LAYOUTGET failed and suspend trying */ +			pnfs_layout_io_set_failed(lo, range->iomode); +		} +		return NULL;  	}  	return lseg; @@ -636,25 +689,24 @@ _pnfs_return_layout(struct inode *ino)  	spin_lock(&ino->i_lock);  	lo = nfsi->layout; -	if (!lo || pnfs_test_layout_returned(lo)) { +	if (!lo) {  		spin_unlock(&ino->i_lock);  		dprintk("NFS: %s no layout to return\n", __func__);  		goto out;  	}  	stateid = nfsi->layout->plh_stateid;  	/* Reference matched in nfs4_layoutreturn_release */ -	get_layout_hdr(lo); +	pnfs_get_layout_hdr(lo);  	empty = list_empty(&lo->plh_segs); -	mark_matching_lsegs_invalid(lo, &tmp_list, NULL); +	pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);  	/* Don't send a LAYOUTRETURN if list was initially empty */  	if (empty) {  		spin_unlock(&ino->i_lock); -		put_layout_hdr(lo); +		pnfs_put_layout_hdr(lo);  		dprintk("NFS: %s no layout segments to return\n", __func__);  		goto out;  	}  	lo->plh_block_lgets++; -	pnfs_mark_layout_returned(lo);  	spin_unlock(&ino->i_lock);  	pnfs_free_lseg_list(&tmp_list); @@ -663,10 +715,10 @@ _pnfs_return_layout(struct inode *ino)  	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);  	if (unlikely(lrp == NULL)) {  		status = -ENOMEM; -		set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); -		set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); -		pnfs_clear_layout_returned(lo); -		put_layout_hdr(lo); +		spin_lock(&ino->i_lock); +		lo->plh_block_lgets--; +		spin_unlock(&ino->i_lock); +		pnfs_put_layout_hdr(lo);  		goto out;  	} @@ -703,7 +755,7 @@ bool pnfs_roc(struct inode *ino)  	if (!found)  		goto out_nolayout;  	lo->plh_block_lgets++; -	get_layout_hdr(lo); /* matched in pnfs_roc_release */ +	pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */  	spin_unlock(&ino->i_lock);  	pnfs_free_lseg_list(&tmp_list);  	return true; @@ -720,8 +772,12 @@ void pnfs_roc_release(struct inode *ino)  	spin_lock(&ino->i_lock);  	lo = NFS_I(ino)->layout;  	lo->plh_block_lgets--; -	put_layout_hdr_locked(lo); -	spin_unlock(&ino->i_lock); +	if (atomic_dec_and_test(&lo->plh_refcount)) { +		pnfs_detach_layout_hdr(lo); +		spin_unlock(&ino->i_lock); +		pnfs_free_layout_hdr(lo); +	} else +		spin_unlock(&ino->i_lock);  }  void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) @@ -730,32 +786,34 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)  	spin_lock(&ino->i_lock);  	lo = NFS_I(ino)->layout; -	if ((int)(barrier - lo->plh_barrier) > 0) +	if (pnfs_seqid_is_newer(barrier, lo->plh_barrier))  		lo->plh_barrier = barrier;  	spin_unlock(&ino->i_lock);  } -bool pnfs_roc_drain(struct inode *ino, u32 *barrier) +bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)  {  	struct nfs_inode *nfsi = NFS_I(ino); +	struct pnfs_layout_hdr *lo;  	struct pnfs_layout_segment *lseg; +	u32 current_seqid;  	bool found = false;  	spin_lock(&ino->i_lock);  	list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)  		if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { +			rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);  			found = true; -			break; +			goto out;  		} -	if (!found) { -		struct pnfs_layout_hdr *lo = nfsi->layout; -		u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); +	lo = nfsi->layout; +	current_seqid = be32_to_cpu(lo->plh_stateid.seqid); -		/* Since close does not return a layout stateid for use as -		 * a barrier, we choose the worst-case barrier. -		 */ -		*barrier = current_seqid + atomic_read(&lo->plh_outstanding); -	} +	/* Since close does not return a layout stateid for use as +	 * a barrier, we choose the worst-case barrier. +	 */ +	*barrier = current_seqid + atomic_read(&lo->plh_outstanding); +out:  	spin_unlock(&ino->i_lock);  	return found;  } @@ -786,14 +844,13 @@ cmp_layout(struct pnfs_layout_range *l1,  }  static void -pnfs_insert_layout(struct pnfs_layout_hdr *lo, +pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo,  		   struct pnfs_layout_segment *lseg)  {  	struct pnfs_layout_segment *lp;  	dprintk("%s:Begin\n", __func__); -	assert_spin_locked(&lo->plh_inode->i_lock);  	list_for_each_entry(lp, &lo->plh_segs, pls_list) {  		if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0)  			continue; @@ -813,7 +870,7 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,  		__func__, lseg, lseg->pls_range.iomode,  		lseg->pls_range.offset, lseg->pls_range.length);  out: -	get_layout_hdr(lo); +	pnfs_get_layout_hdr(lo);  	dprintk("%s:Return\n", __func__);  } @@ -847,21 +904,19 @@ pnfs_find_alloc_layout(struct inode *ino,  	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); -	assert_spin_locked(&ino->i_lock); -	if (nfsi->layout) { -		if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags)) -			return NULL; -		else -			return nfsi->layout; -	} +	if (nfsi->layout != NULL) +		goto out_existing;  	spin_unlock(&ino->i_lock);  	new = alloc_init_layout_hdr(ino, ctx, gfp_flags);  	spin_lock(&ino->i_lock); -	if (likely(nfsi->layout == NULL))	/* Won the race? */ +	if (likely(nfsi->layout == NULL)) {	/* Won the race? */  		nfsi->layout = new; -	else +		return new; +	} else if (new != NULL)  		pnfs_free_layout_hdr(new); +out_existing: +	pnfs_get_layout_hdr(nfsi->layout);  	return nfsi->layout;  } @@ -904,11 +959,10 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,  	dprintk("%s:Begin\n", __func__); -	assert_spin_locked(&lo->plh_inode->i_lock);  	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {  		if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&  		    is_matching_lseg(&lseg->pls_range, range)) { -			ret = get_lseg(lseg); +			ret = pnfs_get_lseg(lseg);  			break;  		}  		if (lseg->pls_range.offset > range->offset) @@ -1013,7 +1067,6 @@ pnfs_update_layout(struct inode *ino,  		.length = count,  	};  	unsigned pg_offset; -	struct nfs_inode *nfsi = NFS_I(ino);  	struct nfs_server *server = NFS_SERVER(ino);  	struct nfs_client *clp = server->nfs_client;  	struct pnfs_layout_hdr *lo; @@ -1021,16 +1074,16 @@ pnfs_update_layout(struct inode *ino,  	bool first = false;  	if (!pnfs_enabled_sb(NFS_SERVER(ino))) -		return NULL; +		goto out;  	if (pnfs_within_mdsthreshold(ctx, ino, iomode)) -		return NULL; +		goto out;  	spin_lock(&ino->i_lock);  	lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);  	if (lo == NULL) { -		dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); -		goto out_unlock; +		spin_unlock(&ino->i_lock); +		goto out;  	}  	/* Do we even need to bother with this? */ @@ -1040,7 +1093,7 @@ pnfs_update_layout(struct inode *ino,  	}  	/* if LAYOUTGET already failed once we don't try again */ -	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) +	if (pnfs_layout_io_test_failed(lo, iomode))  		goto out_unlock;  	/* Check to see if the layout for the given range already exists */ @@ -1048,24 +1101,19 @@ pnfs_update_layout(struct inode *ino,  	if (lseg)  		goto out_unlock; -	if (pnfs_layoutgets_blocked(lo, NULL, 0)) +	if (pnfs_layoutgets_blocked(lo, 0))  		goto out_unlock;  	atomic_inc(&lo->plh_outstanding); -	get_layout_hdr(lo);  	if (list_empty(&lo->plh_segs))  		first = true; -	/* Enable LAYOUTRETURNs */ -	pnfs_clear_layout_returned(lo); -  	spin_unlock(&ino->i_lock);  	if (first) {  		/* The lo must be on the clp list if there is any  		 * chance of a CB_LAYOUTRECALL(FILE) coming in.  		 */  		spin_lock(&clp->cl_lock); -		BUG_ON(!list_empty(&lo->plh_layouts));  		list_add_tail(&lo->plh_layouts, &server->layouts);  		spin_unlock(&clp->cl_lock);  	} @@ -1079,24 +1127,26 @@ pnfs_update_layout(struct inode *ino,  		arg.length = PAGE_CACHE_ALIGN(arg.length);  	lseg = send_layoutget(lo, ctx, &arg, gfp_flags); -	if (!lseg && first) { -		spin_lock(&clp->cl_lock); -		list_del_init(&lo->plh_layouts); -		spin_unlock(&clp->cl_lock); -	}  	atomic_dec(&lo->plh_outstanding); -	put_layout_hdr(lo); +out_put_layout_hdr: +	pnfs_put_layout_hdr(lo);  out: -	dprintk("%s end, state 0x%lx lseg %p\n", __func__, -		nfsi->layout ? nfsi->layout->plh_flags : -1, lseg); +	dprintk("%s: inode %s/%llu pNFS layout segment %s for " +			"(%s, offset: %llu, length: %llu)\n", +			__func__, ino->i_sb->s_id, +			(unsigned long long)NFS_FILEID(ino), +			lseg == NULL ? "not found" : "found", +			iomode==IOMODE_RW ?  "read/write" : "read-only", +			(unsigned long long)pos, +			(unsigned long long)count);  	return lseg;  out_unlock:  	spin_unlock(&ino->i_lock); -	goto out; +	goto out_put_layout_hdr;  }  EXPORT_SYMBOL_GPL(pnfs_update_layout); -int +struct pnfs_layout_segment *  pnfs_layout_process(struct nfs4_layoutget *lgp)  {  	struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; @@ -1123,25 +1173,29 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)  		goto out_forget_reply;  	} -	if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) { +	if (pnfs_layoutgets_blocked(lo, 1) || +	    pnfs_layout_stateid_blocked(lo, &res->stateid)) {  		dprintk("%s forget reply due to state\n", __func__);  		goto out_forget_reply;  	} + +	/* Done processing layoutget. Set the layout stateid */ +	pnfs_set_layout_stateid(lo, &res->stateid, false); +  	init_lseg(lo, lseg);  	lseg->pls_range = res->range; -	*lgp->lsegpp = get_lseg(lseg); -	pnfs_insert_layout(lo, lseg); +	pnfs_get_lseg(lseg); +	pnfs_layout_insert_lseg(lo, lseg);  	if (res->return_on_close) {  		set_bit(NFS_LSEG_ROC, &lseg->pls_flags);  		set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);  	} -	/* Done processing layoutget. Set the layout stateid */ -	pnfs_set_layout_stateid(lo, &res->stateid, false);  	spin_unlock(&ino->i_lock); +	return lseg;  out: -	return status; +	return ERR_PTR(status);  out_forget_reply:  	spin_unlock(&ino->i_lock); @@ -1153,16 +1207,24 @@ out_forget_reply:  void  pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)  { -	BUG_ON(pgio->pg_lseg != NULL); +	u64 rd_size = req->wb_bytes; + +	WARN_ON_ONCE(pgio->pg_lseg != NULL);  	if (req->wb_offset != req->wb_pgbase) {  		nfs_pageio_reset_read_mds(pgio);  		return;  	} + +	if (pgio->pg_dreq == NULL) +		rd_size = i_size_read(pgio->pg_inode) - req_offset(req); +	else +		rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); +  	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,  					   req->wb_context,  					   req_offset(req), -					   req->wb_bytes, +					   rd_size,  					   IOMODE_READ,  					   GFP_KERNEL);  	/* If no lseg, fall back to read through mds */ @@ -1173,18 +1235,20 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r  EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);  void -pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, +			   struct nfs_page *req, u64 wb_size)  { -	BUG_ON(pgio->pg_lseg != NULL); +	WARN_ON_ONCE(pgio->pg_lseg != NULL);  	if (req->wb_offset != req->wb_pgbase) {  		nfs_pageio_reset_write_mds(pgio);  		return;  	} +  	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,  					   req->wb_context,  					   req_offset(req), -					   req->wb_bytes, +					   wb_size,  					   IOMODE_RW,  					   GFP_NOFS);  	/* If no lseg, fall back to write through mds */ @@ -1362,12 +1426,12 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he  		if (trypnfs == PNFS_NOT_ATTEMPTED)  			pnfs_write_through_mds(desc, data);  	} -	put_lseg(lseg); +	pnfs_put_lseg(lseg);  }  static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)  { -	put_lseg(hdr->lseg); +	pnfs_put_lseg(hdr->lseg);  	nfs_writehdr_free(hdr);  }  EXPORT_SYMBOL_GPL(pnfs_writehdr_free); @@ -1382,17 +1446,17 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)  	whdr = nfs_writehdr_alloc();  	if (!whdr) {  		desc->pg_completion_ops->error_cleanup(&desc->pg_list); -		put_lseg(desc->pg_lseg); +		pnfs_put_lseg(desc->pg_lseg);  		desc->pg_lseg = NULL;  		return -ENOMEM;  	}  	hdr = &whdr->header;  	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); -	hdr->lseg = get_lseg(desc->pg_lseg); +	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);  	atomic_inc(&hdr->refcnt);  	ret = nfs_generic_flush(desc, hdr);  	if (ret != 0) { -		put_lseg(desc->pg_lseg); +		pnfs_put_lseg(desc->pg_lseg);  		desc->pg_lseg = NULL;  	} else  		pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); @@ -1517,12 +1581,12 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea  		if (trypnfs == PNFS_NOT_ATTEMPTED)  			pnfs_read_through_mds(desc, data);  	} -	put_lseg(lseg); +	pnfs_put_lseg(lseg);  }  static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)  { -	put_lseg(hdr->lseg); +	pnfs_put_lseg(hdr->lseg);  	nfs_readhdr_free(hdr);  }  EXPORT_SYMBOL_GPL(pnfs_readhdr_free); @@ -1538,17 +1602,17 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)  	if (!rhdr) {  		desc->pg_completion_ops->error_cleanup(&desc->pg_list);  		ret = -ENOMEM; -		put_lseg(desc->pg_lseg); +		pnfs_put_lseg(desc->pg_lseg);  		desc->pg_lseg = NULL;  		return ret;  	}  	hdr = &rhdr->header;  	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); -	hdr->lseg = get_lseg(desc->pg_lseg); +	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);  	atomic_inc(&hdr->refcnt);  	ret = nfs_generic_pagein(desc, hdr);  	if (ret != 0) { -		put_lseg(desc->pg_lseg); +		pnfs_put_lseg(desc->pg_lseg);  		desc->pg_lseg = NULL;  	} else  		pnfs_do_multiple_reads(desc, &hdr->rpc_list); @@ -1574,13 +1638,7 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)  void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)  { -	if (lseg->pls_range.iomode == IOMODE_RW) { -		dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); -		set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); -	} else { -		dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); -		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); -	} +	pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);  }  EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); @@ -1601,7 +1659,7 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)  	}  	if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {  		/* references matched in nfs4_layoutcommit_release */ -		get_lseg(hdr->lseg); +		pnfs_get_lseg(hdr->lseg);  	}  	if (end_pos > nfsi->layout->plh_lwb)  		nfsi->layout->plh_lwb = end_pos;  |