diff options
Diffstat (limited to 'fs/xfs')
33 files changed, 486 insertions, 1390 deletions
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 76e4266d2e7..ac702a6eab9 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -39,7 +39,7 @@ xfs_acl_from_disk(struct xfs_acl *aclp)  	struct posix_acl_entry *acl_e;  	struct posix_acl *acl;  	struct xfs_acl_entry *ace; -	int count, i; +	unsigned int count, i;  	count = be32_to_cpu(aclp->acl_cnt);  	if (count > XFS_ACL_MAX_ENTRIES) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index cf0ac056815..4dff85c7d7e 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1370,7 +1370,7 @@ restart:  			goto restart;  		}  		/* -		 * clear the LRU reference count so the bufer doesn't get +		 * clear the LRU reference count so the buffer doesn't get  		 * ignored in xfs_buf_rele().  		 */  		atomic_set(&bp->b_lru_ref, 0); @@ -1701,12 +1701,8 @@ xfsbufd(  		struct list_head tmp;  		struct blk_plug plug; -		if (unlikely(freezing(current))) { -			set_bit(XBT_FORCE_SLEEP, &target->bt_flags); -			refrigerator(); -		} else { -			clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); -		} +		if (unlikely(freezing(current))) +			try_to_freeze();  		/* sleep for a long time if there is nothing to do. */  		if (list_empty(&target->bt_delwri_queue)) diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 5bab046e859..df7ffb0affe 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -90,8 +90,7 @@ typedef unsigned int xfs_buf_flags_t;  	{ _XBF_DELWRI_Q,	"DELWRI_Q" }  typedef enum { -	XBT_FORCE_SLEEP = 0, -	XBT_FORCE_FLUSH = 1, +	XBT_FORCE_FLUSH = 0,  } xfs_buftarg_flags_t;  typedef struct xfs_buftarg { diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 25d7280e9f6..b4ff40b5f91 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -39,20 +39,19 @@  #include "xfs_qm.h"  #include "xfs_trace.h" -  /* -   LOCK ORDER - -   inode lock		    (ilock) -   dquot hash-chain lock    (hashlock) -   xqm dquot freelist lock  (freelistlock -   mount's dquot list lock  (mplistlock) -   user dquot lock - lock ordering among dquots is based on the uid or gid -   group dquot lock - similar to udquots. Between the two dquots, the udquot -		      has to be locked first. -   pin lock - the dquot lock must be held to take this lock. -   flush lock - ditto. -*/ + * Lock order: + * + * ip->i_lock + *   qh->qh_lock + *     qi->qi_dqlist_lock + *       dquot->q_qlock (xfs_dqlock() and friends) + *         dquot->q_flush (xfs_dqflock() and friends) + *         xfs_Gqm->qm_dqfrlist_lock + * + * If two dquots need to be locked the order is user before group/project, + * otherwise by the lowest id first, see xfs_dqlock2. + */  #ifdef DEBUG  xfs_buftarg_t *xfs_dqerror_target; @@ -155,24 +154,6 @@ xfs_qm_dqdestroy(  }  /* - * This is what a 'fresh' dquot inside a dquot chunk looks like on disk. - */ -STATIC void -xfs_qm_dqinit_core( -	xfs_dqid_t	id, -	uint		type, -	xfs_dqblk_t	*d) -{ -	/* -	 * Caller has zero'd the entire dquot 'chunk' already. -	 */ -	d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); -	d->dd_diskdq.d_version = XFS_DQUOT_VERSION; -	d->dd_diskdq.d_id = cpu_to_be32(id); -	d->dd_diskdq.d_flags = type; -} - -/*   * If default limits are in force, push them into the dquot now.   * We overwrite the dquot limits only if they are zero and this   * is not the root dquot. @@ -328,8 +309,13 @@ xfs_qm_init_dquot_blk(  	curid = id - (id % q->qi_dqperchunk);  	ASSERT(curid >= 0);  	memset(d, 0, BBTOB(q->qi_dqchunklen)); -	for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) -		xfs_qm_dqinit_core(curid, type, d); +	for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) { +		d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); +		d->dd_diskdq.d_version = XFS_DQUOT_VERSION; +		d->dd_diskdq.d_id = cpu_to_be32(curid); +		d->dd_diskdq.d_flags = type; +	} +  	xfs_trans_dquot_buf(tp, bp,  			    (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :  			    ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : @@ -564,36 +550,62 @@ xfs_qm_dqtobp(   * Read in the ondisk dquot using dqtobp() then copy it to an incore version,   * and release the buffer immediately.   * + * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed.   */ -/* ARGSUSED */ -STATIC int +int  xfs_qm_dqread( -	xfs_trans_t	**tpp, -	xfs_dqid_t	id, -	xfs_dquot_t	*dqp,	/* dquot to get filled in */ -	uint		flags) +	struct xfs_mount	*mp, +	xfs_dqid_t		id, +	uint			type, +	uint			flags, +	struct xfs_dquot	**O_dqpp)  { -	xfs_disk_dquot_t *ddqp; -	xfs_buf_t	 *bp; -	int		 error; -	xfs_trans_t	 *tp; +	struct xfs_dquot	*dqp; +	struct xfs_disk_dquot	*ddqp; +	struct xfs_buf		*bp; +	struct xfs_trans	*tp = NULL; +	int			error; +	int			cancelflags = 0; -	ASSERT(tpp); +	dqp = xfs_qm_dqinit(mp, id, type);  	trace_xfs_dqread(dqp); +	if (flags & XFS_QMOPT_DQALLOC) { +		tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); +		error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), +				XFS_WRITE_LOG_RES(mp) + +				/* +				 * Round the chunklen up to the next multiple +				 * of 128 (buf log item chunk size)). +				 */ +				BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128, +				0, +				XFS_TRANS_PERM_LOG_RES, +				XFS_WRITE_LOG_COUNT); +		if (error) +			goto error1; +		cancelflags = XFS_TRANS_RELEASE_LOG_RES; +	} +  	/*  	 * get a pointer to the on-disk dquot and the buffer containing it  	 * dqp already knows its own type (GROUP/USER).  	 */ -	if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { -		return (error); +	error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags); +	if (error) { +		/* +		 * This can happen if quotas got turned off (ESRCH), +		 * or if the dquot didn't exist on disk and we ask to +		 * allocate (ENOENT). +		 */ +		trace_xfs_dqread_fail(dqp); +		cancelflags |= XFS_TRANS_ABORT; +		goto error1;  	} -	tp = *tpp;  	/* copy everything from disk dquot to the incore dquot */  	memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); -	ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);  	xfs_qm_dquot_logitem_init(dqp);  	/* @@ -622,77 +634,22 @@ xfs_qm_dqread(  	ASSERT(xfs_buf_islocked(bp));  	xfs_trans_brelse(tp, bp); -	return (error); -} - - -/* - * allocate an incore dquot from the kernel heap, - * and fill its core with quota information kept on disk. - * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk - * if it wasn't already allocated. - */ -STATIC int -xfs_qm_idtodq( -	xfs_mount_t	*mp, -	xfs_dqid_t	id,	 /* gid or uid, depending on type */ -	uint		type,	 /* UDQUOT or GDQUOT */ -	uint		flags,	 /* DQALLOC, DQREPAIR */ -	xfs_dquot_t	**O_dqpp)/* OUT : incore dquot, not locked */ -{ -	xfs_dquot_t	*dqp; -	int		error; -	xfs_trans_t	*tp; -	int		cancelflags=0; - -	dqp = xfs_qm_dqinit(mp, id, type); -	tp = NULL; -	if (flags & XFS_QMOPT_DQALLOC) { -		tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); -		error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), -				XFS_WRITE_LOG_RES(mp) + -				BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + -				128, -				0, -				XFS_TRANS_PERM_LOG_RES, -				XFS_WRITE_LOG_COUNT); -		if (error) { -			cancelflags = 0; -			goto error0; -		} -		cancelflags = XFS_TRANS_RELEASE_LOG_RES; -	} - -	/* -	 * Read it from disk; xfs_dqread() takes care of -	 * all the necessary initialization of dquot's fields (locks, etc) -	 */ -	if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) { -		/* -		 * This can happen if quotas got turned off (ESRCH), -		 * or if the dquot didn't exist on disk and we ask to -		 * allocate (ENOENT). -		 */ -		trace_xfs_dqread_fail(dqp); -		cancelflags |= XFS_TRANS_ABORT; -		goto error0; -	}  	if (tp) { -		if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) -			goto error1; +		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +		if (error) +			goto error0;  	}  	*O_dqpp = dqp; -	return (0); +	return error; - error0: -	ASSERT(error); +error1:  	if (tp)  		xfs_trans_cancel(tp, cancelflags); - error1: +error0:  	xfs_qm_dqdestroy(dqp);  	*O_dqpp = NULL; -	return (error); +	return error;  }  /* @@ -710,12 +667,9 @@ xfs_qm_dqlookup(  	xfs_dquot_t		**O_dqpp)  {  	xfs_dquot_t		*dqp; -	uint			flist_locked;  	ASSERT(mutex_is_locked(&qh->qh_lock)); -	flist_locked = B_FALSE; -  	/*  	 * Traverse the hashchain looking for a match  	 */ @@ -725,70 +679,31 @@ xfs_qm_dqlookup(  		 * dqlock to look at the id field of the dquot, since the  		 * id can't be modified without the hashlock anyway.  		 */ -		if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) { -			trace_xfs_dqlookup_found(dqp); - -			/* -			 * All in core dquots must be on the dqlist of mp -			 */ -			ASSERT(!list_empty(&dqp->q_mplist)); - -			xfs_dqlock(dqp); -			if (dqp->q_nrefs == 0) { -				ASSERT(!list_empty(&dqp->q_freelist)); -				if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { -					trace_xfs_dqlookup_want(dqp); - -					/* -					 * We may have raced with dqreclaim_one() -					 * (and lost). So, flag that we don't -					 * want the dquot to be reclaimed. -					 */ -					dqp->dq_flags |= XFS_DQ_WANT; -					xfs_dqunlock(dqp); -					mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); -					xfs_dqlock(dqp); -					dqp->dq_flags &= ~(XFS_DQ_WANT); -				} -				flist_locked = B_TRUE; -			} +		if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp) +			continue; -			/* -			 * id couldn't have changed; we had the hashlock all -			 * along -			 */ -			ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); +		trace_xfs_dqlookup_found(dqp); -			if (flist_locked) { -				if (dqp->q_nrefs != 0) { -					mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); -					flist_locked = B_FALSE; -				} else { -					/* take it off the freelist */ -					trace_xfs_dqlookup_freelist(dqp); -					list_del_init(&dqp->q_freelist); -					xfs_Gqm->qm_dqfrlist_cnt--; -				} -			} +		xfs_dqlock(dqp); +		if (dqp->dq_flags & XFS_DQ_FREEING) { +			*O_dqpp = NULL; +			xfs_dqunlock(dqp); +			return -1; +		} -			XFS_DQHOLD(dqp); +		dqp->q_nrefs++; -			if (flist_locked) -				mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); -			/* -			 * move the dquot to the front of the hashchain -			 */ -			ASSERT(mutex_is_locked(&qh->qh_lock)); -			list_move(&dqp->q_hashlist, &qh->qh_list); -			trace_xfs_dqlookup_done(dqp); -			*O_dqpp = dqp; -			return 0; -		} +		/* +		 * move the dquot to the front of the hashchain +		 */ +		list_move(&dqp->q_hashlist, &qh->qh_list); +		trace_xfs_dqlookup_done(dqp); +		*O_dqpp = dqp; +		return 0;  	}  	*O_dqpp = NULL; -	ASSERT(mutex_is_locked(&qh->qh_lock)); -	return (1); +	return 1;  }  /* @@ -829,11 +744,7 @@ xfs_qm_dqget(  			return (EIO);  		}  	} -#endif - - again: -#ifdef DEBUG  	ASSERT(type == XFS_DQ_USER ||  	       type == XFS_DQ_PROJ ||  	       type == XFS_DQ_GROUP); @@ -845,13 +756,21 @@ xfs_qm_dqget(  			ASSERT(ip->i_gdquot == NULL);  	}  #endif + +restart:  	mutex_lock(&h->qh_lock);  	/*  	 * Look in the cache (hashtable).  	 * The chain is kept locked during lookup.  	 */ -	if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) { +	switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) { +	case -1: +		XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); +		mutex_unlock(&h->qh_lock); +		delay(1); +		goto restart; +	case 0:  		XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);  		/*  		 * The dquot was found, moved to the front of the chain, @@ -862,9 +781,11 @@ xfs_qm_dqget(  		ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));  		mutex_unlock(&h->qh_lock);  		trace_xfs_dqget_hit(*O_dqpp); -		return (0);	/* success */ +		return 0;	/* success */ +	default: +		XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses); +		break;  	} -	XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);  	/*  	 * Dquot cache miss. We don't want to keep the inode lock across @@ -882,41 +803,18 @@ xfs_qm_dqget(  	version = h->qh_version;  	mutex_unlock(&h->qh_lock); -	/* -	 * Allocate the dquot on the kernel heap, and read the ondisk -	 * portion off the disk. Also, do all the necessary initialization -	 * This can return ENOENT if dquot didn't exist on disk and we didn't -	 * ask it to allocate; ESRCH if quotas got turned off suddenly. -	 */ -	if ((error = xfs_qm_idtodq(mp, id, type, -				  flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR| -					   XFS_QMOPT_DOWARN), -				  &dqp))) { -		if (ip) -			xfs_ilock(ip, XFS_ILOCK_EXCL); -		return (error); -	} +	error = xfs_qm_dqread(mp, id, type, flags, &dqp); -	/* -	 * See if this is mount code calling to look at the overall quota limits -	 * which are stored in the id == 0 user or group's dquot. -	 * Since we may not have done a quotacheck by this point, just return -	 * the dquot without attaching it to any hashtables, lists, etc, or even -	 * taking a reference. -	 * The caller must dqdestroy this once done. -	 */ -	if (flags & XFS_QMOPT_DQSUSER) { -		ASSERT(id == 0); -		ASSERT(! ip); -		goto dqret; -	} +	if (ip) +		xfs_ilock(ip, XFS_ILOCK_EXCL); + +	if (error) +		return error;  	/*  	 * Dquot lock comes after hashlock in the lock ordering  	 */  	if (ip) { -		xfs_ilock(ip, XFS_ILOCK_EXCL); -  		/*  		 * A dquot could be attached to this inode by now, since  		 * we had dropped the ilock. @@ -961,16 +859,21 @@ xfs_qm_dqget(  		 * lock order between the two dquots here since dqp isn't  		 * on any findable lists yet.  		 */ -		if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) { +		switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) { +		case 0: +		case -1:  			/* -			 * Duplicate found. Just throw away the new dquot -			 * and start over. +			 * Duplicate found, either in cache or on its way out. +			 * Just throw away the new dquot and start over.  			 */ -			xfs_qm_dqput(tmpdqp); +			if (tmpdqp) +				xfs_qm_dqput(tmpdqp);  			mutex_unlock(&h->qh_lock);  			xfs_qm_dqdestroy(dqp);  			XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); -			goto again; +			goto restart; +		default: +			break;  		}  	} @@ -1015,67 +918,49 @@ xfs_qm_dqget(   */  void  xfs_qm_dqput( -	xfs_dquot_t	*dqp) +	struct xfs_dquot	*dqp)  { -	xfs_dquot_t	*gdqp; +	struct xfs_dquot	*gdqp;  	ASSERT(dqp->q_nrefs > 0);  	ASSERT(XFS_DQ_IS_LOCKED(dqp));  	trace_xfs_dqput(dqp); -	if (dqp->q_nrefs != 1) { -		dqp->q_nrefs--; +recurse: +	if (--dqp->q_nrefs > 0) {  		xfs_dqunlock(dqp);  		return;  	} +	trace_xfs_dqput_free(dqp); + +	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); +	if (list_empty(&dqp->q_freelist)) { +		list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); +		xfs_Gqm->qm_dqfrlist_cnt++; +	} +	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); +  	/* -	 * drop the dqlock and acquire the freelist and dqlock -	 * in the right order; but try to get it out-of-order first +	 * If we just added a udquot to the freelist, then we want to release +	 * the gdquot reference that it (probably) has. Otherwise it'll keep +	 * the gdquot from getting reclaimed.  	 */ -	if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { -		trace_xfs_dqput_wait(dqp); -		xfs_dqunlock(dqp); -		mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); -		xfs_dqlock(dqp); +	gdqp = dqp->q_gdquot; +	if (gdqp) { +		xfs_dqlock(gdqp); +		dqp->q_gdquot = NULL;  	} +	xfs_dqunlock(dqp); -	while (1) { -		gdqp = NULL; - -		/* We can't depend on nrefs being == 1 here */ -		if (--dqp->q_nrefs == 0) { -			trace_xfs_dqput_free(dqp); - -			list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); -			xfs_Gqm->qm_dqfrlist_cnt++; - -			/* -			 * If we just added a udquot to the freelist, then -			 * we want to release the gdquot reference that -			 * it (probably) has. Otherwise it'll keep the -			 * gdquot from getting reclaimed. -			 */ -			if ((gdqp = dqp->q_gdquot)) { -				/* -				 * Avoid a recursive dqput call -				 */ -				xfs_dqlock(gdqp); -				dqp->q_gdquot = NULL; -			} -		} -		xfs_dqunlock(dqp); - -		/* -		 * If we had a group quota inside the user quota as a hint, -		 * release it now. -		 */ -		if (! gdqp) -			break; +	/* +	 * If we had a group quota hint, release it now. +	 */ +	if (gdqp) {  		dqp = gdqp; +		goto recurse;  	} -	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);  }  /* @@ -1169,7 +1054,7 @@ xfs_qm_dqflush(  	 * If not dirty, or it's pinned and we are not supposed to block, nada.  	 */  	if (!XFS_DQ_IS_DIRTY(dqp) || -	    (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { +	    ((flags & SYNC_TRYLOCK) && atomic_read(&dqp->q_pincount) > 0)) {  		xfs_dqfunlock(dqp);  		return 0;  	} @@ -1257,40 +1142,17 @@ xfs_qm_dqflush(  } -int -xfs_qm_dqlock_nowait( -	xfs_dquot_t *dqp) -{ -	return mutex_trylock(&dqp->q_qlock); -} - -void -xfs_dqlock( -	xfs_dquot_t *dqp) -{ -	mutex_lock(&dqp->q_qlock); -} -  void  xfs_dqunlock(  	xfs_dquot_t *dqp)  { -	mutex_unlock(&(dqp->q_qlock)); +	xfs_dqunlock_nonotify(dqp);  	if (dqp->q_logitem.qli_dquot == dqp) { -		/* Once was dqp->q_mount, but might just have been cleared */  		xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp, -					(xfs_log_item_t*)&(dqp->q_logitem)); +					&dqp->q_logitem.qli_item);  	}  } - -void -xfs_dqunlock_nonotify( -	xfs_dquot_t *dqp) -{ -	mutex_unlock(&(dqp->q_qlock)); -} -  /*   * Lock two xfs_dquot structures.   * @@ -1319,43 +1181,18 @@ xfs_dqlock2(  	}  } -  /* - * Take a dquot out of the mount's dqlist as well as the hashlist. - * This is called via unmount as well as quotaoff, and the purge - * will always succeed unless there are soft (temp) references - * outstanding. - * - * This returns 0 if it was purged, 1 if it wasn't. It's not an error code - * that we're returning! XXXsup - not cool. + * Take a dquot out of the mount's dqlist as well as the hashlist.  This is + * called via unmount as well as quotaoff, and the purge will always succeed.   */ -/* ARGSUSED */ -int +void  xfs_qm_dqpurge( -	xfs_dquot_t	*dqp) +	struct xfs_dquot	*dqp)  { -	xfs_dqhash_t	*qh = dqp->q_hash; -	xfs_mount_t	*mp = dqp->q_mount; - -	ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock)); -	ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock)); +	struct xfs_mount	*mp = dqp->q_mount; +	struct xfs_dqhash	*qh = dqp->q_hash;  	xfs_dqlock(dqp); -	/* -	 * We really can't afford to purge a dquot that is -	 * referenced, because these are hard refs. -	 * It shouldn't happen in general because we went thru _all_ inodes in -	 * dqrele_all_inodes before calling this and didn't let the mountlock go. -	 * However it is possible that we have dquots with temporary -	 * references that are not attached to an inode. e.g. see xfs_setattr(). -	 */ -	if (dqp->q_nrefs != 0) { -		xfs_dqunlock(dqp); -		mutex_unlock(&dqp->q_hash->qh_lock); -		return (1); -	} - -	ASSERT(!list_empty(&dqp->q_freelist));  	/*  	 * If we're turning off quotas, we have to make sure that, for @@ -1370,23 +1207,18 @@ xfs_qm_dqpurge(  		 * Block on the flush lock after nudging dquot buffer,  		 * if it is incore.  		 */ -		xfs_qm_dqflock_pushbuf_wait(dqp); +		xfs_dqflock_pushbuf_wait(dqp);  	}  	/* -	 * XXXIf we're turning this type of quotas off, we don't care +	 * If we are turning this type of quotas off, we don't care  	 * about the dirty metadata sitting in this dquot. OTOH, if  	 * we're unmounting, we do care, so we flush it and wait.  	 */  	if (XFS_DQ_IS_DIRTY(dqp)) {  		int	error; -		/* dqflush unlocks dqflock */  		/* -		 * Given that dqpurge is a very rare occurrence, it is OK -		 * that we're holding the hashlist and mplist locks -		 * across the disk write. But, ... XXXsup -		 *  		 * We don't care about getting disk errors here. We need  		 * to purge this dquot anyway, so we go ahead regardless.  		 */ @@ -1396,38 +1228,44 @@ xfs_qm_dqpurge(  				__func__, dqp);  		xfs_dqflock(dqp);  	} +  	ASSERT(atomic_read(&dqp->q_pincount) == 0);  	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||  	       !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); +	xfs_dqfunlock(dqp); +	xfs_dqunlock(dqp); + +	mutex_lock(&qh->qh_lock);  	list_del_init(&dqp->q_hashlist);  	qh->qh_version++; +	mutex_unlock(&qh->qh_lock); + +	mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);  	list_del_init(&dqp->q_mplist);  	mp->m_quotainfo->qi_dqreclaims++;  	mp->m_quotainfo->qi_dquots--; +	mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); +  	/* -	 * XXX Move this to the front of the freelist, if we can get the -	 * freelist lock. +	 * We move dquots to the freelist as soon as their reference count +	 * hits zero, so it really should be on the freelist here.  	 */ +	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);  	ASSERT(!list_empty(&dqp->q_freelist)); +	list_del_init(&dqp->q_freelist); +	xfs_Gqm->qm_dqfrlist_cnt--; +	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); -	dqp->q_mount = NULL; -	dqp->q_hash = NULL; -	dqp->dq_flags = XFS_DQ_INACTIVE; -	memset(&dqp->q_core, 0, sizeof(dqp->q_core)); -	xfs_dqfunlock(dqp); -	xfs_dqunlock(dqp); -	mutex_unlock(&qh->qh_lock); -	return (0); +	xfs_qm_dqdestroy(dqp);  } -  /*   * Give the buffer a little push if it is incore and   * wait on the flush lock.   */  void -xfs_qm_dqflock_pushbuf_wait( +xfs_dqflock_pushbuf_wait(  	xfs_dquot_t	*dqp)  {  	xfs_mount_t	*mp = dqp->q_mount; diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 34b7e945dbf..a1d91d8f180 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -80,8 +80,6 @@ enum {  	XFS_QLOCK_NESTED,  }; -#define XFS_DQHOLD(dqp)		((dqp)->q_nrefs++) -  /*   * Manage the q_flush completion queue embedded in the dquot.  This completion   * queue synchronizes processes attempting to flush the in-core dquot back to @@ -102,6 +100,21 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)  	complete(&dqp->q_flush);  } +static inline int xfs_dqlock_nowait(struct xfs_dquot *dqp) +{ +	return mutex_trylock(&dqp->q_qlock); +} + +static inline void xfs_dqlock(struct xfs_dquot *dqp) +{ +	mutex_lock(&dqp->q_qlock); +} + +static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp) +{ +	mutex_unlock(&dqp->q_qlock); +} +  #define XFS_DQ_IS_LOCKED(dqp)	(mutex_is_locked(&((dqp)->q_qlock)))  #define XFS_DQ_IS_DIRTY(dqp)	((dqp)->dq_flags & XFS_DQ_DIRTY)  #define XFS_QM_ISUDQ(dqp)	((dqp)->dq_flags & XFS_DQ_USER) @@ -116,12 +129,12 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)  				     (XFS_IS_UQUOTA_ON((d)->q_mount)) : \  				     (XFS_IS_OQUOTA_ON((d)->q_mount)))) +extern int		xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, +					uint, struct xfs_dquot	**);  extern void		xfs_qm_dqdestroy(xfs_dquot_t *);  extern int		xfs_qm_dqflush(xfs_dquot_t *, uint); -extern int		xfs_qm_dqpurge(xfs_dquot_t *); +extern void		xfs_qm_dqpurge(xfs_dquot_t *);  extern void		xfs_qm_dqunpin_wait(xfs_dquot_t *); -extern int		xfs_qm_dqlock_nowait(xfs_dquot_t *); -extern void		xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);  extern void		xfs_qm_adjust_dqtimers(xfs_mount_t *,  					xfs_disk_dquot_t *);  extern void		xfs_qm_adjust_dqlimits(xfs_mount_t *, @@ -129,9 +142,17 @@ extern void		xfs_qm_adjust_dqlimits(xfs_mount_t *,  extern int		xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,  					xfs_dqid_t, uint, uint, xfs_dquot_t **);  extern void		xfs_qm_dqput(xfs_dquot_t *); -extern void		xfs_dqlock(xfs_dquot_t *); -extern void		xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *); -extern void		xfs_dqunlock(xfs_dquot_t *); -extern void		xfs_dqunlock_nonotify(xfs_dquot_t *); + +extern void		xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); +extern void		xfs_dqunlock(struct xfs_dquot *); +extern void		xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp); + +static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) +{ +	xfs_dqlock(dqp); +	dqp->q_nrefs++; +	xfs_dqunlock(dqp); +	return dqp; +}  #endif /* __XFS_DQUOT_H__ */ diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 0dee0b71029..34baeae4526 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -73,7 +73,6 @@ xfs_qm_dquot_logitem_format(  	logvec->i_len  = sizeof(xfs_disk_dquot_t);  	logvec->i_type = XLOG_REG_TYPE_DQUOT; -	ASSERT(2 == lip->li_desc->lid_size);  	qlip->qli_format.qlf_size = 2;  } @@ -134,7 +133,7 @@ xfs_qm_dquot_logitem_push(  	 * lock without sleeping, then there must not have been  	 * anyone in the process of flushing the dquot.  	 */ -	error = xfs_qm_dqflush(dqp, 0); +	error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);  	if (error)  		xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",  			__func__, error, dqp); @@ -237,7 +236,7 @@ xfs_qm_dquot_logitem_trylock(  	if (atomic_read(&dqp->q_pincount) > 0)  		return XFS_ITEM_PINNED; -	if (!xfs_qm_dqlock_nowait(dqp)) +	if (!xfs_dqlock_nowait(dqp))  		return XFS_ITEM_LOCKED;  	if (!xfs_dqflock_nowait(dqp)) { diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 753ed9b5c70..f675f3d9d7b 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -209,10 +209,10 @@ xfs_file_fsync(  	/*  	 * First check if the VFS inode is marked dirty.  All the dirtying -	 * of non-transactional updates no goes through mark_inode_dirty*, -	 * which allows us to distinguish beteeen pure timestamp updates +	 * of non-transactional updates do not go through mark_inode_dirty*, +	 * which allows us to distinguish between pure timestamp updates  	 * and i_size updates which need to be caught for fdatasync. -	 * After that also theck for the dirty state in the XFS inode, which +	 * After that also check for the dirty state in the XFS inode, which  	 * might gets cleared when the inode gets written out via the AIL  	 * or xfs_iflush_cluster.  	 */ diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 169380e6605..dad1a31aa4f 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -447,7 +447,7 @@ STATIC xfs_buf_t *			/* allocation group buffer */  xfs_ialloc_ag_select(  	xfs_trans_t	*tp,		/* transaction pointer */  	xfs_ino_t	parent,		/* parent directory inode number */ -	mode_t		mode,		/* bits set to indicate file type */ +	umode_t		mode,		/* bits set to indicate file type */  	int		okalloc)	/* ok to allocate more space */  {  	xfs_buf_t	*agbp;		/* allocation group header buffer */ @@ -640,7 +640,7 @@ int  xfs_dialloc(  	xfs_trans_t	*tp,		/* transaction pointer */  	xfs_ino_t	parent,		/* parent inode (directory) */ -	mode_t		mode,		/* mode bits for new inode */ +	umode_t		mode,		/* mode bits for new inode */  	int		okalloc,	/* ok to allocate more space */  	xfs_buf_t	**IO_agbp,	/* in/out ag header's buffer */  	boolean_t	*alloc_done,	/* true if we needed to replenish diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index bb5385475e1..666a037398d 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -81,7 +81,7 @@ int					/* error */  xfs_dialloc(  	struct xfs_trans *tp,		/* transaction pointer */  	xfs_ino_t	parent,		/* parent inode (directory) */ -	mode_t		mode,		/* mode bits for new inode */ +	umode_t		mode,		/* mode bits for new inode */  	int		okalloc,	/* ok to allocate more space */  	struct xfs_buf	**agbp,		/* buf for a.g. inode header */  	boolean_t	*alloc_done,	/* an allocation was done to replenish diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 0fa98b1c70e..3960a066d7f 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -107,7 +107,6 @@ xfs_inode_free_callback(  	struct inode		*inode = container_of(head, struct inode, i_rcu);  	struct xfs_inode	*ip = XFS_I(inode); -	INIT_LIST_HEAD(&inode->i_dentry);  	kmem_zone_free(xfs_inode_zone, ip);  } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 755ee816488..9dda7cc3284 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -961,7 +961,7 @@ int  xfs_ialloc(  	xfs_trans_t	*tp,  	xfs_inode_t	*pip, -	mode_t		mode, +	umode_t		mode,  	xfs_nlink_t	nlink,  	xfs_dev_t	rdev,  	prid_t		prid, @@ -1002,7 +1002,7 @@ xfs_ialloc(  		return error;  	ASSERT(ip != NULL); -	ip->i_d.di_mode = (__uint16_t)mode; +	ip->i_d.di_mode = mode;  	ip->i_d.di_onlink = 0;  	ip->i_d.di_nlink = nlink;  	ASSERT(ip->i_d.di_nlink == nlink); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index b4cd4739f98..f0e6b151ba3 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -481,7 +481,7 @@ void		xfs_inode_free(struct xfs_inode *ip);  /*   * xfs_inode.c prototypes.   */ -int		xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, +int		xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t,  			   xfs_nlink_t, xfs_dev_t, prid_t, int,  			   struct xfs_buf **, boolean_t *, xfs_inode_t **); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index abaafdbb3e6..cfd6c7f8cc3 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -437,7 +437,6 @@ xfs_inode_item_format(  	 * Assert that no attribute-related log flags are set.  	 */  	if (!XFS_IFORK_Q(ip)) { -		ASSERT(nvecs == lip->li_desc->lid_size);  		iip->ili_format.ilf_size = nvecs;  		ASSERT(!(iip->ili_format.ilf_fields &  			 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); @@ -521,7 +520,6 @@ xfs_inode_item_format(  		break;  	} -	ASSERT(nvecs == lip->li_desc->lid_size);  	iip->ili_format.ilf_size = nvecs;  } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index d99a9051890..76f3ca5cfc3 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -559,23 +559,23 @@ xfs_attrmulti_by_handle(  					ops[i].am_flags);  			break;  		case ATTR_OP_SET: -			ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); +			ops[i].am_error = mnt_want_write_file(parfilp);  			if (ops[i].am_error)  				break;  			ops[i].am_error = xfs_attrmulti_attr_set(  					dentry->d_inode, attr_name,  					ops[i].am_attrvalue, ops[i].am_length,  					ops[i].am_flags); -			mnt_drop_write(parfilp->f_path.mnt); +			mnt_drop_write_file(parfilp);  			break;  		case ATTR_OP_REMOVE: -			ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); +			ops[i].am_error = mnt_want_write_file(parfilp);  			if (ops[i].am_error)  				break;  			ops[i].am_error = xfs_attrmulti_attr_remove(  					dentry->d_inode, attr_name,  					ops[i].am_flags); -			mnt_drop_write(parfilp->f_path.mnt); +			mnt_drop_write_file(parfilp);  			break;  		default:  			ops[i].am_error = EINVAL; diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 54e623bfbb8..f9ccb7b7c04 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -454,23 +454,23 @@ xfs_compat_attrmulti_by_handle(  					&ops[i].am_length, ops[i].am_flags);  			break;  		case ATTR_OP_SET: -			ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); +			ops[i].am_error = mnt_want_write_file(parfilp);  			if (ops[i].am_error)  				break;  			ops[i].am_error = xfs_attrmulti_attr_set(  					dentry->d_inode, attr_name,  					compat_ptr(ops[i].am_attrvalue),  					ops[i].am_length, ops[i].am_flags); -			mnt_drop_write(parfilp->f_path.mnt); +			mnt_drop_write_file(parfilp);  			break;  		case ATTR_OP_REMOVE: -			ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); +			ops[i].am_error = mnt_want_write_file(parfilp);  			if (ops[i].am_error)  				break;  			ops[i].am_error = xfs_attrmulti_attr_remove(  					dentry->d_inode, attr_name,  					ops[i].am_flags); -			mnt_drop_write(parfilp->f_path.mnt); +			mnt_drop_write_file(parfilp);  			break;  		default:  			ops[i].am_error = EINVAL; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 23ce927973a..f9babd17922 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -168,7 +168,7 @@ STATIC int  xfs_vn_mknod(  	struct inode	*dir,  	struct dentry	*dentry, -	int		mode, +	umode_t		mode,  	dev_t		rdev)  {  	struct inode	*inode; @@ -231,7 +231,7 @@ STATIC int  xfs_vn_create(  	struct inode	*dir,  	struct dentry	*dentry, -	int		mode, +	umode_t		mode,  	struct nameidata *nd)  {  	return xfs_vn_mknod(dir, dentry, mode, 0); @@ -241,7 +241,7 @@ STATIC int  xfs_vn_mkdir(  	struct inode	*dir,  	struct dentry	*dentry, -	int		mode) +	umode_t		mode)  {  	return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);  } @@ -366,7 +366,7 @@ xfs_vn_symlink(  	struct xfs_inode *cip = NULL;  	struct xfs_name	name;  	int		error; -	mode_t		mode; +	umode_t		mode;  	mode = S_IFLNK |  		(irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 34817adf4b9..e2cc3568c29 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -760,38 +760,6 @@ xfs_log_item_init(  	INIT_LIST_HEAD(&item->li_cil);  } -/* - * Write region vectors to log.  The write happens using the space reservation - * of the ticket (tic).  It is not a requirement that all writes for a given - * transaction occur with one call to xfs_log_write(). However, it is important - * to note that the transaction reservation code makes an assumption about the - * number of log headers a transaction requires that may be violated if you - * don't pass all the transaction vectors in one call.... - */ -int -xfs_log_write( -	struct xfs_mount	*mp, -	struct xfs_log_iovec	reg[], -	int			nentries, -	struct xlog_ticket	*tic, -	xfs_lsn_t		*start_lsn) -{ -	struct log		*log = mp->m_log; -	int			error; -	struct xfs_log_vec	vec = { -		.lv_niovecs = nentries, -		.lv_iovecp = reg, -	}; - -	if (XLOG_FORCED_SHUTDOWN(log)) -		return XFS_ERROR(EIO); - -	error = xlog_write(log, &vec, tic, start_lsn, NULL, 0); -	if (error) -		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); -	return error; -} -  void  xfs_log_move_tail(xfs_mount_t	*mp,  		  xfs_lsn_t	tail_lsn) @@ -1685,7 +1653,7 @@ xlog_print_tic_res(  	};  	xfs_warn(mp, -		"xfs_log_write: reservation summary:\n" +		"xlog_write: reservation summary:\n"  		"  trans type  = %s (%u)\n"  		"  unit res    = %d bytes\n"  		"  current res = %d bytes\n" @@ -1714,7 +1682,7 @@ xlog_print_tic_res(  	}  	xfs_alert_tag(mp, XFS_PTAG_LOGRES, -		"xfs_log_write: reservation ran out. Need to up reservation"); +		"xlog_write: reservation ran out. Need to up reservation");  	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);  } @@ -1968,23 +1936,21 @@ xlog_write(  	*start_lsn = 0;  	len = xlog_write_calc_vec_length(ticket, log_vector); -	if (log->l_cilp) { -		/* -		 * Region headers and bytes are already accounted for. -		 * We only need to take into account start records and -		 * split regions in this function. -		 */ -		if (ticket->t_flags & XLOG_TIC_INITED) -			ticket->t_curr_res -= sizeof(xlog_op_header_t); -		/* -		 * Commit record headers need to be accounted for. These -		 * come in as separate writes so are easy to detect. -		 */ -		if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) -			ticket->t_curr_res -= sizeof(xlog_op_header_t); -	} else -		ticket->t_curr_res -= len; +	/* +	 * Region headers and bytes are already accounted for. +	 * We only need to take into account start records and +	 * split regions in this function. +	 */ +	if (ticket->t_flags & XLOG_TIC_INITED) +		ticket->t_curr_res -= sizeof(xlog_op_header_t); + +	/* +	 * Commit record headers need to be accounted for. These +	 * come in as separate writes so are easy to detect. +	 */ +	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) +		ticket->t_curr_res -= sizeof(xlog_op_header_t);  	if (ticket->t_curr_res < 0)  		xlog_print_tic_res(log->l_mp, ticket); @@ -2931,8 +2897,7 @@ _xfs_log_force(  	XFS_STATS_INC(xs_log_force); -	if (log->l_cilp) -		xlog_cil_force(log); +	xlog_cil_force(log);  	spin_lock(&log->l_icloglock); @@ -3081,11 +3046,9 @@ _xfs_log_force_lsn(  	XFS_STATS_INC(xs_log_force); -	if (log->l_cilp) { -		lsn = xlog_cil_force_lsn(log, lsn); -		if (lsn == NULLCOMMITLSN) -			return 0; -	} +	lsn = xlog_cil_force_lsn(log, lsn); +	if (lsn == NULLCOMMITLSN) +		return 0;  try_again:  	spin_lock(&log->l_icloglock); @@ -3653,7 +3616,7 @@ xfs_log_force_umount(  	 * completed transactions are flushed to disk with the xfs_log_force()  	 * call below.  	 */ -	if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) +	if (!logerror)  		xlog_cil_force(log);  	/* diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 3f7bf451c03..2aee3b22d29 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -174,11 +174,6 @@ int	  xfs_log_reserve(struct xfs_mount *mp,  			  __uint8_t	   clientid,  			  uint		   flags,  			  uint		   t_type); -int	  xfs_log_write(struct xfs_mount *mp, -			xfs_log_iovec_t  region[], -			int		 nentries, -			struct xlog_ticket *ticket, -			xfs_lsn_t	 *start_lsn);  int	  xfs_log_unmount_write(struct xfs_mount *mp);  void      xfs_log_unmount(struct xfs_mount *mp);  int	  xfs_log_force_umount(struct xfs_mount *mp, int logerror); @@ -189,8 +184,7 @@ void	  xlog_iodone(struct xfs_buf *);  struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);  void	  xfs_log_ticket_put(struct xlog_ticket *ticket); -void	xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, -				struct xfs_log_vec *log_vector, +int	xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,  				xfs_lsn_t *commit_lsn, int flags);  bool	xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index c7755d5a5fb..d4fadbe8ac9 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -32,10 +32,7 @@  #include "xfs_discard.h"  /* - * Perform initial CIL structure initialisation. If the CIL is not - * enabled in this filesystem, ensure the log->l_cilp is null so - * we can check this conditional to determine if we are doing delayed - * logging or not. + * Perform initial CIL structure initialisation.   */  int  xlog_cil_init( @@ -44,10 +41,6 @@ xlog_cil_init(  	struct xfs_cil	*cil;  	struct xfs_cil_ctx *ctx; -	log->l_cilp = NULL; -	if (!(log->l_mp->m_flags & XFS_MOUNT_DELAYLOG)) -		return 0; -  	cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);  	if (!cil)  		return ENOMEM; @@ -80,9 +73,6 @@ void  xlog_cil_destroy(  	struct log	*log)  { -	if (!log->l_cilp) -		return; -  	if (log->l_cilp->xc_ctx) {  		if (log->l_cilp->xc_ctx->ticket)  			xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket); @@ -137,9 +127,6 @@ void  xlog_cil_init_post_recovery(  	struct log	*log)  { -	if (!log->l_cilp) -		return; -  	log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);  	log->l_cilp->xc_ctx->sequence = 1;  	log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle, @@ -172,37 +159,73 @@ xlog_cil_init_post_recovery(   * format the regions into the iclog as though they are being formatted   * directly out of the objects themselves.   */ -static void -xlog_cil_format_items( -	struct log		*log, -	struct xfs_log_vec	*log_vector) +static struct xfs_log_vec * +xlog_cil_prepare_log_vecs( +	struct xfs_trans	*tp)  { -	struct xfs_log_vec *lv; +	struct xfs_log_item_desc *lidp; +	struct xfs_log_vec	*lv = NULL; +	struct xfs_log_vec	*ret_lv = NULL; -	ASSERT(log_vector); -	for (lv = log_vector; lv; lv = lv->lv_next) { + +	/* Bail out if we didn't find a log item.  */ +	if (list_empty(&tp->t_items)) { +		ASSERT(0); +		return NULL; +	} + +	list_for_each_entry(lidp, &tp->t_items, lid_trans) { +		struct xfs_log_vec *new_lv;  		void	*ptr;  		int	index;  		int	len = 0; +		uint	niovecs; + +		/* Skip items which aren't dirty in this transaction. */ +		if (!(lidp->lid_flags & XFS_LID_DIRTY)) +			continue; + +		/* Skip items that do not have any vectors for writing */ +		niovecs = IOP_SIZE(lidp->lid_item); +		if (!niovecs) +			continue; + +		new_lv = kmem_zalloc(sizeof(*new_lv) + +				niovecs * sizeof(struct xfs_log_iovec), +				KM_SLEEP); + +		/* The allocated iovec region lies beyond the log vector. */ +		new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; +		new_lv->lv_niovecs = niovecs; +		new_lv->lv_item = lidp->lid_item;  		/* build the vector array and calculate it's length */ -		IOP_FORMAT(lv->lv_item, lv->lv_iovecp); -		for (index = 0; index < lv->lv_niovecs; index++) -			len += lv->lv_iovecp[index].i_len; +		IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp); +		for (index = 0; index < new_lv->lv_niovecs; index++) +			len += new_lv->lv_iovecp[index].i_len; -		lv->lv_buf_len = len; -		lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); -		ptr = lv->lv_buf; +		new_lv->lv_buf_len = len; +		new_lv->lv_buf = kmem_alloc(new_lv->lv_buf_len, +				KM_SLEEP|KM_NOFS); +		ptr = new_lv->lv_buf; -		for (index = 0; index < lv->lv_niovecs; index++) { -			struct xfs_log_iovec *vec = &lv->lv_iovecp[index]; +		for (index = 0; index < new_lv->lv_niovecs; index++) { +			struct xfs_log_iovec *vec = &new_lv->lv_iovecp[index];  			memcpy(ptr, vec->i_addr, vec->i_len);  			vec->i_addr = ptr;  			ptr += vec->i_len;  		} -		ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); +		ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len); + +		if (!ret_lv) +			ret_lv = new_lv; +		else +			lv->lv_next = new_lv; +		lv = new_lv;  	} + +	return ret_lv;  }  /* @@ -256,7 +279,7 @@ xfs_cil_prepare_item(   * Insert the log items into the CIL and calculate the difference in space   * consumed by the item. Add the space to the checkpoint ticket and calculate   * if the change requires additional log metadata. If it does, take that space - * as well. Remove the amount of space we addded to the checkpoint ticket from + * as well. Remove the amount of space we added to the checkpoint ticket from   * the current transaction ticket so that the accounting works out correctly.   */  static void @@ -635,28 +658,30 @@ out_abort:   * background commit, returns without it held once background commits are   * allowed again.   */ -void +int  xfs_log_commit_cil(  	struct xfs_mount	*mp,  	struct xfs_trans	*tp, -	struct xfs_log_vec	*log_vector,  	xfs_lsn_t		*commit_lsn,  	int			flags)  {  	struct log		*log = mp->m_log;  	int			log_flags = 0;  	int			push = 0; +	struct xfs_log_vec	*log_vector;  	if (flags & XFS_TRANS_RELEASE_LOG_RES)  		log_flags = XFS_LOG_REL_PERM_RESERV;  	/* -	 * do all the hard work of formatting items (including memory +	 * Do all the hard work of formatting items (including memory  	 * allocation) outside the CIL context lock. This prevents stalling CIL  	 * pushes when we are low on memory and a transaction commit spends a  	 * lot of time in memory reclaim.  	 */ -	xlog_cil_format_items(log, log_vector); +	log_vector = xlog_cil_prepare_log_vecs(tp); +	if (!log_vector) +		return ENOMEM;  	/* lock out background commit */  	down_read(&log->l_cilp->xc_ctx_lock); @@ -709,6 +734,7 @@ xfs_log_commit_cil(  	 */  	if (push)  		xlog_cil_push(log, 0); +	return 0;  }  /* @@ -786,8 +812,6 @@ xfs_log_item_in_current_chkpt(  {  	struct xfs_cil_ctx *ctx; -	if (!(lip->li_mountp->m_flags & XFS_MOUNT_DELAYLOG)) -		return false;  	if (list_empty(&lip->li_cil))  		return false; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index bb24dac42a2..19f69e23250 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -219,7 +219,6 @@ typedef struct xfs_mount {  #define XFS_MOUNT_WSYNC		(1ULL << 0)	/* for nfs - all metadata ops  						   must be synchronous except  						   for space allocations */ -#define XFS_MOUNT_DELAYLOG	(1ULL << 1)	/* delayed logging is enabled */  #define XFS_MOUNT_WAS_CLEAN	(1ULL << 3)  #define XFS_MOUNT_FS_SHUTDOWN	(1ULL << 4)	/* atomic stop of all filesystem  						   operations, typically for diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 0bbb1a41998..671f37eae1c 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -154,12 +154,17 @@ STATIC void  xfs_qm_destroy(  	struct xfs_qm	*xqm)  { -	struct xfs_dquot *dqp, *n;  	int		hsize, i;  	ASSERT(xqm != NULL);  	ASSERT(xqm->qm_nrefs == 0); +  	unregister_shrinker(&xfs_qm_shaker); + +	mutex_lock(&xqm->qm_dqfrlist_lock); +	ASSERT(list_empty(&xqm->qm_dqfrlist)); +	mutex_unlock(&xqm->qm_dqfrlist_lock); +  	hsize = xqm->qm_dqhashmask + 1;  	for (i = 0; i < hsize; i++) {  		xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); @@ -171,17 +176,6 @@ xfs_qm_destroy(  	xqm->qm_grp_dqhtable = NULL;  	xqm->qm_dqhashmask = 0; -	/* frlist cleanup */ -	mutex_lock(&xqm->qm_dqfrlist_lock); -	list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { -		xfs_dqlock(dqp); -		list_del_init(&dqp->q_freelist); -		xfs_Gqm->qm_dqfrlist_cnt--; -		xfs_dqunlock(dqp); -		xfs_qm_dqdestroy(dqp); -	} -	mutex_unlock(&xqm->qm_dqfrlist_lock); -	mutex_destroy(&xqm->qm_dqfrlist_lock);  	kmem_free(xqm);  } @@ -232,34 +226,10 @@ STATIC void  xfs_qm_rele_quotafs_ref(  	struct xfs_mount *mp)  { -	xfs_dquot_t	*dqp, *n; -  	ASSERT(xfs_Gqm);  	ASSERT(xfs_Gqm->qm_nrefs > 0);  	/* -	 * Go thru the freelist and destroy all inactive dquots. -	 */ -	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); - -	list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) { -		xfs_dqlock(dqp); -		if (dqp->dq_flags & XFS_DQ_INACTIVE) { -			ASSERT(dqp->q_mount == NULL); -			ASSERT(! XFS_DQ_IS_DIRTY(dqp)); -			ASSERT(list_empty(&dqp->q_hashlist)); -			ASSERT(list_empty(&dqp->q_mplist)); -			list_del_init(&dqp->q_freelist); -			xfs_Gqm->qm_dqfrlist_cnt--; -			xfs_dqunlock(dqp); -			xfs_qm_dqdestroy(dqp); -		} else { -			xfs_dqunlock(dqp); -		} -	} -	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - -	/*  	 * Destroy the entire XQM. If somebody mounts with quotaon, this'll  	 * be restarted.  	 */ @@ -415,8 +385,7 @@ xfs_qm_unmount_quotas(   */  STATIC int  xfs_qm_dqflush_all( -	struct xfs_mount	*mp, -	int			sync_mode) +	struct xfs_mount	*mp)  {  	struct xfs_quotainfo	*q = mp->m_quotainfo;  	int			recl; @@ -429,7 +398,8 @@ again:  	mutex_lock(&q->qi_dqlist_lock);  	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {  		xfs_dqlock(dqp); -		if (! XFS_DQ_IS_DIRTY(dqp)) { +		if ((dqp->dq_flags & XFS_DQ_FREEING) || +		    !XFS_DQ_IS_DIRTY(dqp)) {  			xfs_dqunlock(dqp);  			continue;  		} @@ -444,14 +414,14 @@ again:  			 * out immediately.  We'll be able to acquire  			 * the flush lock when the I/O completes.  			 */ -			xfs_qm_dqflock_pushbuf_wait(dqp); +			xfs_dqflock_pushbuf_wait(dqp);  		}  		/*  		 * Let go of the mplist lock. We don't want to hold it  		 * across a disk write.  		 */  		mutex_unlock(&q->qi_dqlist_lock); -		error = xfs_qm_dqflush(dqp, sync_mode); +		error = xfs_qm_dqflush(dqp, 0);  		xfs_dqunlock(dqp);  		if (error)  			return error; @@ -468,6 +438,7 @@ again:  	/* return ! busy */  	return 0;  } +  /*   * Release the group dquot pointers the user dquots may be   * carrying around as a hint. mplist is locked on entry and exit. @@ -478,31 +449,26 @@ xfs_qm_detach_gdquots(  {  	struct xfs_quotainfo	*q = mp->m_quotainfo;  	struct xfs_dquot	*dqp, *gdqp; -	int			nrecl;   again:  	ASSERT(mutex_is_locked(&q->qi_dqlist_lock));  	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {  		xfs_dqlock(dqp); -		if ((gdqp = dqp->q_gdquot)) { -			xfs_dqlock(gdqp); -			dqp->q_gdquot = NULL; -		} -		xfs_dqunlock(dqp); - -		if (gdqp) { -			/* -			 * Can't hold the mplist lock across a dqput. -			 * XXXmust convert to marker based iterations here. -			 */ -			nrecl = q->qi_dqreclaims; +		if (dqp->dq_flags & XFS_DQ_FREEING) { +			xfs_dqunlock(dqp);  			mutex_unlock(&q->qi_dqlist_lock); -			xfs_qm_dqput(gdqp); - +			delay(1);  			mutex_lock(&q->qi_dqlist_lock); -			if (nrecl != q->qi_dqreclaims) -				goto again; +			goto again;  		} + +		gdqp = dqp->q_gdquot; +		if (gdqp) +			dqp->q_gdquot = NULL; +		xfs_dqunlock(dqp); + +		if (gdqp) +			xfs_qm_dqrele(gdqp);  	}  } @@ -520,8 +486,8 @@ xfs_qm_dqpurge_int(  	struct xfs_quotainfo	*q = mp->m_quotainfo;  	struct xfs_dquot	*dqp, *n;  	uint			dqtype; -	int			nrecl; -	int			nmisses; +	int			nmisses = 0; +	LIST_HEAD		(dispose_list);  	if (!q)  		return 0; @@ -540,47 +506,26 @@ xfs_qm_dqpurge_int(  	 */  	xfs_qm_detach_gdquots(mp); -      again: -	nmisses = 0; -	ASSERT(mutex_is_locked(&q->qi_dqlist_lock));  	/* -	 * Try to get rid of all of the unwanted dquots. The idea is to -	 * get them off mplist and hashlist, but leave them on freelist. +	 * Try to get rid of all of the unwanted dquots.  	 */  	list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) { -		/* -		 * It's OK to look at the type without taking dqlock here. -		 * We're holding the mplist lock here, and that's needed for -		 * a dqreclaim. -		 */ -		if ((dqp->dq_flags & dqtype) == 0) -			continue; - -		if (!mutex_trylock(&dqp->q_hash->qh_lock)) { -			nrecl = q->qi_dqreclaims; -			mutex_unlock(&q->qi_dqlist_lock); -			mutex_lock(&dqp->q_hash->qh_lock); -			mutex_lock(&q->qi_dqlist_lock); - -			/* -			 * XXXTheoretically, we can get into a very long -			 * ping pong game here. -			 * No one can be adding dquots to the mplist at -			 * this point, but somebody might be taking things off. -			 */ -			if (nrecl != q->qi_dqreclaims) { -				mutex_unlock(&dqp->q_hash->qh_lock); -				goto again; -			} +		xfs_dqlock(dqp); +		if ((dqp->dq_flags & dqtype) != 0 && +		    !(dqp->dq_flags & XFS_DQ_FREEING)) { +			if (dqp->q_nrefs == 0) { +				dqp->dq_flags |= XFS_DQ_FREEING; +				list_move_tail(&dqp->q_mplist, &dispose_list); +			} else +				nmisses++;  		} - -		/* -		 * Take the dquot off the mplist and hashlist. It may remain on -		 * freelist in INACTIVE state. -		 */ -		nmisses += xfs_qm_dqpurge(dqp); +		xfs_dqunlock(dqp);  	}  	mutex_unlock(&q->qi_dqlist_lock); + +	list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist) +		xfs_qm_dqpurge(dqp); +  	return nmisses;  } @@ -648,12 +593,9 @@ xfs_qm_dqattach_one(  		 */  		dqp = udqhint->q_gdquot;  		if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { -			xfs_dqlock(dqp); -			XFS_DQHOLD(dqp);  			ASSERT(*IO_idqpp == NULL); -			*IO_idqpp = dqp; -			xfs_dqunlock(dqp); +			*IO_idqpp = xfs_qm_dqhold(dqp);  			xfs_dqunlock(udqhint);  			return 0;  		} @@ -693,11 +635,7 @@ xfs_qm_dqattach_one(  /*   * Given a udquot and gdquot, attach a ptr to the group dquot in the - * udquot as a hint for future lookups. The idea sounds simple, but the - * execution isn't, because the udquot might have a group dquot attached - * already and getting rid of that gets us into lock ordering constraints. - * The process is complicated more by the fact that the dquots may or may not - * be locked on entry. + * udquot as a hint for future lookups.   */  STATIC void  xfs_qm_dqattach_grouphint( @@ -708,45 +646,17 @@ xfs_qm_dqattach_grouphint(  	xfs_dqlock(udq); -	if ((tmp = udq->q_gdquot)) { -		if (tmp == gdq) { -			xfs_dqunlock(udq); -			return; -		} +	tmp = udq->q_gdquot; +	if (tmp) { +		if (tmp == gdq) +			goto done;  		udq->q_gdquot = NULL; -		/* -		 * We can't keep any dqlocks when calling dqrele, -		 * because the freelist lock comes before dqlocks. -		 */ -		xfs_dqunlock(udq); -		/* -		 * we took a hard reference once upon a time in dqget, -		 * so give it back when the udquot no longer points at it -		 * dqput() does the unlocking of the dquot. -		 */  		xfs_qm_dqrele(tmp); - -		xfs_dqlock(udq); -		xfs_dqlock(gdq); - -	} else { -		ASSERT(XFS_DQ_IS_LOCKED(udq)); -		xfs_dqlock(gdq); -	} - -	ASSERT(XFS_DQ_IS_LOCKED(udq)); -	ASSERT(XFS_DQ_IS_LOCKED(gdq)); -	/* -	 * Somebody could have attached a gdquot here, -	 * when we dropped the uqlock. If so, just do nothing. -	 */ -	if (udq->q_gdquot == NULL) { -		XFS_DQHOLD(gdq); -		udq->q_gdquot = gdq;  	} -	xfs_dqunlock(gdq); +	udq->q_gdquot = xfs_qm_dqhold(gdq); +done:  	xfs_dqunlock(udq);  } @@ -813,17 +723,13 @@ xfs_qm_dqattach_locked(  		ASSERT(ip->i_gdquot);  		/* -		 * We may or may not have the i_udquot locked at this point, -		 * but this check is OK since we don't depend on the i_gdquot to -		 * be accurate 100% all the time. It is just a hint, and this -		 * will succeed in general. -		 */ -		if (ip->i_udquot->q_gdquot == ip->i_gdquot) -			goto done; -		/* -		 * Attach i_gdquot to the gdquot hint inside the i_udquot. +		 * We do not have i_udquot locked at this point, but this check +		 * is OK since we don't depend on the i_gdquot to be accurate +		 * 100% all the time. It is just a hint, and this will +		 * succeed in general.  		 */ -		xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); +		if (ip->i_udquot->q_gdquot != ip->i_gdquot) +			xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);  	}   done: @@ -879,100 +785,6 @@ xfs_qm_dqdetach(  	}  } -int -xfs_qm_sync( -	struct xfs_mount	*mp, -	int			flags) -{ -	struct xfs_quotainfo	*q = mp->m_quotainfo; -	int			recl, restarts; -	struct xfs_dquot	*dqp; -	int			error; - -	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) -		return 0; - -	restarts = 0; - -  again: -	mutex_lock(&q->qi_dqlist_lock); -	/* -	 * dqpurge_all() also takes the mplist lock and iterate thru all dquots -	 * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared -	 * when we have the mplist lock, we know that dquots will be consistent -	 * as long as we have it locked. -	 */ -	if (!XFS_IS_QUOTA_ON(mp)) { -		mutex_unlock(&q->qi_dqlist_lock); -		return 0; -	} -	ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); -	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { -		/* -		 * If this is vfs_sync calling, then skip the dquots that -		 * don't 'seem' to be dirty. ie. don't acquire dqlock. -		 * This is very similar to what xfs_sync does with inodes. -		 */ -		if (flags & SYNC_TRYLOCK) { -			if (!XFS_DQ_IS_DIRTY(dqp)) -				continue; -			if (!xfs_qm_dqlock_nowait(dqp)) -				continue; -		} else { -			xfs_dqlock(dqp); -		} - -		/* -		 * Now, find out for sure if this dquot is dirty or not. -		 */ -		if (! XFS_DQ_IS_DIRTY(dqp)) { -			xfs_dqunlock(dqp); -			continue; -		} - -		/* XXX a sentinel would be better */ -		recl = q->qi_dqreclaims; -		if (!xfs_dqflock_nowait(dqp)) { -			if (flags & SYNC_TRYLOCK) { -				xfs_dqunlock(dqp); -				continue; -			} -			/* -			 * If we can't grab the flush lock then if the caller -			 * really wanted us to give this our best shot, so -			 * see if we can give a push to the buffer before we wait -			 * on the flush lock. At this point, we know that -			 * even though the dquot is being flushed, -			 * it has (new) dirty data. -			 */ -			xfs_qm_dqflock_pushbuf_wait(dqp); -		} -		/* -		 * Let go of the mplist lock. We don't want to hold it -		 * across a disk write -		 */ -		mutex_unlock(&q->qi_dqlist_lock); -		error = xfs_qm_dqflush(dqp, flags); -		xfs_dqunlock(dqp); -		if (error && XFS_FORCED_SHUTDOWN(mp)) -			return 0;	/* Need to prevent umount failure */ -		else if (error) -			return error; - -		mutex_lock(&q->qi_dqlist_lock); -		if (recl != q->qi_dqreclaims) { -			if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS) -				break; - -			mutex_unlock(&q->qi_dqlist_lock); -			goto again; -		} -	} - -	mutex_unlock(&q->qi_dqlist_lock); -	return 0; -} -  /*   * The hash chains and the mplist use the same xfs_dqhash structure as   * their list head, but we can take the mplist qh_lock and one of the @@ -1034,18 +846,21 @@ xfs_qm_init_quotainfo(  	/*  	 * We try to get the limits from the superuser's limits fields.  	 * This is quite hacky, but it is standard quota practice. +	 *  	 * We look at the USR dquot with id == 0 first, but if user quotas  	 * are not enabled we goto the GRP dquot with id == 0.  	 * We don't really care to keep separate default limits for user  	 * and group quotas, at least not at this point. +	 * +	 * Since we may not have done a quotacheck by this point, just read +	 * the dquot without attaching it to any hashtables or lists.  	 */ -	error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0, -			     XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :  -			     (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : -				XFS_DQ_PROJ), -			     XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN, -			     &dqp); -	if (! error) { +	error = xfs_qm_dqread(mp, 0, +			XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : +			 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : +			  XFS_DQ_PROJ), +			XFS_QMOPT_DOWARN, &dqp); +	if (!error) {  		xfs_disk_dquot_t	*ddqp = &dqp->q_core;  		/* @@ -1072,11 +887,6 @@ xfs_qm_init_quotainfo(  		qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);  		qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); -		/* -		 * We sent the XFS_QMOPT_DQSUSER flag to dqget because -		 * we don't want this dquot cached. We haven't done a -		 * quotacheck yet, and quotacheck doesn't like incore dquots. -		 */  		xfs_qm_dqdestroy(dqp);  	} else {  		qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; @@ -1661,7 +1471,7 @@ xfs_qm_quotacheck(  	 * successfully.  	 */  	if (!error) -		error = xfs_qm_dqflush_all(mp, 0); +		error = xfs_qm_dqflush_all(mp);  	/*  	 * We can get this error if we couldn't do a dquot allocation inside @@ -1793,59 +1603,33 @@ xfs_qm_init_quotainos(  /* - * Just pop the least recently used dquot off the freelist and - * recycle it. The returned dquot is locked. + * Pop the least recently used dquot off the freelist and recycle it.   */ -STATIC xfs_dquot_t * +STATIC struct xfs_dquot *  xfs_qm_dqreclaim_one(void)  { -	xfs_dquot_t	*dqpout; -	xfs_dquot_t	*dqp; -	int		restarts; -	int		startagain; - -	restarts = 0; -	dqpout = NULL; +	struct xfs_dquot	*dqp; +	int			restarts = 0; -	/* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ -again: -	startagain = 0;  	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); - +restart:  	list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {  		struct xfs_mount *mp = dqp->q_mount; -		xfs_dqlock(dqp); + +		if (!xfs_dqlock_nowait(dqp)) +			continue;  		/* -		 * We are racing with dqlookup here. Naturally we don't -		 * want to reclaim a dquot that lookup wants. We release the -		 * freelist lock and start over, so that lookup will grab -		 * both the dquot and the freelistlock. +		 * This dquot has already been grabbed by dqlookup. +		 * Remove it from the freelist and try again.  		 */ -		if (dqp->dq_flags & XFS_DQ_WANT) { -			ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); - +		if (dqp->q_nrefs) {  			trace_xfs_dqreclaim_want(dqp);  			XQM_STATS_INC(xqmstats.xs_qm_dqwants); -			restarts++; -			startagain = 1; -			goto dqunlock; -		} -		/* -		 * If the dquot is inactive, we are assured that it is -		 * not on the mplist or the hashlist, and that makes our -		 * life easier. -		 */ -		if (dqp->dq_flags & XFS_DQ_INACTIVE) { -			ASSERT(mp == NULL); -			ASSERT(! XFS_DQ_IS_DIRTY(dqp)); -			ASSERT(list_empty(&dqp->q_hashlist)); -			ASSERT(list_empty(&dqp->q_mplist));  			list_del_init(&dqp->q_freelist);  			xfs_Gqm->qm_dqfrlist_cnt--; -			dqpout = dqp; -			XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); +			restarts++;  			goto dqunlock;  		} @@ -1874,64 +1658,49 @@ again:  			 * We flush it delayed write, so don't bother  			 * releasing the freelist lock.  			 */ -			error = xfs_qm_dqflush(dqp, 0); +			error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);  			if (error) {  				xfs_warn(mp, "%s: dquot %p flush failed",  					__func__, dqp);  			}  			goto dqunlock;  		} +		xfs_dqfunlock(dqp);  		/* -		 * We're trying to get the hashlock out of order. This races -		 * with dqlookup; so, we giveup and goto the next dquot if -		 * we couldn't get the hashlock. This way, we won't starve -		 * a dqlookup process that holds the hashlock that is -		 * waiting for the freelist lock. +		 * Prevent lookup now that we are going to reclaim the dquot. +		 * Once XFS_DQ_FREEING is set lookup won't touch the dquot, +		 * thus we can drop the lock now.  		 */ -		if (!mutex_trylock(&dqp->q_hash->qh_lock)) { -			restarts++; -			goto dqfunlock; -		} +		dqp->dq_flags |= XFS_DQ_FREEING; +		xfs_dqunlock(dqp); -		/* -		 * This races with dquot allocation code as well as dqflush_all -		 * and reclaim code. So, if we failed to grab the mplist lock, -		 * giveup everything and start over. -		 */ -		if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { -			restarts++; -			startagain = 1; -			goto qhunlock; -		} +		mutex_lock(&dqp->q_hash->qh_lock); +		list_del_init(&dqp->q_hashlist); +		dqp->q_hash->qh_version++; +		mutex_unlock(&dqp->q_hash->qh_lock); -		ASSERT(dqp->q_nrefs == 0); +		mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);  		list_del_init(&dqp->q_mplist);  		mp->m_quotainfo->qi_dquots--;  		mp->m_quotainfo->qi_dqreclaims++; -		list_del_init(&dqp->q_hashlist); -		dqp->q_hash->qh_version++; +		mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); + +		ASSERT(dqp->q_nrefs == 0);  		list_del_init(&dqp->q_freelist);  		xfs_Gqm->qm_dqfrlist_cnt--; -		dqpout = dqp; -		mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); -qhunlock: -		mutex_unlock(&dqp->q_hash->qh_lock); -dqfunlock: -		xfs_dqfunlock(dqp); + +		mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); +		return dqp;  dqunlock:  		xfs_dqunlock(dqp); -		if (dqpout) -			break;  		if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)  			break; -		if (startagain) { -			mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); -			goto again; -		} +		goto restart;  	} +  	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); -	return dqpout; +	return NULL;  }  /* @@ -2151,10 +1920,7 @@ xfs_qm_vop_dqalloc(  			 * this to caller  			 */  			ASSERT(ip->i_udquot); -			uq = ip->i_udquot; -			xfs_dqlock(uq); -			XFS_DQHOLD(uq); -			xfs_dqunlock(uq); +			uq = xfs_qm_dqhold(ip->i_udquot);  		}  	}  	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { @@ -2175,10 +1941,7 @@ xfs_qm_vop_dqalloc(  			xfs_ilock(ip, lockflags);  		} else {  			ASSERT(ip->i_gdquot); -			gq = ip->i_gdquot; -			xfs_dqlock(gq); -			XFS_DQHOLD(gq); -			xfs_dqunlock(gq); +			gq = xfs_qm_dqhold(ip->i_gdquot);  		}  	} else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {  		if (xfs_get_projid(ip) != prid) { @@ -2198,10 +1961,7 @@ xfs_qm_vop_dqalloc(  			xfs_ilock(ip, lockflags);  		} else {  			ASSERT(ip->i_gdquot); -			gq = ip->i_gdquot; -			xfs_dqlock(gq); -			XFS_DQHOLD(gq); -			xfs_dqunlock(gq); +			gq = xfs_qm_dqhold(ip->i_gdquot);  		}  	}  	if (uq) @@ -2251,14 +2011,10 @@ xfs_qm_vop_chown(  	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);  	/* -	 * Take an extra reference, because the inode -	 * is going to keep this dquot pointer even -	 * after the trans_commit. +	 * Take an extra reference, because the inode is going to keep +	 * this dquot pointer even after the trans_commit.  	 */ -	xfs_dqlock(newdq); -	XFS_DQHOLD(newdq); -	xfs_dqunlock(newdq); -	*IO_olddq = newdq; +	*IO_olddq = xfs_qm_dqhold(newdq);  	return prevdq;  } @@ -2390,25 +2146,21 @@ xfs_qm_vop_create_dqattach(  	ASSERT(XFS_IS_QUOTA_RUNNING(mp));  	if (udqp) { -		xfs_dqlock(udqp); -		XFS_DQHOLD(udqp); -		xfs_dqunlock(udqp);  		ASSERT(ip->i_udquot == NULL); -		ip->i_udquot = udqp;  		ASSERT(XFS_IS_UQUOTA_ON(mp));  		ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); + +		ip->i_udquot = xfs_qm_dqhold(udqp);  		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);  	}  	if (gdqp) { -		xfs_dqlock(gdqp); -		XFS_DQHOLD(gdqp); -		xfs_dqunlock(gdqp);  		ASSERT(ip->i_gdquot == NULL); -		ip->i_gdquot = gdqp;  		ASSERT(XFS_IS_OQUOTA_ON(mp));  		ASSERT((XFS_IS_GQUOTA_ON(mp) ?  			ip->i_d.di_gid : xfs_get_projid(ip)) ==  				be32_to_cpu(gdqp->q_core.d_id)); + +		ip->i_gdquot = xfs_qm_dqhold(gdqp);  		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);  	}  } diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 43b9abe1052..9b4f3adefbc 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -33,12 +33,6 @@ extern kmem_zone_t	*qm_dqzone;  extern kmem_zone_t	*qm_dqtrxzone;  /* - * Used in xfs_qm_sync called by xfs_sync to count the max times that it can - * iterate over the mountpt's dquot list in one call. - */ -#define XFS_QM_SYNC_MAX_RESTARTS	7 - -/*   * Ditto, for xfs_qm_dqreclaim_one.   */  #define XFS_QM_RECLAIM_MAX_RESTARTS	4 diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index a595f29567f..8a0807e0f97 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -87,8 +87,7 @@ typedef struct xfs_dqblk {  #define XFS_DQ_PROJ		0x0002		/* project quota */  #define XFS_DQ_GROUP		0x0004		/* a group quota */  #define XFS_DQ_DIRTY		0x0008		/* dquot is dirty */ -#define XFS_DQ_WANT		0x0010		/* for lookup/reclaim race */ -#define XFS_DQ_INACTIVE		0x0020		/* dq off mplist & hashlist */ +#define XFS_DQ_FREEING		0x0010		/* dquot is beeing torn down */  #define XFS_DQ_ALLTYPES		(XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) @@ -97,8 +96,7 @@ typedef struct xfs_dqblk {  	{ XFS_DQ_PROJ,		"PROJ" }, \  	{ XFS_DQ_GROUP,		"GROUP" }, \  	{ XFS_DQ_DIRTY,		"DIRTY" }, \ -	{ XFS_DQ_WANT,		"WANT" }, \ -	{ XFS_DQ_INACTIVE,	"INACTIVE" } +	{ XFS_DQ_FREEING,	"FREEING" }  /*   * In the worst case, when both user and group quotas are on, @@ -199,7 +197,6 @@ typedef struct xfs_qoff_logformat {  #define XFS_QMOPT_UQUOTA	0x0000004 /* user dquot requested */  #define XFS_QMOPT_PQUOTA	0x0000008 /* project dquot requested */  #define XFS_QMOPT_FORCE_RES	0x0000010 /* ignore quota limits */ -#define XFS_QMOPT_DQSUSER	0x0000020 /* don't cache super users dquot */  #define XFS_QMOPT_SBVERSION	0x0000040 /* change superblock version num */  #define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if needed */  #define XFS_QMOPT_DQREPAIR	0x0001000 /* repair dquot if damaged */ @@ -326,7 +323,6 @@ extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);  extern void xfs_qm_dqdetach(struct xfs_inode *);  extern void xfs_qm_dqrele(struct xfs_dquot *);  extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *); -extern int xfs_qm_sync(struct xfs_mount *, int);  extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);  extern void xfs_qm_mount_quotas(struct xfs_mount *);  extern void xfs_qm_unmount(struct xfs_mount *); @@ -366,10 +362,6 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,  #define xfs_qm_dqdetach(ip)  #define xfs_qm_dqrele(d)  #define xfs_qm_statvfs(ip, s) -static inline int xfs_qm_sync(struct xfs_mount *mp, int flags) -{ -	return 0; -}  #define xfs_qm_newmount(mp, a, b)					(0)  #define xfs_qm_mount_quotas(mp)  #define xfs_qm_unmount(mp) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 3eca58f51ae..281961c1d81 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -199,7 +199,6 @@ xfs_parseargs(  	mp->m_flags |= XFS_MOUNT_BARRIER;  	mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;  	mp->m_flags |= XFS_MOUNT_SMALL_INUMS; -	mp->m_flags |= XFS_MOUNT_DELAYLOG;  	/*  	 * These can be overridden by the mount option parsing. @@ -353,11 +352,11 @@ xfs_parseargs(  			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);  			mp->m_qflags &= ~XFS_OQUOTA_ENFD;  		} else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { -			mp->m_flags |= XFS_MOUNT_DELAYLOG; +			xfs_warn(mp, +	"delaylog is the default now, option is deprecated.");  		} else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { -			mp->m_flags &= ~XFS_MOUNT_DELAYLOG;  			xfs_warn(mp, -	"nodelaylog is deprecated and will be removed in Linux 3.3"); +	"nodelaylog support has been removed, option is deprecated.");  		} else if (!strcmp(this_char, MNTOPT_DISCARD)) {  			mp->m_flags |= XFS_MOUNT_DISCARD;  		} else if (!strcmp(this_char, MNTOPT_NODISCARD)) { @@ -395,13 +394,6 @@ xfs_parseargs(  		return EINVAL;  	} -	if ((mp->m_flags & XFS_MOUNT_DISCARD) && -	    !(mp->m_flags & XFS_MOUNT_DELAYLOG)) { -		xfs_warn(mp, -	"the discard option is incompatible with the nodelaylog option"); -		return EINVAL; -	} -  #ifndef CONFIG_XFS_QUOTA  	if (XFS_IS_QUOTA_RUNNING(mp)) {  		xfs_warn(mp, "quota support not available in this kernel."); @@ -501,7 +493,6 @@ xfs_showargs(  		{ XFS_MOUNT_ATTR2,		"," MNTOPT_ATTR2 },  		{ XFS_MOUNT_FILESTREAMS,	"," MNTOPT_FILESTREAM },  		{ XFS_MOUNT_GRPID,		"," MNTOPT_GRPID }, -		{ XFS_MOUNT_DELAYLOG,		"," MNTOPT_DELAYLOG },  		{ XFS_MOUNT_DISCARD,		"," MNTOPT_DISCARD },  		{ 0, NULL }  	}; @@ -869,27 +860,6 @@ xfs_fs_dirty_inode(  }  STATIC int -xfs_log_inode( -	struct xfs_inode	*ip) -{ -	struct xfs_mount	*mp = ip->i_mount; -	struct xfs_trans	*tp; -	int			error; - -	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); -	error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); -	if (error) { -		xfs_trans_cancel(tp, 0); -		return error; -	} - -	xfs_ilock(ip, XFS_ILOCK_EXCL); -	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); -	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); -	return xfs_trans_commit(tp, 0); -} - -STATIC int  xfs_fs_write_inode(  	struct inode		*inode,  	struct writeback_control *wbc) @@ -902,10 +872,8 @@ xfs_fs_write_inode(  	if (XFS_FORCED_SHUTDOWN(mp))  		return -XFS_ERROR(EIO); -	if (!ip->i_update_core) -		return 0; -	if (wbc->sync_mode == WB_SYNC_ALL) { +	if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {  		/*  		 * Make sure the inode has made it it into the log.  Instead  		 * of forcing it all the way to stable storage using a @@ -913,11 +881,14 @@ xfs_fs_write_inode(  		 * ->sync_fs call do that for thus, which reduces the number  		 * of synchronous log forces dramatically.  		 */ -		error = xfs_log_inode(ip); +		error = xfs_log_dirty_inode(ip, NULL, 0);  		if (error)  			goto out;  		return 0;  	} else { +		if (!ip->i_update_core) +			return 0; +  		/*  		 * We make this non-blocking if the inode is contended, return  		 * EAGAIN to indicate to the caller that they did not succeed. @@ -1034,17 +1005,10 @@ xfs_fs_sync_fs(  	int			error;  	/* -	 * Not much we can do for the first async pass.  Writing out the -	 * superblock would be counter-productive as we are going to redirty -	 * when writing out other data and metadata (and writing out a single -	 * block is quite fast anyway). -	 * -	 * Try to asynchronously kick off quota syncing at least. +	 * Doing anything during the async pass would be counterproductive.  	 */ -	if (!wait) { -		xfs_qm_sync(mp, SYNC_TRYLOCK); +	if (!wait)  		return 0; -	}  	error = xfs_quiesce_data(mp);  	if (error) @@ -1258,9 +1222,9 @@ xfs_fs_unfreeze(  STATIC int  xfs_fs_show_options(  	struct seq_file		*m, -	struct vfsmount		*mnt) +	struct dentry		*root)  { -	return -xfs_showargs(XFS_M(mnt->mnt_sb), m); +	return -xfs_showargs(XFS_M(root->d_sb), m);  }  /* @@ -1641,12 +1605,12 @@ STATIC int __init  xfs_init_workqueues(void)  {  	/* -	 * max_active is set to 8 to give enough concurency to allow -	 * multiple work operations on each CPU to run. This allows multiple -	 * filesystems to be running sync work concurrently, and scales with -	 * the number of CPUs in the system. +	 * We never want to the same work item to run twice, reclaiming inodes +	 * or idling the log is not going to get any faster by multiple CPUs +	 * competing for ressources.  Use the default large max_active value +	 * so that even lots of filesystems can perform these task in parallel.  	 */ -	xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); +	xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0);  	if (!xfs_syncd_wq)  		return -ENOMEM;  	return 0; diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index be5c51d8f75..72c01a1c16e 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c @@ -336,6 +336,32 @@ xfs_sync_fsdata(  	return error;  } +int +xfs_log_dirty_inode( +	struct xfs_inode	*ip, +	struct xfs_perag	*pag, +	int			flags) +{ +	struct xfs_mount	*mp = ip->i_mount; +	struct xfs_trans	*tp; +	int			error; + +	if (!ip->i_update_core) +		return 0; + +	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); +	error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); +	if (error) { +		xfs_trans_cancel(tp, 0); +		return error; +	} + +	xfs_ilock(ip, XFS_ILOCK_EXCL); +	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); +	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); +	return xfs_trans_commit(tp, 0); +} +  /*   * When remounting a filesystem read-only or freezing the filesystem, we have   * two phases to execute. This first phase is syncing the data before we @@ -359,10 +385,17 @@ xfs_quiesce_data(  {  	int			error, error2 = 0; -	xfs_qm_sync(mp, SYNC_TRYLOCK); -	xfs_qm_sync(mp, SYNC_WAIT); +	/* +	 * Log all pending size and timestamp updates.  The vfs writeback +	 * code is supposed to do this, but due to its overagressive +	 * livelock detection it will skip inodes where appending writes +	 * were written out in the first non-blocking sync phase if their +	 * completion took long enough that it happened after taking the +	 * timestamp for the cut-off in the blocking phase. +	 */ +	xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0); -	/* force out the newly dirtied log buffers */ +	/* force out the log */  	xfs_log_force(mp, XFS_LOG_SYNC);  	/* write superblock and hoover up shutdown errors */ @@ -470,7 +503,6 @@ xfs_sync_worker(  			error = xfs_fs_log_dummy(mp);  		else  			xfs_log_force(mp, 0); -		error = xfs_qm_sync(mp, SYNC_TRYLOCK);  		/* start pushing all the metadata that is currently dirty */  		xfs_ail_push_all(mp->m_ail); diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h index 941202e7ac6..fa965479d78 100644 --- a/fs/xfs/xfs_sync.h +++ b/fs/xfs/xfs_sync.h @@ -34,6 +34,8 @@ void xfs_quiesce_attr(struct xfs_mount *mp);  void xfs_flush_inodes(struct xfs_inode *ip); +int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags); +  int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);  int xfs_reclaim_inodes_count(struct xfs_mount *mp);  void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 49403579887..a9d5b1e06ef 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -743,8 +743,6 @@ DEFINE_DQUOT_EVENT(xfs_dqtobp_read);  DEFINE_DQUOT_EVENT(xfs_dqread);  DEFINE_DQUOT_EVENT(xfs_dqread_fail);  DEFINE_DQUOT_EVENT(xfs_dqlookup_found); -DEFINE_DQUOT_EVENT(xfs_dqlookup_want); -DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);  DEFINE_DQUOT_EVENT(xfs_dqlookup_done);  DEFINE_DQUOT_EVENT(xfs_dqget_hit);  DEFINE_DQUOT_EVENT(xfs_dqget_miss); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 1f35b2feca9..329b06aba1c 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1158,7 +1158,6 @@ xfs_trans_add_item(  	lidp->lid_item = lip;  	lidp->lid_flags = 0; -	lidp->lid_size = 0;  	list_add_tail(&lidp->lid_trans, &tp->t_items);  	lip->li_desc = lidp; @@ -1210,219 +1209,6 @@ xfs_trans_free_items(  	}  } -/* - * Unlock the items associated with a transaction. - * - * Items which were not logged should be freed.  Those which were logged must - * still be tracked so they can be unpinned when the transaction commits. - */ -STATIC void -xfs_trans_unlock_items( -	struct xfs_trans	*tp, -	xfs_lsn_t		commit_lsn) -{ -	struct xfs_log_item_desc *lidp, *next; - -	list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) { -		struct xfs_log_item	*lip = lidp->lid_item; - -		lip->li_desc = NULL; - -		if (commit_lsn != NULLCOMMITLSN) -			IOP_COMMITTING(lip, commit_lsn); -		IOP_UNLOCK(lip); - -		/* -		 * Free the descriptor if the item is not dirty -		 * within this transaction. -		 */ -		if (!(lidp->lid_flags & XFS_LID_DIRTY)) -			xfs_trans_free_item_desc(lidp); -	} -} - -/* - * Total up the number of log iovecs needed to commit this - * transaction.  The transaction itself needs one for the - * transaction header.  Ask each dirty item in turn how many - * it needs to get the total. - */ -static uint -xfs_trans_count_vecs( -	struct xfs_trans	*tp) -{ -	int			nvecs; -	struct xfs_log_item_desc *lidp; - -	nvecs = 1; - -	/* In the non-debug case we need to start bailing out if we -	 * didn't find a log_item here, return zero and let trans_commit -	 * deal with it. -	 */ -	if (list_empty(&tp->t_items)) { -		ASSERT(0); -		return 0; -	} - -	list_for_each_entry(lidp, &tp->t_items, lid_trans) { -		/* -		 * Skip items which aren't dirty in this transaction. -		 */ -		if (!(lidp->lid_flags & XFS_LID_DIRTY)) -			continue; -		lidp->lid_size = IOP_SIZE(lidp->lid_item); -		nvecs += lidp->lid_size; -	} - -	return nvecs; -} - -/* - * Fill in the vector with pointers to data to be logged - * by this transaction.  The transaction header takes - * the first vector, and then each dirty item takes the - * number of vectors it indicated it needed in xfs_trans_count_vecs(). - * - * As each item fills in the entries it needs, also pin the item - * so that it cannot be flushed out until the log write completes. - */ -static void -xfs_trans_fill_vecs( -	struct xfs_trans	*tp, -	struct xfs_log_iovec	*log_vector) -{ -	struct xfs_log_item_desc *lidp; -	struct xfs_log_iovec	*vecp; -	uint			nitems; - -	/* -	 * Skip over the entry for the transaction header, we'll -	 * fill that in at the end. -	 */ -	vecp = log_vector + 1; - -	nitems = 0; -	ASSERT(!list_empty(&tp->t_items)); -	list_for_each_entry(lidp, &tp->t_items, lid_trans) { -		/* Skip items which aren't dirty in this transaction. */ -		if (!(lidp->lid_flags & XFS_LID_DIRTY)) -			continue; - -		/* -		 * The item may be marked dirty but not log anything.  This can -		 * be used to get called when a transaction is committed. -		 */ -		if (lidp->lid_size) -			nitems++; -		IOP_FORMAT(lidp->lid_item, vecp); -		vecp += lidp->lid_size; -		IOP_PIN(lidp->lid_item); -	} - -	/* -	 * Now that we've counted the number of items in this transaction, fill -	 * in the transaction header. Note that the transaction header does not -	 * have a log item. -	 */ -	tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC; -	tp->t_header.th_type = tp->t_type; -	tp->t_header.th_num_items = nitems; -	log_vector->i_addr = (xfs_caddr_t)&tp->t_header; -	log_vector->i_len = sizeof(xfs_trans_header_t); -	log_vector->i_type = XLOG_REG_TYPE_TRANSHDR; -} - -/* - * The committed item processing consists of calling the committed routine of - * each logged item, updating the item's position in the AIL if necessary, and - * unpinning each item.  If the committed routine returns -1, then do nothing - * further with the item because it may have been freed. - * - * Since items are unlocked when they are copied to the incore log, it is - * possible for two transactions to be completing and manipulating the same - * item simultaneously.  The AIL lock will protect the lsn field of each item. - * The value of this field can never go backwards. - * - * We unpin the items after repositioning them in the AIL, because otherwise - * they could be immediately flushed and we'd have to race with the flusher - * trying to pull the item from the AIL as we add it. - */ -static void -xfs_trans_item_committed( -	struct xfs_log_item	*lip, -	xfs_lsn_t		commit_lsn, -	int			aborted) -{ -	xfs_lsn_t		item_lsn; -	struct xfs_ail		*ailp; - -	if (aborted) -		lip->li_flags |= XFS_LI_ABORTED; -	item_lsn = IOP_COMMITTED(lip, commit_lsn); - -	/* item_lsn of -1 means the item needs no further processing */ -	if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) -		return; - -	/* -	 * If the returned lsn is greater than what it contained before, update -	 * the location of the item in the AIL.  If it is not, then do nothing. -	 * Items can never move backwards in the AIL. -	 * -	 * While the new lsn should usually be greater, it is possible that a -	 * later transaction completing simultaneously with an earlier one -	 * using the same item could complete first with a higher lsn.  This -	 * would cause the earlier transaction to fail the test below. -	 */ -	ailp = lip->li_ailp; -	spin_lock(&ailp->xa_lock); -	if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { -		/* -		 * This will set the item's lsn to item_lsn and update the -		 * position of the item in the AIL. -		 * -		 * xfs_trans_ail_update() drops the AIL lock. -		 */ -		xfs_trans_ail_update(ailp, lip, item_lsn); -	} else { -		spin_unlock(&ailp->xa_lock); -	} - -	/* -	 * Now that we've repositioned the item in the AIL, unpin it so it can -	 * be flushed. Pass information about buffer stale state down from the -	 * log item flags, if anyone else stales the buffer we do not want to -	 * pay any attention to it. -	 */ -	IOP_UNPIN(lip, 0); -} - -/* - * This is typically called by the LM when a transaction has been fully - * committed to disk.  It needs to unpin the items which have - * been logged by the transaction and update their positions - * in the AIL if necessary. - * - * This also gets called when the transactions didn't get written out - * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then. - */ -STATIC void -xfs_trans_committed( -	void			*arg, -	int			abortflag) -{ -	struct xfs_trans	*tp = arg; -	struct xfs_log_item_desc *lidp, *next; - -	list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) { -		xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag); -		xfs_trans_free_item_desc(lidp); -	} - -	xfs_trans_free(tp); -} -  static inline void  xfs_log_item_batch_insert(  	struct xfs_ail		*ailp, @@ -1538,258 +1324,6 @@ xfs_trans_committed_bulk(  }  /* - * Called from the trans_commit code when we notice that the filesystem is in - * the middle of a forced shutdown. - * - * When we are called here, we have already pinned all the items in the - * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called - * so we can simply walk the items in the transaction, unpin them with an abort - * flag and then free the items. Note that unpinning the items can result in - * them being freed immediately, so we need to use a safe list traversal method - * here. - */ -STATIC void -xfs_trans_uncommit( -	struct xfs_trans	*tp, -	uint			flags) -{ -	struct xfs_log_item_desc *lidp, *n; - -	list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) { -		if (lidp->lid_flags & XFS_LID_DIRTY) -			IOP_UNPIN(lidp->lid_item, 1); -	} - -	xfs_trans_unreserve_and_mod_sb(tp); -	xfs_trans_unreserve_and_mod_dquots(tp); - -	xfs_trans_free_items(tp, NULLCOMMITLSN, flags); -	xfs_trans_free(tp); -} - -/* - * Format the transaction direct to the iclog. This isolates the physical - * transaction commit operation from the logical operation and hence allows - * other methods to be introduced without affecting the existing commit path. - */ -static int -xfs_trans_commit_iclog( -	struct xfs_mount	*mp, -	struct xfs_trans	*tp, -	xfs_lsn_t		*commit_lsn, -	int			flags) -{ -	int			shutdown; -	int			error; -	int			log_flags = 0; -	struct xlog_in_core	*commit_iclog; -#define XFS_TRANS_LOGVEC_COUNT  16 -	struct xfs_log_iovec	log_vector_fast[XFS_TRANS_LOGVEC_COUNT]; -	struct xfs_log_iovec	*log_vector; -	uint			nvec; - - -	/* -	 * Ask each log item how many log_vector entries it will -	 * need so we can figure out how many to allocate. -	 * Try to avoid the kmem_alloc() call in the common case -	 * by using a vector from the stack when it fits. -	 */ -	nvec = xfs_trans_count_vecs(tp); -	if (nvec == 0) { -		return ENOMEM;	/* triggers a shutdown! */ -	} else if (nvec <= XFS_TRANS_LOGVEC_COUNT) { -		log_vector = log_vector_fast; -	} else { -		log_vector = (xfs_log_iovec_t *)kmem_alloc(nvec * -						   sizeof(xfs_log_iovec_t), -						   KM_SLEEP); -	} - -	/* -	 * Fill in the log_vector and pin the logged items, and -	 * then write the transaction to the log. -	 */ -	xfs_trans_fill_vecs(tp, log_vector); - -	if (flags & XFS_TRANS_RELEASE_LOG_RES) -		log_flags = XFS_LOG_REL_PERM_RESERV; - -	error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn)); - -	/* -	 * The transaction is committed incore here, and can go out to disk -	 * at any time after this call.  However, all the items associated -	 * with the transaction are still locked and pinned in memory. -	 */ -	*commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags); - -	tp->t_commit_lsn = *commit_lsn; -	trace_xfs_trans_commit_lsn(tp); - -	if (nvec > XFS_TRANS_LOGVEC_COUNT) -		kmem_free(log_vector); - -	/* -	 * If we got a log write error. Unpin the logitems that we -	 * had pinned, clean up, free trans structure, and return error. -	 */ -	if (error || *commit_lsn == -1) { -		current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); -		xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT); -		return XFS_ERROR(EIO); -	} - -	/* -	 * Once the transaction has committed, unused -	 * reservations need to be released and changes to -	 * the superblock need to be reflected in the in-core -	 * version.  Do that now. -	 */ -	xfs_trans_unreserve_and_mod_sb(tp); - -	/* -	 * Tell the LM to call the transaction completion routine -	 * when the log write with LSN commit_lsn completes (e.g. -	 * when the transaction commit really hits the on-disk log). -	 * After this call we cannot reference tp, because the call -	 * can happen at any time and the call will free the transaction -	 * structure pointed to by tp.  The only case where we call -	 * the completion routine (xfs_trans_committed) directly is -	 * if the log is turned off on a debug kernel or we're -	 * running in simulation mode (the log is explicitly turned -	 * off). -	 */ -	tp->t_logcb.cb_func = xfs_trans_committed; -	tp->t_logcb.cb_arg = tp; - -	/* -	 * We need to pass the iclog buffer which was used for the -	 * transaction commit record into this function, and attach -	 * the callback to it. The callback must be attached before -	 * the items are unlocked to avoid racing with other threads -	 * waiting for an item to unlock. -	 */ -	shutdown = xfs_log_notify(mp, commit_iclog, &(tp->t_logcb)); - -	/* -	 * Mark this thread as no longer being in a transaction -	 */ -	current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); - -	/* -	 * Once all the items of the transaction have been copied -	 * to the in core log and the callback is attached, the -	 * items can be unlocked. -	 * -	 * This will free descriptors pointing to items which were -	 * not logged since there is nothing more to do with them. -	 * For items which were logged, we will keep pointers to them -	 * so they can be unpinned after the transaction commits to disk. -	 * This will also stamp each modified meta-data item with -	 * the commit lsn of this transaction for dependency tracking -	 * purposes. -	 */ -	xfs_trans_unlock_items(tp, *commit_lsn); - -	/* -	 * If we detected a log error earlier, finish committing -	 * the transaction now (unpin log items, etc). -	 * -	 * Order is critical here, to avoid using the transaction -	 * pointer after its been freed (by xfs_trans_committed -	 * either here now, or as a callback).  We cannot do this -	 * step inside xfs_log_notify as was done earlier because -	 * of this issue. -	 */ -	if (shutdown) -		xfs_trans_committed(tp, XFS_LI_ABORTED); - -	/* -	 * Now that the xfs_trans_committed callback has been attached, -	 * and the items are released we can finally allow the iclog to -	 * go to disk. -	 */ -	return xfs_log_release_iclog(mp, commit_iclog); -} - -/* - * Walk the log items and allocate log vector structures for - * each item large enough to fit all the vectors they require. - * Note that this format differs from the old log vector format in - * that there is no transaction header in these log vectors. - */ -STATIC struct xfs_log_vec * -xfs_trans_alloc_log_vecs( -	xfs_trans_t	*tp) -{ -	struct xfs_log_item_desc *lidp; -	struct xfs_log_vec	*lv = NULL; -	struct xfs_log_vec	*ret_lv = NULL; - - -	/* Bail out if we didn't find a log item.  */ -	if (list_empty(&tp->t_items)) { -		ASSERT(0); -		return NULL; -	} - -	list_for_each_entry(lidp, &tp->t_items, lid_trans) { -		struct xfs_log_vec *new_lv; - -		/* Skip items which aren't dirty in this transaction. */ -		if (!(lidp->lid_flags & XFS_LID_DIRTY)) -			continue; - -		/* Skip items that do not have any vectors for writing */ -		lidp->lid_size = IOP_SIZE(lidp->lid_item); -		if (!lidp->lid_size) -			continue; - -		new_lv = kmem_zalloc(sizeof(*new_lv) + -				lidp->lid_size * sizeof(struct xfs_log_iovec), -				KM_SLEEP); - -		/* The allocated iovec region lies beyond the log vector. */ -		new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; -		new_lv->lv_niovecs = lidp->lid_size; -		new_lv->lv_item = lidp->lid_item; -		if (!ret_lv) -			ret_lv = new_lv; -		else -			lv->lv_next = new_lv; -		lv = new_lv; -	} - -	return ret_lv; -} - -static int -xfs_trans_commit_cil( -	struct xfs_mount	*mp, -	struct xfs_trans	*tp, -	xfs_lsn_t		*commit_lsn, -	int			flags) -{ -	struct xfs_log_vec	*log_vector; - -	/* -	 * Get each log item to allocate a vector structure for -	 * the log item to to pass to the log write code. The -	 * CIL commit code will format the vector and save it away. -	 */ -	log_vector = xfs_trans_alloc_log_vecs(tp); -	if (!log_vector) -		return ENOMEM; - -	xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); - -	current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); -	xfs_trans_free(tp); -	return 0; -} - -/*   * Commit the given transaction to the log.   *   * XFS disk error handling mechanism is not based on a typical @@ -1845,17 +1379,16 @@ xfs_trans_commit(  		xfs_trans_apply_sb_deltas(tp);  	xfs_trans_apply_dquot_deltas(tp); -	if (mp->m_flags & XFS_MOUNT_DELAYLOG) -		error = xfs_trans_commit_cil(mp, tp, &commit_lsn, flags); -	else -		error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags); - +	error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags);  	if (error == ENOMEM) {  		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);  		error = XFS_ERROR(EIO);  		goto out_unreserve;  	} +	current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); +	xfs_trans_free(tp); +  	/*  	 * If the transaction needs to be synchronous, then force the  	 * log out now and wait for it. diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 3ae713c0abd..f6118703f20 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -163,9 +163,8 @@ typedef struct xfs_trans_header {   */  struct xfs_log_item_desc {  	struct xfs_log_item	*lid_item; -	ushort			lid_size; -	unsigned char		lid_flags;  	struct list_head	lid_trans; +	unsigned char		lid_flags;  };  #define XFS_LID_DIRTY		0x1 diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 8b32d1a4c5a..89dbb4a5087 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -53,7 +53,7 @@ xfs_dir_ialloc(  					   output: may be a new transaction. */  	xfs_inode_t	*dp,		/* directory within whose allocate  					   the inode. */ -	mode_t		mode, +	umode_t		mode,  	xfs_nlink_t	nlink,  	xfs_dev_t	rdev,  	prid_t		prid,		/* project id */ diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h index 456fca31493..5eeab4690cf 100644 --- a/fs/xfs/xfs_utils.h +++ b/fs/xfs/xfs_utils.h @@ -18,7 +18,7 @@  #ifndef __XFS_UTILS_H__  #define __XFS_UTILS_H__ -extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, +extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, umode_t, xfs_nlink_t,  				xfs_dev_t, prid_t, int, xfs_inode_t **, int *);  extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *);  extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index ce9268a2f56..f2fea868d4d 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -822,7 +822,7 @@ int  xfs_create(  	xfs_inode_t		*dp,  	struct xfs_name		*name, -	mode_t			mode, +	umode_t			mode,  	xfs_dev_t		rdev,  	xfs_inode_t		**ipp)  { @@ -1481,7 +1481,7 @@ xfs_symlink(  	xfs_inode_t		*dp,  	struct xfs_name		*link_name,  	const char		*target_path, -	mode_t			mode, +	umode_t			mode,  	xfs_inode_t		**ipp)  {  	xfs_mount_t		*mp = dp->i_mount; diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 35d3d513e1e..0c877cbde14 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -26,7 +26,7 @@ int xfs_release(struct xfs_inode *ip);  int xfs_inactive(struct xfs_inode *ip);  int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,  		struct xfs_inode **ipp, struct xfs_name *ci_name); -int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, +int xfs_create(struct xfs_inode *dp, struct xfs_name *name, umode_t mode,  		xfs_dev_t rdev, struct xfs_inode **ipp);  int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,  		struct xfs_inode *ip); @@ -35,7 +35,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,  int xfs_readdir(struct xfs_inode	*dp, void *dirent, size_t bufsize,  		       xfs_off_t *offset, filldir_t filldir);  int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, -		const char *target_path, mode_t mode, struct xfs_inode **ipp); +		const char *target_path, umode_t mode, struct xfs_inode **ipp);  int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);  int xfs_change_file_space(struct xfs_inode *ip, int cmd,  		xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);  |