diff options
Diffstat (limited to 'fs/xfs/xfs_inode.c')
| -rw-r--r-- | fs/xfs/xfs_inode.c | 207 | 
1 files changed, 84 insertions, 123 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 8cd6e8d8fe9..68415cb4f23 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -27,13 +27,10 @@  #include "xfs_trans_priv.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h"  #include "xfs_mount.h"  #include "xfs_bmap_btree.h"  #include "xfs_alloc_btree.h"  #include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h"  #include "xfs_attr_sf.h"  #include "xfs_dinode.h"  #include "xfs_inode.h" @@ -44,7 +41,6 @@  #include "xfs_alloc.h"  #include "xfs_ialloc.h"  #include "xfs_bmap.h" -#include "xfs_rw.h"  #include "xfs_error.h"  #include "xfs_utils.h"  #include "xfs_quota.h" @@ -177,7 +173,7 @@ xfs_imap_to_bp(  		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,  						XFS_ERRTAG_ITOBP_INOTOBP,  						XFS_RANDOM_ITOBP_INOTOBP))) { -			if (iget_flags & XFS_IGET_BULKSTAT) { +			if (iget_flags & XFS_IGET_UNTRUSTED) {  				xfs_trans_brelse(tp, bp);  				return XFS_ERROR(EINVAL);  			} @@ -426,7 +422,7 @@ xfs_iformat(  	if (!XFS_DFORK_Q(dip))  		return 0;  	ASSERT(ip->i_afp == NULL); -	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); +	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);  	ip->i_afp->if_ext_max =  		XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);  	switch (dip->di_aformat) { @@ -509,7 +505,7 @@ xfs_iformat_local(  		ifp->if_u1.if_data = ifp->if_u2.if_inline_data;  	else {  		real_size = roundup(size, 4); -		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); +		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);  	}  	ifp->if_bytes = size;  	ifp->if_real_bytes = real_size; @@ -636,7 +632,7 @@ xfs_iformat_btree(  	}  	ifp->if_broot_bytes = size; -	ifp->if_broot = kmem_alloc(size, KM_SLEEP); +	ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);  	ASSERT(ifp->if_broot != NULL);  	/*  	 * Copy and convert from the on-disk structure @@ -787,7 +783,6 @@ xfs_iread(  	xfs_mount_t	*mp,  	xfs_trans_t	*tp,  	xfs_inode_t	*ip, -	xfs_daddr_t	bno,  	uint		iget_flags)  {  	xfs_buf_t	*bp; @@ -797,11 +792,9 @@ xfs_iread(  	/*  	 * Fill in the location information in the in-core inode.  	 */ -	ip->i_imap.im_blkno = bno;  	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);  	if (error)  		return error; -	ASSERT(bno == 0 || bno == ip->i_imap.im_blkno);  	/*  	 * Get pointers to the on-disk inode and the buffer containing it. @@ -925,7 +918,6 @@ xfs_iread_extents(  	int		error;  	xfs_ifork_t	*ifp;  	xfs_extnum_t	nextents; -	size_t		size;  	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {  		XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, @@ -933,7 +925,6 @@ xfs_iread_extents(  		return XFS_ERROR(EFSCORRUPTED);  	}  	nextents = XFS_IFORK_NEXTENTS(ip, whichfork); -	size = nextents * sizeof(xfs_bmbt_rec_t);  	ifp = XFS_IFORK_PTR(ip, whichfork);  	/* @@ -1229,7 +1220,7 @@ xfs_isize_check(  				       (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) -  			  map_first),  			 XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, -			 NULL, NULL)) +			 NULL))  	    return;  	ASSERT(nimaps == 1);  	ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); @@ -1463,7 +1454,7 @@ xfs_itruncate_finish(  	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);  	ASSERT(ip->i_transp == *tp);  	ASSERT(ip->i_itemp != NULL); -	ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD); +	ASSERT(ip->i_itemp->ili_lock_flags == 0);  	ntp = *tp; @@ -1592,11 +1583,10 @@ xfs_itruncate_finish(  		xfs_bmap_init(&free_list, &first_block);  		error = xfs_bunmapi(ntp, ip,  				    first_unmap_block, unmap_len, -				    xfs_bmapi_aflag(fork) | -				      (sync ? 0 : XFS_BMAPI_ASYNC), +				    xfs_bmapi_aflag(fork),  				    XFS_ITRUNC_MAX_EXTENTS,  				    &first_block, &free_list, -				    NULL, &done); +				    &done);  		if (error) {  			/*  			 * If the bunmapi call encounters an error, @@ -1615,12 +1605,8 @@ xfs_itruncate_finish(  		 */  		error = xfs_bmap_finish(tp, &free_list, &committed);  		ntp = *tp; -		if (committed) { -			/* link the inode into the next xact in the chain */ -			xfs_trans_ijoin(ntp, ip, -					XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); -			xfs_trans_ihold(ntp, ip); -		} +		if (committed) +			xfs_trans_ijoin(ntp, ip);  		if (error) {  			/* @@ -1649,9 +1635,7 @@ xfs_itruncate_finish(  		error = xfs_trans_commit(*tp, 0);  		*tp = ntp; -		/* link the inode into the next transaction in the chain */ -		xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); -		xfs_trans_ihold(ntp, ip); +		xfs_trans_ijoin(ntp, ip);  		if (error)  			return error; @@ -1940,10 +1924,10 @@ xfs_ifree_cluster(  	int			blks_per_cluster;  	int			nbufs;  	int			ninodes; -	int			i, j, found, pre_flushed; +	int			i, j;  	xfs_daddr_t		blkno;  	xfs_buf_t		*bp; -	xfs_inode_t		*ip, **ip_found; +	xfs_inode_t		*ip;  	xfs_inode_log_item_t	*iip;  	xfs_log_item_t		*lip;  	struct xfs_perag	*pag; @@ -1960,114 +1944,97 @@ xfs_ifree_cluster(  		nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;  	} -	ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS); -  	for (j = 0; j < nbufs; j++, inum += ninodes) { +		int	found = 0; +  		blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),  					 XFS_INO_TO_AGBNO(mp, inum)); +		/* +		 * We obtain and lock the backing buffer first in the process +		 * here, as we have to ensure that any dirty inode that we +		 * can't get the flush lock on is attached to the buffer. +		 * If we scan the in-memory inodes first, then buffer IO can +		 * complete before we get a lock on it, and hence we may fail +		 * to mark all the active inodes on the buffer stale. +		 */ +		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, +					mp->m_bsize * blks_per_cluster, +					XBF_LOCK); + +		/* +		 * Walk the inodes already attached to the buffer and mark them +		 * stale. These will all have the flush locks held, so an +		 * in-memory inode walk can't lock them. +		 */ +		lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); +		while (lip) { +			if (lip->li_type == XFS_LI_INODE) { +				iip = (xfs_inode_log_item_t *)lip; +				ASSERT(iip->ili_logged == 1); +				lip->li_cb = xfs_istale_done; +				xfs_trans_ail_copy_lsn(mp->m_ail, +							&iip->ili_flush_lsn, +							&iip->ili_item.li_lsn); +				xfs_iflags_set(iip->ili_inode, XFS_ISTALE); +				found++; +			} +			lip = lip->li_bio_list; +		}  		/* -		 * Look for each inode in memory and attempt to lock it, -		 * we can be racing with flush and tail pushing here. -		 * any inode we get the locks on, add to an array of -		 * inode items to process later. +		 * For each inode in memory attempt to add it to the inode +		 * buffer and set it up for being staled on buffer IO +		 * completion.  This is safe as we've locked out tail pushing +		 * and flushing by locking the buffer.  		 * -		 * The get the buffer lock, we could beat a flush -		 * or tail pushing thread to the lock here, in which -		 * case they will go looking for the inode buffer -		 * and fail, we need some other form of interlock -		 * here. +		 * We have already marked every inode that was part of a +		 * transaction stale above, which means there is no point in +		 * even trying to lock them.  		 */ -		found = 0;  		for (i = 0; i < ninodes; i++) {  			read_lock(&pag->pag_ici_lock);  			ip = radix_tree_lookup(&pag->pag_ici_root,  					XFS_INO_TO_AGINO(mp, (inum + i))); -			/* Inode not in memory or we found it already, -			 * nothing to do -			 */ +			/* Inode not in memory or stale, nothing to do */  			if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {  				read_unlock(&pag->pag_ici_lock);  				continue;  			} -			if (xfs_inode_clean(ip)) { +			/* don't try to lock/unlock the current inode */ +			if (ip != free_ip && +			    !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {  				read_unlock(&pag->pag_ici_lock);  				continue;  			} +			read_unlock(&pag->pag_ici_lock); -			/* If we can get the locks then add it to the -			 * list, otherwise by the time we get the bp lock -			 * below it will already be attached to the -			 * inode buffer. -			 */ - -			/* This inode will already be locked - by us, lets -			 * keep it that way. -			 */ - -			if (ip == free_ip) { -				if (xfs_iflock_nowait(ip)) { -					xfs_iflags_set(ip, XFS_ISTALE); -					if (xfs_inode_clean(ip)) { -						xfs_ifunlock(ip); -					} else { -						ip_found[found++] = ip; -					} -				} -				read_unlock(&pag->pag_ici_lock); -				continue; -			} - -			if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { -				if (xfs_iflock_nowait(ip)) { -					xfs_iflags_set(ip, XFS_ISTALE); - -					if (xfs_inode_clean(ip)) { -						xfs_ifunlock(ip); -						xfs_iunlock(ip, XFS_ILOCK_EXCL); -					} else { -						ip_found[found++] = ip; -					} -				} else { +			if (!xfs_iflock_nowait(ip)) { +				if (ip != free_ip)  					xfs_iunlock(ip, XFS_ILOCK_EXCL); -				} +				continue;  			} -			read_unlock(&pag->pag_ici_lock); -		} - -		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,  -					mp->m_bsize * blks_per_cluster, -					XBF_LOCK); -		pre_flushed = 0; -		lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); -		while (lip) { -			if (lip->li_type == XFS_LI_INODE) { -				iip = (xfs_inode_log_item_t *)lip; -				ASSERT(iip->ili_logged == 1); -				lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; -				xfs_trans_ail_copy_lsn(mp->m_ail, -							&iip->ili_flush_lsn, -							&iip->ili_item.li_lsn); -				xfs_iflags_set(iip->ili_inode, XFS_ISTALE); -				pre_flushed++; +			xfs_iflags_set(ip, XFS_ISTALE); +			if (xfs_inode_clean(ip)) { +				ASSERT(ip != free_ip); +				xfs_ifunlock(ip); +				xfs_iunlock(ip, XFS_ILOCK_EXCL); +				continue;  			} -			lip = lip->li_bio_list; -		} -		for (i = 0; i < found; i++) { -			ip = ip_found[i];  			iip = ip->i_itemp; -  			if (!iip) { +				/* inode with unlogged changes only */ +				ASSERT(ip != free_ip);  				ip->i_update_core = 0;  				xfs_ifunlock(ip);  				xfs_iunlock(ip, XFS_ILOCK_EXCL);  				continue;  			} +			found++;  			iip->ili_last_fields = iip->ili_format.ilf_fields;  			iip->ili_format.ilf_fields = 0; @@ -2075,20 +2042,18 @@ xfs_ifree_cluster(  			xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,  						&iip->ili_item.li_lsn); -			xfs_buf_attach_iodone(bp, -				(void(*)(xfs_buf_t*,xfs_log_item_t*)) -				xfs_istale_done, (xfs_log_item_t *)iip); -			if (ip != free_ip) { +			xfs_buf_attach_iodone(bp, xfs_istale_done, +						  &iip->ili_item); + +			if (ip != free_ip)  				xfs_iunlock(ip, XFS_ILOCK_EXCL); -			}  		} -		if (found || pre_flushed) +		if (found)  			xfs_trans_stale_inode_buf(tp, bp);  		xfs_trans_binval(tp, bp);  	} -	kmem_free(ip_found);  	xfs_perag_put(pag);  } @@ -2224,7 +2189,7 @@ xfs_iroot_realloc(  		 */  		if (ifp->if_broot_bytes == 0) {  			new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); -			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP); +			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);  			ifp->if_broot_bytes = (int)new_size;  			return;  		} @@ -2240,7 +2205,7 @@ xfs_iroot_realloc(  		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);  		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,  				(size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ -				KM_SLEEP); +				KM_SLEEP | KM_NOFS);  		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,  						     ifp->if_broot_bytes);  		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, @@ -2266,7 +2231,7 @@ xfs_iroot_realloc(  	else  		new_size = 0;  	if (new_size > 0) { -		new_broot = kmem_alloc(new_size, KM_SLEEP); +		new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);  		/*  		 * First copy over the btree block header.  		 */ @@ -2370,7 +2335,8 @@ xfs_idata_realloc(  		real_size = roundup(new_size, 4);  		if (ifp->if_u1.if_data == NULL) {  			ASSERT(ifp->if_real_bytes == 0); -			ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); +			ifp->if_u1.if_data = kmem_alloc(real_size, +							KM_SLEEP | KM_NOFS);  		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {  			/*  			 * Only do the realloc if the underlying size @@ -2381,11 +2347,12 @@ xfs_idata_realloc(  					kmem_realloc(ifp->if_u1.if_data,  							real_size,  							ifp->if_real_bytes, -							KM_SLEEP); +							KM_SLEEP | KM_NOFS);  			}  		} else {  			ASSERT(ifp->if_real_bytes == 0); -			ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); +			ifp->if_u1.if_data = kmem_alloc(real_size, +							KM_SLEEP | KM_NOFS);  			memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,  				ifp->if_bytes);  		} @@ -2649,8 +2616,6 @@ xfs_iflush_cluster(  	int			i;  	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); -	ASSERT(pag->pagi_inodeok); -	ASSERT(pag->pag_ici_init);  	inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;  	ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); @@ -2754,7 +2719,6 @@ cluster_corrupt_out:  		 * mark it as stale and brelse.  		 */  		if (XFS_BUF_IODONE_FUNC(bp)) { -			XFS_BUF_CLR_BDSTRAT_FUNC(bp);  			XFS_BUF_UNDONE(bp);  			XFS_BUF_STALE(bp);  			XFS_BUF_ERROR(bp,EIO); @@ -3092,8 +3056,7 @@ xfs_iflush_int(  		 * and unlock the inode's flush lock when the inode is  		 * completely written to disk.  		 */ -		xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) -				      xfs_iflush_done, (xfs_log_item_t *)iip); +		xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);  		ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);  		ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); @@ -3537,13 +3500,11 @@ xfs_iext_remove_indirect(  	xfs_extnum_t	ext_diff;	/* extents to remove in current list */  	xfs_extnum_t	nex1;		/* number of extents before idx */  	xfs_extnum_t	nex2;		/* extents after idx + count */ -	int		nlists;		/* entries in indirection array */  	int		page_idx = idx;	/* index in target extent list */  	ASSERT(ifp->if_flags & XFS_IFEXTIREC);  	erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);  	ASSERT(erp != NULL); -	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;  	nex1 = page_idx;  	ext_cnt = count;  	while (ext_cnt) {  |