diff options
62 files changed, 883 insertions, 1326 deletions
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h deleted file mode 100644 index 3abe7e9ceb3..00000000000 --- a/fs/xfs/linux-2.6/sema.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA - */ -#ifndef __XFS_SUPPORT_SEMA_H__ -#define __XFS_SUPPORT_SEMA_H__ - -#include <linux/time.h> -#include <linux/wait.h> -#include <linux/semaphore.h> -#include <asm/atomic.h> - -/* - * sema_t structure just maps to struct semaphore in Linux kernel. - */ - -typedef struct semaphore sema_t; - -#define initnsema(sp, val, name)	sema_init(sp, val) -#define psema(sp, b)			down(sp) -#define vsema(sp)			up(sp) -#define freesema(sema)			do { } while (0) - -static inline int issemalocked(sema_t *sp) -{ -	return down_trylock(sp) || (up(sp), 0); -} - -/* - * Map cpsema (try to get the sema) to down_trylock. We need to switch - * the return values since cpsema returns 1 (acquired) 0 (failed) and - * down_trylock returns the reverse 0 (acquired) 1 (failed). - */ -static inline int cpsema(sema_t *sp) -{ -	return down_trylock(sp) ? 0 : 1; -} - -#endif /* __XFS_SUPPORT_SEMA_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index fa47e43b8b4..f42f80a3b1f 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -73,7 +73,6 @@ xfs_page_trace(  	unsigned long	pgoff)  {  	xfs_inode_t	*ip; -	bhv_vnode_t	*vp = vn_from_inode(inode);  	loff_t		isize = i_size_read(inode);  	loff_t		offset = page_offset(page);  	int		delalloc = -1, unmapped = -1, unwritten = -1; @@ -81,7 +80,7 @@ xfs_page_trace(  	if (page_has_buffers(page))  		xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); -	ip = xfs_vtoi(vp); +	ip = XFS_I(inode);  	if (!ip->i_rwtrace)  		return; diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 9cc8f021309..986061ae1b9 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -58,7 +58,7 @@ xfs_buf_trace(  		bp, id,  		(void *)(unsigned long)bp->b_flags,  		(void *)(unsigned long)bp->b_hold.counter, -		(void *)(unsigned long)bp->b_sema.count.counter, +		(void *)(unsigned long)bp->b_sema.count,  		(void *)current,  		data, ra,  		(void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff), @@ -253,7 +253,7 @@ _xfs_buf_initialize(  	memset(bp, 0, sizeof(xfs_buf_t));  	atomic_set(&bp->b_hold, 1); -	init_MUTEX_LOCKED(&bp->b_iodonesema); +	init_completion(&bp->b_iowait);  	INIT_LIST_HEAD(&bp->b_list);  	INIT_LIST_HEAD(&bp->b_hash_list);  	init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ @@ -838,6 +838,7 @@ xfs_buf_rele(  		return;  	} +	ASSERT(atomic_read(&bp->b_hold) > 0);  	if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {  		if (bp->b_relse) {  			atomic_inc(&bp->b_hold); @@ -851,11 +852,6 @@ xfs_buf_rele(  			spin_unlock(&hash->bh_lock);  			xfs_buf_free(bp);  		} -	} else { -		/* -		 * Catch reference count leaks -		 */ -		ASSERT(atomic_read(&bp->b_hold) >= 0);  	}  } @@ -1037,7 +1033,7 @@ xfs_buf_ioend(  			xfs_buf_iodone_work(&bp->b_iodone_work);  		}  	} else { -		up(&bp->b_iodonesema); +		complete(&bp->b_iowait);  	}  } @@ -1275,7 +1271,7 @@ xfs_buf_iowait(  	XB_TRACE(bp, "iowait", 0);  	if (atomic_read(&bp->b_io_remaining))  		blk_run_address_space(bp->b_target->bt_mapping); -	down(&bp->b_iodonesema); +	wait_for_completion(&bp->b_iowait);  	XB_TRACE(bp, "iowaited", (long)bp->b_error);  	return bp->b_error;  } @@ -1799,7 +1795,7 @@ int __init  xfs_buf_init(void)  {  #ifdef XFS_BUF_TRACE -	xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP); +	xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS);  #endif  	xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 29d1d4adc07..fe010995665 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -157,7 +157,7 @@ typedef struct xfs_buf {  	xfs_buf_iodone_t	b_iodone;	/* I/O completion function */  	xfs_buf_relse_t		b_relse;	/* releasing function */  	xfs_buf_bdstrat_t	b_strat;	/* pre-write function */ -	struct semaphore	b_iodonesema;	/* Semaphore for I/O waiters */ +	struct completion	b_iowait;	/* queue for I/O waiters */  	void			*b_fspriv;  	void			*b_fspriv2;  	void			*b_fspriv3; @@ -352,7 +352,7 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);  #define XFS_BUF_CPSEMA(bp)	(xfs_buf_cond_lock(bp) == 0)  #define XFS_BUF_VSEMA(bp)	xfs_buf_unlock(bp)  #define XFS_BUF_PSEMA(bp,x)	xfs_buf_lock(bp) -#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema); +#define XFS_BUF_FINISH_IOWAIT(bp)	complete(&bp->b_iowait);  #define XFS_BUF_SET_TARGET(bp, target)	((bp)->b_target = (target))  #define XFS_BUF_TARGET(bp)		((bp)->b_target) diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 987fe84f7b1..24fd598af84 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -139,7 +139,7 @@ xfs_nfs_get_inode(  	}  	xfs_iunlock(ip, XFS_ILOCK_SHARED); -	return ip->i_vnode; +	return VFS_I(ip);  }  STATIC struct dentry * @@ -167,7 +167,7 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,  	if (!inode)  		return NULL;  	if (IS_ERR(inode)) -		return ERR_PTR(PTR_ERR(inode)); +		return ERR_CAST(inode);  	result = d_alloc_anon(inode);  	if (!result) {  		iput(inode); @@ -198,7 +198,7 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,  	if (!inode)  		return NULL;  	if (IS_ERR(inode)) -		return ERR_PTR(PTR_ERR(inode)); +		return ERR_CAST(inode);  	result = d_alloc_anon(inode);  	if (!result) {  		iput(inode); @@ -219,9 +219,9 @@ xfs_fs_get_parent(  	if (unlikely(error))  		return ERR_PTR(-error); -	parent = d_alloc_anon(cip->i_vnode); +	parent = d_alloc_anon(VFS_I(cip));  	if (unlikely(!parent)) { -		iput(cip->i_vnode); +		iput(VFS_I(cip));  		return ERR_PTR(-ENOMEM);  	}  	return parent; diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 1eefe61f0e1..36caa6d957d 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -31,7 +31,7 @@ xfs_tosspages(  	xfs_off_t	last,  	int		fiopt)  { -	struct address_space *mapping = ip->i_vnode->i_mapping; +	struct address_space *mapping = VFS_I(ip)->i_mapping;  	if (mapping->nrpages)  		truncate_inode_pages(mapping, first); @@ -44,7 +44,7 @@ xfs_flushinval_pages(  	xfs_off_t	last,  	int		fiopt)  { -	struct address_space *mapping = ip->i_vnode->i_mapping; +	struct address_space *mapping = VFS_I(ip)->i_mapping;  	int		ret = 0;  	if (mapping->nrpages) { @@ -64,7 +64,7 @@ xfs_flush_pages(  	uint64_t	flags,  	int		fiopt)  { -	struct address_space *mapping = ip->i_vnode->i_mapping; +	struct address_space *mapping = VFS_I(ip)->i_mapping;  	int		ret = 0;  	int		ret2; diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index acb978d9d08..48799ba7e3e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -245,7 +245,7 @@ xfs_vget_fsop_handlereq(  	xfs_iunlock(ip, XFS_ILOCK_SHARED); -	*inode = XFS_ITOV(ip); +	*inode = VFS_I(ip);  	return 0;  } @@ -927,7 +927,7 @@ STATIC void  xfs_diflags_to_linux(  	struct xfs_inode	*ip)  { -	struct inode		*inode = XFS_ITOV(ip); +	struct inode		*inode = VFS_I(ip);  	unsigned int		xflags = xfs_ip2xflags(ip);  	if (xflags & XFS_XFLAG_IMMUTABLE) diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index e88f5102808..91bcd979242 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -62,7 +62,7 @@ void  xfs_synchronize_atime(  	xfs_inode_t	*ip)  { -	struct inode	*inode = ip->i_vnode; +	struct inode	*inode = VFS_I(ip);  	if (inode) {  		ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; @@ -79,7 +79,7 @@ void  xfs_mark_inode_dirty_sync(  	xfs_inode_t	*ip)  { -	struct inode	*inode = ip->i_vnode; +	struct inode	*inode = VFS_I(ip);  	if (inode)  		mark_inode_dirty_sync(inode); @@ -89,36 +89,31 @@ xfs_mark_inode_dirty_sync(   * Change the requested timestamp in the given inode.   * We don't lock across timestamp updates, and we don't log them but   * we do record the fact that there is dirty information in core. - * - * NOTE -- callers MUST combine XFS_ICHGTIME_MOD or XFS_ICHGTIME_CHG - *		with XFS_ICHGTIME_ACC to be sure that access time - *		update will take.  Calling first with XFS_ICHGTIME_ACC - *		and then XFS_ICHGTIME_MOD may fail to modify the access - *		timestamp if the filesystem is mounted noacctm.   */  void  xfs_ichgtime(  	xfs_inode_t	*ip,  	int		flags)  { -	struct inode	*inode = vn_to_inode(XFS_ITOV(ip)); +	struct inode	*inode = VFS_I(ip);  	timespec_t	tv; +	int		sync_it = 0; + +	tv = current_fs_time(inode->i_sb); -	nanotime(&tv); -	if (flags & XFS_ICHGTIME_MOD) { +	if ((flags & XFS_ICHGTIME_MOD) && +	    !timespec_equal(&inode->i_mtime, &tv)) {  		inode->i_mtime = tv;  		ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;  		ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; +		sync_it = 1;  	} -	if (flags & XFS_ICHGTIME_ACC) { -		inode->i_atime = tv; -		ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec; -		ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec; -	} -	if (flags & XFS_ICHGTIME_CHG) { +	if ((flags & XFS_ICHGTIME_CHG) && +	    !timespec_equal(&inode->i_ctime, &tv)) {  		inode->i_ctime = tv;  		ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec;  		ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec; +		sync_it = 1;  	}  	/* @@ -130,55 +125,11 @@ xfs_ichgtime(  	 * ensure that the compiler does not reorder the update  	 * of i_update_core above the timestamp updates above.  	 */ -	SYNCHRONIZE(); -	ip->i_update_core = 1; -	if (!(inode->i_state & I_NEW)) +	if (sync_it) { +		SYNCHRONIZE(); +		ip->i_update_core = 1;  		mark_inode_dirty_sync(inode); -} - -/* - * Variant on the above which avoids querying the system clock - * in situations where we know the Linux inode timestamps have - * just been updated (and so we can update our inode cheaply). - */ -void -xfs_ichgtime_fast( -	xfs_inode_t	*ip, -	struct inode	*inode, -	int		flags) -{ -	timespec_t	*tvp; - -	/* -	 * Atime updates for read() & friends are handled lazily now, and -	 * explicit updates must go through xfs_ichgtime() -	 */ -	ASSERT((flags & XFS_ICHGTIME_ACC) == 0); - -	if (flags & XFS_ICHGTIME_MOD) { -		tvp = &inode->i_mtime; -		ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; -		ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec;  	} -	if (flags & XFS_ICHGTIME_CHG) { -		tvp = &inode->i_ctime; -		ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec; -		ip->i_d.di_ctime.t_nsec = (__int32_t)tvp->tv_nsec; -	} - -	/* -	 * We update the i_update_core field _after_ changing -	 * the timestamps in order to coordinate properly with -	 * xfs_iflush() so that we don't lose timestamp updates. -	 * This keeps us from having to hold the inode lock -	 * while doing this.  We use the SYNCHRONIZE macro to -	 * ensure that the compiler does not reorder the update -	 * of i_update_core above the timestamp updates above. -	 */ -	SYNCHRONIZE(); -	ip->i_update_core = 1; -	if (!(inode->i_state & I_NEW)) -		mark_inode_dirty_sync(inode);  }  /* @@ -299,7 +250,7 @@ xfs_vn_mknod(  	if (unlikely(error))  		goto out_free_acl; -	inode = ip->i_vnode; +	inode = VFS_I(ip);  	error = xfs_init_security(inode, dir);  	if (unlikely(error)) @@ -366,7 +317,7 @@ xfs_vn_lookup(  		return NULL;  	} -	return d_splice_alias(cip->i_vnode, dentry); +	return d_splice_alias(VFS_I(cip), dentry);  }  STATIC struct dentry * @@ -399,12 +350,12 @@ xfs_vn_ci_lookup(  	/* if exact match, just splice and exit */  	if (!ci_name.name) -		return d_splice_alias(ip->i_vnode, dentry); +		return d_splice_alias(VFS_I(ip), dentry);  	/* else case-insensitive match... */  	dname.name = ci_name.name;  	dname.len = ci_name.len; -	dentry = d_add_ci(ip->i_vnode, dentry, &dname); +	dentry = d_add_ci(VFS_I(ip), dentry, &dname);  	kmem_free(ci_name.name);  	return dentry;  } @@ -478,7 +429,7 @@ xfs_vn_symlink(  	if (unlikely(error))  		goto out; -	inode = cip->i_vnode; +	inode = VFS_I(cip);  	error = xfs_init_security(inode, dir);  	if (unlikely(error)) @@ -710,7 +661,7 @@ out_error:  	return error;  } -const struct inode_operations xfs_inode_operations = { +static const struct inode_operations xfs_inode_operations = {  	.permission		= xfs_vn_permission,  	.truncate		= xfs_vn_truncate,  	.getattr		= xfs_vn_getattr, @@ -722,7 +673,7 @@ const struct inode_operations xfs_inode_operations = {  	.fallocate		= xfs_vn_fallocate,  }; -const struct inode_operations xfs_dir_inode_operations = { +static const struct inode_operations xfs_dir_inode_operations = {  	.create			= xfs_vn_create,  	.lookup			= xfs_vn_lookup,  	.link			= xfs_vn_link, @@ -747,7 +698,7 @@ const struct inode_operations xfs_dir_inode_operations = {  	.listxattr		= xfs_vn_listxattr,  }; -const struct inode_operations xfs_dir_ci_inode_operations = { +static const struct inode_operations xfs_dir_ci_inode_operations = {  	.create			= xfs_vn_create,  	.lookup			= xfs_vn_ci_lookup,  	.link			= xfs_vn_link, @@ -772,7 +723,7 @@ const struct inode_operations xfs_dir_ci_inode_operations = {  	.listxattr		= xfs_vn_listxattr,  }; -const struct inode_operations xfs_symlink_inode_operations = { +static const struct inode_operations xfs_symlink_inode_operations = {  	.readlink		= generic_readlink,  	.follow_link		= xfs_vn_follow_link,  	.put_link		= xfs_vn_put_link, @@ -784,3 +735,98 @@ const struct inode_operations xfs_symlink_inode_operations = {  	.removexattr		= generic_removexattr,  	.listxattr		= xfs_vn_listxattr,  }; + +STATIC void +xfs_diflags_to_iflags( +	struct inode		*inode, +	struct xfs_inode	*ip) +{ +	if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) +		inode->i_flags |= S_IMMUTABLE; +	else +		inode->i_flags &= ~S_IMMUTABLE; +	if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) +		inode->i_flags |= S_APPEND; +	else +		inode->i_flags &= ~S_APPEND; +	if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) +		inode->i_flags |= S_SYNC; +	else +		inode->i_flags &= ~S_SYNC; +	if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) +		inode->i_flags |= S_NOATIME; +	else +		inode->i_flags &= ~S_NOATIME; +} + +/* + * Initialize the Linux inode, set up the operation vectors and + * unlock the inode. + * + * When reading existing inodes from disk this is called directly + * from xfs_iget, when creating a new inode it is called from + * xfs_ialloc after setting up the inode. + */ +void +xfs_setup_inode( +	struct xfs_inode	*ip) +{ +	struct inode		*inode = ip->i_vnode; + +	inode->i_mode	= ip->i_d.di_mode; +	inode->i_nlink	= ip->i_d.di_nlink; +	inode->i_uid	= ip->i_d.di_uid; +	inode->i_gid	= ip->i_d.di_gid; + +	switch (inode->i_mode & S_IFMT) { +	case S_IFBLK: +	case S_IFCHR: +		inode->i_rdev = +			MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, +			      sysv_minor(ip->i_df.if_u2.if_rdev)); +		break; +	default: +		inode->i_rdev = 0; +		break; +	} + +	inode->i_generation = ip->i_d.di_gen; +	i_size_write(inode, ip->i_d.di_size); +	inode->i_atime.tv_sec	= ip->i_d.di_atime.t_sec; +	inode->i_atime.tv_nsec	= ip->i_d.di_atime.t_nsec; +	inode->i_mtime.tv_sec	= ip->i_d.di_mtime.t_sec; +	inode->i_mtime.tv_nsec	= ip->i_d.di_mtime.t_nsec; +	inode->i_ctime.tv_sec	= ip->i_d.di_ctime.t_sec; +	inode->i_ctime.tv_nsec	= ip->i_d.di_ctime.t_nsec; +	xfs_diflags_to_iflags(inode, ip); +	xfs_iflags_clear(ip, XFS_IMODIFIED); + +	switch (inode->i_mode & S_IFMT) { +	case S_IFREG: +		inode->i_op = &xfs_inode_operations; +		inode->i_fop = &xfs_file_operations; +		inode->i_mapping->a_ops = &xfs_address_space_operations; +		break; +	case S_IFDIR: +		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) +			inode->i_op = &xfs_dir_ci_inode_operations; +		else +			inode->i_op = &xfs_dir_inode_operations; +		inode->i_fop = &xfs_dir_file_operations; +		break; +	case S_IFLNK: +		inode->i_op = &xfs_symlink_inode_operations; +		if (!(ip->i_df.if_flags & XFS_IFINLINE)) +			inode->i_mapping->a_ops = &xfs_address_space_operations; +		break; +	default: +		inode->i_op = &xfs_inode_operations; +		init_special_inode(inode, inode->i_mode, inode->i_rdev); +		break; +	} + +	xfs_iflags_clear(ip, XFS_INEW); +	barrier(); + +	unlock_new_inode(inode); +} diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h index d97ba934a2a..8b1a1e31dc2 100644 --- a/fs/xfs/linux-2.6/xfs_iops.h +++ b/fs/xfs/linux-2.6/xfs_iops.h @@ -18,10 +18,7 @@  #ifndef __XFS_IOPS_H__  #define __XFS_IOPS_H__ -extern const struct inode_operations xfs_inode_operations; -extern const struct inode_operations xfs_dir_inode_operations; -extern const struct inode_operations xfs_dir_ci_inode_operations; -extern const struct inode_operations xfs_symlink_inode_operations; +struct xfs_inode;  extern const struct file_operations xfs_file_operations;  extern const struct file_operations xfs_dir_file_operations; @@ -29,14 +26,6 @@ extern const struct file_operations xfs_invis_file_operations;  extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); -struct xfs_inode; -extern void xfs_ichgtime(struct xfs_inode *, int); -extern void xfs_ichgtime_fast(struct xfs_inode *, struct inode *, int); - -#define xfs_vtoi(vp) \ -	((struct xfs_inode *)vn_to_inode(vp)->i_private) - -#define XFS_I(inode) \ -	((struct xfs_inode *)(inode)->i_private) +extern void xfs_setup_inode(struct xfs_inode *);  #endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 4d45d9351a6..3b7c4ff48ba 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -45,13 +45,13 @@  #include <mrlock.h>  #include <sv.h>  #include <mutex.h> -#include <sema.h>  #include <time.h>  #include <support/ktrace.h>  #include <support/debug.h>  #include <support/uuid.h> +#include <linux/semaphore.h>  #include <linux/mm.h>  #include <linux/kernel.h>  #include <linux/blkdev.h> @@ -180,7 +180,7 @@  #define xfs_sort(a,n,s,fn)	sort(a,n,s,fn,NULL)  #define xfs_stack_trace()	dump_stack()  #define xfs_itruncate_data(ip, off)	\ -	(-vmtruncate(vn_to_inode(XFS_ITOV(ip)), (off))) +	(-vmtruncate(VFS_I(ip), (off)))  /* Move the kernel do_div definition off to one side */ diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 82333b3e118..1957e5357d0 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -137,7 +137,7 @@ xfs_iozero(  	struct address_space	*mapping;  	int			status; -	mapping = ip->i_vnode->i_mapping; +	mapping = VFS_I(ip)->i_mapping;  	do {  		unsigned offset, bytes;  		void *fsdata; @@ -674,9 +674,7 @@ start:  	 */  	if (likely(!(ioflags & IO_INVIS) &&  		   !mnt_want_write(file->f_path.mnt))) { -		file_update_time(file); -		xfs_ichgtime_fast(xip, inode, -				  XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); +		xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);  		mnt_drop_write(file->f_path.mnt);  	} diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 30ae96397e3..73c65f19e54 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -581,118 +581,6 @@ xfs_max_file_offset(  	return (((__uint64_t)pagefactor) << bitshift) - 1;  } -STATIC_INLINE void -xfs_set_inodeops( -	struct inode		*inode) -{ -	switch (inode->i_mode & S_IFMT) { -	case S_IFREG: -		inode->i_op = &xfs_inode_operations; -		inode->i_fop = &xfs_file_operations; -		inode->i_mapping->a_ops = &xfs_address_space_operations; -		break; -	case S_IFDIR: -		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) -			inode->i_op = &xfs_dir_ci_inode_operations; -		else -			inode->i_op = &xfs_dir_inode_operations; -		inode->i_fop = &xfs_dir_file_operations; -		break; -	case S_IFLNK: -		inode->i_op = &xfs_symlink_inode_operations; -		if (!(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE)) -			inode->i_mapping->a_ops = &xfs_address_space_operations; -		break; -	default: -		inode->i_op = &xfs_inode_operations; -		init_special_inode(inode, inode->i_mode, inode->i_rdev); -		break; -	} -} - -STATIC_INLINE void -xfs_revalidate_inode( -	xfs_mount_t		*mp, -	bhv_vnode_t		*vp, -	xfs_inode_t		*ip) -{ -	struct inode		*inode = vn_to_inode(vp); - -	inode->i_mode	= ip->i_d.di_mode; -	inode->i_nlink	= ip->i_d.di_nlink; -	inode->i_uid	= ip->i_d.di_uid; -	inode->i_gid	= ip->i_d.di_gid; - -	switch (inode->i_mode & S_IFMT) { -	case S_IFBLK: -	case S_IFCHR: -		inode->i_rdev = -			MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, -			      sysv_minor(ip->i_df.if_u2.if_rdev)); -		break; -	default: -		inode->i_rdev = 0; -		break; -	} - -	inode->i_generation = ip->i_d.di_gen; -	i_size_write(inode, ip->i_d.di_size); -	inode->i_atime.tv_sec	= ip->i_d.di_atime.t_sec; -	inode->i_atime.tv_nsec	= ip->i_d.di_atime.t_nsec; -	inode->i_mtime.tv_sec	= ip->i_d.di_mtime.t_sec; -	inode->i_mtime.tv_nsec	= ip->i_d.di_mtime.t_nsec; -	inode->i_ctime.tv_sec	= ip->i_d.di_ctime.t_sec; -	inode->i_ctime.tv_nsec	= ip->i_d.di_ctime.t_nsec; -	if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) -		inode->i_flags |= S_IMMUTABLE; -	else -		inode->i_flags &= ~S_IMMUTABLE; -	if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) -		inode->i_flags |= S_APPEND; -	else -		inode->i_flags &= ~S_APPEND; -	if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) -		inode->i_flags |= S_SYNC; -	else -		inode->i_flags &= ~S_SYNC; -	if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) -		inode->i_flags |= S_NOATIME; -	else -		inode->i_flags &= ~S_NOATIME; -	xfs_iflags_clear(ip, XFS_IMODIFIED); -} - -void -xfs_initialize_vnode( -	struct xfs_mount	*mp, -	bhv_vnode_t		*vp, -	struct xfs_inode	*ip) -{ -	struct inode		*inode = vn_to_inode(vp); - -	if (!ip->i_vnode) { -		ip->i_vnode = vp; -		inode->i_private = ip; -	} - -	/* -	 * We need to set the ops vectors, and unlock the inode, but if -	 * we have been called during the new inode create process, it is -	 * too early to fill in the Linux inode.  We will get called a -	 * second time once the inode is properly set up, and then we can -	 * finish our work. -	 */ -	if (ip->i_d.di_mode != 0 && (inode->i_state & I_NEW)) { -		xfs_revalidate_inode(mp, vp, ip); -		xfs_set_inodeops(inode); - -		xfs_iflags_clear(ip, XFS_INEW); -		barrier(); - -		unlock_new_inode(inode); -	} -} -  int  xfs_blkdev_get(  	xfs_mount_t		*mp, @@ -982,26 +870,21 @@ STATIC struct inode *  xfs_fs_alloc_inode(  	struct super_block	*sb)  { -	bhv_vnode_t		*vp; - -	vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); -	if (unlikely(!vp)) -		return NULL; -	return vn_to_inode(vp); +	return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);  }  STATIC void  xfs_fs_destroy_inode(  	struct inode		*inode)  { -	kmem_zone_free(xfs_vnode_zone, vn_from_inode(inode)); +	kmem_zone_free(xfs_vnode_zone, inode);  }  STATIC void  xfs_fs_inode_init_once(  	void			*vnode)  { -	inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); +	inode_init_once((struct inode *)vnode);  }  /* @@ -1106,7 +989,7 @@ void  xfs_flush_inode(  	xfs_inode_t	*ip)  { -	struct inode	*inode = ip->i_vnode; +	struct inode	*inode = VFS_I(ip);  	igrab(inode);  	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); @@ -1131,7 +1014,7 @@ void  xfs_flush_device(  	xfs_inode_t	*ip)  { -	struct inode	*inode = vn_to_inode(XFS_ITOV(ip)); +	struct inode	*inode = VFS_I(ip);  	igrab(inode);  	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); @@ -1201,6 +1084,15 @@ xfssyncd(  }  STATIC void +xfs_free_fsname( +	struct xfs_mount	*mp) +{ +	kfree(mp->m_fsname); +	kfree(mp->m_rtname); +	kfree(mp->m_logname); +} + +STATIC void  xfs_fs_put_super(  	struct super_block	*sb)  { @@ -1239,8 +1131,6 @@ xfs_fs_put_super(  	error = xfs_unmount_flush(mp, 0);  	WARN_ON(error); -	IRELE(rip); -  	/*  	 * If we're forcing a shutdown, typically because of a media error,  	 * we want to make sure we invalidate dirty pages that belong to @@ -1257,10 +1147,12 @@ xfs_fs_put_super(  	}  	xfs_unmountfs(mp); +	xfs_freesb(mp);  	xfs_icsb_destroy_counters(mp);  	xfs_close_devices(mp);  	xfs_qmops_put(mp);  	xfs_dmops_put(mp); +	xfs_free_fsname(mp);  	kfree(mp);  } @@ -1517,6 +1409,8 @@ xfs_start_flags(  	struct xfs_mount_args	*ap,  	struct xfs_mount	*mp)  { +	int			error; +  	/* Values are in BBs */  	if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {  		/* @@ -1549,17 +1443,27 @@ xfs_start_flags(  			ap->logbufsize);  		return XFS_ERROR(EINVAL);  	} + +	error = ENOMEM; +  	mp->m_logbsize = ap->logbufsize;  	mp->m_fsname_len = strlen(ap->fsname) + 1; -	mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP); -	strcpy(mp->m_fsname, ap->fsname); + +	mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL); +	if (!mp->m_fsname) +		goto out; +  	if (ap->rtname[0]) { -		mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP); -		strcpy(mp->m_rtname, ap->rtname); +		mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL); +		if (!mp->m_rtname) +			goto out_free_fsname; +  	} +  	if (ap->logname[0]) { -		mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP); -		strcpy(mp->m_logname, ap->logname); +		mp->m_logname = kstrdup(ap->logname, GFP_KERNEL); +		if (!mp->m_logname) +			goto out_free_rtname;  	}  	if (ap->flags & XFSMNT_WSYNC) @@ -1632,6 +1536,14 @@ xfs_start_flags(  	if (ap->flags & XFSMNT_DMAPI)  		mp->m_flags |= XFS_MOUNT_DMAPI;  	return 0; + + + out_free_rtname: +	kfree(mp->m_rtname); + out_free_fsname: +	kfree(mp->m_fsname); + out: +	return error;  }  /* @@ -1792,10 +1704,10 @@ xfs_fs_fill_super(  	 */  	error = xfs_start_flags(args, mp);  	if (error) -		goto out_destroy_counters; +		goto out_free_fsname;  	error = xfs_readsb(mp, flags);  	if (error) -		goto out_destroy_counters; +		goto out_free_fsname;  	error = xfs_finish_flags(args, mp);  	if (error)  		goto out_free_sb; @@ -1811,7 +1723,7 @@ xfs_fs_fill_super(  	if (error)  		goto out_free_sb; -	error = xfs_mountfs(mp, flags); +	error = xfs_mountfs(mp);  	if (error)  		goto out_filestream_unmount; @@ -1825,7 +1737,7 @@ xfs_fs_fill_super(  	sb->s_time_gran = 1;  	set_posix_acl_flag(sb); -	root = igrab(mp->m_rootip->i_vnode); +	root = igrab(VFS_I(mp->m_rootip));  	if (!root) {  		error = ENOENT;  		goto fail_unmount; @@ -1857,7 +1769,8 @@ xfs_fs_fill_super(  	xfs_filestream_unmount(mp);   out_free_sb:  	xfs_freesb(mp); - out_destroy_counters: + out_free_fsname: +	xfs_free_fsname(mp);  	xfs_icsb_destroy_counters(mp);  	xfs_close_devices(mp);   out_put_qmops: @@ -1890,10 +1803,8 @@ xfs_fs_fill_super(  	error = xfs_unmount_flush(mp, 0);  	WARN_ON(error); -	IRELE(mp->m_rootip); -  	xfs_unmountfs(mp); -	goto out_destroy_counters; +	goto out_free_sb;  }  STATIC int @@ -2014,7 +1925,7 @@ xfs_free_trace_bufs(void)  STATIC int __init  xfs_init_zones(void)  { -	xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode", +	xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode",  					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |  					KM_ZONE_SPREAD,  					xfs_fs_inode_init_once); diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index b7d13da01bd..fe2ef4e6a0f 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -101,9 +101,6 @@ struct block_device;  extern __uint64_t xfs_max_file_offset(unsigned int); -extern void xfs_initialize_vnode(struct xfs_mount *mp, bhv_vnode_t *vp, -		struct xfs_inode *ip); -  extern void xfs_flush_inode(struct xfs_inode *);  extern void xfs_flush_device(struct xfs_inode *); diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 25488b6d988..b52528bbbff 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -33,7 +33,7 @@  /* - * Dedicated vnode inactive/reclaim sync semaphores. + * Dedicated vnode inactive/reclaim sync wait queues.   * Prime number of hash buckets since address is used as the key.   */  #define NVSYNC                  37 @@ -82,24 +82,6 @@ vn_ioerror(  		xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l);  } - -/* - * Add a reference to a referenced vnode. - */ -bhv_vnode_t * -vn_hold( -	bhv_vnode_t	*vp) -{ -	struct inode	*inode; - -	XFS_STATS_INC(vn_hold); - -	inode = igrab(vn_to_inode(vp)); -	ASSERT(inode); - -	return vp; -} -  #ifdef	XFS_INODE_TRACE  /* @@ -108,7 +90,7 @@ vn_hold(   */  static inline int xfs_icount(struct xfs_inode *ip)  { -	bhv_vnode_t *vp = XFS_ITOV_NULL(ip); +	struct inode *vp = VFS_I(ip);  	if (vp)  		return vn_count(vp); diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 41ca2cec5d3..683ce16210f 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -22,20 +22,6 @@ struct file;  struct xfs_iomap;  struct attrlist_cursor_kern; -typedef struct inode	bhv_vnode_t; - -/* - * Vnode to Linux inode mapping. - */ -static inline bhv_vnode_t *vn_from_inode(struct inode *inode) -{ -	return inode; -} -static inline struct inode *vn_to_inode(bhv_vnode_t *vnode) -{ -	return vnode; -} -  /*   * Return values for xfs_inactive.  A return value of   * VN_INACTIVE_NOCACHE implies that the file system behavior @@ -76,57 +62,52 @@ extern void	vn_iowait(struct xfs_inode *ip);  extern void	vn_iowake(struct xfs_inode *ip);  extern void	vn_ioerror(struct xfs_inode *ip, int error, char *f, int l); -static inline int vn_count(bhv_vnode_t *vp) +static inline int vn_count(struct inode *vp)  { -	return atomic_read(&vn_to_inode(vp)->i_count); +	return atomic_read(&vp->i_count);  } -/* - * Vnode reference counting functions (and macros for compatibility). - */ -extern bhv_vnode_t	*vn_hold(bhv_vnode_t *); +#define IHOLD(ip) \ +do { \ +	ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ +	atomic_inc(&(VFS_I(ip)->i_count)); \ +	xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ +} while (0) -#if defined(XFS_INODE_TRACE) -#define VN_HOLD(vp)		\ -	((void)vn_hold(vp),	\ -	  xfs_itrace_hold(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address)) -#define VN_RELE(vp)		\ -	  (xfs_itrace_rele(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address), \ -	   iput(vn_to_inode(vp))) -#else -#define VN_HOLD(vp)		((void)vn_hold(vp)) -#define VN_RELE(vp)		(iput(vn_to_inode(vp))) -#endif +#define IRELE(ip) \ +do { \ +	xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ +	iput(VFS_I(ip)); \ +} while (0) -static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp) +static inline struct inode *vn_grab(struct inode *vp)  { -	struct inode *inode = igrab(vn_to_inode(vp)); -	return inode ? vn_from_inode(inode) : NULL; +	return igrab(vp);  }  /*   * Dealing with bad inodes   */ -static inline int VN_BAD(bhv_vnode_t *vp) +static inline int VN_BAD(struct inode *vp)  { -	return is_bad_inode(vn_to_inode(vp)); +	return is_bad_inode(vp);  }  /*   * Extracting atime values in various formats   */ -static inline void vn_atime_to_bstime(bhv_vnode_t *vp, xfs_bstime_t *bs_atime) +static inline void vn_atime_to_bstime(struct inode *vp, xfs_bstime_t *bs_atime)  {  	bs_atime->tv_sec = vp->i_atime.tv_sec;  	bs_atime->tv_nsec = vp->i_atime.tv_nsec;  } -static inline void vn_atime_to_timespec(bhv_vnode_t *vp, struct timespec *ts) +static inline void vn_atime_to_timespec(struct inode *vp, struct timespec *ts)  {  	*ts = vp->i_atime;  } -static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) +static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt)  {  	*tt = vp->i_atime.tv_sec;  } @@ -134,9 +115,9 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)  /*   * Some useful predicates.   */ -#define VN_MAPPED(vp)	mapping_mapped(vn_to_inode(vp)->i_mapping) -#define VN_CACHED(vp)	(vn_to_inode(vp)->i_mapping->nrpages) -#define VN_DIRTY(vp)	mapping_tagged(vn_to_inode(vp)->i_mapping, \ +#define VN_MAPPED(vp)	mapping_mapped(vp->i_mapping) +#define VN_CACHED(vp)	(vp->i_mapping->nrpages) +#define VN_DIRTY(vp)	mapping_tagged(vp->i_mapping, \  					PAGECACHE_TAG_DIRTY) diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index fc9f3fb39b7..f2705f2fd43 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -101,11 +101,18 @@ xfs_qm_dqinit(  	if (brandnewdquot) {  		dqp->dq_flnext = dqp->dq_flprev = dqp;  		mutex_init(&dqp->q_qlock); -		initnsema(&dqp->q_flock, 1, "fdq");  		sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); +		/* +		 * Because we want to use a counting completion, complete +		 * the flush completion once to allow a single access to +		 * the flush completion without blocking. +		 */ +		init_completion(&dqp->q_flush); +		complete(&dqp->q_flush); +  #ifdef XFS_DQUOT_TRACE -		dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_SLEEP); +		dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_NOFS);  		xfs_dqtrace_entry(dqp, "DQINIT");  #endif  	} else { @@ -150,7 +157,6 @@ xfs_qm_dqdestroy(  	ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp));  	mutex_destroy(&dqp->q_qlock); -	freesema(&dqp->q_flock);  	sv_destroy(&dqp->q_pinwait);  #ifdef XFS_DQUOT_TRACE @@ -431,7 +437,7 @@ xfs_qm_dqalloc(  	 * when it unlocks the inode. Since we want to keep the quota  	 * inode around, we bump the vnode ref count now.  	 */ -	VN_HOLD(XFS_ITOV(quotip)); +	IHOLD(quotip);  	xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);  	nmaps = 1; @@ -1211,7 +1217,7 @@ xfs_qm_dqflush(  	int			error;  	ASSERT(XFS_DQ_IS_LOCKED(dqp)); -	ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); +	ASSERT(!completion_done(&dqp->q_flush));  	xfs_dqtrace_entry(dqp, "DQFLUSH");  	/* @@ -1348,34 +1354,18 @@ xfs_qm_dqflush_done(  	xfs_dqfunlock(dqp);  } - -int -xfs_qm_dqflock_nowait( -	xfs_dquot_t *dqp) -{ -	int locked; - -	locked = cpsema(&((dqp)->q_flock)); - -	/* XXX ifdef these out */ -	if (locked) -		(dqp)->dq_flags |= XFS_DQ_FLOCKED; -	return (locked); -} - -  int  xfs_qm_dqlock_nowait(  	xfs_dquot_t *dqp)  { -	return (mutex_trylock(&((dqp)->q_qlock))); +	return mutex_trylock(&dqp->q_qlock);  }  void  xfs_dqlock(  	xfs_dquot_t *dqp)  { -	mutex_lock(&(dqp->q_qlock)); +	mutex_lock(&dqp->q_qlock);  }  void @@ -1468,7 +1458,7 @@ xfs_qm_dqpurge(  	 * if we're turning off quotas. Basically, we need this flush  	 * lock, and are willing to block on it.  	 */ -	if (! xfs_qm_dqflock_nowait(dqp)) { +	if (!xfs_dqflock_nowait(dqp)) {  		/*  		 * Block on the flush lock after nudging dquot buffer,  		 * if it is incore. diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index f7393bba4e9..8958d0faf8d 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -82,7 +82,7 @@ typedef struct xfs_dquot {  	xfs_qcnt_t	 q_res_icount;	/* total inos allocd+reserved */  	xfs_qcnt_t	 q_res_rtbcount;/* total realtime blks used+reserved */  	mutex_t		 q_qlock;	/* quota lock */ -	sema_t		 q_flock;	/* flush lock */ +	struct completion q_flush;	/* flush completion queue */  	uint		 q_pincount;	/* pin count for this dquot */  	sv_t		 q_pinwait;	/* sync var for pinning */  #ifdef XFS_DQUOT_TRACE @@ -113,17 +113,25 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)  /* - * The following three routines simply manage the q_flock - * semaphore embedded in the dquot.  This semaphore synchronizes - * processes attempting to flush the in-core dquot back to disk. + * Manage the q_flush completion queue embedded in the dquot.  This completion + * queue synchronizes processes attempting to flush the in-core dquot back to + * disk.   */ -#define xfs_dqflock(dqp)	 { psema(&((dqp)->q_flock), PINOD | PRECALC);\ -				   (dqp)->dq_flags |= XFS_DQ_FLOCKED; } -#define xfs_dqfunlock(dqp)	 { ASSERT(issemalocked(&((dqp)->q_flock))); \ -				   vsema(&((dqp)->q_flock)); \ -				   (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); } +static inline void xfs_dqflock(xfs_dquot_t *dqp) +{ +	wait_for_completion(&dqp->q_flush); +} + +static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp) +{ +	return try_wait_for_completion(&dqp->q_flush); +} + +static inline void xfs_dqfunlock(xfs_dquot_t *dqp) +{ +	complete(&dqp->q_flush); +} -#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (issemalocked(&((dqp)->q_flock)))  #define XFS_DQ_IS_ON_FREELIST(dqp)  ((dqp)->dq_flnext != (dqp))  #define XFS_DQ_IS_DIRTY(dqp)	((dqp)->dq_flags & XFS_DQ_DIRTY)  #define XFS_QM_ISUDQ(dqp)	((dqp)->dq_flags & XFS_DQ_USER) @@ -167,7 +175,6 @@ extern int		xfs_qm_dqflush(xfs_dquot_t *, uint);  extern int		xfs_qm_dqpurge(xfs_dquot_t *);  extern void		xfs_qm_dqunpin_wait(xfs_dquot_t *);  extern int		xfs_qm_dqlock_nowait(xfs_dquot_t *); -extern int		xfs_qm_dqflock_nowait(xfs_dquot_t *);  extern void		xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);  extern void		xfs_qm_adjust_dqtimers(xfs_mount_t *,  					xfs_disk_dquot_t *); diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 08d2fc89e6a..f028644caa5 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -151,7 +151,7 @@ xfs_qm_dquot_logitem_push(  	dqp = logitem->qli_dquot;  	ASSERT(XFS_DQ_IS_LOCKED(dqp)); -	ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); +	ASSERT(!completion_done(&dqp->q_flush));  	/*  	 * Since we were able to lock the dquot's flush lock and @@ -245,7 +245,7 @@ xfs_qm_dquot_logitem_pushbuf(  	 * inode flush completed and the inode was taken off the AIL.  	 * So, just get out.  	 */ -	if (!issemalocked(&(dqp->q_flock))  || +	if (completion_done(&dqp->q_flush)  ||  	    ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {  		qip->qli_pushbuf_flag = 0;  		xfs_dqunlock(dqp); @@ -258,7 +258,7 @@ xfs_qm_dquot_logitem_pushbuf(  	if (bp != NULL) {  		if (XFS_BUF_ISDELAYWRITE(bp)) {  			dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && -				  issemalocked(&(dqp->q_flock))); +				  !completion_done(&dqp->q_flush));  			qip->qli_pushbuf_flag = 0;  			xfs_dqunlock(dqp); @@ -317,7 +317,7 @@ xfs_qm_dquot_logitem_trylock(  		return (XFS_ITEM_LOCKED);  	retval = XFS_ITEM_SUCCESS; -	if (! xfs_qm_dqflock_nowait(dqp)) { +	if (!xfs_dqflock_nowait(dqp)) {  		/*  		 * The dquot is already being flushed.	It may have been  		 * flushed delayed write, however, and we don't want to diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 021934a3d45..df0ffef9775 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -310,8 +310,7 @@ xfs_qm_unmount_quotadestroy(   */  void  xfs_qm_mount_quotas( -	xfs_mount_t	*mp, -	int		mfsi_flags) +	xfs_mount_t	*mp)  {  	int		error = 0;  	uint		sbf; @@ -346,8 +345,7 @@ xfs_qm_mount_quotas(  	/*  	 * If any of the quotas are not consistent, do a quotacheck.  	 */ -	if (XFS_QM_NEED_QUOTACHECK(mp) && -	    !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { +	if (XFS_QM_NEED_QUOTACHECK(mp)) {  		error = xfs_qm_quotacheck(mp);  		if (error) {  			/* Quotacheck failed and disabled quotas. */ @@ -484,7 +482,7 @@ again:  		xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");  		/* XXX a sentinel would be better */  		recl = XFS_QI_MPLRECLAIMS(mp); -		if (! xfs_qm_dqflock_nowait(dqp)) { +		if (!xfs_dqflock_nowait(dqp)) {  			/*  			 * If we can't grab the flush lock then check  			 * to see if the dquot has been flushed delayed @@ -1062,7 +1060,7 @@ xfs_qm_sync(  		/* XXX a sentinel would be better */  		recl = XFS_QI_MPLRECLAIMS(mp); -		if (! xfs_qm_dqflock_nowait(dqp)) { +		if (!xfs_dqflock_nowait(dqp)) {  			if (nowait) {  				xfs_dqunlock(dqp);  				continue; @@ -2079,7 +2077,7 @@ xfs_qm_shake_freelist(  		 * Try to grab the flush lock. If this dquot is in the process of  		 * getting flushed to disk, we don't want to reclaim it.  		 */ -		if (! xfs_qm_dqflock_nowait(dqp)) { +		if (!xfs_dqflock_nowait(dqp)) {  			xfs_dqunlock(dqp);  			dqp = dqp->dq_flnext;  			continue; @@ -2257,7 +2255,7 @@ xfs_qm_dqreclaim_one(void)  		 * Try to grab the flush lock. If this dquot is in the process of  		 * getting flushed to disk, we don't want to reclaim it.  		 */ -		if (! xfs_qm_dqflock_nowait(dqp)) { +		if (!xfs_dqflock_nowait(dqp)) {  			xfs_dqunlock(dqp);  			continue;  		} diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index cd2300e374a..44f25349e47 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct {  #define XFS_QM_RELE(xqm)	((xqm)->qm_nrefs--)  extern void		xfs_qm_destroy_quotainfo(xfs_mount_t *); -extern void		xfs_qm_mount_quotas(xfs_mount_t *, int); +extern void		xfs_qm_mount_quotas(xfs_mount_t *);  extern int		xfs_qm_quotacheck(xfs_mount_t *);  extern void		xfs_qm_unmount_quotadestroy(xfs_mount_t *);  extern int		xfs_qm_unmount_quotas(xfs_mount_t *); diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index f4f6c4c861d..eea2e60b456 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -162,7 +162,7 @@ xfs_qm_newmount(  			 * mounting, and get on with the boring life  			 * without disk quotas.  			 */ -			xfs_qm_mount_quotas(mp, 0); +			xfs_qm_mount_quotas(mp);  		} else {  			/*  			 * Clear the quota flags, but remember them. This @@ -184,13 +184,12 @@ STATIC int  xfs_qm_endmount(  	xfs_mount_t	*mp,  	uint		needquotamount, -	uint		quotaflags, -	int		mfsi_flags) +	uint		quotaflags)  {  	if (needquotamount) {  		ASSERT(mp->m_qflags == 0);  		mp->m_qflags = quotaflags; -		xfs_qm_mount_quotas(mp, mfsi_flags); +		xfs_qm_mount_quotas(mp);  	}  #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index adfb8723f65..1a3b803dfa5 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -1034,7 +1034,7 @@ xfs_qm_dqrele_all_inodes(  {  	xfs_inode_t	*ip, *topino;  	uint		ireclaims; -	bhv_vnode_t	*vp; +	struct inode	*vp;  	boolean_t	vnode_refd;  	ASSERT(mp->m_quotainfo); @@ -1059,7 +1059,7 @@ again:  			ip = ip->i_mnext;  			continue;  		} -		vp = XFS_ITOV_NULL(ip); +		vp = VFS_I(ip);  		if (!vp) {  			ASSERT(ip->i_udquot == NULL);  			ASSERT(ip->i_gdquot == NULL); diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 3e4648ad9cf..b2f639a1416 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -37,15 +37,15 @@  #include <linux/capability.h>  #include <linux/posix_acl_xattr.h> -STATIC int	xfs_acl_setmode(bhv_vnode_t *, xfs_acl_t *, int *); +STATIC int	xfs_acl_setmode(struct inode *, xfs_acl_t *, int *);  STATIC void     xfs_acl_filter_mode(mode_t, xfs_acl_t *);  STATIC void	xfs_acl_get_endian(xfs_acl_t *);  STATIC int	xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *);  STATIC int	xfs_acl_invalid(xfs_acl_t *);  STATIC void	xfs_acl_sync_mode(mode_t, xfs_acl_t *); -STATIC void	xfs_acl_get_attr(bhv_vnode_t *, xfs_acl_t *, int, int, int *); -STATIC void	xfs_acl_set_attr(bhv_vnode_t *, xfs_acl_t *, int, int *); -STATIC int	xfs_acl_allow_set(bhv_vnode_t *, int); +STATIC void	xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *); +STATIC void	xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *); +STATIC int	xfs_acl_allow_set(struct inode *, int);  kmem_zone_t *xfs_acl_zone; @@ -55,7 +55,7 @@ kmem_zone_t *xfs_acl_zone;   */  int  xfs_acl_vhasacl_access( -	bhv_vnode_t	*vp) +	struct inode	*vp)  {  	int		error; @@ -68,7 +68,7 @@ xfs_acl_vhasacl_access(   */  int  xfs_acl_vhasacl_default( -	bhv_vnode_t	*vp) +	struct inode	*vp)  {  	int		error; @@ -207,7 +207,7 @@ posix_acl_xfs_to_xattr(  int  xfs_acl_vget( -	bhv_vnode_t	*vp, +	struct inode	*vp,  	void		*acl,  	size_t		size,  	int		kind) @@ -217,7 +217,6 @@ xfs_acl_vget(  	posix_acl_xattr_header	*ext_acl = acl;  	int			flags = 0; -	VN_HOLD(vp);  	if(size) {  		if (!(_ACL_ALLOC(xfs_acl))) {  			error = ENOMEM; @@ -239,11 +238,10 @@ xfs_acl_vget(  			goto out;  		}  		if (kind == _ACL_TYPE_ACCESS) -			xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, xfs_acl); +			xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl);  		error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);  	}  out: -	VN_RELE(vp);  	if(xfs_acl)  		_ACL_FREE(xfs_acl);  	return -error; @@ -251,28 +249,26 @@ out:  int  xfs_acl_vremove( -	bhv_vnode_t	*vp, +	struct inode	*vp,  	int		kind)  {  	int		error; -	VN_HOLD(vp);  	error = xfs_acl_allow_set(vp, kind);  	if (!error) { -		error = xfs_attr_remove(xfs_vtoi(vp), +		error = xfs_attr_remove(XFS_I(vp),  						kind == _ACL_TYPE_DEFAULT?  						SGI_ACL_DEFAULT: SGI_ACL_FILE,  						ATTR_ROOT);  		if (error == ENOATTR)  			error = 0;	/* 'scool */  	} -	VN_RELE(vp);  	return -error;  }  int  xfs_acl_vset( -	bhv_vnode_t		*vp, +	struct inode		*vp,  	void			*acl,  	size_t			size,  	int			kind) @@ -298,7 +294,6 @@ xfs_acl_vset(  		return 0;  	} -	VN_HOLD(vp);  	error = xfs_acl_allow_set(vp, kind);  	/* Incoming ACL exists, set file mode based on its value */ @@ -321,7 +316,6 @@ xfs_acl_vset(  	}  out: -	VN_RELE(vp);  	_ACL_FREE(xfs_acl);  	return -error;  } @@ -363,7 +357,7 @@ xfs_acl_iaccess(  STATIC int  xfs_acl_allow_set( -	bhv_vnode_t	*vp, +	struct inode	*vp,  	int		kind)  {  	if (vp->i_flags & (S_IMMUTABLE|S_APPEND)) @@ -372,7 +366,7 @@ xfs_acl_allow_set(  		return ENOTDIR;  	if (vp->i_sb->s_flags & MS_RDONLY)  		return EROFS; -	if (xfs_vtoi(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER)) +	if (XFS_I(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER))  		return EPERM;  	return 0;  } @@ -566,7 +560,7 @@ xfs_acl_get_endian(   */  STATIC void  xfs_acl_get_attr( -	bhv_vnode_t	*vp, +	struct inode	*vp,  	xfs_acl_t	*aclp,  	int		kind,  	int		flags, @@ -576,7 +570,7 @@ xfs_acl_get_attr(  	ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1);  	flags |= ATTR_ROOT; -	*error = xfs_attr_get(xfs_vtoi(vp), +	*error = xfs_attr_get(XFS_I(vp),  					kind == _ACL_TYPE_ACCESS ?  					SGI_ACL_FILE : SGI_ACL_DEFAULT,  					(char *)aclp, &len, flags); @@ -590,7 +584,7 @@ xfs_acl_get_attr(   */  STATIC void  xfs_acl_set_attr( -	bhv_vnode_t	*vp, +	struct inode	*vp,  	xfs_acl_t	*aclp,  	int		kind,  	int		*error) @@ -615,7 +609,7 @@ xfs_acl_set_attr(  		INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm);  	}  	INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); -	*error = xfs_attr_set(xfs_vtoi(vp), +	*error = xfs_attr_set(XFS_I(vp),  				kind == _ACL_TYPE_ACCESS ?  				SGI_ACL_FILE: SGI_ACL_DEFAULT,  				(char *)newacl, len, ATTR_ROOT); @@ -624,7 +618,7 @@ xfs_acl_set_attr(  int  xfs_acl_vtoacl( -	bhv_vnode_t	*vp, +	struct inode	*vp,  	xfs_acl_t	*access_acl,  	xfs_acl_t	*default_acl)  { @@ -639,7 +633,7 @@ xfs_acl_vtoacl(  		if (error)  			access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;  		else /* We have a good ACL and the file mode, synchronize. */ -			xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, access_acl); +			xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl);  	}  	if (default_acl) { @@ -656,7 +650,7 @@ xfs_acl_vtoacl(   */  int  xfs_acl_inherit( -	bhv_vnode_t	*vp, +	struct inode	*vp,  	mode_t		mode,  	xfs_acl_t	*pdaclp)  { @@ -715,7 +709,7 @@ out_error:   */  STATIC int  xfs_acl_setmode( -	bhv_vnode_t	*vp, +	struct inode	*vp,  	xfs_acl_t	*acl,  	int		*basicperms)  { @@ -734,7 +728,7 @@ xfs_acl_setmode(  	 * mode.  The m:: bits take precedence over the g:: bits.  	 */  	iattr.ia_valid = ATTR_MODE; -	iattr.ia_mode = xfs_vtoi(vp)->i_d.di_mode; +	iattr.ia_mode = XFS_I(vp)->i_d.di_mode;  	iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);  	ap = acl->acl_entry;  	for (i = 0; i < acl->acl_cnt; ++i) { @@ -764,7 +758,7 @@ xfs_acl_setmode(  	if (gap && nomask)  		iattr.ia_mode |= gap->ae_perm << 3; -	return xfs_setattr(xfs_vtoi(vp), &iattr, 0, sys_cred); +	return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred);  }  /* diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 323ee94cf83..a4e293b93ef 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -59,14 +59,14 @@ extern struct kmem_zone *xfs_acl_zone;  		(zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))  #define xfs_acl_zone_destroy(zone)	kmem_zone_destroy(zone) -extern int xfs_acl_inherit(bhv_vnode_t *, mode_t mode, xfs_acl_t *); +extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *);  extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *); -extern int xfs_acl_vtoacl(bhv_vnode_t *, xfs_acl_t *, xfs_acl_t *); -extern int xfs_acl_vhasacl_access(bhv_vnode_t *); -extern int xfs_acl_vhasacl_default(bhv_vnode_t *); -extern int xfs_acl_vset(bhv_vnode_t *, void *, size_t, int); -extern int xfs_acl_vget(bhv_vnode_t *, void *, size_t, int); -extern int xfs_acl_vremove(bhv_vnode_t *, int); +extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *); +extern int xfs_acl_vhasacl_access(struct inode *); +extern int xfs_acl_vhasacl_default(struct inode *); +extern int xfs_acl_vset(struct inode *, void *, size_t, int); +extern int xfs_acl_vget(struct inode *, void *, size_t, int); +extern int xfs_acl_vremove(struct inode *, int);  #define _ACL_PERM_INVALID(perm)	((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE)) diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h index f9472a2076d..0b3b5efe848 100644 --- a/fs/xfs/xfs_arch.h +++ b/fs/xfs/xfs_arch.h @@ -92,16 +92,6 @@  	((__u8*)(pointer))[1] = (((value)     ) & 0xff); \      } -/* define generic INT_ macros */ - -#define INT_GET(reference,arch) \ -    (((arch) == ARCH_NOCONVERT) \ -	? \ -	    (reference) \ -	: \ -	    INT_SWAP((reference),(reference)) \ -    ) -  /* does not return a value */  #define INT_SET(reference,arch,valueref) \      (__builtin_constant_p(valueref) ? \ @@ -112,64 +102,6 @@  	) \      ) -/* does not return a value */ -#define INT_MOD_EXPR(reference,arch,code) \ -    (((arch) == ARCH_NOCONVERT) \ -	? \ -	    (void)((reference) code) \ -	: \ -	    (void)( \ -		(reference) = INT_GET((reference),arch) , \ -		((reference) code), \ -		INT_SET(reference, arch, reference) \ -	    ) \ -    ) - -/* does not return a value */ -#define INT_MOD(reference,arch,delta) \ -    (void)( \ -	INT_MOD_EXPR(reference,arch,+=(delta)) \ -    ) - -/* - * INT_COPY - copy a value between two locations with the - *	      _same architecture_ but _potentially different sizes_ - * - *	    if the types of the two parameters are equal or they are - *		in native architecture, a simple copy is done - * - *	    otherwise, architecture conversions are done - * - */ - -/* does not return a value */ -#define INT_COPY(dst,src,arch) \ -    ( \ -	((sizeof(dst) == sizeof(src)) || ((arch) == ARCH_NOCONVERT)) \ -	    ? \ -		(void)((dst) = (src)) \ -	    : \ -		INT_SET(dst, arch, INT_GET(src, arch)) \ -    ) - -/* - * INT_XLATE - copy a value in either direction between two locations - *	       with different architectures - * - *		    dir < 0	- copy from memory to buffer (native to arch) - *		    dir > 0	- copy from buffer to memory (arch to native) - */ - -/* does not return a value */ -#define INT_XLATE(buf,mem,dir,arch) {\ -    ASSERT(dir); \ -    if (dir>0) { \ -	(mem)=INT_GET(buf, arch); \ -    } else { \ -	INT_SET(buf, arch, mem); \ -    } \ -} -  /*   * In directories inode numbers are stored as unaligned arrays of unsigned   * 8bit integers on disk. diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 78de80e3caa..f7cdc28aff4 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -194,6 +194,46 @@ xfs_attr_get(  	return(error);  } +/* + * Calculate how many blocks we need for the new attribute, + */ +int +xfs_attr_calc_size( +	struct xfs_inode 	*ip, +	int			namelen, +	int			valuelen, +	int			*local) +{ +	struct xfs_mount 	*mp = ip->i_mount; +	int			size; +	int			nblks; + +	/* +	 * Determine space new attribute will use, and if it would be +	 * "local" or "remote" (note: local != inline). +	 */ +	size = xfs_attr_leaf_newentsize(namelen, valuelen, +					mp->m_sb.sb_blocksize, local); + +	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); +	if (*local) { +		if (size > (mp->m_sb.sb_blocksize >> 1)) { +			/* Double split possible */ +			nblks *= 2; +		} +	} else { +		/* +		 * Out of line attribute, cannot double split, but +		 * make room for the attribute value itself. +		 */ +		uint	dblocks = XFS_B_TO_FSB(mp, valuelen); +		nblks += dblocks; +		nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); +	} + +	return nblks; +} +  STATIC int  xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,  		char *value, int valuelen, int flags) @@ -202,10 +242,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,  	xfs_fsblock_t	firstblock;  	xfs_bmap_free_t flist;  	int		error, err2, committed; -	int		local, size; -	uint		nblks;  	xfs_mount_t	*mp = dp->i_mount;  	int             rsvd = (flags & ATTR_ROOT) != 0; +	int		local;  	/*  	 * Attach the dquots to the inode. @@ -241,30 +280,8 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,  	args.whichfork = XFS_ATTR_FORK;  	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; -	/* -	 * Determine space new attribute will use, and if it would be -	 * "local" or "remote" (note: local != inline). -	 */ -	size = xfs_attr_leaf_newentsize(name->len, valuelen, -					mp->m_sb.sb_blocksize, &local); - -	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); -	if (local) { -		if (size > (mp->m_sb.sb_blocksize >> 1)) { -			/* Double split possible */ -			nblks <<= 1; -		} -	} else { -		uint	dblocks = XFS_B_TO_FSB(mp, valuelen); -		/* Out of line attribute, cannot double split, but make -		 * room for the attribute value itself. -		 */ -		nblks += dblocks; -		nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); -	} -  	/* Size is now blocks for attribute data */ -	args.total = nblks; +	args.total = xfs_attr_calc_size(dp, name->len, valuelen, &local);  	/*  	 * Start our first transaction of the day. @@ -286,18 +303,17 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,  	if (rsvd)  		args.trans->t_flags |= XFS_TRANS_RESERVE; -	if ((error = xfs_trans_reserve(args.trans, (uint) nblks, -				      XFS_ATTRSET_LOG_RES(mp, nblks), -				      0, XFS_TRANS_PERM_LOG_RES, -				      XFS_ATTRSET_LOG_COUNT))) { +	if ((error = xfs_trans_reserve(args.trans, args.total, +			XFS_ATTRSET_LOG_RES(mp, args.total), 0, +			XFS_TRANS_PERM_LOG_RES, XFS_ATTRSET_LOG_COUNT))) {  		xfs_trans_cancel(args.trans, 0);  		return(error);  	}  	xfs_ilock(dp, XFS_ILOCK_EXCL); -	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, nblks, 0, -			 rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : -				XFS_QMOPT_RES_REGBLKS); +	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0, +				rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : +				       XFS_QMOPT_RES_REGBLKS);  	if (error) {  		xfs_iunlock(dp, XFS_ILOCK_EXCL);  		xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); @@ -384,7 +400,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,  		 * Commit the leaf transformation.  We'll need another (linked)  		 * transaction to add the new attribute to the leaf.  		 */ -		if ((error = xfs_attr_rolltrans(&args.trans, dp))) + +		error = xfs_trans_roll(&args.trans, dp); +		if (error)  			goto out;  	} @@ -964,7 +982,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)  		 * Commit the current trans (including the inode) and start  		 * a new one.  		 */ -		if ((error = xfs_attr_rolltrans(&args->trans, dp))) +		error = xfs_trans_roll(&args->trans, dp); +		if (error)  			return (error);  		/* @@ -978,7 +997,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)  	 * Commit the transaction that added the attr name so that  	 * later routines can manage their own transactions.  	 */ -	if ((error = xfs_attr_rolltrans(&args->trans, dp))) +	error = xfs_trans_roll(&args->trans, dp); +	if (error)  		return (error);  	/* @@ -1067,7 +1087,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)  		/*  		 * Commit the remove and start the next trans in series.  		 */ -		error = xfs_attr_rolltrans(&args->trans, dp); +		error = xfs_trans_roll(&args->trans, dp);  	} else if (args->rmtblkno > 0) {  		/* @@ -1298,7 +1318,8 @@ restart:  			 * Commit the node conversion and start the next  			 * trans in the chain.  			 */ -			if ((error = xfs_attr_rolltrans(&args->trans, dp))) +			error = xfs_trans_roll(&args->trans, dp); +			if (error)  				goto out;  			goto restart; @@ -1349,7 +1370,8 @@ restart:  	 * Commit the leaf addition or btree split and start the next  	 * trans in the chain.  	 */ -	if ((error = xfs_attr_rolltrans(&args->trans, dp))) +	error = xfs_trans_roll(&args->trans, dp); +	if (error)  		goto out;  	/* @@ -1449,7 +1471,8 @@ restart:  		/*  		 * Commit and start the next trans in the chain.  		 */ -		if ((error = xfs_attr_rolltrans(&args->trans, dp))) +		error = xfs_trans_roll(&args->trans, dp); +		if (error)  			goto out;  	} else if (args->rmtblkno > 0) { @@ -1581,7 +1604,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)  		/*  		 * Commit the Btree join operation and start a new trans.  		 */ -		if ((error = xfs_attr_rolltrans(&args->trans, dp))) +		error = xfs_trans_roll(&args->trans, dp); +		if (error)  			goto out;  	} @@ -2082,7 +2106,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)  		/*  		 * Start the next trans in the chain.  		 */ -		if ((error = xfs_attr_rolltrans(&args->trans, dp))) +		error = xfs_trans_roll(&args->trans, dp); +		if (error)  			return (error);  	} @@ -2232,7 +2257,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)  		/*  		 * Close out trans and start the next one in the chain.  		 */ -		if ((error = xfs_attr_rolltrans(&args->trans, args->dp))) +		error = xfs_trans_roll(&args->trans, args->dp); +		if (error)  			return (error);  	}  	return(0); diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index 8b2d31c19e4..fb3b2a68b9b 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h @@ -129,6 +129,7 @@ typedef struct xfs_attr_list_context {  /*   * Overall external interface routines.   */ +int xfs_attr_calc_size(struct xfs_inode *, int, int, int *);  int xfs_attr_inactive(struct xfs_inode *dp);  int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);  int xfs_attr_rmtval_get(struct xfs_da_args *args); diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 23ef5d7c87e..79da6b2ea99 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -2498,9 +2498,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)  	/*  	 * Commit the flag value change and start the next trans in series.  	 */ -	error = xfs_attr_rolltrans(&args->trans, args->dp); - -	return(error); +	return xfs_trans_roll(&args->trans, args->dp);  }  /* @@ -2547,9 +2545,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)  	/*  	 * Commit the flag value change and start the next trans in series.  	 */ -	error = xfs_attr_rolltrans(&args->trans, args->dp); - -	return(error); +	return xfs_trans_roll(&args->trans, args->dp);  }  /* @@ -2665,7 +2661,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)  	/*  	 * Commit the flag value change and start the next trans in series.  	 */ -	error = xfs_attr_rolltrans(&args->trans, args->dp); +	error = xfs_trans_roll(&args->trans, args->dp);  	return(error);  } @@ -2723,7 +2719,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)  	/*  	 * Commit the invalidate and start the next transaction.  	 */ -	error = xfs_attr_rolltrans(trans, dp); +	error = xfs_trans_roll(trans, dp);  	return (error);  } @@ -2825,7 +2821,8 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,  		/*  		 * Atomically commit the whole invalidate stuff.  		 */ -		if ((error = xfs_attr_rolltrans(trans, dp))) +		error = xfs_trans_roll(trans, dp); +		if (error)  			return (error);  	} @@ -2964,7 +2961,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,  			/*  			 * Roll to next transaction.  			 */ -			if ((error = xfs_attr_rolltrans(trans, dp))) +			error = xfs_trans_roll(trans, dp); +			if (error)  				return (error);  		} @@ -2974,60 +2972,3 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,  	return(0);  } - - -/* - * Roll from one trans in the sequence of PERMANENT transactions to the next. - */ -int -xfs_attr_rolltrans(xfs_trans_t **transp, xfs_inode_t *dp) -{ -	xfs_trans_t *trans; -	unsigned int logres, count; -	int	error; - -	/* -	 * Ensure that the inode is always logged. -	 */ -	trans = *transp; -	xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); - -	/* -	 * Copy the critical parameters from one trans to the next. -	 */ -	logres = trans->t_log_res; -	count = trans->t_log_count; -	*transp = xfs_trans_dup(trans); - -	/* -	 * Commit the current transaction. -	 * If this commit failed, then it'd just unlock those items that -	 * are not marked ihold. That also means that a filesystem shutdown -	 * is in progress. The caller takes the responsibility to cancel -	 * the duplicate transaction that gets returned. -	 */ -	if ((error = xfs_trans_commit(trans, 0))) -		return (error); - -	trans = *transp; - -	/* -	 * Reserve space in the log for th next transaction. -	 * This also pushes items in the "AIL", the list of logged items, -	 * out to disk if they are taking up space at the tail of the log -	 * that we want to use.  This requires that either nothing be locked -	 * across this call, or that anything that is locked be logged in -	 * the prior and the next transactions. -	 */ -	error = xfs_trans_reserve(trans, 0, logres, 0, -				  XFS_TRANS_PERM_LOG_RES, count); -	/* -	 *  Ensure that the inode is in the new transaction and locked. -	 */ -	if (!error) { -		xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); -		xfs_trans_ihold(trans, dp); -	} -	return (error); - -} diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index 5ecf437b782..83e9af417ca 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h @@ -274,6 +274,4 @@ int	xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp,  				   struct xfs_dabuf *leaf2_bp);  int	xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize,  					int *local); -int	xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp); -  #endif	/* __XFS_ATTR_LEAF_H__ */ diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c index fab0b6d5a41..48228848f5a 100644 --- a/fs/xfs/xfs_bit.c +++ b/fs/xfs/xfs_bit.c @@ -25,109 +25,6 @@   * XFS bit manipulation routines, used in non-realtime code.   */ -#ifndef HAVE_ARCH_HIGHBIT -/* - * Index of high bit number in byte, -1 for none set, 0..7 otherwise. - */ -static const char xfs_highbit[256] = { -       -1, 0, 1, 1, 2, 2, 2, 2,			/* 00 .. 07 */ -	3, 3, 3, 3, 3, 3, 3, 3,			/* 08 .. 0f */ -	4, 4, 4, 4, 4, 4, 4, 4,			/* 10 .. 17 */ -	4, 4, 4, 4, 4, 4, 4, 4,			/* 18 .. 1f */ -	5, 5, 5, 5, 5, 5, 5, 5,			/* 20 .. 27 */ -	5, 5, 5, 5, 5, 5, 5, 5,			/* 28 .. 2f */ -	5, 5, 5, 5, 5, 5, 5, 5,			/* 30 .. 37 */ -	5, 5, 5, 5, 5, 5, 5, 5,			/* 38 .. 3f */ -	6, 6, 6, 6, 6, 6, 6, 6,			/* 40 .. 47 */ -	6, 6, 6, 6, 6, 6, 6, 6,			/* 48 .. 4f */ -	6, 6, 6, 6, 6, 6, 6, 6,			/* 50 .. 57 */ -	6, 6, 6, 6, 6, 6, 6, 6,			/* 58 .. 5f */ -	6, 6, 6, 6, 6, 6, 6, 6,			/* 60 .. 67 */ -	6, 6, 6, 6, 6, 6, 6, 6,			/* 68 .. 6f */ -	6, 6, 6, 6, 6, 6, 6, 6,			/* 70 .. 77 */ -	6, 6, 6, 6, 6, 6, 6, 6,			/* 78 .. 7f */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* 80 .. 87 */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* 88 .. 8f */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* 90 .. 97 */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* 98 .. 9f */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* a0 .. a7 */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* a8 .. af */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* b0 .. b7 */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* b8 .. bf */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* c0 .. c7 */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* c8 .. cf */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* d0 .. d7 */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* d8 .. df */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* e0 .. e7 */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* e8 .. ef */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* f0 .. f7 */ -	7, 7, 7, 7, 7, 7, 7, 7,			/* f8 .. ff */ -}; -#endif - -/* - * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set. - */ -inline int -xfs_highbit32( -	__uint32_t	v) -{ -#ifdef HAVE_ARCH_HIGHBIT -	return highbit32(v); -#else -	int		i; - -	if (v & 0xffff0000) -		if (v & 0xff000000) -			i = 24; -		else -			i = 16; -	else if (v & 0x0000ffff) -		if (v & 0x0000ff00) -			i = 8; -		else -			i = 0; -	else -		return -1; -	return i + xfs_highbit[(v >> i) & 0xff]; -#endif -} - -/* - * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set. - */ -int -xfs_lowbit64( -	__uint64_t	v) -{ -	__uint32_t	w = (__uint32_t)v; -	int		n = 0; - -	if (w) {	/* lower bits */ -		n = ffs(w); -	} else {	/* upper bits */ -		w = (__uint32_t)(v >> 32); -		if (w && (n = ffs(w))) -			n += 32; -	} -	return n - 1; -} - -/* - * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set. - */ -int -xfs_highbit64( -	__uint64_t	v) -{ -	__uint32_t	h = (__uint32_t)(v >> 32); - -	if (h) -		return xfs_highbit32(h) + 32; -	return xfs_highbit32((__uint32_t)v); -} - -  /*   * Return whether bitmap is empty.   * Size is number of words in the bitmap, which is padded to word boundary diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h index 082641a9782..8e0e463dae2 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/xfs_bit.h @@ -47,13 +47,39 @@ static inline __uint64_t xfs_mask64lo(int n)  }  /* Get high bit set out of 32-bit argument, -1 if none set */ -extern int xfs_highbit32(__uint32_t v); +static inline int xfs_highbit32(__uint32_t v) +{ +	return fls(v) - 1; +} + +/* Get high bit set out of 64-bit argument, -1 if none set */ +static inline int xfs_highbit64(__uint64_t v) +{ +	return fls64(v) - 1; +} + +/* Get low bit set out of 32-bit argument, -1 if none set */ +static inline int xfs_lowbit32(__uint32_t v) +{ +	unsigned long	t = v; +	return (v) ? find_first_bit(&t, 32) : -1; +}  /* Get low bit set out of 64-bit argument, -1 if none set */ -extern int xfs_lowbit64(__uint64_t v); +static inline int xfs_lowbit64(__uint64_t v) +{ +	__uint32_t	w = (__uint32_t)v; +	int		n = 0; -/* Get high bit set out of 64-bit argument, -1 if none set */ -extern int xfs_highbit64(__uint64_t); +	if (w) {	/* lower bits */ +		n = ffs(w); +	} else {	/* upper bits */ +		w = (__uint32_t)(v >> 32); +		if (w && (n = ffs(w))) +		n += 32; +	} +	return n - 1; +}  /* Return whether bitmap is empty (1 == empty) */  extern int xfs_bitmap_empty(uint *map, uint size); diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3c4beb3a432..a1aab9275d5 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -384,14 +384,14 @@ xfs_bmap_count_tree(  	int             levelin,  	int		*count); -STATIC int +STATIC void  xfs_bmap_count_leaves(  	xfs_ifork_t		*ifp,  	xfs_extnum_t		idx,  	int			numrecs,  	int			*count); -STATIC int +STATIC void  xfs_bmap_disk_count_leaves(  	xfs_extnum_t		idx,  	xfs_bmbt_block_t	*block, @@ -4000,7 +4000,7 @@ xfs_bmap_add_attrfork(  		ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;  	}  	ASSERT(ip->i_d.di_anextents == 0); -	VN_HOLD(XFS_ITOV(ip)); +	IHOLD(ip);  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);  	switch (ip->i_d.di_format) { @@ -6096,7 +6096,7 @@ xfs_bmap_get_bp(  		tp = cur->bc_tp;  		licp = &tp->t_items;  		while (!bp && licp != NULL) { -			if (XFS_LIC_ARE_ALL_FREE(licp)) { +			if (xfs_lic_are_all_free(licp)) {  				licp = licp->lic_next;  				continue;  			} @@ -6106,11 +6106,11 @@ xfs_bmap_get_bp(  				xfs_buf_log_item_t	*bip;  				xfs_buf_t		*lbp; -				if (XFS_LIC_ISFREE(licp, i)) { +				if (xfs_lic_isfree(licp, i)) {  					continue;  				} -				lidp = XFS_LIC_SLOT(licp, i); +				lidp = xfs_lic_slot(licp, i);  				lip = lidp->lid_item;  				if (lip->li_type != XFS_LI_BUF)  					continue; @@ -6367,13 +6367,9 @@ xfs_bmap_count_blocks(  	mp = ip->i_mount;  	ifp = XFS_IFORK_PTR(ip, whichfork);  	if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { -		if (unlikely(xfs_bmap_count_leaves(ifp, 0, +		xfs_bmap_count_leaves(ifp, 0,  			ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), -			count) < 0)) { -			XFS_ERROR_REPORT("xfs_bmap_count_blocks(1)", -					 XFS_ERRLEVEL_LOW, mp); -			return XFS_ERROR(EFSCORRUPTED); -		} +			count);  		return 0;  	} @@ -6454,13 +6450,7 @@ xfs_bmap_count_tree(  		for (;;) {  			nextbno = be64_to_cpu(block->bb_rightsib);  			numrecs = be16_to_cpu(block->bb_numrecs); -			if (unlikely(xfs_bmap_disk_count_leaves(0, -					block, numrecs, count) < 0)) { -				xfs_trans_brelse(tp, bp); -				XFS_ERROR_REPORT("xfs_bmap_count_tree(2)", -						 XFS_ERRLEVEL_LOW, mp); -				return XFS_ERROR(EFSCORRUPTED); -			} +			xfs_bmap_disk_count_leaves(0, block, numrecs, count);  			xfs_trans_brelse(tp, bp);  			if (nextbno == NULLFSBLOCK)  				break; @@ -6478,7 +6468,7 @@ xfs_bmap_count_tree(  /*   * Count leaf blocks given a range of extent records.   */ -STATIC int +STATIC void  xfs_bmap_count_leaves(  	xfs_ifork_t		*ifp,  	xfs_extnum_t		idx, @@ -6491,14 +6481,13 @@ xfs_bmap_count_leaves(  		xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);  		*count += xfs_bmbt_get_blockcount(frp);  	} -	return 0;  }  /*   * Count leaf blocks given a range of extent records originally   * in btree format.   */ -STATIC int +STATIC void  xfs_bmap_disk_count_leaves(  	xfs_extnum_t		idx,  	xfs_bmbt_block_t	*block, @@ -6512,5 +6501,4 @@ xfs_bmap_disk_count_leaves(  		frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b);  		*count += xfs_bmbt_disk_get_blockcount(frp);  	} -	return 0;  } diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index aeb87ca69fc..cc593a84c34 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -46,38 +46,11 @@ kmem_zone_t	*xfs_btree_cur_zone;  /*   * Btree magic numbers.   */ -const __uint32_t xfs_magics[XFS_BTNUM_MAX] = -{ +const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {  	XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC  };  /* - * Prototypes for internal routines. - */ - -/* - * Checking routine: return maxrecs for the block. - */ -STATIC int				/* number of records fitting in block */ -xfs_btree_maxrecs( -	xfs_btree_cur_t		*cur,	/* btree cursor */ -	xfs_btree_block_t	*block);/* generic btree block pointer */ - -/* - * Internal routines. - */ - -/* - * Retrieve the block pointer from the cursor at the given level. - * This may be a bmap btree root or from a buffer. - */ -STATIC xfs_btree_block_t *			/* generic btree block pointer */ -xfs_btree_get_block( -	xfs_btree_cur_t		*cur,	/* btree cursor */ -	int			level,	/* level in btree */ -	struct xfs_buf		**bpp);	/* buffer containing the block */ - -/*   * Checking routine: return maxrecs for the block.   */  STATIC int				/* number of records fitting in block */ @@ -457,35 +430,6 @@ xfs_btree_dup_cursor(  }  /* - * Change the cursor to point to the first record at the given level. - * Other levels are unaffected. - */ -int					/* success=1, failure=0 */ -xfs_btree_firstrec( -	xfs_btree_cur_t		*cur,	/* btree cursor */ -	int			level)	/* level to change */ -{ -	xfs_btree_block_t	*block;	/* generic btree block pointer */ -	xfs_buf_t		*bp;	/* buffer containing block */ - -	/* -	 * Get the block pointer for this level. -	 */ -	block = xfs_btree_get_block(cur, level, &bp); -	xfs_btree_check_block(cur, block, level, bp); -	/* -	 * It's empty, there is no such record. -	 */ -	if (!block->bb_h.bb_numrecs) -		return 0; -	/* -	 * Set the ptr value to 1, that's the first record/key. -	 */ -	cur->bc_ptrs[level] = 1; -	return 1; -} - -/*   * Retrieve the block pointer from the cursor at the given level.   * This may be a bmap btree root or from a buffer.   */ @@ -626,6 +570,13 @@ xfs_btree_init_cursor(  		cur->bc_private.a.agbp = agbp;  		cur->bc_private.a.agno = agno;  		break; +	case XFS_BTNUM_INO: +		/* +		 * Inode allocation btree fields. +		 */ +		cur->bc_private.a.agbp = agbp; +		cur->bc_private.a.agno = agno; +		break;  	case XFS_BTNUM_BMAP:  		/*  		 * Bmap btree fields. @@ -638,13 +589,6 @@ xfs_btree_init_cursor(  		cur->bc_private.b.flags = 0;  		cur->bc_private.b.whichfork = whichfork;  		break; -	case XFS_BTNUM_INO: -		/* -		 * Inode allocation btree fields. -		 */ -		cur->bc_private.i.agbp = agbp; -		cur->bc_private.i.agno = agno; -		break;  	default:  		ASSERT(0);  	} @@ -671,6 +615,35 @@ xfs_btree_islastblock(  }  /* + * Change the cursor to point to the first record at the given level. + * Other levels are unaffected. + */ +int					/* success=1, failure=0 */ +xfs_btree_firstrec( +	xfs_btree_cur_t		*cur,	/* btree cursor */ +	int			level)	/* level to change */ +{ +	xfs_btree_block_t	*block;	/* generic btree block pointer */ +	xfs_buf_t		*bp;	/* buffer containing block */ + +	/* +	 * Get the block pointer for this level. +	 */ +	block = xfs_btree_get_block(cur, level, &bp); +	xfs_btree_check_block(cur, block, level, bp); +	/* +	 * It's empty, there is no such record. +	 */ +	if (!block->bb_h.bb_numrecs) +		return 0; +	/* +	 * Set the ptr value to 1, that's the first record/key. +	 */ +	cur->bc_ptrs[level] = 1; +	return 1; +} + +/*   * Change the cursor to point to the last record in the current block   * at the given level.  Other levels are unaffected.   */ @@ -890,12 +863,12 @@ xfs_btree_readahead_core(  	case XFS_BTNUM_INO:  		i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);  		if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) { -			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno, +			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,  				be32_to_cpu(i->bb_leftsib), 1);  			rval++;  		}  		if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) { -			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno, +			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,  				be32_to_cpu(i->bb_rightsib), 1);  			rval++;  		} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 7440b78f9ce..1f528a2a375 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -158,8 +158,8 @@ typedef struct xfs_btree_cur  	__uint8_t	bc_blocklog;	/* log2(blocksize) of btree blocks */  	xfs_btnum_t	bc_btnum;	/* identifies which btree type */  	union { -		struct {			/* needed for BNO, CNT */ -			struct xfs_buf	*agbp;	/* agf buffer pointer */ +		struct {			/* needed for BNO, CNT, INO */ +			struct xfs_buf	*agbp;	/* agf/agi buffer pointer */  			xfs_agnumber_t	agno;	/* ag number */  		} a;  		struct {			/* needed for BMAP */ @@ -172,10 +172,6 @@ typedef struct xfs_btree_cur  			char		flags;		/* flags */  #define	XFS_BTCUR_BPRV_WASDEL	1			/* was delayed */  		} b; -		struct {			/* needed for INO */ -			struct xfs_buf	*agbp;	/* agi buffer pointer */ -			xfs_agnumber_t	agno;	/* ag number */ -		} i;  	}		bc_private;	/* per-btree type data */  } xfs_btree_cur_t; diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index d86ca2c03a7..608c30c3f76 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -737,7 +737,7 @@ xfs_buf_item_init(  	bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));  	bip->bli_format.blf_map_size = map_size;  #ifdef XFS_BLI_TRACE -	bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_SLEEP); +	bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_NOFS);  #endif  #ifdef XFS_TRANS_DEBUG @@ -1056,7 +1056,7 @@ xfs_buf_iodone_callbacks(  			   anyway. */  			XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse);  			XFS_BUF_DONE(bp); -			XFS_BUF_V_IODONESEMA(bp); +			XFS_BUF_FINISH_IOWAIT(bp);  		}  		return;  	} diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 2211e885ef2..760f4c5b516 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -128,10 +128,8 @@ xfs_swap_extents(  	xfs_swapext_t	*sxp)  {  	xfs_mount_t	*mp; -	xfs_inode_t	*ips[2];  	xfs_trans_t	*tp;  	xfs_bstat_t	*sbp = &sxp->sx_stat; -	bhv_vnode_t	*vp, *tvp;  	xfs_ifork_t	*tempifp, *ifp, *tifp;  	int		ilf_fields, tilf_fields;  	static uint	lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; @@ -150,19 +148,8 @@ xfs_swap_extents(  	}  	sbp = &sxp->sx_stat; -	vp = XFS_ITOV(ip); -	tvp = XFS_ITOV(tip); -	/* Lock in i_ino order */ -	if (ip->i_ino < tip->i_ino) { -		ips[0] = ip; -		ips[1] = tip; -	} else { -		ips[0] = tip; -		ips[1] = ip; -	} - -	xfs_lock_inodes(ips, 2, lock_flags); +	xfs_lock_two_inodes(ip, tip, lock_flags);  	locked = 1;  	/* Verify that both files have the same format */ @@ -184,7 +171,7 @@ xfs_swap_extents(  		goto error0;  	} -	if (VN_CACHED(tvp) != 0) { +	if (VN_CACHED(VFS_I(tip)) != 0) {  		xfs_inval_cached_trace(tip, 0, -1, 0, -1);  		error = xfs_flushinval_pages(tip, 0, -1,  				FI_REMAPF_LOCKED); @@ -193,7 +180,7 @@ xfs_swap_extents(  	}  	/* Verify O_DIRECT for ftmp */ -	if (VN_CACHED(tvp) != 0) { +	if (VN_CACHED(VFS_I(tip)) != 0) {  		error = XFS_ERROR(EINVAL);  		goto error0;  	} @@ -237,7 +224,7 @@ xfs_swap_extents(  	 * vop_read (or write in the case of autogrow) they block on the iolock  	 * until we have switched the extents.  	 */ -	if (VN_MAPPED(vp)) { +	if (VN_MAPPED(VFS_I(ip))) {  		error = XFS_ERROR(EBUSY);  		goto error0;  	} @@ -265,7 +252,7 @@ xfs_swap_extents(  		locked = 0;  		goto error0;  	} -	xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); +	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);  	/*  	 * Count the number of extended attribute blocks @@ -350,15 +337,11 @@ xfs_swap_extents(  		break;  	} -	/* -	 * Increment vnode ref counts since xfs_trans_commit & -	 * xfs_trans_cancel will both unlock the inodes and -	 * decrement the associated ref counts. -	 */ -	VN_HOLD(vp); -	VN_HOLD(tvp); +	IHOLD(ip);  	xfs_trans_ijoin(tp, ip, lock_flags); + +	IHOLD(tip);  	xfs_trans_ijoin(tp, tip, lock_flags);  	xfs_trans_log_inode(tp, ip,  ilf_fields); diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index f66756cfb5e..f227ecd1a29 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -58,9 +58,6 @@ xfs_error_trap(int e)  	}  	return e;  } -#endif - -#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))  int	xfs_etest[XFS_NUM_INJECT_ERROR];  int64_t	xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; @@ -154,7 +151,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)  	return 0;  } -#endif /* DEBUG || INDUCE_IO_ERROR */ +#endif /* DEBUG */  static void  xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap) diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index d8559d132ef..11543f10b0c 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -125,22 +125,14 @@ extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp,  #define XFS_RANDOM_DIOWRITE_IOERR			(XFS_RANDOM_DEFAULT/10)  #define	XFS_RANDOM_BMAPIFORMAT				XFS_RANDOM_DEFAULT -#if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) +#ifdef DEBUG  extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);  #define	XFS_NUM_INJECT_ERROR				10 - -#ifdef __ANSI_CPP__ -#define XFS_TEST_ERROR(expr, mp, tag, rf)		\ -	((expr) || \ -	 xfs_error_test((tag), (mp)->m_fixedfsid, #expr, __LINE__, __FILE__, \ -			 (rf))) -#else  #define XFS_TEST_ERROR(expr, mp, tag, rf)		\  	((expr) || \  	 xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \  			(rf))) -#endif /* __ANSI_CPP__ */  extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);  extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); @@ -148,7 +140,7 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);  #define XFS_TEST_ERROR(expr, mp, tag, rf)	(expr)  #define xfs_errortag_add(tag, mp)		(ENOSYS)  #define xfs_errortag_clearall(mp, loud)		(ENOSYS) -#endif /* (DEBUG || INDUCE_IO_ERROR) */ +#endif /* DEBUG */  /*   * XFS panic tags -- allow a call to xfs_cmn_err() be turned into diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index c38fd14fca2..f3bb75da384 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -400,7 +400,7 @@ xfs_filestream_init(void)  	if (!item_zone)  		return -ENOMEM;  #ifdef XFS_FILESTREAMS_TRACE -	xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP); +	xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_NOFS);  #endif  	return 0;  } diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index e5310c90e50..83502f3edef 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -181,7 +181,7 @@ xfs_inobt_delrec(  		 * then we can get rid of this level.  		 */  		if (numrecs == 1 && level > 0) { -			agbp = cur->bc_private.i.agbp; +			agbp = cur->bc_private.a.agbp;  			agi = XFS_BUF_TO_AGI(agbp);  			/*  			 * pp is still set to the first pointer in the block. @@ -194,7 +194,7 @@ xfs_inobt_delrec(  			 * Free the block.  			 */  			if ((error = xfs_free_extent(cur->bc_tp, -				XFS_AGB_TO_FSB(mp, cur->bc_private.i.agno, bno), 1))) +				XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1)))  				return error;  			xfs_trans_binval(cur->bc_tp, bp);  			xfs_ialloc_log_agi(cur->bc_tp, agbp, @@ -379,7 +379,7 @@ xfs_inobt_delrec(  		rrecs = be16_to_cpu(right->bb_numrecs);  		rbp = bp;  		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, -				cur->bc_private.i.agno, lbno, 0, &lbp, +				cur->bc_private.a.agno, lbno, 0, &lbp,  				XFS_INO_BTREE_REF)))  			return error;  		left = XFS_BUF_TO_INOBT_BLOCK(lbp); @@ -401,7 +401,7 @@ xfs_inobt_delrec(  		lrecs = be16_to_cpu(left->bb_numrecs);  		lbp = bp;  		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, -				cur->bc_private.i.agno, rbno, 0, &rbp, +				cur->bc_private.a.agno, rbno, 0, &rbp,  				XFS_INO_BTREE_REF)))  			return error;  		right = XFS_BUF_TO_INOBT_BLOCK(rbp); @@ -484,7 +484,7 @@ xfs_inobt_delrec(  		xfs_buf_t		*rrbp;  		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, -				cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), 0, +				cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,  				&rrbp, XFS_INO_BTREE_REF)))  			return error;  		rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); @@ -497,7 +497,7 @@ xfs_inobt_delrec(  	 * Free the deleting block.  	 */  	if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp, -				     cur->bc_private.i.agno, rbno), 1))) +				     cur->bc_private.a.agno, rbno), 1)))  		return error;  	xfs_trans_binval(cur->bc_tp, rbp);  	/* @@ -854,7 +854,7 @@ xfs_inobt_lookup(  	{  		xfs_agi_t	*agi;	/* a.g. inode header */ -		agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp); +		agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);  		agno = be32_to_cpu(agi->agi_seqno);  		agbno = be32_to_cpu(agi->agi_root);  	} @@ -1089,7 +1089,7 @@ xfs_inobt_lshift(  	 * Set up the left neighbor as "left".  	 */  	if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, -			cur->bc_private.i.agno, be32_to_cpu(right->bb_leftsib), +			cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),  			0, &lbp, XFS_INO_BTREE_REF)))  		return error;  	left = XFS_BUF_TO_INOBT_BLOCK(lbp); @@ -1207,10 +1207,10 @@ xfs_inobt_newroot(  	/*  	 * Get a block & a buffer.  	 */ -	agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp); +	agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);  	args.tp = cur->bc_tp;  	args.mp = cur->bc_mp; -	args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, +	args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno,  		be32_to_cpu(agi->agi_root));  	args.mod = args.minleft = args.alignment = args.total = args.wasdel =  		args.isfl = args.userdata = args.minalignslop = 0; @@ -1233,7 +1233,7 @@ xfs_inobt_newroot(  	 */  	agi->agi_root = cpu_to_be32(args.agbno);  	be32_add_cpu(&agi->agi_level, 1); -	xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp, +	xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp,  		XFS_AGI_ROOT | XFS_AGI_LEVEL);  	/*  	 * At the previous root level there are now two blocks: the old @@ -1376,7 +1376,7 @@ xfs_inobt_rshift(  	 * Set up the right neighbor as "right".  	 */  	if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, -			cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), +			cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),  			0, &rbp, XFS_INO_BTREE_REF)))  		return error;  	right = XFS_BUF_TO_INOBT_BLOCK(rbp); @@ -1492,7 +1492,7 @@ xfs_inobt_split(  	 * Allocate the new block.  	 * If we can't do it, we're toast.  Give up.  	 */ -	args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno); +	args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno);  	args.mod = args.minleft = args.alignment = args.total = args.wasdel =  		args.isfl = args.userdata = args.minalignslop = 0;  	args.minlen = args.maxlen = args.prod = 1; @@ -1725,7 +1725,7 @@ xfs_inobt_decrement(  		agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));  		if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, -				cur->bc_private.i.agno, agbno, 0, &bp, +				cur->bc_private.a.agno, agbno, 0, &bp,  				XFS_INO_BTREE_REF)))  			return error;  		lev--; @@ -1897,7 +1897,7 @@ xfs_inobt_increment(  		agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));  		if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, -				cur->bc_private.i.agno, agbno, 0, &bp, +				cur->bc_private.a.agno, agbno, 0, &bp,  				XFS_INO_BTREE_REF)))  			return error;  		lev--; diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index b07604b94d9..e229e9e001c 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -216,7 +216,14 @@ finish_inode:  	mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);  	init_waitqueue_head(&ip->i_ipin_wait);  	atomic_set(&ip->i_pincount, 0); -	initnsema(&ip->i_flock, 1, "xfsfino"); + +	/* +	 * Because we want to use a counting completion, complete +	 * the flush completion once to allow a single access to +	 * the flush completion without blocking. +	 */ +	init_completion(&ip->i_flush); +	complete(&ip->i_flush);  	if (lock_flags)  		xfs_ilock(ip, lock_flags); @@ -288,10 +295,17 @@ finish_inode:  	*ipp = ip;  	/* +	 * Set up the Linux with the Linux inode. +	 */ +	ip->i_vnode = inode; +	inode->i_private = ip; + +	/*  	 * If we have a real type for an on-disk inode, we can set ops(&unlock)  	 * now.	 If it's a new inode being created, xfs_ialloc will handle it.  	 */ -	xfs_initialize_vnode(mp, inode, ip); +	if (ip->i_d.di_mode != 0) +		xfs_setup_inode(ip);  	return 0;  } @@ -411,10 +425,11 @@ xfs_iput(xfs_inode_t	*ip,   * Special iput for brand-new inodes that are still locked   */  void -xfs_iput_new(xfs_inode_t	*ip, -	     uint		lock_flags) +xfs_iput_new( +	xfs_inode_t	*ip, +	uint		lock_flags)  { -	struct inode	*inode = ip->i_vnode; +	struct inode	*inode = VFS_I(ip);  	xfs_itrace_entry(ip); @@ -775,26 +790,3 @@ xfs_isilocked(  }  #endif -/* - * The following three routines simply manage the i_flock - * semaphore embedded in the inode.  This semaphore synchronizes - * processes attempting to flush the in-core inode back to disk. - */ -void -xfs_iflock(xfs_inode_t *ip) -{ -	psema(&(ip->i_flock), PINOD|PLTWAIT); -} - -int -xfs_iflock_nowait(xfs_inode_t *ip) -{ -	return (cpsema(&(ip->i_flock))); -} - -void -xfs_ifunlock(xfs_inode_t *ip) -{ -	ASSERT(issemalocked(&(ip->i_flock))); -	vsema(&(ip->i_flock)); -} diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bedc6616317..358511b85ce 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -580,8 +580,8 @@ xfs_iformat_extents(  		xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));  		for (i = 0; i < nex; i++, dp++) {  			xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); -			ep->l0 = be64_to_cpu(get_unaligned(&dp->l0)); -			ep->l1 = be64_to_cpu(get_unaligned(&dp->l1)); +			ep->l0 = get_unaligned_be64(&dp->l0); +			ep->l1 = get_unaligned_be64(&dp->l1);  		}  		XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);  		if (whichfork != XFS_DATA_FORK || @@ -835,22 +835,22 @@ xfs_iread(  	 * Do this before xfs_iformat in case it adds entries.  	 */  #ifdef	XFS_INODE_TRACE -	ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_SLEEP); +	ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);  #endif  #ifdef XFS_BMAP_TRACE -	ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP); +	ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);  #endif  #ifdef XFS_BMBT_TRACE -	ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP); +	ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);  #endif  #ifdef XFS_RW_TRACE -	ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP); +	ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);  #endif  #ifdef XFS_ILOCK_TRACE -	ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP); +	ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);  #endif  #ifdef XFS_DIR2_TRACE -	ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP); +	ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);  #endif  	/* @@ -1046,9 +1046,9 @@ xfs_ialloc(  {  	xfs_ino_t	ino;  	xfs_inode_t	*ip; -	bhv_vnode_t	*vp;  	uint		flags;  	int		error; +	timespec_t	tv;  	/*  	 * Call the space management code to pick @@ -1077,7 +1077,6 @@ xfs_ialloc(  	}  	ASSERT(ip != NULL); -	vp = XFS_ITOV(ip);  	ip->i_d.di_mode = (__uint16_t)mode;  	ip->i_d.di_onlink = 0;  	ip->i_d.di_nlink = nlink; @@ -1130,7 +1129,13 @@ xfs_ialloc(  	ip->i_size = 0;  	ip->i_d.di_nextents = 0;  	ASSERT(ip->i_d.di_nblocks == 0); -	xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD); + +	nanotime(&tv); +	ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; +	ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; +	ip->i_d.di_atime = ip->i_d.di_mtime; +	ip->i_d.di_ctime = ip->i_d.di_mtime; +  	/*  	 * di_gen will have been taken care of in xfs_iread.  	 */ @@ -1220,7 +1225,7 @@ xfs_ialloc(  	xfs_trans_log_inode(tp, ip, flags);  	/* now that we have an i_mode we can setup inode ops and unlock */ -	xfs_initialize_vnode(tp->t_mountp, vp, ip); +	xfs_setup_inode(ip);  	*ipp = ip;  	return 0; @@ -1399,7 +1404,6 @@ xfs_itruncate_start(  	xfs_fsize_t	last_byte;  	xfs_off_t	toss_start;  	xfs_mount_t	*mp; -	bhv_vnode_t	*vp;  	int		error = 0;  	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); @@ -1408,7 +1412,6 @@ xfs_itruncate_start(  	       (flags == XFS_ITRUNC_MAYBE));  	mp = ip->i_mount; -	vp = XFS_ITOV(ip);  	/* wait for the completion of any pending DIOs */  	if (new_size < ip->i_size) @@ -1457,7 +1460,7 @@ xfs_itruncate_start(  #ifdef DEBUG  	if (new_size == 0) { -		ASSERT(VN_CACHED(vp) == 0); +		ASSERT(VN_CACHED(VFS_I(ip)) == 0);  	}  #endif  	return error; @@ -2630,7 +2633,6 @@ xfs_idestroy(  		xfs_idestroy_fork(ip, XFS_ATTR_FORK);  	mrfree(&ip->i_lock);  	mrfree(&ip->i_iolock); -	freesema(&ip->i_flock);  #ifdef XFS_INODE_TRACE  	ktrace_free(ip->i_trace); @@ -3048,10 +3050,10 @@ cluster_corrupt_out:  /*   * xfs_iflush() will write a modified inode's changes out to the   * inode's on disk home.  The caller must have the inode lock held - * in at least shared mode and the inode flush semaphore must be - * held as well.  The inode lock will still be held upon return from + * in at least shared mode and the inode flush completion must be + * active as well.  The inode lock will still be held upon return from   * the call and the caller is free to unlock it. - * The inode flush lock will be unlocked when the inode reaches the disk. + * The inode flush will be completed when the inode reaches the disk.   * The flags indicate how the inode's buffer should be written out.   */  int @@ -3070,7 +3072,7 @@ xfs_iflush(  	XFS_STATS_INC(xs_iflush_count);  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); -	ASSERT(issemalocked(&(ip->i_flock))); +	ASSERT(!completion_done(&ip->i_flush));  	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||  	       ip->i_d.di_nextents > ip->i_df.if_ext_max); @@ -3233,7 +3235,7 @@ xfs_iflush_int(  #endif  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); -	ASSERT(issemalocked(&(ip->i_flock))); +	ASSERT(!completion_done(&ip->i_flush));  	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||  	       ip->i_d.di_nextents > ip->i_df.if_ext_max); @@ -3465,7 +3467,6 @@ xfs_iflush_all(  	xfs_mount_t	*mp)  {  	xfs_inode_t	*ip; -	bhv_vnode_t	*vp;   again:  	XFS_MOUNT_ILOCK(mp); @@ -3480,14 +3481,13 @@ xfs_iflush_all(  			continue;  		} -		vp = XFS_ITOV_NULL(ip); -		if (!vp) { +		if (!VFS_I(ip)) {  			XFS_MOUNT_IUNLOCK(mp);  			xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);  			goto again;  		} -		ASSERT(vn_count(vp) == 0); +		ASSERT(vn_count(VFS_I(ip)) == 0);  		ip = ip->i_mnext;  	} while (ip != mp->m_inodes); @@ -3707,7 +3707,7 @@ xfs_iext_add_indirect_multi(  	 * (all extents past */  	if (nex2) {  		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); -		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP); +		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);  		memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);  		erp->er_extcount -= nex2;  		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); @@ -4007,8 +4007,7 @@ xfs_iext_realloc_direct(  			ifp->if_u1.if_extents =  				kmem_realloc(ifp->if_u1.if_extents,  						rnew_size, -						ifp->if_real_bytes, -						KM_SLEEP); +						ifp->if_real_bytes, KM_NOFS);  		}  		if (rnew_size > ifp->if_real_bytes) {  			memset(&ifp->if_u1.if_extents[ifp->if_bytes / @@ -4067,7 +4066,7 @@ xfs_iext_inline_to_direct(  	xfs_ifork_t	*ifp,		/* inode fork pointer */  	int		new_size)	/* number of extents in file */  { -	ifp->if_u1.if_extents = kmem_alloc(new_size, KM_SLEEP); +	ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);  	memset(ifp->if_u1.if_extents, 0, new_size);  	if (ifp->if_bytes) {  		memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, @@ -4099,7 +4098,7 @@ xfs_iext_realloc_indirect(  	} else {  		ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)  			kmem_realloc(ifp->if_u1.if_ext_irec, -				new_size, size, KM_SLEEP); +				new_size, size, KM_NOFS);  	}  } @@ -4341,11 +4340,10 @@ xfs_iext_irec_init(  	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);  	ASSERT(nextents <= XFS_LINEAR_EXTS); -	erp = (xfs_ext_irec_t *) -		kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP); +	erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);  	if (nextents == 0) { -		ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); +		ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);  	} else if (!ifp->if_real_bytes) {  		xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);  	} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { @@ -4393,7 +4391,7 @@ xfs_iext_irec_new(  	/* Initialize new extent record */  	erp = ifp->if_u1.if_ext_irec; -	erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); +	erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);  	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;  	memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);  	erp[erp_idx].er_extcount = 0; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 17a04b6321e..1420c49674d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -87,8 +87,7 @@ typedef struct xfs_ifork {   * Flags for xfs_ichgtime().   */  #define	XFS_ICHGTIME_MOD	0x1	/* data fork modification timestamp */ -#define	XFS_ICHGTIME_ACC	0x2	/* data fork access timestamp */ -#define	XFS_ICHGTIME_CHG	0x4	/* inode field change timestamp */ +#define	XFS_ICHGTIME_CHG	0x2	/* inode field change timestamp */  /*   * Per-fork incore inode flags. @@ -204,7 +203,7 @@ typedef struct xfs_inode {  	struct xfs_inode	*i_mprev;	/* ptr to prev inode */  	struct xfs_mount	*i_mount;	/* fs mount struct ptr */  	struct list_head	i_reclaim;	/* reclaim list */ -	bhv_vnode_t		*i_vnode;	/* vnode backpointer */ +	struct inode		*i_vnode;	/* vnode backpointer */  	struct xfs_dquot	*i_udquot;	/* user dquot */  	struct xfs_dquot	*i_gdquot;	/* group dquot */ @@ -223,7 +222,7 @@ typedef struct xfs_inode {  	struct xfs_inode_log_item *i_itemp;	/* logging information */  	mrlock_t		i_lock;		/* inode lock */  	mrlock_t		i_iolock;	/* inode IO lock */ -	sema_t			i_flock;	/* inode flush lock */ +	struct completion	i_flush;	/* inode flush completion q */  	atomic_t		i_pincount;	/* inode pin count */  	wait_queue_head_t	i_ipin_wait;	/* inode pinning wait queue */  	spinlock_t		i_flags_lock;	/* inode i_flags lock */ @@ -263,6 +262,18 @@ typedef struct xfs_inode {  #define XFS_ISIZE(ip)	(((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \  				(ip)->i_size : (ip)->i_d.di_size; +/* Convert from vfs inode to xfs inode */ +static inline struct xfs_inode *XFS_I(struct inode *inode) +{ +	return (struct xfs_inode *)inode->i_private; +} + +/* convert from xfs inode to vfs inode */ +static inline struct inode *VFS_I(struct xfs_inode *ip) +{ +	return (struct inode *)ip->i_vnode; +} +  /*   * i_flags helper functions   */ @@ -439,9 +450,6 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)  #define	XFS_ITRUNC_DEFINITE	0x1  #define	XFS_ITRUNC_MAYBE	0x2 -#define	XFS_ITOV(ip)		((ip)->i_vnode) -#define	XFS_ITOV_NULL(ip)	((ip)->i_vnode) -  /*   * For multiple groups support: if S_ISGID bit is set in the parent   * directory, group of new file is set to that of the parent, and @@ -473,11 +481,8 @@ int		xfs_ilock_nowait(xfs_inode_t *, uint);  void		xfs_iunlock(xfs_inode_t *, uint);  void		xfs_ilock_demote(xfs_inode_t *, uint);  int		xfs_isilocked(xfs_inode_t *, uint); -void		xfs_iflock(xfs_inode_t *); -int		xfs_iflock_nowait(xfs_inode_t *);  uint		xfs_ilock_map_shared(xfs_inode_t *);  void		xfs_iunlock_map_shared(xfs_inode_t *, uint); -void		xfs_ifunlock(xfs_inode_t *);  void		xfs_ireclaim(xfs_inode_t *);  int		xfs_finish_reclaim(xfs_inode_t *, int, int);  int		xfs_finish_reclaim_all(struct xfs_mount *, int); @@ -522,6 +527,7 @@ void		xfs_iflush_all(struct xfs_mount *);  void		xfs_ichgtime(xfs_inode_t *, int);  xfs_fsize_t	xfs_file_last_byte(xfs_inode_t *);  void		xfs_lock_inodes(xfs_inode_t **, int, uint); +void		xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);  void		xfs_synchronize_atime(xfs_inode_t *);  void		xfs_mark_inode_dirty_sync(xfs_inode_t *); @@ -570,6 +576,26 @@ extern struct kmem_zone	*xfs_ifork_zone;  extern struct kmem_zone	*xfs_inode_zone;  extern struct kmem_zone	*xfs_ili_zone; +/* + * Manage the i_flush queue embedded in the inode.  This completion + * queue synchronizes processes attempting to flush the in-core + * inode back to disk. + */ +static inline void xfs_iflock(xfs_inode_t *ip) +{ +	wait_for_completion(&ip->i_flush); +} + +static inline int xfs_iflock_nowait(xfs_inode_t *ip) +{ +	return try_wait_for_completion(&ip->i_flush); +} + +static inline void xfs_ifunlock(xfs_inode_t *ip) +{ +	complete(&ip->i_flush); +} +  #endif	/* __KERNEL__ */  #endif	/* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 0eee08a32c2..97c7452e262 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -779,11 +779,10 @@ xfs_inode_item_pushbuf(  	ASSERT(iip->ili_push_owner == current_pid());  	/* -	 * If flushlock isn't locked anymore, chances are that the -	 * inode flush completed and the inode was taken off the AIL. -	 * So, just get out. +	 * If a flush is not in progress anymore, chances are that the +	 * inode was taken off the AIL. So, just get out.  	 */ -	if (!issemalocked(&(ip->i_flock)) || +	if (completion_done(&ip->i_flush) ||  	    ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {  		iip->ili_pushbuf_flag = 0;  		xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -805,7 +804,7 @@ xfs_inode_item_pushbuf(  			 * If not, we can flush it async.  			 */  			dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && -				  issemalocked(&(ip->i_flock))); +				  !completion_done(&ip->i_flush));  			iip->ili_pushbuf_flag = 0;  			xfs_iunlock(ip, XFS_ILOCK_SHARED);  			xfs_buftrace("INODE ITEM PUSH", bp); @@ -858,7 +857,7 @@ xfs_inode_item_push(  	ip = iip->ili_inode;  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); -	ASSERT(issemalocked(&(ip->i_flock))); +	ASSERT(!completion_done(&ip->i_flush));  	/*  	 * Since we were able to lock the inode's flush lock and  	 * we found it on the AIL, the inode must be dirty.  This diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 9a3ef9dcaeb..cf6754a3c5b 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -59,7 +59,6 @@ xfs_bulkstat_one_iget(  {  	xfs_icdinode_t	*dic;	/* dinode core info pointer */  	xfs_inode_t	*ip;		/* incore inode pointer */ -	bhv_vnode_t	*vp;  	int		error;  	error = xfs_iget(mp, NULL, ino, @@ -72,7 +71,6 @@ xfs_bulkstat_one_iget(  	ASSERT(ip != NULL);  	ASSERT(ip->i_blkno != (xfs_daddr_t)0); -	vp = XFS_ITOV(ip);  	dic = &ip->i_d;  	/* xfs_iget returns the following without needing @@ -85,7 +83,7 @@ xfs_bulkstat_one_iget(  	buf->bs_uid = dic->di_uid;  	buf->bs_gid = dic->di_gid;  	buf->bs_size = dic->di_size; -	vn_atime_to_bstime(vp, &buf->bs_atime); +	vn_atime_to_bstime(VFS_I(ip), &buf->bs_atime);  	buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;  	buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;  	buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 91b00a5686c..ccba14eb9db 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -160,7 +160,7 @@ void  xlog_trace_iclog(xlog_in_core_t *iclog, uint state)  {  	if (!iclog->ic_trace) -		iclog->ic_trace = ktrace_alloc(256, KM_SLEEP); +		iclog->ic_trace = ktrace_alloc(256, KM_NOFS);  	ktrace_enter(iclog->ic_trace,  		     (void *)((unsigned long)state),  		     (void *)((unsigned long)current_pid()), @@ -336,15 +336,12 @@ xfs_log_done(xfs_mount_t	*mp,  	} else {  		xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");  		xlog_regrant_reserve_log_space(log, ticket); -	} - -	/* If this ticket was a permanent reservation and we aren't -	 * trying to release it, reset the inited flags; so next time -	 * we write, a start record will be written out. -	 */ -	if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) && -	    (flags & XFS_LOG_REL_PERM_RESERV) == 0) +		/* If this ticket was a permanent reservation and we aren't +		 * trying to release it, reset the inited flags; so next time +		 * we write, a start record will be written out. +		 */  		ticket->t_flags |= XLOG_TIC_INITED; +	}  	return lsn;  }	/* xfs_log_done */ @@ -357,11 +354,11 @@ xfs_log_done(xfs_mount_t	*mp,   * Asynchronous forces are implemented by setting the WANT_SYNC   * bit in the appropriate in-core log and then returning.   * - * Synchronous forces are implemented with a semaphore.  All callers - * to force a given lsn to disk will wait on a semaphore attached to the + * Synchronous forces are implemented with a signal variable. All callers + * to force a given lsn to disk will wait on a the sv attached to the   * specific in-core log.  When given in-core log finally completes its   * write to disk, that thread will wake up all threads waiting on the - * semaphore. + * sv.   */  int  _xfs_log_force( @@ -588,12 +585,12 @@ error:   * mp		- ubiquitous xfs mount point structure   */  int -xfs_log_mount_finish(xfs_mount_t *mp, int mfsi_flags) +xfs_log_mount_finish(xfs_mount_t *mp)  {  	int	error;  	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) -		error = xlog_recover_finish(mp->m_log, mfsi_flags); +		error = xlog_recover_finish(mp->m_log);  	else {  		error = 0;  		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); @@ -707,7 +704,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)  		if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||  		      iclog->ic_state == XLOG_STATE_DIRTY)) {  			if (!XLOG_FORCED_SHUTDOWN(log)) { -				sv_wait(&iclog->ic_forcesema, PMEM, +				sv_wait(&iclog->ic_force_wait, PMEM,  					&log->l_icloglock, s);  			} else {  				spin_unlock(&log->l_icloglock); @@ -748,7 +745,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)  			|| iclog->ic_state == XLOG_STATE_DIRTY  			|| iclog->ic_state == XLOG_STATE_IOERROR) ) { -				sv_wait(&iclog->ic_forcesema, PMEM, +				sv_wait(&iclog->ic_force_wait, PMEM,  					&log->l_icloglock, s);  		} else {  			spin_unlock(&log->l_icloglock); @@ -838,7 +835,7 @@ xfs_log_move_tail(xfs_mount_t	*mp,  				break;  			tail_lsn = 0;  			free_bytes -= tic->t_unit_res; -			sv_signal(&tic->t_sema); +			sv_signal(&tic->t_wait);  			tic = tic->t_next;  		} while (tic != log->l_write_headq);  	} @@ -859,7 +856,7 @@ xfs_log_move_tail(xfs_mount_t	*mp,  				break;  			tail_lsn = 0;  			free_bytes -= need_bytes; -			sv_signal(&tic->t_sema); +			sv_signal(&tic->t_wait);  			tic = tic->t_next;  		} while (tic != log->l_reserve_headq);  	} @@ -1285,8 +1282,8 @@ xlog_alloc_log(xfs_mount_t	*mp,  		ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));  		ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); -		sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force"); -		sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write"); +		sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); +		sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");  		iclogp = &iclog->ic_next;  	} @@ -1565,8 +1562,8 @@ xlog_dealloc_log(xlog_t *log)  	iclog = log->l_iclog;  	for (i=0; i<log->l_iclog_bufs; i++) { -		sv_destroy(&iclog->ic_forcesema); -		sv_destroy(&iclog->ic_writesema); +		sv_destroy(&iclog->ic_force_wait); +		sv_destroy(&iclog->ic_write_wait);  		xfs_buf_free(iclog->ic_bp);  #ifdef XFS_LOG_TRACE  		if (iclog->ic_trace != NULL) { @@ -1976,7 +1973,7 @@ xlog_write(xfs_mount_t *	mp,  /* Clean iclogs starting from the head.  This ordering must be   * maintained, so an iclog doesn't become ACTIVE beyond one that   * is SYNCING.  This is also required to maintain the notion that we use - * a counting semaphore to hold off would be writers to the log when every + * a ordered wait queue to hold off would be writers to the log when every   * iclog is trying to sync to disk.   *   * State Change: DIRTY -> ACTIVE @@ -2240,7 +2237,7 @@ xlog_state_do_callback(  			xlog_state_clean_log(log);  			/* wake up threads waiting in xfs_log_force() */ -			sv_broadcast(&iclog->ic_forcesema); +			sv_broadcast(&iclog->ic_force_wait);  			iclog = iclog->ic_next;  		} while (first_iclog != iclog); @@ -2302,8 +2299,7 @@ xlog_state_do_callback(   * the second completion goes through.   *   * Callbacks could take time, so they are done outside the scope of the - * global state machine log lock.  Assume that the calls to cvsema won't - * take a long time.  At least we know it won't sleep. + * global state machine log lock.   */  STATIC void  xlog_state_done_syncing( @@ -2339,7 +2335,7 @@ xlog_state_done_syncing(  	 * iclog buffer, we wake them all, one will get to do the  	 * I/O, the others get to wait for the result.  	 */ -	sv_broadcast(&iclog->ic_writesema); +	sv_broadcast(&iclog->ic_write_wait);  	spin_unlock(&log->l_icloglock);  	xlog_state_do_callback(log, aborted, iclog);	/* also cleans log */  }	/* xlog_state_done_syncing */ @@ -2347,11 +2343,9 @@ xlog_state_done_syncing(  /*   * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must - * sleep.  The flush semaphore is set to the number of in-core buffers and - * decremented around disk syncing.  Therefore, if all buffers are syncing, - * this semaphore will cause new writes to sleep until a sync completes. - * Otherwise, this code just does p() followed by v().  This approximates - * a sleep/wakeup except we can't race. + * sleep.  We wait on the flush queue on the head iclog as that should be + * the first iclog to complete flushing. Hence if all iclogs are syncing, + * we will wait here and all new writes will sleep until a sync completes.   *   * The in-core logs are used in a circular fashion. They are not used   * out-of-order even when an iclog past the head is free. @@ -2508,7 +2502,7 @@ xlog_grant_log_space(xlog_t	   *log,  			goto error_return;  		XFS_STATS_INC(xs_sleep_logspace); -		sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); +		sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);  		/*  		 * If we got an error, and the filesystem is shutting down,  		 * we'll catch it down below. So just continue... @@ -2534,7 +2528,7 @@ redo:  		xlog_trace_loggrant(log, tic,  				    "xlog_grant_log_space: sleep 2");  		XFS_STATS_INC(xs_sleep_logspace); -		sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); +		sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);  		if (XLOG_FORCED_SHUTDOWN(log)) {  			spin_lock(&log->l_grant_lock); @@ -2633,7 +2627,7 @@ xlog_regrant_write_log_space(xlog_t	   *log,  			if (free_bytes < ntic->t_unit_res)  				break;  			free_bytes -= ntic->t_unit_res; -			sv_signal(&ntic->t_sema); +			sv_signal(&ntic->t_wait);  			ntic = ntic->t_next;  		} while (ntic != log->l_write_headq); @@ -2644,7 +2638,7 @@ xlog_regrant_write_log_space(xlog_t	   *log,  			xlog_trace_loggrant(log, tic,  				    "xlog_regrant_write_log_space: sleep 1");  			XFS_STATS_INC(xs_sleep_logspace); -			sv_wait(&tic->t_sema, PINOD|PLTWAIT, +			sv_wait(&tic->t_wait, PINOD|PLTWAIT,  				&log->l_grant_lock, s);  			/* If we're shutting down, this tic is already @@ -2673,7 +2667,7 @@ redo:  		if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)  			xlog_ins_ticketq(&log->l_write_headq, tic);  		XFS_STATS_INC(xs_sleep_logspace); -		sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); +		sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);  		/* If we're shutting down, this tic is already off the queue */  		if (XLOG_FORCED_SHUTDOWN(log)) { @@ -2916,7 +2910,7 @@ xlog_state_switch_iclogs(xlog_t		*log,   *	2. the current iclog is drity, and the previous iclog is in the   *		active or dirty state.   * - * We may sleep (call psema) if: + * We may sleep if:   *   *	1. the current iclog is not in the active nor dirty state.   *	2. the current iclog dirty, and the previous iclog is not in the @@ -3013,7 +3007,7 @@ maybe_sleep:  			return XFS_ERROR(EIO);  		}  		XFS_STATS_INC(xs_log_force_sleep); -		sv_wait(&iclog->ic_forcesema, PINOD, &log->l_icloglock, s); +		sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s);  		/*  		 * No need to grab the log lock here since we're  		 * only deciding whether or not to return EIO @@ -3096,7 +3090,7 @@ try_again:  						 XLOG_STATE_SYNCING))) {  			ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));  			XFS_STATS_INC(xs_log_force_sleep); -			sv_wait(&iclog->ic_prev->ic_writesema, PSWP, +			sv_wait(&iclog->ic_prev->ic_write_wait, PSWP,  				&log->l_icloglock, s);  			*log_flushed = 1;  			already_slept = 1; @@ -3116,7 +3110,7 @@ try_again:  	    !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {  		/* -		 * Don't wait on the forcesema if we know that we've +		 * Don't wait on completion if we know that we've  		 * gotten a log write error.  		 */  		if (iclog->ic_state & XLOG_STATE_IOERROR) { @@ -3124,7 +3118,7 @@ try_again:  			return XFS_ERROR(EIO);  		}  		XFS_STATS_INC(xs_log_force_sleep); -		sv_wait(&iclog->ic_forcesema, PSWP, &log->l_icloglock, s); +		sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);  		/*  		 * No need to grab the log lock here since we're  		 * only deciding whether or not to return EIO @@ -3180,7 +3174,7 @@ STATIC void  xlog_ticket_put(xlog_t		*log,  		xlog_ticket_t	*ticket)  { -	sv_destroy(&ticket->t_sema); +	sv_destroy(&ticket->t_wait);  	kmem_zone_free(xfs_log_ticket_zone, ticket);  }	/* xlog_ticket_put */ @@ -3270,7 +3264,7 @@ xlog_ticket_get(xlog_t		*log,  	tic->t_trans_type	= 0;  	if (xflags & XFS_LOG_PERM_RESERV)  		tic->t_flags |= XLOG_TIC_PERM_RESERV; -	sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); +	sv_init(&(tic->t_wait), SV_DEFAULT, "logtick");  	xlog_tic_reset_res(tic); @@ -3557,14 +3551,14 @@ xfs_log_force_umount(  	 */  	if ((tic = log->l_reserve_headq)) {  		do { -			sv_signal(&tic->t_sema); +			sv_signal(&tic->t_wait);  			tic = tic->t_next;  		} while (tic != log->l_reserve_headq);  	}  	if ((tic = log->l_write_headq)) {  		do { -			sv_signal(&tic->t_sema); +			sv_signal(&tic->t_wait);  			tic = tic->t_next;  		} while (tic != log->l_write_headq);  	} diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index d1d678ecb63..d47b91f1082 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -149,7 +149,7 @@ int	  xfs_log_mount(struct xfs_mount	*mp,  			struct xfs_buftarg	*log_target,  			xfs_daddr_t		start_block,  			int		 	num_bblocks); -int	  xfs_log_mount_finish(struct xfs_mount *mp, int); +int	  xfs_log_mount_finish(struct xfs_mount *mp);  void	  xfs_log_move_tail(struct xfs_mount	*mp,  			    xfs_lsn_t		tail_lsn);  int	  xfs_log_notify(struct xfs_mount	*mp, diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 6245913196b..c8a5b22ee3e 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -241,7 +241,7 @@ typedef struct xlog_res {  } xlog_res_t;  typedef struct xlog_ticket { -	sv_t		   t_sema;	 /* sleep on this semaphore      : 20 */ +	sv_t		   t_wait;	 /* ticket wait queue            : 20 */  	struct xlog_ticket *t_next;	 /*			         :4|8 */  	struct xlog_ticket *t_prev;	 /*				 :4|8 */  	xlog_tid_t	   t_tid;	 /* transaction identifier	 : 4  */ @@ -314,7 +314,7 @@ typedef struct xlog_rec_ext_header {   *	xlog_rec_header_t into the reserved space.   * - ic_data follows, so a write to disk can start at the beginning of   *	the iclog. - * - ic_forcesema is used to implement synchronous forcing of the iclog to disk. + * - ic_forcewait is used to implement synchronous forcing of the iclog to disk.   * - ic_next is the pointer to the next iclog in the ring.   * - ic_bp is a pointer to the buffer used to write this incore log to disk.   * - ic_log is a pointer back to the global log structure. @@ -339,8 +339,8 @@ typedef struct xlog_rec_ext_header {   * and move everything else out to subsequent cachelines.   */  typedef struct xlog_iclog_fields { -	sv_t			ic_forcesema; -	sv_t			ic_writesema; +	sv_t			ic_force_wait; +	sv_t			ic_write_wait;  	struct xlog_in_core	*ic_next;  	struct xlog_in_core	*ic_prev;  	struct xfs_buf		*ic_bp; @@ -377,8 +377,8 @@ typedef struct xlog_in_core {  /*   * Defines to save our code from this glop.   */ -#define	ic_forcesema	hic_fields.ic_forcesema -#define ic_writesema	hic_fields.ic_writesema +#define	ic_force_wait	hic_fields.ic_force_wait +#define ic_write_wait	hic_fields.ic_write_wait  #define	ic_next		hic_fields.ic_next  #define	ic_prev		hic_fields.ic_prev  #define	ic_bp		hic_fields.ic_bp @@ -468,7 +468,7 @@ extern int	 xlog_find_tail(xlog_t	*log,  				xfs_daddr_t *head_blk,  				xfs_daddr_t *tail_blk);  extern int	 xlog_recover(xlog_t *log); -extern int	 xlog_recover_finish(xlog_t *log, int mfsi_flags); +extern int	 xlog_recover_finish(xlog_t *log);  extern void	 xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);  extern void	 xlog_recover_process_iunlinks(xlog_t *log); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9eb722ec744..82d46ce69d5 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3940,8 +3940,7 @@ xlog_recover(   */  int  xlog_recover_finish( -	xlog_t		*log, -	int		mfsi_flags) +	xlog_t		*log)  {  	/*  	 * Now we're ready to do the transactions needed for the @@ -3969,9 +3968,7 @@ xlog_recover_finish(  		xfs_log_force(log->l_mp, (xfs_lsn_t)0,  			      (XFS_LOG_FORCE | XFS_LOG_SYNC)); -		if ( (mfsi_flags & XFS_MFSI_NOUNLINK) == 0 ) { -			xlog_recover_process_iunlinks(log); -		} +		xlog_recover_process_iunlinks(log);  		xlog_recover_check_summary(log); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 6c5d1325e7f..a4503f5e949 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -128,7 +128,7 @@ static const struct {   * initialized.   */  STATIC void -xfs_mount_free( +xfs_free_perag(  	xfs_mount_t	*mp)  {  	if (mp->m_perag) { @@ -139,20 +139,6 @@ xfs_mount_free(  				kmem_free(mp->m_perag[agno].pagb_list);  		kmem_free(mp->m_perag);  	} - -	spinlock_destroy(&mp->m_ail_lock); -	spinlock_destroy(&mp->m_sb_lock); -	mutex_destroy(&mp->m_ilock); -	mutex_destroy(&mp->m_growlock); -	if (mp->m_quotainfo) -		XFS_QM_DONE(mp); - -	if (mp->m_fsname != NULL) -		kmem_free(mp->m_fsname); -	if (mp->m_rtname != NULL) -		kmem_free(mp->m_rtname); -	if (mp->m_logname != NULL) -		kmem_free(mp->m_logname);  }  /* @@ -704,11 +690,11 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)   * Update alignment values based on mount options and sb values   */  STATIC int -xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags) +xfs_update_alignment(xfs_mount_t *mp, __uint64_t *update_flags)  {  	xfs_sb_t	*sbp = &(mp->m_sb); -	if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) { +	if (mp->m_dalign) {  		/*  		 * If stripe unit and stripe width are not multiples  		 * of the fs blocksize turn off alignment. @@ -864,7 +850,7 @@ xfs_set_inoalignment(xfs_mount_t *mp)   * Check that the data (and log if separate) are an ok size.   */  STATIC int -xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) +xfs_check_sizes(xfs_mount_t *mp)  {  	xfs_buf_t	*bp;  	xfs_daddr_t	d; @@ -887,8 +873,7 @@ xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags)  		return error;  	} -	if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && -	    mp->m_logdev_targp != mp->m_ddev_targp) { +	if (mp->m_logdev_targp != mp->m_ddev_targp) {  		d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);  		if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {  			cmn_err(CE_WARN, "XFS: size check 3 failed"); @@ -923,15 +908,13 @@ xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags)   */  int  xfs_mountfs( -	xfs_mount_t	*mp, -	int		mfsi_flags) +	xfs_mount_t	*mp)  {  	xfs_sb_t	*sbp = &(mp->m_sb);  	xfs_inode_t	*rip;  	__uint64_t	resblks;  	__int64_t	update_flags = 0LL;  	uint		quotamount, quotaflags; -	int		agno;  	int		uuid_mounted = 0;  	int		error = 0; @@ -985,7 +968,7 @@ xfs_mountfs(  	 * allocator alignment is within an ag, therefore ag has  	 * to be aligned at stripe boundary.  	 */ -	error = xfs_update_alignment(mp, mfsi_flags, &update_flags); +	error = xfs_update_alignment(mp, &update_flags);  	if (error)  		goto error1; @@ -1004,8 +987,7 @@ xfs_mountfs(  	 * since a single partition filesystem is identical to a single  	 * partition volume/filesystem.  	 */ -	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && -	    (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { +	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) {  		if (xfs_uuid_mount(mp)) {  			error = XFS_ERROR(EINVAL);  			goto error1; @@ -1033,7 +1015,7 @@ xfs_mountfs(  	/*  	 * Check that the data (and log if separate) are an ok size.  	 */ -	error = xfs_check_sizes(mp, mfsi_flags); +	error = xfs_check_sizes(mp);  	if (error)  		goto error1; @@ -1047,13 +1029,6 @@ xfs_mountfs(  	}  	/* -	 * For client case we are done now -	 */ -	if (mfsi_flags & XFS_MFSI_CLIENT) { -		return 0; -	} - -	/*  	 *  Copies the low order bits of the timestamp and the randomly  	 *  set "sequence" number out of a UUID.  	 */ @@ -1077,8 +1052,10 @@ xfs_mountfs(  	 * Allocate and initialize the per-ag data.  	 */  	init_rwsem(&mp->m_peraglock); -	mp->m_perag = -		kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP); +	mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), +				  KM_MAYFAIL); +	if (!mp->m_perag) +		goto error1;  	mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); @@ -1190,7 +1167,7 @@ xfs_mountfs(  	 * delayed until after the root and real-time bitmap inodes  	 * were consistently read in.  	 */ -	error = xfs_log_mount_finish(mp, mfsi_flags); +	error = xfs_log_mount_finish(mp);  	if (error) {  		cmn_err(CE_WARN, "XFS: log mount finish failed");  		goto error4; @@ -1199,7 +1176,7 @@ xfs_mountfs(  	/*  	 * Complete the quota initialisation, post-log-replay component.  	 */ -	error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags); +	error = XFS_QM_MOUNT(mp, quotamount, quotaflags);  	if (error)  		goto error4; @@ -1233,12 +1210,7 @@ xfs_mountfs(   error3:  	xfs_log_unmount_dealloc(mp);   error2: -	for (agno = 0; agno < sbp->sb_agcount; agno++) -		if (mp->m_perag[agno].pagb_list) -			kmem_free(mp->m_perag[agno].pagb_list); -	kmem_free(mp->m_perag); -	mp->m_perag = NULL; -	/* FALLTHROUGH */ +	xfs_free_perag(mp);   error1:  	if (uuid_mounted)  		uuid_table_remove(&mp->m_sb.sb_uuid); @@ -1246,16 +1218,17 @@ xfs_mountfs(  }  /* - * xfs_unmountfs - *   * This flushes out the inodes,dquots and the superblock, unmounts the   * log and makes sure that incore structures are freed.   */ -int -xfs_unmountfs(xfs_mount_t *mp) +void +xfs_unmountfs( +	struct xfs_mount	*mp)  { -	__uint64_t	resblks; -	int		error = 0; +	__uint64_t		resblks; +	int			error; + +	IRELE(mp->m_rootip);  	/*  	 * We can potentially deadlock here if we have an inode cluster @@ -1312,8 +1285,6 @@ xfs_unmountfs(xfs_mount_t *mp)  	xfs_unmountfs_wait(mp); 		/* wait for async bufs */  	xfs_log_unmount(mp);			/* Done! No more fs ops. */ -	xfs_freesb(mp); -  	/*  	 * All inodes from this mount point should be freed.  	 */ @@ -1322,11 +1293,12 @@ xfs_unmountfs(xfs_mount_t *mp)  	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)  		uuid_table_remove(&mp->m_sb.sb_uuid); -#if defined(DEBUG) || defined(INDUCE_IO_ERROR) +#if defined(DEBUG)  	xfs_errortag_clearall(mp, 0);  #endif -	xfs_mount_free(mp); -	return 0; +	xfs_free_perag(mp); +	if (mp->m_quotainfo) +		XFS_QM_DONE(mp);  }  STATIC void diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 5269bd6e3df..f3c1024b124 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -114,7 +114,7 @@ struct xfs_dqtrxops;  struct xfs_quotainfo;  typedef int	(*xfs_qminit_t)(struct xfs_mount *, uint *, uint *); -typedef int	(*xfs_qmmount_t)(struct xfs_mount *, uint, uint, int); +typedef int	(*xfs_qmmount_t)(struct xfs_mount *, uint, uint);  typedef int	(*xfs_qmunmount_t)(struct xfs_mount *);  typedef void	(*xfs_qmdone_t)(struct xfs_mount *);  typedef void	(*xfs_dqrele_t)(struct xfs_dquot *); @@ -158,8 +158,8 @@ typedef struct xfs_qmops {  #define XFS_QM_INIT(mp, mnt, fl) \  	(*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl) -#define XFS_QM_MOUNT(mp, mnt, fl, mfsi_flags) \ -	(*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl, mfsi_flags) +#define XFS_QM_MOUNT(mp, mnt, fl) \ +	(*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl)  #define XFS_QM_UNMOUNT(mp) \  	(*(mp)->m_qm_ops->xfs_qmunmount)(mp)  #define XFS_QM_DONE(mp) \ @@ -442,13 +442,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,  /*   * Flags for xfs_mountfs   */ -#define XFS_MFSI_SECOND		0x01	/* Secondary mount -- skip stuff */ -#define XFS_MFSI_CLIENT		0x02	/* Is a client -- skip lots of stuff */ -/*	XFS_MFSI_RRINODES	*/ -#define XFS_MFSI_NOUNLINK	0x08	/* Skip unlinked inode processing in */ -					/* log recovery */ -#define XFS_MFSI_NO_QUOTACHECK	0x10	/* Skip quotacheck processing */ -/*	XFS_MFSI_CONVERT_SUNIT	*/  #define XFS_MFSI_QUIET		0x40	/* Be silent if mount errors found */  #define XFS_DADDR_TO_AGNO(mp,d)         xfs_daddr_to_agno(mp,d) @@ -517,10 +510,10 @@ typedef struct xfs_mod_sb {  extern void	xfs_mod_sb(xfs_trans_t *, __int64_t);  extern int	xfs_log_sbcount(xfs_mount_t *, uint); -extern int	xfs_mountfs(xfs_mount_t *mp, int); +extern int	xfs_mountfs(xfs_mount_t *mp);  extern void	xfs_mountfs_check_barriers(xfs_mount_t *mp); -extern int	xfs_unmountfs(xfs_mount_t *); +extern void	xfs_unmountfs(xfs_mount_t *);  extern int	xfs_unmountfs_writesb(xfs_mount_t *);  extern int	xfs_unmount_flush(xfs_mount_t *, int);  extern int	xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index bf87a591350..e2f68de1615 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -74,18 +74,6 @@ STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int,   */  /* - * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set. - */ -STATIC int -xfs_lowbit32( -	__uint32_t	v) -{ -	if (v) -		return ffs(v) - 1; -	return -1; -} - -/*   * Allocate space to the bitmap or summary file, and zero it, for growfs.   */  STATIC int				/* error */ @@ -450,6 +438,7 @@ xfs_rtallocate_extent_near(  	}  	bbno = XFS_BITTOBLOCK(mp, bno);  	i = 0; +	ASSERT(minlen != 0);  	log2len = xfs_highbit32(minlen);  	/*  	 * Loop over all bitmap blocks (bbno + i is current block). @@ -618,6 +607,8 @@ xfs_rtallocate_extent_size(  	xfs_suminfo_t	sum;		/* summary information for extents */  	ASSERT(minlen % prod == 0 && maxlen % prod == 0); +	ASSERT(maxlen != 0); +  	/*  	 * Loop over all the levels starting with maxlen.  	 * At each level, look at all the bitmap blocks, to see if there @@ -675,6 +666,9 @@ xfs_rtallocate_extent_size(  		*rtblock = NULLRTBLOCK;  		return 0;  	} +	ASSERT(minlen != 0); +	ASSERT(maxlen != 0); +  	/*  	 * Loop over sizes, from maxlen down to minlen.  	 * This time, when we do the allocations, allow smaller ones @@ -1961,6 +1955,7 @@ xfs_growfs_rt(  				  nsbp->sb_blocksize * nsbp->sb_rextsize);  		nsbp->sb_rextents = nsbp->sb_rblocks;  		do_div(nsbp->sb_rextents, nsbp->sb_rextsize); +		ASSERT(nsbp->sb_rextents != 0);  		nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents);  		nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1;  		nrsumsize = diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index b0f31c09a76..3a82576dde9 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -314,7 +314,7 @@ xfs_bioerror_relse(  		 * ASYNC buffers.  		 */  		XFS_BUF_ERROR(bp, EIO); -		XFS_BUF_V_IODONESEMA(bp); +		XFS_BUF_FINISH_IOWAIT(bp);  	} else {  		xfs_buf_relse(bp);  	} diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index e4ebddd3c50..4e1c22a23be 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -43,6 +43,7 @@  #include "xfs_quota.h"  #include "xfs_trans_priv.h"  #include "xfs_trans_space.h" +#include "xfs_inode_item.h"  STATIC void	xfs_trans_apply_sb_deltas(xfs_trans_t *); @@ -253,7 +254,7 @@ _xfs_trans_alloc(  	tp->t_mountp = mp;  	tp->t_items_free = XFS_LIC_NUM_SLOTS;  	tp->t_busy_free = XFS_LBC_NUM_SLOTS; -	XFS_LIC_INIT(&(tp->t_items)); +	xfs_lic_init(&(tp->t_items));  	XFS_LBC_INIT(&(tp->t_busy));  	return tp;  } @@ -282,7 +283,7 @@ xfs_trans_dup(  	ntp->t_mountp = tp->t_mountp;  	ntp->t_items_free = XFS_LIC_NUM_SLOTS;  	ntp->t_busy_free = XFS_LBC_NUM_SLOTS; -	XFS_LIC_INIT(&(ntp->t_items)); +	xfs_lic_init(&(ntp->t_items));  	XFS_LBC_INIT(&(ntp->t_busy));  	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); @@ -1169,7 +1170,7 @@ xfs_trans_cancel(  		while (licp != NULL) {  			lidp = licp->lic_descs;  			for (i = 0; i < licp->lic_unused; i++, lidp++) { -				if (XFS_LIC_ISFREE(licp, i)) { +				if (xfs_lic_isfree(licp, i)) {  					continue;  				} @@ -1216,6 +1217,68 @@ xfs_trans_free(  	kmem_zone_free(xfs_trans_zone, tp);  } +/* + * Roll from one trans in the sequence of PERMANENT transactions to + * the next: permanent transactions are only flushed out when + * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon + * as possible to let chunks of it go to the log. So we commit the + * chunk we've been working on and get a new transaction to continue. + */ +int +xfs_trans_roll( +	struct xfs_trans	**tpp, +	struct xfs_inode	*dp) +{ +	struct xfs_trans	*trans; +	unsigned int		logres, count; +	int			error; + +	/* +	 * Ensure that the inode is always logged. +	 */ +	trans = *tpp; +	xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); + +	/* +	 * Copy the critical parameters from one trans to the next. +	 */ +	logres = trans->t_log_res; +	count = trans->t_log_count; +	*tpp = xfs_trans_dup(trans); + +	/* +	 * Commit the current transaction. +	 * If this commit failed, then it'd just unlock those items that +	 * are not marked ihold. That also means that a filesystem shutdown +	 * is in progress. The caller takes the responsibility to cancel +	 * the duplicate transaction that gets returned. +	 */ +	error = xfs_trans_commit(trans, 0); +	if (error) +		return (error); + +	trans = *tpp; + +	/* +	 * Reserve space in the log for th next transaction. +	 * This also pushes items in the "AIL", the list of logged items, +	 * out to disk if they are taking up space at the tail of the log +	 * that we want to use.  This requires that either nothing be locked +	 * across this call, or that anything that is locked be logged in +	 * the prior and the next transactions. +	 */ +	error = xfs_trans_reserve(trans, 0, logres, 0, +				  XFS_TRANS_PERM_LOG_RES, count); +	/* +	 *  Ensure that the inode is in the new transaction and locked. +	 */ +	if (error) +		return error; + +	xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); +	xfs_trans_ihold(trans, dp); +	return 0; +}  /*   * THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item(). @@ -1253,7 +1316,7 @@ xfs_trans_committed(  	 * Special case the chunk embedded in the transaction.  	 */  	licp = &(tp->t_items); -	if (!(XFS_LIC_ARE_ALL_FREE(licp))) { +	if (!(xfs_lic_are_all_free(licp))) {  		xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);  	} @@ -1262,7 +1325,7 @@ xfs_trans_committed(  	 */  	licp = licp->lic_next;  	while (licp != NULL) { -		ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); +		ASSERT(!xfs_lic_are_all_free(licp));  		xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);  		next_licp = licp->lic_next;  		kmem_free(licp); @@ -1325,7 +1388,7 @@ xfs_trans_chunk_committed(  	lidp = licp->lic_descs;  	for (i = 0; i < licp->lic_unused; i++, lidp++) { -		if (XFS_LIC_ISFREE(licp, i)) { +		if (xfs_lic_isfree(licp, i)) {  			continue;  		} diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 0804207c739..74c80bd2b0e 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -210,62 +210,52 @@ typedef struct xfs_log_item_chunk {   * lic_unused to the right value (0 matches all free).  The   * lic_descs.lid_index values are set up as each desc is allocated.   */ -#define	XFS_LIC_INIT(cp)	xfs_lic_init(cp)  static inline void xfs_lic_init(xfs_log_item_chunk_t *cp)  {  	cp->lic_free = XFS_LIC_FREEMASK;  } -#define	XFS_LIC_INIT_SLOT(cp,slot)	xfs_lic_init_slot(cp, slot)  static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot)  {  	cp->lic_descs[slot].lid_index = (unsigned char)(slot);  } -#define	XFS_LIC_VACANCY(cp)		xfs_lic_vacancy(cp)  static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp)  {  	return cp->lic_free & XFS_LIC_FREEMASK;  } -#define	XFS_LIC_ALL_FREE(cp)		xfs_lic_all_free(cp)  static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp)  {  	cp->lic_free = XFS_LIC_FREEMASK;  } -#define	XFS_LIC_ARE_ALL_FREE(cp)	xfs_lic_are_all_free(cp)  static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp)  {  	return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK);  } -#define	XFS_LIC_ISFREE(cp,slot)	xfs_lic_isfree(cp,slot)  static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot)  {  	return (cp->lic_free & (1 << slot));  } -#define	XFS_LIC_CLAIM(cp,slot)		xfs_lic_claim(cp,slot)  static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot)  {  	cp->lic_free &= ~(1 << slot);  } -#define	XFS_LIC_RELSE(cp,slot)		xfs_lic_relse(cp,slot)  static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot)  {  	cp->lic_free |= 1 << slot;  } -#define	XFS_LIC_SLOT(cp,slot)		xfs_lic_slot(cp,slot)  static inline xfs_log_item_desc_t *  xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot)  {  	return &(cp->lic_descs[slot]);  } -#define	XFS_LIC_DESC_TO_SLOT(dp)	xfs_lic_desc_to_slot(dp)  static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)  {  	return (uint)dp->lid_index; @@ -278,7 +268,6 @@ static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)   * All of this yields the address of the chunk, which is   * cast to a chunk pointer.   */ -#define	XFS_LIC_DESC_TO_CHUNK(dp)	xfs_lic_desc_to_chunk(dp)  static inline xfs_log_item_chunk_t *  xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)  { @@ -986,6 +975,7 @@ int		_xfs_trans_commit(xfs_trans_t *,  				  int *);  #define xfs_trans_commit(tp, flags)	_xfs_trans_commit(tp, flags, NULL)  void		xfs_trans_cancel(xfs_trans_t *, int); +int		xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);  int		xfs_trans_ail_init(struct xfs_mount *);  void		xfs_trans_ail_destroy(struct xfs_mount *);  void		xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index cb0c5839154..4e855b5ced6 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -1021,16 +1021,16 @@ xfs_trans_buf_item_match(  	bp = NULL;  	len = BBTOB(len);  	licp = &tp->t_items; -	if (!XFS_LIC_ARE_ALL_FREE(licp)) { +	if (!xfs_lic_are_all_free(licp)) {  		for (i = 0; i < licp->lic_unused; i++) {  			/*  			 * Skip unoccupied slots.  			 */ -			if (XFS_LIC_ISFREE(licp, i)) { +			if (xfs_lic_isfree(licp, i)) {  				continue;  			} -			lidp = XFS_LIC_SLOT(licp, i); +			lidp = xfs_lic_slot(licp, i);  			blip = (xfs_buf_log_item_t *)lidp->lid_item;  			if (blip->bli_item.li_type != XFS_LI_BUF) {  				continue; @@ -1074,7 +1074,7 @@ xfs_trans_buf_item_match_all(  	bp = NULL;  	len = BBTOB(len);  	for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { -		if (XFS_LIC_ARE_ALL_FREE(licp)) { +		if (xfs_lic_are_all_free(licp)) {  			ASSERT(licp == &tp->t_items);  			ASSERT(licp->lic_next == NULL);  			return NULL; @@ -1083,11 +1083,11 @@ xfs_trans_buf_item_match_all(  			/*  			 * Skip unoccupied slots.  			 */ -			if (XFS_LIC_ISFREE(licp, i)) { +			if (xfs_lic_isfree(licp, i)) {  				continue;  			} -			lidp = XFS_LIC_SLOT(licp, i); +			lidp = xfs_lic_slot(licp, i);  			blip = (xfs_buf_log_item_t *)lidp->lid_item;  			if (blip->bli_item.li_type != XFS_LI_BUF) {  				continue; diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c index db5c8359552..3c666e8317f 100644 --- a/fs/xfs/xfs_trans_item.c +++ b/fs/xfs/xfs_trans_item.c @@ -53,11 +53,11 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)  		 * Initialize the chunk, and then  		 * claim the first slot in the newly allocated chunk.  		 */ -		XFS_LIC_INIT(licp); -		XFS_LIC_CLAIM(licp, 0); +		xfs_lic_init(licp); +		xfs_lic_claim(licp, 0);  		licp->lic_unused = 1; -		XFS_LIC_INIT_SLOT(licp, 0); -		lidp = XFS_LIC_SLOT(licp, 0); +		xfs_lic_init_slot(licp, 0); +		lidp = xfs_lic_slot(licp, 0);  		/*  		 * Link in the new chunk and update the free count. @@ -88,14 +88,14 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)  	 */  	licp = &tp->t_items;  	while (licp != NULL) { -		if (XFS_LIC_VACANCY(licp)) { +		if (xfs_lic_vacancy(licp)) {  			if (licp->lic_unused <= XFS_LIC_MAX_SLOT) {  				i = licp->lic_unused; -				ASSERT(XFS_LIC_ISFREE(licp, i)); +				ASSERT(xfs_lic_isfree(licp, i));  				break;  			}  			for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) { -				if (XFS_LIC_ISFREE(licp, i)) +				if (xfs_lic_isfree(licp, i))  					break;  			}  			ASSERT(i <= XFS_LIC_MAX_SLOT); @@ -108,12 +108,12 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)  	 * If we find a free descriptor, claim it,  	 * initialize it, and return it.  	 */ -	XFS_LIC_CLAIM(licp, i); +	xfs_lic_claim(licp, i);  	if (licp->lic_unused <= i) {  		licp->lic_unused = i + 1; -		XFS_LIC_INIT_SLOT(licp, i); +		xfs_lic_init_slot(licp, i);  	} -	lidp = XFS_LIC_SLOT(licp, i); +	lidp = xfs_lic_slot(licp, i);  	tp->t_items_free--;  	lidp->lid_item = lip;  	lidp->lid_flags = 0; @@ -136,9 +136,9 @@ xfs_trans_free_item(xfs_trans_t	*tp, xfs_log_item_desc_t *lidp)  	xfs_log_item_chunk_t	*licp;  	xfs_log_item_chunk_t	**licpp; -	slot = XFS_LIC_DESC_TO_SLOT(lidp); -	licp = XFS_LIC_DESC_TO_CHUNK(lidp); -	XFS_LIC_RELSE(licp, slot); +	slot = xfs_lic_desc_to_slot(lidp); +	licp = xfs_lic_desc_to_chunk(lidp); +	xfs_lic_relse(licp, slot);  	lidp->lid_item->li_desc = NULL;  	tp->t_items_free++; @@ -154,7 +154,7 @@ xfs_trans_free_item(xfs_trans_t	*tp, xfs_log_item_desc_t *lidp)  	 * Also decrement the transaction structure's count of free items  	 * by the number in a chunk since we are freeing an empty chunk.  	 */ -	if (XFS_LIC_ARE_ALL_FREE(licp) && (licp != &(tp->t_items))) { +	if (xfs_lic_are_all_free(licp) && (licp != &(tp->t_items))) {  		licpp = &(tp->t_items.lic_next);  		while (*licpp != licp) {  			ASSERT(*licpp != NULL); @@ -207,20 +207,20 @@ xfs_trans_first_item(xfs_trans_t *tp)  	/*  	 * If it's not in the first chunk, skip to the second.  	 */ -	if (XFS_LIC_ARE_ALL_FREE(licp)) { +	if (xfs_lic_are_all_free(licp)) {  		licp = licp->lic_next;  	}  	/*  	 * Return the first non-free descriptor in the chunk.  	 */ -	ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); +	ASSERT(!xfs_lic_are_all_free(licp));  	for (i = 0; i < licp->lic_unused; i++) { -		if (XFS_LIC_ISFREE(licp, i)) { +		if (xfs_lic_isfree(licp, i)) {  			continue;  		} -		return XFS_LIC_SLOT(licp, i); +		return xfs_lic_slot(licp, i);  	}  	cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item");  	return NULL; @@ -242,18 +242,18 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)  	xfs_log_item_chunk_t	*licp;  	int			i; -	licp = XFS_LIC_DESC_TO_CHUNK(lidp); +	licp = xfs_lic_desc_to_chunk(lidp);  	/*  	 * First search the rest of the chunk. The for loop keeps us  	 * from referencing things beyond the end of the chunk.  	 */ -	for (i = (int)XFS_LIC_DESC_TO_SLOT(lidp) + 1; i < licp->lic_unused; i++) { -		if (XFS_LIC_ISFREE(licp, i)) { +	for (i = (int)xfs_lic_desc_to_slot(lidp) + 1; i < licp->lic_unused; i++) { +		if (xfs_lic_isfree(licp, i)) {  			continue;  		} -		return XFS_LIC_SLOT(licp, i); +		return xfs_lic_slot(licp, i);  	}  	/* @@ -266,13 +266,13 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)  	}  	licp = licp->lic_next; -	ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); +	ASSERT(!xfs_lic_are_all_free(licp));  	for (i = 0; i < licp->lic_unused; i++) { -		if (XFS_LIC_ISFREE(licp, i)) { +		if (xfs_lic_isfree(licp, i)) {  			continue;  		} -		return XFS_LIC_SLOT(licp, i); +		return xfs_lic_slot(licp, i);  	}  	ASSERT(0);  	/* NOTREACHED */ @@ -300,9 +300,9 @@ xfs_trans_free_items(  	/*  	 * Special case the embedded chunk so we don't free it below.  	 */ -	if (!XFS_LIC_ARE_ALL_FREE(licp)) { +	if (!xfs_lic_are_all_free(licp)) {  		(void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); -		XFS_LIC_ALL_FREE(licp); +		xfs_lic_all_free(licp);  		licp->lic_unused = 0;  	}  	licp = licp->lic_next; @@ -311,7 +311,7 @@ xfs_trans_free_items(  	 * Unlock each item in each chunk and free the chunks.  	 */  	while (licp != NULL) { -		ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); +		ASSERT(!xfs_lic_are_all_free(licp));  		(void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);  		next_licp = licp->lic_next;  		kmem_free(licp); @@ -347,7 +347,7 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)  	/*  	 * Special case the embedded chunk so we don't free.  	 */ -	if (!XFS_LIC_ARE_ALL_FREE(licp)) { +	if (!xfs_lic_are_all_free(licp)) {  		freed = xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn);  	}  	licpp = &(tp->t_items.lic_next); @@ -358,10 +358,10 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)  	 * and free empty chunks.  	 */  	while (licp != NULL) { -		ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); +		ASSERT(!xfs_lic_are_all_free(licp));  		freed += xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn);  		next_licp = licp->lic_next; -		if (XFS_LIC_ARE_ALL_FREE(licp)) { +		if (xfs_lic_are_all_free(licp)) {  			*licpp = next_licp;  			kmem_free(licp);  			freed -= XFS_LIC_NUM_SLOTS; @@ -402,7 +402,7 @@ xfs_trans_unlock_chunk(  	freed = 0;  	lidp = licp->lic_descs;  	for (i = 0; i < licp->lic_unused; i++, lidp++) { -		if (XFS_LIC_ISFREE(licp, i)) { +		if (xfs_lic_isfree(licp, i)) {  			continue;  		}  		lip = lidp->lid_item; @@ -421,7 +421,7 @@ xfs_trans_unlock_chunk(  		 */  		if (!(freeing_chunk) &&  		    (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) { -			XFS_LIC_RELSE(licp, i); +			xfs_lic_relse(licp, i);  			freed++;  		}  	} diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 98e5f110ba5..35d4d414bcc 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -237,7 +237,7 @@ xfs_droplink(  	ASSERT (ip->i_d.di_nlink > 0);  	ip->i_d.di_nlink--; -	drop_nlink(ip->i_vnode); +	drop_nlink(VFS_I(ip));  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);  	error = 0; @@ -301,7 +301,7 @@ xfs_bumplink(  	ASSERT(ip->i_d.di_nlink > 0);  	ip->i_d.di_nlink++; -	inc_nlink(ip->i_vnode); +	inc_nlink(VFS_I(ip));  	if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) &&  	    (ip->i_d.di_nlink > XFS_MAXLINK_1)) {  		/* diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h index f316cb85d8e..ef321225d26 100644 --- a/fs/xfs/xfs_utils.h +++ b/fs/xfs/xfs_utils.h @@ -18,9 +18,6 @@  #ifndef __XFS_UTILS_H__  #define __XFS_UTILS_H__ -#define IRELE(ip)	VN_RELE(XFS_ITOV(ip)) -#define IHOLD(ip)	VN_HOLD(XFS_ITOV(ip)) -  extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);  extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,  				xfs_dev_t, cred_t *, prid_t, int, diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 4a9a43315a8..439dd3939dd 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -128,7 +128,6 @@ xfs_unmount_flush(  	xfs_inode_t	*rip = mp->m_rootip;  	xfs_inode_t	*rbmip;  	xfs_inode_t	*rsumip = NULL; -	bhv_vnode_t	*rvp = XFS_ITOV(rip);  	int		error;  	xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); @@ -146,7 +145,7 @@ xfs_unmount_flush(  		if (error == EFSCORRUPTED)  			goto fscorrupt_out; -		ASSERT(vn_count(XFS_ITOV(rbmip)) == 1); +		ASSERT(vn_count(VFS_I(rbmip)) == 1);  		rsumip = mp->m_rsumip;  		xfs_ilock(rsumip, XFS_ILOCK_EXCL); @@ -157,7 +156,7 @@ xfs_unmount_flush(  		if (error == EFSCORRUPTED)  			goto fscorrupt_out; -		ASSERT(vn_count(XFS_ITOV(rsumip)) == 1); +		ASSERT(vn_count(VFS_I(rsumip)) == 1);  	}  	/* @@ -167,7 +166,7 @@ xfs_unmount_flush(  	if (error == EFSCORRUPTED)  		goto fscorrupt_out2; -	if (vn_count(rvp) != 1 && !relocation) { +	if (vn_count(VFS_I(rip)) != 1 && !relocation) {  		xfs_iunlock(rip, XFS_ILOCK_EXCL);  		return XFS_ERROR(EBUSY);  	} @@ -284,7 +283,7 @@ xfs_sync_inodes(  	int             *bypassed)  {  	xfs_inode_t	*ip = NULL; -	bhv_vnode_t	*vp = NULL; +	struct inode	*vp = NULL;  	int		error;  	int		last_error;  	uint64_t	fflag; @@ -404,7 +403,7 @@ xfs_sync_inodes(  			continue;  		} -		vp = XFS_ITOV_NULL(ip); +		vp = VFS_I(ip);  		/*  		 * If the vnode is gone then this is being torn down, @@ -479,7 +478,7 @@ xfs_sync_inodes(  			IPOINTER_INSERT(ip, mp);  			xfs_ilock(ip, lock_flags); -			ASSERT(vp == XFS_ITOV(ip)); +			ASSERT(vp == VFS_I(ip));  			ASSERT(ip->i_mount == mp);  			vnode_refed = B_TRUE; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 76a1166af82..588bb4aa215 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -83,7 +83,7 @@ xfs_setattr(  	cred_t			*credp)  {  	xfs_mount_t		*mp = ip->i_mount; -	struct inode		*inode = XFS_ITOV(ip); +	struct inode		*inode = VFS_I(ip);  	int			mask = iattr->ia_valid;  	xfs_trans_t		*tp;  	int			code; @@ -513,7 +513,6 @@ xfs_setattr(  			ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;  			ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;  			ip->i_update_core = 1; -			timeflags &= ~XFS_ICHGTIME_ACC;  		}  		if (mask & ATTR_MTIME) {  			inode->i_mtime = iattr->ia_mtime; @@ -714,7 +713,7 @@ xfs_fsync(  		return XFS_ERROR(EIO);  	/* capture size updates in I/O completion before writing the inode. */ -	error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); +	error = filemap_fdatawait(VFS_I(ip)->i_mapping);  	if (error)  		return XFS_ERROR(error); @@ -1160,7 +1159,6 @@ int  xfs_release(  	xfs_inode_t	*ip)  { -	bhv_vnode_t	*vp = XFS_ITOV(ip);  	xfs_mount_t	*mp = ip->i_mount;  	int		error; @@ -1195,13 +1193,13 @@ xfs_release(  		 * be exposed to that problem.  		 */  		truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); -		if (truncated && VN_DIRTY(vp) && ip->i_delayed_blks > 0) +		if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)  			xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE);  	}  	if (ip->i_d.di_nlink != 0) {  		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && -		     ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || +		     ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||  		       ip->i_delayed_blks > 0)) &&  		     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&  		    (!(ip->i_d.di_flags & @@ -1227,7 +1225,6 @@ int  xfs_inactive(  	xfs_inode_t	*ip)  { -	bhv_vnode_t	*vp = XFS_ITOV(ip);  	xfs_bmap_free_t	free_list;  	xfs_fsblock_t	first_block;  	int		committed; @@ -1242,7 +1239,7 @@ xfs_inactive(  	 * If the inode is already free, then there can be nothing  	 * to clean up here.  	 */ -	if (ip->i_d.di_mode == 0 || VN_BAD(vp)) { +	if (ip->i_d.di_mode == 0 || VN_BAD(VFS_I(ip))) {  		ASSERT(ip->i_df.if_real_bytes == 0);  		ASSERT(ip->i_df.if_broot_bytes == 0);  		return VN_INACTIVE_CACHE; @@ -1272,7 +1269,7 @@ xfs_inactive(  	if (ip->i_d.di_nlink != 0) {  		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && -                     ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || +                     ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||                         ip->i_delayed_blks > 0)) &&  		      (ip->i_df.if_flags & XFS_IFEXTENTS) &&  		     (!(ip->i_d.di_flags & @@ -1708,111 +1705,6 @@ std_return:  }  #ifdef DEBUG -/* - * Some counters to see if (and how often) we are hitting some deadlock - * prevention code paths. - */ - -int xfs_rm_locks; -int xfs_rm_lock_delays; -int xfs_rm_attempts; -#endif - -/* - * The following routine will lock the inodes associated with the - * directory and the named entry in the directory. The locks are - * acquired in increasing inode number. - * - * If the entry is "..", then only the directory is locked. The - * vnode ref count will still include that from the .. entry in - * this case. - * - * There is a deadlock we need to worry about. If the locked directory is - * in the AIL, it might be blocking up the log. The next inode we lock - * could be already locked by another thread waiting for log space (e.g - * a permanent log reservation with a long running transaction (see - * xfs_itruncate_finish)). To solve this, we must check if the directory - * is in the ail and use lock_nowait. If we can't lock, we need to - * drop the inode lock on the directory and try again. xfs_iunlock will - * potentially push the tail if we were holding up the log. - */ -STATIC int -xfs_lock_dir_and_entry( -	xfs_inode_t	*dp, -	xfs_inode_t	*ip)	/* inode of entry 'name' */ -{ -	int		attempts; -	xfs_ino_t	e_inum; -	xfs_inode_t	*ips[2]; -	xfs_log_item_t	*lp; - -#ifdef DEBUG -	xfs_rm_locks++; -#endif -	attempts = 0; - -again: -	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); - -	e_inum = ip->i_ino; - -	xfs_itrace_ref(ip); - -	/* -	 * We want to lock in increasing inum. Since we've already -	 * acquired the lock on the directory, we may need to release -	 * if if the inum of the entry turns out to be less. -	 */ -	if (e_inum > dp->i_ino) { -		/* -		 * We are already in the right order, so just -		 * lock on the inode of the entry. -		 * We need to use nowait if dp is in the AIL. -		 */ - -		lp = (xfs_log_item_t *)dp->i_itemp; -		if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { -			if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { -				attempts++; -#ifdef DEBUG -				xfs_rm_attempts++; -#endif - -				/* -				 * Unlock dp and try again. -				 * xfs_iunlock will try to push the tail -				 * if the inode is in the AIL. -				 */ - -				xfs_iunlock(dp, XFS_ILOCK_EXCL); - -				if ((attempts % 5) == 0) { -					delay(1); /* Don't just spin the CPU */ -#ifdef DEBUG -					xfs_rm_lock_delays++; -#endif -				} -				goto again; -			} -		} else { -			xfs_ilock(ip, XFS_ILOCK_EXCL); -		} -	} else if (e_inum < dp->i_ino) { -		xfs_iunlock(dp, XFS_ILOCK_EXCL); - -		ips[0] = ip; -		ips[1] = dp; -		xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); -	} -	/* else	 e_inum == dp->i_ino */ -	/*     This can happen if we're asked to lock /x/.. -	 *     the entry is "..", which is also the parent directory. -	 */ - -	return 0; -} - -#ifdef DEBUG  int xfs_locked_n;  int xfs_small_retries;  int xfs_middle_retries; @@ -1946,6 +1838,45 @@ again:  #endif  } +void +xfs_lock_two_inodes( +	xfs_inode_t		*ip0, +	xfs_inode_t		*ip1, +	uint			lock_mode) +{ +	xfs_inode_t		*temp; +	int			attempts = 0; +	xfs_log_item_t		*lp; + +	ASSERT(ip0->i_ino != ip1->i_ino); + +	if (ip0->i_ino > ip1->i_ino) { +		temp = ip0; +		ip0 = ip1; +		ip1 = temp; +	} + + again: +	xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); + +	/* +	 * If the first lock we have locked is in the AIL, we must TRY to get +	 * the second lock. If we can't get it, we must release the first one +	 * and try again. +	 */ +	lp = (xfs_log_item_t *)ip0->i_itemp; +	if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { +		if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { +			xfs_iunlock(ip0, lock_mode); +			if ((++attempts % 5) == 0) +				delay(1); /* Don't just spin the CPU */ +			goto again; +		} +	} else { +		xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); +	} +} +  int  xfs_remove(  	xfs_inode_t             *dp, @@ -2018,9 +1949,7 @@ xfs_remove(  		goto out_trans_cancel;  	} -	error = xfs_lock_dir_and_entry(dp, ip); -	if (error) -		goto out_trans_cancel; +	xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);  	/*  	 * At this point, we've gotten both the directory and the entry @@ -2047,9 +1976,6 @@ xfs_remove(  		}  	} -	/* -	 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. -	 */  	XFS_BMAP_INIT(&free_list, &first_block);  	error = xfs_dir_removename(tp, dp, name, ip->i_ino,  					&first_block, &free_list, resblks); @@ -2155,7 +2081,6 @@ xfs_link(  {  	xfs_mount_t		*mp = tdp->i_mount;  	xfs_trans_t		*tp; -	xfs_inode_t		*ips[2];  	int			error;  	xfs_bmap_free_t         free_list;  	xfs_fsblock_t           first_block; @@ -2203,15 +2128,7 @@ xfs_link(  		goto error_return;  	} -	if (sip->i_ino < tdp->i_ino) { -		ips[0] = sip; -		ips[1] = tdp; -	} else { -		ips[0] = tdp; -		ips[1] = sip; -	} - -	xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); +	xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);  	/*  	 * Increment vnode ref counts since xfs_trans_commit & @@ -2873,14 +2790,13 @@ int  xfs_reclaim(  	xfs_inode_t	*ip)  { -	bhv_vnode_t	*vp = XFS_ITOV(ip);  	xfs_itrace_entry(ip); -	ASSERT(!VN_MAPPED(vp)); +	ASSERT(!VN_MAPPED(VFS_I(ip)));  	/* bad inode, get out here ASAP */ -	if (VN_BAD(vp)) { +	if (VN_BAD(VFS_I(ip))) {  		xfs_ireclaim(ip);  		return 0;  	} @@ -2917,7 +2833,7 @@ xfs_reclaim(  		XFS_MOUNT_ILOCK(mp);  		spin_lock(&ip->i_flags_lock);  		__xfs_iflags_set(ip, XFS_IRECLAIMABLE); -		vn_to_inode(vp)->i_private = NULL; +		VFS_I(ip)->i_private = NULL;  		ip->i_vnode = NULL;  		spin_unlock(&ip->i_flags_lock);  		list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); @@ -2933,7 +2849,7 @@ xfs_finish_reclaim(  	int		sync_mode)  {  	xfs_perag_t	*pag = xfs_get_perag(ip->i_mount, ip->i_ino); -	bhv_vnode_t	*vp = XFS_ITOV_NULL(ip); +	struct inode	*vp = VFS_I(ip);  	if (vp && VN_BAD(vp))  		goto reclaim; @@ -3321,7 +3237,6 @@ xfs_free_file_space(  	xfs_off_t		len,  	int			attr_flags)  { -	bhv_vnode_t		*vp;  	int			committed;  	int			done;  	xfs_off_t		end_dmi_offset; @@ -3341,7 +3256,6 @@ xfs_free_file_space(  	xfs_trans_t		*tp;  	int			need_iolock = 1; -	vp = XFS_ITOV(ip);  	mp = ip->i_mount;  	xfs_itrace_entry(ip); @@ -3378,7 +3292,7 @@ xfs_free_file_space(  	rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);  	ioffset = offset & ~(rounding - 1); -	if (VN_CACHED(vp) != 0) { +	if (VN_CACHED(VFS_I(ip)) != 0) {  		xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1);  		error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED);  		if (error) diff --git a/include/linux/completion.h b/include/linux/completion.h index d2961b66d53..57faa60de9b 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -55,4 +55,49 @@ extern void complete_all(struct completion *);  #define INIT_COMPLETION(x)	((x).done = 0) + +/** + *	try_wait_for_completion - try to decrement a completion without blocking + *	@x:	completion structure + * + *	Returns: 0 if a decrement cannot be done without blocking + *		 1 if a decrement succeeded. + * + *	If a completion is being used as a counting completion, + *	attempt to decrement the counter without blocking. This + *	enables us to avoid waiting if the resource the completion + *	is protecting is not available. + */ +static inline bool try_wait_for_completion(struct completion *x) +{ +	int ret = 1; + +	spin_lock_irq(&x->wait.lock); +	if (!x->done) +		ret = 0; +	else +		x->done--; +	spin_unlock_irq(&x->wait.lock); +	return ret; +} + +/** + *	completion_done - Test to see if a completion has any waiters + *	@x:	completion structure + * + *	Returns: 0 if there are waiters (wait_for_completion() in progress) + *		 1 if there are no waiters. + * + */ +static inline bool completion_done(struct completion *x) +{ +	int ret = 1; + +	spin_lock_irq(&x->wait.lock); +	if (!x->done) +		ret = 0; +	spin_unlock_irq(&x->wait.lock); +	return ret; +} +  #endif  |