diff options
| author | Christoph Hellwig <hch@lst.de> | 2013-04-21 14:53:46 -0500 | 
|---|---|---|
| committer | Ben Myers <bpm@sgi.com> | 2013-04-21 14:53:46 -0500 | 
| commit | ee1a47ab0e77600fcbdf1c87d461bd8f3f63150d (patch) | |
| tree | 6340d9f4b8b53c0d18045da1372599645375efce /fs/xfs | |
| parent | a2050646f655a90400cbb66c3866d2e0137eee0c (diff) | |
| download | olio-linux-3.10-ee1a47ab0e77600fcbdf1c87d461bd8f3f63150d.tar.xz olio-linux-3.10-ee1a47ab0e77600fcbdf1c87d461bd8f3f63150d.zip  | |
xfs: add support for large btree blocks
Add support for larger btree blocks that contains a CRC32C checksum,
a filesystem uuid and block number for detecting filesystem
consistency and out of place writes.
[dchinner@redhat.com] Also include an owner field to allow reverse
mappings to be implemented for improved repairability and a LSN
field to so that log recovery can easily determine the last
modification that made it to disk for each buffer.
[dchinner@redhat.com] Add buffer log format flags to indicate the
type of buffer to recovery so that we don't have to do blind magic
number tests to determine what the buffer is.
[dchinner@redhat.com] Modified to fit into the verifier structure.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Ben Myers <bpm@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs')
| -rw-r--r-- | fs/xfs/xfs_alloc_btree.c | 103 | ||||
| -rw-r--r-- | fs/xfs/xfs_alloc_btree.h | 12 | ||||
| -rw-r--r-- | fs/xfs/xfs_attr_leaf.c | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_bmap.c | 47 | ||||
| -rw-r--r-- | fs/xfs/xfs_bmap_btree.c | 108 | ||||
| -rw-r--r-- | fs/xfs/xfs_bmap_btree.h | 19 | ||||
| -rw-r--r-- | fs/xfs/xfs_btree.c | 256 | ||||
| -rw-r--r-- | fs/xfs/xfs_btree.h | 64 | ||||
| -rw-r--r-- | fs/xfs/xfs_buf_item.h | 24 | ||||
| -rw-r--r-- | fs/xfs/xfs_dinode.h | 4 | ||||
| -rw-r--r-- | fs/xfs/xfs_fsops.c | 23 | ||||
| -rw-r--r-- | fs/xfs/xfs_ialloc_btree.c | 85 | ||||
| -rw-r--r-- | fs/xfs/xfs_ialloc_btree.h | 9 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode.c | 33 | ||||
| -rw-r--r-- | fs/xfs/xfs_log_recover.c | 28 | ||||
| -rw-r--r-- | fs/xfs/xfs_trans.h | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_trans_buf.c | 29 | 
17 files changed, 642 insertions, 206 deletions
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index b1ddef6b268..30c4c1434fa 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -33,6 +33,7 @@  #include "xfs_extent_busy.h"  #include "xfs_error.h"  #include "xfs_trace.h" +#include "xfs_cksum.h"  STATIC struct xfs_btree_cur * @@ -272,7 +273,7 @@ xfs_allocbt_key_diff(  	return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;  } -static void +static bool  xfs_allocbt_verify(  	struct xfs_buf		*bp)  { @@ -280,66 +281,103 @@ xfs_allocbt_verify(  	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);  	struct xfs_perag	*pag = bp->b_pag;  	unsigned int		level; -	int			sblock_ok; /* block passes checks */  	/*  	 * magic number and level verification  	 * -	 * During growfs operations, we can't verify the exact level as the -	 * perag is not fully initialised and hence not attached to the buffer. -	 * In this case, check against the maximum tree depth. +	 * During growfs operations, we can't verify the exact level or owner as +	 * the perag is not fully initialised and hence not attached to the +	 * buffer.  In this case, check against the maximum tree depth. +	 * +	 * Similarly, during log recovery we will have a perag structure +	 * attached, but the agf information will not yet have been initialised +	 * from the on disk AGF. Again, we can only check against maximum limits +	 * in this case.  	 */  	level = be16_to_cpu(block->bb_level);  	switch (block->bb_magic) { +	case cpu_to_be32(XFS_ABTB_CRC_MAGIC): +		if (!xfs_sb_version_hascrc(&mp->m_sb)) +			return false; +		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) +			return false; +		if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) +			return false; +		if (pag && +		    be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) +			return false; +		/* fall through */  	case cpu_to_be32(XFS_ABTB_MAGIC): -		if (pag) -			sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi]; -		else -			sblock_ok = level < mp->m_ag_maxlevels; +		if (pag && pag->pagf_init) { +			if (level >= pag->pagf_levels[XFS_BTNUM_BNOi]) +				return false; +		} else if (level >= mp->m_ag_maxlevels) +			return false;  		break; +	case cpu_to_be32(XFS_ABTC_CRC_MAGIC): +		if (!xfs_sb_version_hascrc(&mp->m_sb)) +			return false; +		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) +			return false; +		if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) +			return false; +		if (pag && +		    be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) +			return false; +		/* fall through */  	case cpu_to_be32(XFS_ABTC_MAGIC): -		if (pag) -			sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi]; -		else -			sblock_ok = level < mp->m_ag_maxlevels; +		if (pag && pag->pagf_init) { +			if (level >= pag->pagf_levels[XFS_BTNUM_CNTi]) +				return false; +		} else if (level >= mp->m_ag_maxlevels) +			return false;  		break;  	default: -		sblock_ok = 0; -		break; +		return false;  	}  	/* numrecs verification */ -	sblock_ok = sblock_ok && -		be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0]; +	if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0]) +		return false;  	/* sibling pointer verification */ -	sblock_ok = sblock_ok && -		(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || -		 be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) && -		block->bb_u.s.bb_leftsib && -		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || -		 be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) && -		block->bb_u.s.bb_rightsib; +	if (!block->bb_u.s.bb_leftsib || +	    (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && +	     block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) +		return false; +	if (!block->bb_u.s.bb_rightsib || +	    (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && +	     block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) +		return false; -	if (!sblock_ok) { -		trace_xfs_btree_corrupt(bp, _RET_IP_); -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block); -		xfs_buf_ioerror(bp, EFSCORRUPTED); -	} +	return true;  }  static void  xfs_allocbt_read_verify(  	struct xfs_buf	*bp)  { -	xfs_allocbt_verify(bp); +	if (!(xfs_btree_sblock_verify_crc(bp) && +	      xfs_allocbt_verify(bp))) { +		trace_xfs_btree_corrupt(bp, _RET_IP_); +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, +				     bp->b_target->bt_mount, bp->b_addr); +		xfs_buf_ioerror(bp, EFSCORRUPTED); +	}  }  static void  xfs_allocbt_write_verify(  	struct xfs_buf	*bp)  { -	xfs_allocbt_verify(bp); +	if (!xfs_allocbt_verify(bp)) { +		trace_xfs_btree_corrupt(bp, _RET_IP_); +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, +				     bp->b_target->bt_mount, bp->b_addr); +		xfs_buf_ioerror(bp, EFSCORRUPTED); +	} +	xfs_btree_sblock_calc_crc(bp); +  }  const struct xfs_buf_ops xfs_allocbt_buf_ops = { @@ -444,6 +482,9 @@ xfs_allocbt_init_cursor(  	cur->bc_private.a.agbp = agbp;  	cur->bc_private.a.agno = agno; +	if (xfs_sb_version_hascrc(&mp->m_sb)) +		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; +  	return cur;  } diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 7e89a2b429d..e3a3f742419 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -31,8 +31,10 @@ struct xfs_mount;   * by blockcount and blockno.  All blocks look the same to make the code   * simpler; if we have time later, we'll make the optimizations.   */ -#define	XFS_ABTB_MAGIC	0x41425442	/* 'ABTB' for bno tree */ -#define	XFS_ABTC_MAGIC	0x41425443	/* 'ABTC' for cnt tree */ +#define	XFS_ABTB_MAGIC		0x41425442	/* 'ABTB' for bno tree */ +#define	XFS_ABTB_CRC_MAGIC	0x41423342	/* 'AB3B' */ +#define	XFS_ABTC_MAGIC		0x41425443	/* 'ABTC' for cnt tree */ +#define	XFS_ABTC_CRC_MAGIC	0x41423343	/* 'AB3C' */  /*   * Data record/key structure @@ -59,10 +61,10 @@ typedef __be32 xfs_alloc_ptr_t;  /*   * Btree block header size depends on a superblock flag. - * - * (not quite yet, but soon)   */ -#define XFS_ALLOC_BLOCK_LEN(mp)	XFS_BTREE_SBLOCK_LEN +#define XFS_ALLOC_BLOCK_LEN(mp) \ +	(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ +		XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)  /*   * Record, key, and pointer address macros for btree blocks. diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index f96a734ed1e..aa4765f15cb 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -232,7 +232,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)  				return 0;  			return dp->i_d.di_forkoff;  		} -		dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot); +		dsize = XFS_BMAP_BROOT_SPACE(mp, dp->i_df.if_broot);  		break;  	} diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 20efb397a7f..0531cd3927a 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -439,11 +439,15 @@ xfs_bmap_sanity_check(  {  	struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp); -	if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) || -	    be16_to_cpu(block->bb_level) != level || +	if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) && +	    block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC)) +		return 0; + +	if (be16_to_cpu(block->bb_level) != level ||  	    be16_to_cpu(block->bb_numrecs) == 0 ||  	    be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])  		return 0; +  	return 1;  } @@ -1031,6 +1035,7 @@ xfs_bmap_extents_to_btree(  	xfs_extnum_t		nextents;	/* number of file extents */  	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */ +	mp = ip->i_mount;  	ifp = XFS_IFORK_PTR(ip, whichfork);  	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); @@ -1044,16 +1049,18 @@ xfs_bmap_extents_to_btree(  	 * Fill in the root.  	 */  	block = ifp->if_broot; -	block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); -	block->bb_level = cpu_to_be16(1); -	block->bb_numrecs = cpu_to_be16(1); -	block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); -	block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); +	if (xfs_sb_version_hascrc(&mp->m_sb)) +		xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, +				 XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino, +				 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); +	else +		xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, +				 XFS_BMAP_MAGIC, 1, 1, ip->i_ino, +				 XFS_BTREE_LONG_PTRS);  	/*  	 * Need a cursor.  Can't allocate until bb_level is filled in.  	 */ -	mp = ip->i_mount;  	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);  	cur->bc_private.b.firstblock = *firstblock;  	cur->bc_private.b.flist = flist; @@ -1102,10 +1109,15 @@ xfs_bmap_extents_to_btree(  	 */  	abp->b_ops = &xfs_bmbt_buf_ops;  	ablock = XFS_BUF_TO_BLOCK(abp); -	ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); -	ablock->bb_level = 0; -	ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); -	ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); +	if (xfs_sb_version_hascrc(&mp->m_sb)) +		xfs_btree_init_block_int(mp, ablock, abp->b_bn, +				XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, +				XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); +	else +		xfs_btree_init_block_int(mp, ablock, abp->b_bn, +				XFS_BMAP_MAGIC, 0, 0, ip->i_ino, +				XFS_BTREE_LONG_PTRS); +  	arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);  	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);  	for (cnt = i = 0; i < nextents; i++) { @@ -1155,7 +1167,8 @@ xfs_bmap_local_to_extents(  	xfs_extlen_t	total,		/* total blocks needed by transaction */  	int		*logflagsp,	/* inode logging flags */  	int		whichfork, -	void		(*init_fn)(struct xfs_buf *bp, +	void		(*init_fn)(struct xfs_trans *tp, +				   struct xfs_buf *bp,  				   struct xfs_inode *ip,  				   struct xfs_ifork *ifp))  { @@ -1207,7 +1220,7 @@ xfs_bmap_local_to_extents(  		bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);  		/* initialise the block and copy the data */ -		init_fn(bp, ip, ifp); +		init_fn(tp, bp, ip, ifp);  		/* account for the change in fork size and log everything */  		xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); @@ -1314,16 +1327,19 @@ xfs_bmap_add_attrfork_extents(   */  STATIC void  xfs_bmap_local_to_extents_init_fn( +	struct xfs_trans	*tp,  	struct xfs_buf		*bp,  	struct xfs_inode	*ip,  	struct xfs_ifork	*ifp)  {  	bp->b_ops = &xfs_bmbt_buf_ops;  	memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); +	xfs_trans_buf_set_type(tp, bp, XFS_BLF_BTREE_BUF);  }  STATIC void  xfs_symlink_local_to_remote( +	struct xfs_trans	*tp,  	struct xfs_buf		*bp,  	struct xfs_inode	*ip,  	struct xfs_ifork	*ifp) @@ -1342,8 +1358,7 @@ xfs_symlink_local_to_remote(   *   * XXX (dgc): investigate whether directory conversion can use the generic   * formatting callout. It should be possible - it's just a very complex - * formatter. it would also require passing the transaction through to the init - * function. + * formatter.   */  STATIC int					/* error */  xfs_bmap_add_attrfork_local( diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 061b45cbe61..3a86c3fa6de 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -37,6 +37,7 @@  #include "xfs_error.h"  #include "xfs_quota.h"  #include "xfs_trace.h" +#include "xfs_cksum.h"  /*   * Determine the extent state. @@ -59,24 +60,31 @@ xfs_extent_state(   */  void  xfs_bmdr_to_bmbt( -	struct xfs_mount	*mp, +	struct xfs_inode	*ip,  	xfs_bmdr_block_t	*dblock,  	int			dblocklen,  	struct xfs_btree_block	*rblock,  	int			rblocklen)  { +	struct xfs_mount	*mp = ip->i_mount;  	int			dmxr;  	xfs_bmbt_key_t		*fkp;  	__be64			*fpp;  	xfs_bmbt_key_t		*tkp;  	__be64			*tpp; -	rblock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); +	if (xfs_sb_version_hascrc(&mp->m_sb)) +		xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, +				 XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, +				 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); +	else +		xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, +				 XFS_BMAP_MAGIC, 0, 0, ip->i_ino, +				 XFS_BTREE_LONG_PTRS); +  	rblock->bb_level = dblock->bb_level;  	ASSERT(be16_to_cpu(rblock->bb_level) > 0);  	rblock->bb_numrecs = dblock->bb_numrecs; -	rblock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); -	rblock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);  	dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);  	fkp = XFS_BMDR_KEY_ADDR(dblock, 1);  	tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); @@ -424,7 +432,13 @@ xfs_bmbt_to_bmdr(  	xfs_bmbt_key_t		*tkp;  	__be64			*tpp; -	ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); +	if (xfs_sb_version_hascrc(&mp->m_sb)) { +		ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC)); +		ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid)); +		ASSERT(rblock->bb_u.l.bb_blkno == +		       cpu_to_be64(XFS_BUF_DADDR_NULL)); +	} else +		ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));  	ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));  	ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));  	ASSERT(rblock->bb_level != 0); @@ -708,59 +722,89 @@ xfs_bmbt_key_diff(  				      cur->bc_rec.b.br_startoff;  } -static void +static int  xfs_bmbt_verify(  	struct xfs_buf		*bp)  {  	struct xfs_mount	*mp = bp->b_target->bt_mount;  	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);  	unsigned int		level; -	int			lblock_ok; /* block passes checks */ -	/* magic number and level verification. +	switch (block->bb_magic) { +	case cpu_to_be32(XFS_BMAP_CRC_MAGIC): +		if (!xfs_sb_version_hascrc(&mp->m_sb)) +			return false; +		if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid)) +			return false; +		if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn) +			return false; +		/* +		 * XXX: need a better way of verifying the owner here. Right now +		 * just make sure there has been one set. +		 */ +		if (be64_to_cpu(block->bb_u.l.bb_owner) == 0) +			return false; +		/* fall through */ +	case cpu_to_be32(XFS_BMAP_MAGIC): +		break; +	default: +		return false; +	} + +	/* +	 * numrecs and level verification.  	 * -	 * We don't know waht fork we belong to, so just verify that the level +	 * We don't know what fork we belong to, so just verify that the level  	 * is less than the maximum of the two. Later checks will be more  	 * precise.  	 */  	level = be16_to_cpu(block->bb_level); -	lblock_ok = block->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC) && -		    level < max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]); - -	/* numrecs verification */ -	lblock_ok = lblock_ok && -		be16_to_cpu(block->bb_numrecs) <= mp->m_bmap_dmxr[level != 0]; +	if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) +		return false; +	if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) +		return false;  	/* sibling pointer verification */ -	lblock_ok = lblock_ok && -		block->bb_u.l.bb_leftsib && -		(block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || -		 XFS_FSB_SANITY_CHECK(mp, -			be64_to_cpu(block->bb_u.l.bb_leftsib))) && -		block->bb_u.l.bb_rightsib && -		(block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || -		 XFS_FSB_SANITY_CHECK(mp, -			be64_to_cpu(block->bb_u.l.bb_rightsib))); +	if (!block->bb_u.l.bb_leftsib || +	    (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) && +	     !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) +		return false; +	if (!block->bb_u.l.bb_rightsib || +	    (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) && +	     !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) +		return false; + +	return true; -	if (!lblock_ok) { -		trace_xfs_btree_corrupt(bp, _RET_IP_); -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block); -		xfs_buf_ioerror(bp, EFSCORRUPTED); -	}  }  static void  xfs_bmbt_read_verify(  	struct xfs_buf	*bp)  { -	xfs_bmbt_verify(bp); +	if (!(xfs_btree_lblock_verify_crc(bp) && +	      xfs_bmbt_verify(bp))) { +		trace_xfs_btree_corrupt(bp, _RET_IP_); +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, +				     bp->b_target->bt_mount, bp->b_addr); +		xfs_buf_ioerror(bp, EFSCORRUPTED); +	} +  }  static void  xfs_bmbt_write_verify(  	struct xfs_buf	*bp)  { -	xfs_bmbt_verify(bp); +	if (!xfs_bmbt_verify(bp)) { +		xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn); +		trace_xfs_btree_corrupt(bp, _RET_IP_); +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, +				     bp->b_target->bt_mount, bp->b_addr); +		xfs_buf_ioerror(bp, EFSCORRUPTED); +		return; +	} +	xfs_btree_lblock_calc_crc(bp);  }  const struct xfs_buf_ops xfs_bmbt_buf_ops = { @@ -838,6 +882,8 @@ xfs_bmbt_init_cursor(  	cur->bc_ops = &xfs_bmbt_ops;  	cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE; +	if (xfs_sb_version_hascrc(&mp->m_sb)) +		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;  	cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);  	cur->bc_private.b.ip = ip; diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 88469ca0869..70c43d9f72c 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -18,7 +18,8 @@  #ifndef __XFS_BMAP_BTREE_H__  #define __XFS_BMAP_BTREE_H__ -#define XFS_BMAP_MAGIC	0x424d4150	/* 'BMAP' */ +#define XFS_BMAP_MAGIC		0x424d4150	/* 'BMAP' */ +#define XFS_BMAP_CRC_MAGIC	0x424d4133	/* 'BMA3' */  struct xfs_btree_cur;  struct xfs_btree_block; @@ -136,10 +137,10 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;  /*   * Btree block header size depends on a superblock flag. - * - * (not quite yet, but soon)   */ -#define XFS_BMBT_BLOCK_LEN(mp)	XFS_BTREE_LBLOCK_LEN +#define XFS_BMBT_BLOCK_LEN(mp) \ +	(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ +		XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN)  #define XFS_BMBT_REC_ADDR(mp, block, index) \  	((xfs_bmbt_rec_t *) \ @@ -186,12 +187,12 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;  #define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \  	XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0)) -#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \ -	(int)(XFS_BTREE_LBLOCK_LEN + \ +#define XFS_BMAP_BROOT_SPACE_CALC(mp, nrecs) \ +	(int)(XFS_BMBT_BLOCK_LEN(mp) + \  	       ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) -#define XFS_BMAP_BROOT_SPACE(bb) \ -	(XFS_BMAP_BROOT_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs))) +#define XFS_BMAP_BROOT_SPACE(mp, bb) \ +	(XFS_BMAP_BROOT_SPACE_CALC(mp, be16_to_cpu((bb)->bb_numrecs)))  #define XFS_BMDR_SPACE_CALC(nrecs) \  	(int)(sizeof(xfs_bmdr_block_t) + \  	       ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) @@ -204,7 +205,7 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;  /*   * Prototypes for xfs_bmap.c to call.   */ -extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int, +extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int,  			struct xfs_btree_block *, int);  extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);  extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index db010408d70..ec77036f13b 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -30,9 +30,11 @@  #include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_inode_item.h" +#include "xfs_buf_item.h"  #include "xfs_btree.h"  #include "xfs_error.h"  #include "xfs_trace.h" +#include "xfs_cksum.h"  /*   * Cursor allocation zone. @@ -42,9 +44,13 @@ kmem_zone_t	*xfs_btree_cur_zone;  /*   * Btree magic numbers.   */ -const __uint32_t xfs_magics[XFS_BTNUM_MAX] = { -	XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC +static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { +	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }, +	{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, +	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC }  }; +#define xfs_btree_magic(cur) \ +	xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]  STATIC int				/* error (0 or EFSCORRUPTED) */ @@ -54,30 +60,38 @@ xfs_btree_check_lblock(  	int			level,	/* level of the btree block */  	struct xfs_buf		*bp)	/* buffer for block, if any */  { -	int			lblock_ok; /* block passes checks */ +	int			lblock_ok = 1; /* block passes checks */  	struct xfs_mount	*mp;	/* file system mount point */  	mp = cur->bc_mp; -	lblock_ok = -		be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && + +	if (xfs_sb_version_hascrc(&mp->m_sb)) { +		lblock_ok = lblock_ok && +			uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) && +			block->bb_u.l.bb_blkno == cpu_to_be64( +				bp ? bp->b_bn : XFS_BUF_DADDR_NULL); +	} + +	lblock_ok = lblock_ok && +		be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&  		be16_to_cpu(block->bb_level) == level &&  		be16_to_cpu(block->bb_numrecs) <=  			cur->bc_ops->get_maxrecs(cur, level) &&  		block->bb_u.l.bb_leftsib &&  		(block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||  		 XFS_FSB_SANITY_CHECK(mp, -		 	be64_to_cpu(block->bb_u.l.bb_leftsib))) && +			be64_to_cpu(block->bb_u.l.bb_leftsib))) &&  		block->bb_u.l.bb_rightsib &&  		(block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||  		 XFS_FSB_SANITY_CHECK(mp, -		 	be64_to_cpu(block->bb_u.l.bb_rightsib))); +			be64_to_cpu(block->bb_u.l.bb_rightsib))); +  	if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,  			XFS_ERRTAG_BTREE_CHECK_LBLOCK,  			XFS_RANDOM_BTREE_CHECK_LBLOCK))) {  		if (bp)  			trace_xfs_btree_corrupt(bp, _RET_IP_); -		XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW, -				 mp); +		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);  		return XFS_ERROR(EFSCORRUPTED);  	}  	return 0; @@ -90,16 +104,26 @@ xfs_btree_check_sblock(  	int			level,	/* level of the btree block */  	struct xfs_buf		*bp)	/* buffer containing block */  { +	struct xfs_mount	*mp;	/* file system mount point */  	struct xfs_buf		*agbp;	/* buffer for ag. freespace struct */  	struct xfs_agf		*agf;	/* ag. freespace structure */  	xfs_agblock_t		agflen;	/* native ag. freespace length */ -	int			sblock_ok; /* block passes checks */ +	int			sblock_ok = 1; /* block passes checks */ +	mp = cur->bc_mp;  	agbp = cur->bc_private.a.agbp;  	agf = XFS_BUF_TO_AGF(agbp);  	agflen = be32_to_cpu(agf->agf_length); -	sblock_ok = -		be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && + +	if (xfs_sb_version_hascrc(&mp->m_sb)) { +		sblock_ok = sblock_ok && +			uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) && +			block->bb_u.s.bb_blkno == cpu_to_be64( +				bp ? bp->b_bn : XFS_BUF_DADDR_NULL); +	} + +	sblock_ok = sblock_ok && +		be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&  		be16_to_cpu(block->bb_level) == level &&  		be16_to_cpu(block->bb_numrecs) <=  			cur->bc_ops->get_maxrecs(cur, level) && @@ -109,13 +133,13 @@ xfs_btree_check_sblock(  		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||  		 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&  		block->bb_u.s.bb_rightsib; -	if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, + +	if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,  			XFS_ERRTAG_BTREE_CHECK_SBLOCK,  			XFS_RANDOM_BTREE_CHECK_SBLOCK))) {  		if (bp)  			trace_xfs_btree_corrupt(bp, _RET_IP_); -		XFS_CORRUPTION_ERROR("xfs_btree_check_sblock", -			XFS_ERRLEVEL_LOW, cur->bc_mp, block); +		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);  		return XFS_ERROR(EFSCORRUPTED);  	}  	return 0; @@ -194,6 +218,72 @@ xfs_btree_check_ptr(  #endif  /* + * Calculate CRC on the whole btree block and stuff it into the + * long-form btree header. + * + * Prior to calculting the CRC, pull the LSN out of the buffer log item and put + * it into the buffer so recovery knows what the last modifcation was that made + * it to disk. + */ +void +xfs_btree_lblock_calc_crc( +	struct xfs_buf		*bp) +{ +	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp); +	struct xfs_buf_log_item	*bip = bp->b_fspriv; + +	if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) +		return; +	if (bip) +		block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); +	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), +			 XFS_BTREE_LBLOCK_CRC_OFF); +} + +bool +xfs_btree_lblock_verify_crc( +	struct xfs_buf		*bp) +{ +	if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) +		return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +					XFS_BTREE_LBLOCK_CRC_OFF); +	return true; +} + +/* + * Calculate CRC on the whole btree block and stuff it into the + * short-form btree header. + * + * Prior to calculting the CRC, pull the LSN out of the buffer log item and put + * it into the buffer so recovery knows what the last modifcation was that made + * it to disk. + */ +void +xfs_btree_sblock_calc_crc( +	struct xfs_buf		*bp) +{ +	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp); +	struct xfs_buf_log_item	*bip = bp->b_fspriv; + +	if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) +		return; +	if (bip) +		block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); +	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), +			 XFS_BTREE_SBLOCK_CRC_OFF); +} + +bool +xfs_btree_sblock_verify_crc( +	struct xfs_buf		*bp) +{ +	if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) +		return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +					XFS_BTREE_SBLOCK_CRC_OFF); +	return true; +} + +/*   * Delete the btree cursor.   */  void @@ -277,10 +367,8 @@ xfs_btree_dup_cursor(  				*ncur = NULL;  				return error;  			} -			new->bc_bufs[i] = bp; -			ASSERT(!xfs_buf_geterror(bp)); -		} else -			new->bc_bufs[i] = NULL; +		} +		new->bc_bufs[i] = bp;  	}  	*ncur = new;  	return 0; @@ -321,9 +409,14 @@ xfs_btree_dup_cursor(   */  static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)  { -	return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? -		XFS_BTREE_LBLOCK_LEN : -		XFS_BTREE_SBLOCK_LEN; +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { +		if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) +			return XFS_BTREE_LBLOCK_CRC_LEN; +		return XFS_BTREE_LBLOCK_LEN; +	} +	if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) +		return XFS_BTREE_SBLOCK_CRC_LEN; +	return XFS_BTREE_SBLOCK_LEN;  }  /* @@ -863,43 +956,85 @@ xfs_btree_set_sibling(  }  void +xfs_btree_init_block_int( +	struct xfs_mount	*mp, +	struct xfs_btree_block	*buf, +	xfs_daddr_t		blkno, +	__u32			magic, +	__u16			level, +	__u16			numrecs, +	__u64			owner, +	unsigned int		flags) +{ +	buf->bb_magic = cpu_to_be32(magic); +	buf->bb_level = cpu_to_be16(level); +	buf->bb_numrecs = cpu_to_be16(numrecs); + +	if (flags & XFS_BTREE_LONG_PTRS) { +		buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); +		buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); +		if (flags & XFS_BTREE_CRC_BLOCKS) { +			buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); +			buf->bb_u.l.bb_owner = cpu_to_be64(owner); +			uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid); +			buf->bb_u.l.bb_pad = 0; +		} +	} else { +		/* owner is a 32 bit value on short blocks */ +		__u32 __owner = (__u32)owner; + +		buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); +		buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); +		if (flags & XFS_BTREE_CRC_BLOCKS) { +			buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); +			buf->bb_u.s.bb_owner = cpu_to_be32(__owner); +			uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid); +		} +	} +} + +void  xfs_btree_init_block(  	struct xfs_mount *mp,  	struct xfs_buf	*bp,  	__u32		magic,  	__u16		level,  	__u16		numrecs, +	__u64		owner,  	unsigned int	flags)  { -	struct xfs_btree_block	*new = XFS_BUF_TO_BLOCK(bp); - -	new->bb_magic = cpu_to_be32(magic); -	new->bb_level = cpu_to_be16(level); -	new->bb_numrecs = cpu_to_be16(numrecs); - -	if (flags & XFS_BTREE_LONG_PTRS) { -		new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); -		new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); -	} else { -		new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); -		new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); -	} +	xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, +				 magic, level, numrecs, owner, flags);  }  STATIC void  xfs_btree_init_block_cur(  	struct xfs_btree_cur	*cur, +	struct xfs_buf		*bp,  	int			level, -	int			numrecs, -	struct xfs_buf		*bp) +	int			numrecs)  { -	xfs_btree_init_block(cur->bc_mp, bp, xfs_magics[cur->bc_btnum], -			       level, numrecs, cur->bc_flags); +	__u64 owner; + +	/* +	 * we can pull the owner from the cursor right now as the different +	 * owners align directly with the pointer size of the btree. This may +	 * change in future, but is safe for current users of the generic btree +	 * code. +	 */ +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) +		owner = cur->bc_private.b.ip->i_ino; +	else +		owner = cur->bc_private.a.agno; + +	xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, +				 xfs_btree_magic(cur), level, numrecs, +				 owner, cur->bc_flags);  }  /*   * Return true if ptr is the last record in the btree and - * we need to track updateѕ to this record.  The decision + * we need to track updates to this record.  The decision   * will be further refined in the update_lastrec method.   */  STATIC int @@ -1147,6 +1282,7 @@ xfs_btree_log_keys(  	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);  	if (bp) { +		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);  		xfs_trans_log_buf(cur->bc_tp, bp,  				  xfs_btree_key_offset(cur, first),  				  xfs_btree_key_offset(cur, last + 1) - 1); @@ -1171,6 +1307,7 @@ xfs_btree_log_recs(  	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);  	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); +	xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);  	xfs_trans_log_buf(cur->bc_tp, bp,  			  xfs_btree_rec_offset(cur, first),  			  xfs_btree_rec_offset(cur, last + 1) - 1); @@ -1195,6 +1332,7 @@ xfs_btree_log_ptrs(  		struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);  		int			level = xfs_btree_get_level(block); +		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);  		xfs_trans_log_buf(cur->bc_tp, bp,  				xfs_btree_ptr_offset(cur, first, level),  				xfs_btree_ptr_offset(cur, last + 1, level) - 1); @@ -1223,7 +1361,12 @@ xfs_btree_log_block(  		offsetof(struct xfs_btree_block, bb_numrecs),  		offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),  		offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), -		XFS_BTREE_SBLOCK_LEN +		offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), +		offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), +		offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), +		offsetof(struct xfs_btree_block, bb_u.s.bb_owner), +		offsetof(struct xfs_btree_block, bb_u.s.bb_crc), +		XFS_BTREE_SBLOCK_CRC_LEN  	};  	static const short	loffsets[] = {	/* table of offsets (long) */  		offsetof(struct xfs_btree_block, bb_magic), @@ -1231,17 +1374,40 @@ xfs_btree_log_block(  		offsetof(struct xfs_btree_block, bb_numrecs),  		offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),  		offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), -		XFS_BTREE_LBLOCK_LEN +		offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), +		offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), +		offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), +		offsetof(struct xfs_btree_block, bb_u.l.bb_owner), +		offsetof(struct xfs_btree_block, bb_u.l.bb_crc), +		offsetof(struct xfs_btree_block, bb_u.l.bb_pad), +		XFS_BTREE_LBLOCK_CRC_LEN  	};  	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);  	XFS_BTREE_TRACE_ARGBI(cur, bp, fields);  	if (bp) { +		int nbits; + +		if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { +			/* +			 * We don't log the CRC when updating a btree +			 * block but instead recreate it during log +			 * recovery.  As the log buffers have checksums +			 * of their own this is safe and avoids logging a crc +			 * update in a lot of places. +			 */ +			if (fields == XFS_BB_ALL_BITS) +				fields = XFS_BB_ALL_BITS_CRC; +			nbits = XFS_BB_NUM_BITS_CRC; +		} else { +			nbits = XFS_BB_NUM_BITS; +		}  		xfs_btree_offsets(fields,  				  (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?  					loffsets : soffsets, -				  XFS_BB_NUM_BITS, &first, &last); +				  nbits, &first, &last); +		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);  		xfs_trans_log_buf(cur->bc_tp, bp, first, last);  	} else {  		xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, @@ -2204,7 +2370,7 @@ xfs_btree_split(  		goto error0;  	/* Fill in the btree header for the new right block. */ -	xfs_btree_init_block_cur(cur, xfs_btree_get_level(left), 0, rbp); +	xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);  	/*  	 * Split the entries between the old and the new block evenly. @@ -2513,7 +2679,7 @@ xfs_btree_new_root(  		nptr = 2;  	}  	/* Fill in the new block's btree header and log it. */ -	xfs_btree_init_block_cur(cur, cur->bc_nlevels, 2, nbp); +	xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);  	xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);  	ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&  			!xfs_btree_ptr_is_null(cur, &rptr)); diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index f932897194e..6e6c915673f 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -42,11 +42,15 @@ extern kmem_zone_t	*xfs_btree_cur_zone;   * Generic btree header.   *   * This is a combination of the actual format used on disk for short and long - * format btrees.  The first three fields are shared by both format, but - * the pointers are different and should be used with care. + * format btrees.  The first three fields are shared by both format, but the + * pointers are different and should be used with care.   * - * To get the size of the actual short or long form headers please use - * the size macros below.  Never use sizeof(xfs_btree_block). + * To get the size of the actual short or long form headers please use the size + * macros below.  Never use sizeof(xfs_btree_block). + * + * The blkno, crc, lsn, owner and uuid fields are only available in filesystems + * with the crc feature bit, and all accesses to them must be conditional on + * that flag.   */  struct xfs_btree_block {  	__be32		bb_magic;	/* magic number for block type */ @@ -56,10 +60,23 @@ struct xfs_btree_block {  		struct {  			__be32		bb_leftsib;  			__be32		bb_rightsib; + +			__be64		bb_blkno; +			__be64		bb_lsn; +			uuid_t		bb_uuid; +			__be32		bb_owner; +			__le32		bb_crc;  		} s;			/* short form pointers */  		struct	{  			__be64		bb_leftsib;  			__be64		bb_rightsib; + +			__be64		bb_blkno; +			__be64		bb_lsn; +			uuid_t		bb_uuid; +			__be64		bb_owner; +			__le32		bb_crc; +			__be32		bb_pad; /* padding for alignment */  		} l;			/* long form pointers */  	} bb_u;				/* rest */  }; @@ -67,6 +84,16 @@ struct xfs_btree_block {  #define XFS_BTREE_SBLOCK_LEN	16	/* size of a short form block */  #define XFS_BTREE_LBLOCK_LEN	24	/* size of a long form block */ +/* sizes of CRC enabled btree blocks */ +#define XFS_BTREE_SBLOCK_CRC_LEN	(XFS_BTREE_SBLOCK_LEN + 40) +#define XFS_BTREE_LBLOCK_CRC_LEN	(XFS_BTREE_LBLOCK_LEN + 48) + + +#define XFS_BTREE_SBLOCK_CRC_OFF \ +	offsetof(struct xfs_btree_block, bb_u.s.bb_crc) +#define XFS_BTREE_LBLOCK_CRC_OFF \ +	offsetof(struct xfs_btree_block, bb_u.l.bb_crc) +  /*   * Generic key, ptr and record wrapper structures. @@ -101,13 +128,11 @@ union xfs_btree_rec {  #define	XFS_BB_NUMRECS		0x04  #define	XFS_BB_LEFTSIB		0x08  #define	XFS_BB_RIGHTSIB		0x10 +#define	XFS_BB_BLKNO		0x20  #define	XFS_BB_NUM_BITS		5  #define	XFS_BB_ALL_BITS		((1 << XFS_BB_NUM_BITS) - 1) - -/* - * Magic numbers for btree blocks. - */ -extern const __uint32_t	xfs_magics[]; +#define	XFS_BB_NUM_BITS_CRC	8 +#define	XFS_BB_ALL_BITS_CRC	((1 << XFS_BB_NUM_BITS_CRC) - 1)  /*   * Generic stats interface @@ -256,6 +281,7 @@ typedef struct xfs_btree_cur  #define XFS_BTREE_LONG_PTRS		(1<<0)	/* pointers are 64bits long */  #define XFS_BTREE_ROOT_IN_INODE		(1<<1)	/* root may be variable size */  #define XFS_BTREE_LASTREC_UPDATE	(1<<2)	/* track last rec externally */ +#define XFS_BTREE_CRC_BLOCKS		(1<<3)	/* uses extended btree blocks */  #define	XFS_BTREE_NOERROR	0 @@ -393,8 +419,20 @@ xfs_btree_init_block(  	__u32		magic,  	__u16		level,  	__u16		numrecs, +	__u64		owner,  	unsigned int	flags); +void +xfs_btree_init_block_int( +	struct xfs_mount	*mp, +	struct xfs_btree_block	*buf, +	xfs_daddr_t		blkno, +	__u32			magic, +	__u16			level, +	__u16			numrecs, +	__u64			owner, +	unsigned int		flags); +  /*   * Common btree core entry points.   */ @@ -408,6 +446,14 @@ int xfs_btree_delete(struct xfs_btree_cur *, int *);  int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);  /* + * btree block CRC helpers + */ +void xfs_btree_lblock_calc_crc(struct xfs_buf *); +bool xfs_btree_lblock_verify_crc(struct xfs_buf *); +void xfs_btree_sblock_calc_crc(struct xfs_buf *); +bool xfs_btree_sblock_verify_crc(struct xfs_buf *); + +/*   * Internal btree helpers also used by xfs_bmap.c.   */  void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index ee36c88ecfd..101ef8377f1 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -24,19 +24,33 @@ extern kmem_zone_t	*xfs_buf_item_zone;   * This flag indicates that the buffer contains on disk inodes   * and requires special recovery handling.   */ -#define	XFS_BLF_INODE_BUF	0x1 +#define	XFS_BLF_INODE_BUF	(1<<0)  /*   * This flag indicates that the buffer should not be replayed   * during recovery because its blocks are being freed.   */ -#define	XFS_BLF_CANCEL		0x2 +#define	XFS_BLF_CANCEL		(1<<1) +  /*   * This flag indicates that the buffer contains on disk   * user or group dquots and may require special recovery handling.   */ -#define	XFS_BLF_UDQUOT_BUF	0x4 -#define XFS_BLF_PDQUOT_BUF	0x8 -#define	XFS_BLF_GDQUOT_BUF	0x10 +#define	XFS_BLF_UDQUOT_BUF	(1<<2) +#define XFS_BLF_PDQUOT_BUF	(1<<3) +#define	XFS_BLF_GDQUOT_BUF	(1<<4) + +/* + * all buffers now need flags to tell recovery where the magic number + * is so that it can verify and calculate the CRCs on the buffer correctly + * once the changes have been replayed into the buffer. + */ +#define XFS_BLF_BTREE_BUF	(1<<5) + +#define XFS_BLF_TYPE_MASK	\ +		(XFS_BLF_UDQUOT_BUF | \ +		 XFS_BLF_PDQUOT_BUF | \ +		 XFS_BLF_GDQUOT_BUF | \ +		 XFS_BLF_BTREE_BUF)  #define	XFS_BLF_CHUNK		128  #define	XFS_BLF_SHIFT		7 diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index 88a3368ef12..6b5bd1745db 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -107,8 +107,8 @@ typedef enum xfs_dinode_fmt {  #define XFS_LITINO(mp, version) \  	((int)(((mp)->m_sb.sb_inodesize) - sizeof(struct xfs_dinode))) -#define	XFS_BROOT_SIZE_ADJ	\ -	(XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t)) +#define XFS_BROOT_SIZE_ADJ(ip) \ +	(XFS_BMBT_BLOCK_LEN((ip)->i_mount) - sizeof(xfs_bmdr_block_t))  /*   * Inode data & attribute fork sizes, per inode. diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 2866b8c78b7..6fe286a8e29 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -316,7 +316,13 @@ xfs_growfs_data_private(  			goto error0;  		} -		xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1, 0); +		if (xfs_sb_version_hascrc(&mp->m_sb)) +			xfs_btree_init_block(mp, bp, XFS_ABTB_CRC_MAGIC, 0, 1, +						agno, XFS_BTREE_CRC_BLOCKS); +		else +			xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1, +						agno, 0); +  		arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);  		arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));  		arec->ar_blockcount = cpu_to_be32( @@ -339,7 +345,13 @@ xfs_growfs_data_private(  			goto error0;  		} -		xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1, 0); +		if (xfs_sb_version_hascrc(&mp->m_sb)) +			xfs_btree_init_block(mp, bp, XFS_ABTC_CRC_MAGIC, 0, 1, +						agno, XFS_BTREE_CRC_BLOCKS); +		else +			xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1, +						agno, 0); +  		arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);  		arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));  		arec->ar_blockcount = cpu_to_be32( @@ -363,7 +375,12 @@ xfs_growfs_data_private(  			goto error0;  		} -		xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0, 0); +		if (xfs_sb_version_hascrc(&mp->m_sb)) +			xfs_btree_init_block(mp, bp, XFS_IBT_CRC_MAGIC, 0, 0, +						agno, XFS_BTREE_CRC_BLOCKS); +		else +			xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0, +						agno, 0);  		error = xfs_bwrite(bp);  		xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index bec344b3650..c82ac886742 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -34,6 +34,7 @@  #include "xfs_alloc.h"  #include "xfs_error.h"  #include "xfs_trace.h" +#include "xfs_cksum.h"  STATIC int @@ -182,52 +183,88 @@ xfs_inobt_key_diff(  			  cur->bc_rec.i.ir_startino;  } -void +static int  xfs_inobt_verify(  	struct xfs_buf		*bp)  {  	struct xfs_mount	*mp = bp->b_target->bt_mount;  	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp); +	struct xfs_perag	*pag = bp->b_pag;  	unsigned int		level; -	int			sblock_ok; /* block passes checks */ -	/* magic number and level verification */ -	level = be16_to_cpu(block->bb_level); -	sblock_ok = block->bb_magic == cpu_to_be32(XFS_IBT_MAGIC) && -		    level < mp->m_in_maxlevels; +	/* +	 * During growfs operations, we can't verify the exact owner as the +	 * perag is not fully initialised and hence not attached to the buffer. +	 * +	 * Similarly, during log recovery we will have a perag structure +	 * attached, but the agi information will not yet have been initialised +	 * from the on disk AGI. We don't currently use any of this information, +	 * but beware of the landmine (i.e. need to check pag->pagi_init) if we +	 * ever do. +	 */ +	switch (block->bb_magic) { +	case cpu_to_be32(XFS_IBT_CRC_MAGIC): +		if (!xfs_sb_version_hascrc(&mp->m_sb)) +			return false; +		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) +			return false; +		if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) +			return false; +		if (pag && +		    be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) +			return false; +		/* fall through */ +	case cpu_to_be32(XFS_IBT_MAGIC): +		break; +	default: +		return 0; +	} -	/* numrecs verification */ -	sblock_ok = sblock_ok && -		be16_to_cpu(block->bb_numrecs) <= mp->m_inobt_mxr[level != 0]; +	/* numrecs and level verification */ +	level = be16_to_cpu(block->bb_level); +	if (level >= mp->m_in_maxlevels) +		return false; +	if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0]) +		return false;  	/* sibling pointer verification */ -	sblock_ok = sblock_ok && -		(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || -		 be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) && -		block->bb_u.s.bb_leftsib && -		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || -		 be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) && -		block->bb_u.s.bb_rightsib; +	if (!block->bb_u.s.bb_leftsib || +	    (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && +	     block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) +		return false; +	if (!block->bb_u.s.bb_rightsib || +	    (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && +	     block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) +		return false; -	if (!sblock_ok) { -		trace_xfs_btree_corrupt(bp, _RET_IP_); -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block); -		xfs_buf_ioerror(bp, EFSCORRUPTED); -	} +	return true;  }  static void  xfs_inobt_read_verify(  	struct xfs_buf	*bp)  { -	xfs_inobt_verify(bp); +	if (!(xfs_btree_sblock_verify_crc(bp) && +	      xfs_inobt_verify(bp))) { +		trace_xfs_btree_corrupt(bp, _RET_IP_); +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, +				     bp->b_target->bt_mount, bp->b_addr); +		xfs_buf_ioerror(bp, EFSCORRUPTED); +	}  }  static void  xfs_inobt_write_verify(  	struct xfs_buf	*bp)  { -	xfs_inobt_verify(bp); +	if (!xfs_inobt_verify(bp)) { +		trace_xfs_btree_corrupt(bp, _RET_IP_); +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, +				     bp->b_target->bt_mount, bp->b_addr); +		xfs_buf_ioerror(bp, EFSCORRUPTED); +	} +	xfs_btree_sblock_calc_crc(bp); +  }  const struct xfs_buf_ops xfs_inobt_buf_ops = { @@ -301,6 +338,8 @@ xfs_inobt_init_cursor(  	cur->bc_blocklog = mp->m_sb.sb_blocklog;  	cur->bc_ops = &xfs_inobt_ops; +	if (xfs_sb_version_hascrc(&mp->m_sb)) +		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;  	cur->bc_private.a.agbp = agbp;  	cur->bc_private.a.agno = agno; diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 25c0239a8ea..3ac36b7642e 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -29,7 +29,8 @@ struct xfs_mount;  /*   * There is a btree for the inode map per allocation group.   */ -#define	XFS_IBT_MAGIC	0x49414254	/* 'IABT' */ +#define	XFS_IBT_MAGIC		0x49414254	/* 'IABT' */ +#define	XFS_IBT_CRC_MAGIC	0x49414233	/* 'IAB3' */  typedef	__uint64_t	xfs_inofree_t;  #define	XFS_INODES_PER_CHUNK		(NBBY * sizeof(xfs_inofree_t)) @@ -76,10 +77,10 @@ typedef __be32 xfs_inobt_ptr_t;  /*   * Btree block header size depends on a superblock flag. - * - * (not quite yet, but soon)   */ -#define XFS_INOBT_BLOCK_LEN(mp)	XFS_BTREE_SBLOCK_LEN +#define XFS_INOBT_BLOCK_LEN(mp) \ +	(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ +		XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)  /*   * Record, key, and pointer address macros for btree blocks. diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4f201656d2d..202ce37e66c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -786,6 +786,7 @@ xfs_iformat_btree(  	xfs_dinode_t		*dip,  	int			whichfork)  { +	struct xfs_mount	*mp = ip->i_mount;  	xfs_bmdr_block_t	*dfp;  	xfs_ifork_t		*ifp;  	/* REFERENCED */ @@ -794,7 +795,7 @@ xfs_iformat_btree(  	ifp = XFS_IFORK_PTR(ip, whichfork);  	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); -	size = XFS_BMAP_BROOT_SPACE(dfp); +	size = XFS_BMAP_BROOT_SPACE(mp, dfp);  	nrecs = be16_to_cpu(dfp->bb_numrecs);  	/* @@ -805,14 +806,14 @@ xfs_iformat_btree(  	 * blocks.  	 */  	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= -			XFS_IFORK_MAXEXT(ip, whichfork) || +					XFS_IFORK_MAXEXT(ip, whichfork) ||  		     XFS_BMDR_SPACE_CALC(nrecs) > -			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) || +					XFS_DFORK_SIZE(dip, mp, whichfork) ||  		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { -		xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).", -			(unsigned long long) ip->i_ino); +		xfs_warn(mp, "corrupt inode %Lu (btree).", +					(unsigned long long) ip->i_ino);  		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, -				 ip->i_mount, dip); +					 mp, dip);  		return XFS_ERROR(EFSCORRUPTED);  	} @@ -823,8 +824,7 @@ xfs_iformat_btree(  	 * Copy and convert from the on-disk structure  	 * to the in-memory structure.  	 */ -	xfs_bmdr_to_bmbt(ip->i_mount, dfp, -			 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), +	xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),  			 ifp->if_broot, size);  	ifp->if_flags &= ~XFS_IFEXTENTS;  	ifp->if_flags |= XFS_IFBROOT; @@ -2037,7 +2037,7 @@ xfs_iroot_realloc(  		 * allocate it now and get out.  		 */  		if (ifp->if_broot_bytes == 0) { -			new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); +			new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);  			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);  			ifp->if_broot_bytes = (int)new_size;  			return; @@ -2051,9 +2051,9 @@ xfs_iroot_realloc(  		 */  		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);  		new_max = cur_max + rec_diff; -		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); +		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);  		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, -				(size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ +				XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),  				KM_SLEEP | KM_NOFS);  		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,  						     ifp->if_broot_bytes); @@ -2061,7 +2061,7 @@ xfs_iroot_realloc(  						     (int)new_size);  		ifp->if_broot_bytes = (int)new_size;  		ASSERT(ifp->if_broot_bytes <= -			XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); +			XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));  		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));  		return;  	} @@ -2076,7 +2076,7 @@ xfs_iroot_realloc(  	new_max = cur_max + rec_diff;  	ASSERT(new_max >= 0);  	if (new_max > 0) -		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); +		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);  	else  		new_size = 0;  	if (new_size > 0) { @@ -2084,7 +2084,8 @@ xfs_iroot_realloc(  		/*  		 * First copy over the btree block header.  		 */ -		memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN); +		memcpy(new_broot, ifp->if_broot, +			XFS_BMBT_BLOCK_LEN(ip->i_mount));  	} else {  		new_broot = NULL;  		ifp->if_flags &= ~XFS_IFBROOT; @@ -2114,7 +2115,7 @@ xfs_iroot_realloc(  	ifp->if_broot = new_broot;  	ifp->if_broot_bytes = (int)new_size;  	ASSERT(ifp->if_broot_bytes <= -		XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); +		XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));  	return;  } @@ -2427,7 +2428,7 @@ xfs_iflush_fork(  			ASSERT(ifp->if_broot != NULL);  			ASSERT(ifp->if_broot_bytes <=  			       (XFS_IFORK_SIZE(ip, whichfork) + -				XFS_BROOT_SIZE_ADJ)); +				XFS_BROOT_SIZE_ADJ(ip)));  			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,  				(xfs_bmdr_block_t *)cp,  				XFS_DFORK_SIZE(dip, mp, whichfork)); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 3ca3380c3af..3762ce2e99f 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -29,6 +29,7 @@  #include "xfs_bmap_btree.h"  #include "xfs_alloc_btree.h"  #include "xfs_ialloc_btree.h" +#include "xfs_btree.h"  #include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_inode_item.h" @@ -1928,6 +1929,33 @@ xlog_recover_do_reg_buffer(  	/* Shouldn't be any more regions */  	ASSERT(i == item->ri_total); + +	switch (buf_f->blf_flags & XFS_BLF_TYPE_MASK) { +	case XFS_BLF_BTREE_BUF: +		switch (be32_to_cpu(*(__be32 *)bp->b_addr)) { +		case XFS_ABTB_CRC_MAGIC: +		case XFS_ABTC_CRC_MAGIC: +		case XFS_ABTB_MAGIC: +		case XFS_ABTC_MAGIC: +			bp->b_ops = &xfs_allocbt_buf_ops; +			break; +		case XFS_IBT_CRC_MAGIC: +		case XFS_IBT_MAGIC: +			bp->b_ops = &xfs_inobt_buf_ops; +			break; +		case XFS_BMAP_CRC_MAGIC: +		case XFS_BMAP_MAGIC: +			bp->b_ops = &xfs_bmbt_buf_ops; +			break; +		default: +			xfs_warn(mp, "Bad btree block magic!"); +			ASSERT(0); +			break; +		} +		break; +	default: +		break; +	}  }  /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index cd29f617102..1b04fe59c60 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -505,6 +505,8 @@ void		xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);  void		xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);  void		xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);  void		xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); +void		xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *, +				       uint);  void		xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);  void		xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);  void		xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 3edf5dbee00..f950edd0d53 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -659,6 +659,7 @@ xfs_trans_binval(  		ASSERT(XFS_BUF_ISSTALE(bp));  		ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));  		ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF)); +		ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_TYPE_MASK));  		ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);  		ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);  		ASSERT(tp->t_flags & XFS_TRANS_DIRTY); @@ -671,6 +672,7 @@ xfs_trans_binval(  	bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);  	bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;  	bip->__bli_format.blf_flags |= XFS_BLF_CANCEL; +	bip->__bli_format.blf_flags &= ~XFS_BLF_TYPE_MASK;  	for (i = 0; i < bip->bli_format_count; i++) {  		memset(bip->bli_formats[i].blf_data_map, 0,  		       (bip->bli_formats[i].blf_map_size * sizeof(uint))); @@ -751,6 +753,26 @@ xfs_trans_inode_alloc_buf(  	bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;  } +/* + * Set the type of the buffer for log recovery so that it can correctly identify + * and hence attach the correct buffer ops to the buffer after replay. + */ +void +xfs_trans_buf_set_type( +	struct xfs_trans	*tp, +	struct xfs_buf		*bp, +	uint			type) +{ +	struct xfs_buf_log_item	*bip = bp->b_fspriv; + +	ASSERT(bp->b_transp == tp); +	ASSERT(bip != NULL); +	ASSERT(atomic_read(&bip->bli_refcount) > 0); +	ASSERT((type & XFS_BLF_TYPE_MASK) != 0); + +	bip->__bli_format.blf_flags &= ~XFS_BLF_TYPE_MASK; +	bip->__bli_format.blf_flags |= type; +}  /*   * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of @@ -769,14 +791,9 @@ xfs_trans_dquot_buf(  	xfs_buf_t	*bp,  	uint		type)  { -	xfs_buf_log_item_t	*bip = bp->b_fspriv; - -	ASSERT(bp->b_transp == tp); -	ASSERT(bip != NULL);  	ASSERT(type == XFS_BLF_UDQUOT_BUF ||  	       type == XFS_BLF_PDQUOT_BUF ||  	       type == XFS_BLF_GDQUOT_BUF); -	ASSERT(atomic_read(&bip->bli_refcount) > 0); -	bip->__bli_format.blf_flags |= type; +	xfs_trans_buf_set_type(tp, bp, type);  }  |