diff options
Diffstat (limited to 'include/zfs/spa.h')
| -rw-r--r-- | include/zfs/spa.h | 292 | 
1 files changed, 292 insertions, 0 deletions
| diff --git a/include/zfs/spa.h b/include/zfs/spa.h new file mode 100644 index 000000000..360cf897d --- /dev/null +++ b/include/zfs/spa.h @@ -0,0 +1,292 @@ +/* + *  GRUB  --  GRand Unified Bootloader + *  Copyright (C) 1999,2000,2001,2002,2003,2004  Free Software Foundation, Inc. + * + *  This program is free software; you can redistribute it and/or modify + *  it under the terms of the GNU General Public License as published by + *  the Free Software Foundation; either version 2 of the License, or + *  (at your option) any later version. + * + *  This program is distributed in the hope that it will be useful, + *  but WITHOUT ANY WARRANTY; without even the implied warranty of + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + *  GNU General Public License for more details. + * + *  You should have received a copy of the GNU General Public License + *  along with this program; if not, write to the Free Software + *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef ZFS_SPA_HEADER +#define	ZFS_SPA_HEADER 1 + + +/* + * General-purpose 32-bit and 64-bit bitfield encodings. + */ +#define	BF32_DECODE(x, low, len)	P2PHASE((x) >> (low), 1U << (len)) +#define	BF64_DECODE(x, low, len)	P2PHASE((x) >> (low), 1ULL << (len)) +#define	BF32_ENCODE(x, low, len)	(P2PHASE((x), 1U << (len)) << (low)) +#define	BF64_ENCODE(x, low, len)	(P2PHASE((x), 1ULL << (len)) << (low)) + +#define	BF32_GET(x, low, len)		BF32_DECODE(x, low, len) +#define	BF64_GET(x, low, len)		BF64_DECODE(x, low, len) + +#define	BF32_SET(x, low, len, val)						\ +	((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len)) +#define	BF64_SET(x, low, len, val)						\ +	((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len)) + +#define	BF32_GET_SB(x, low, len, shift, bias)		\ +	((BF32_GET(x, low, len) + (bias)) << (shift)) +#define	BF64_GET_SB(x, low, len, shift, bias)		\ +	((BF64_GET(x, low, len) + (bias)) << (shift)) + +#define	BF32_SET_SB(x, low, len, shift, bias, val)		\ +	BF32_SET(x, low, len, ((val) >> (shift)) - (bias)) +#define	BF64_SET_SB(x, low, len, shift, bias, val)		\ +	BF64_SET(x, low, len, ((val) >> (shift)) - (bias)) + +/* + * We currently support nine block sizes, from 512 bytes to 128K. + * We could go higher, but the benefits are near-zero and the cost + * of COWing a giant block to modify one byte would become excessive. + */ +#define	SPA_MINBLOCKSHIFT	9 +#define	SPA_MAXBLOCKSHIFT	17 +#define	SPA_MINBLOCKSIZE	(1ULL << SPA_MINBLOCKSHIFT) +#define	SPA_MAXBLOCKSIZE	(1ULL << SPA_MAXBLOCKSHIFT) + +#define	SPA_BLOCKSIZES		(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1) + +/* + * Size of block to hold the configuration data (a packed nvlist) + */ +#define	SPA_CONFIG_BLOCKSIZE	(1 << 14) + +/* + * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB. + * The ASIZE encoding should be at least 64 times larger (6 more bits) + * to support up to 4-way RAID-Z mirror mode with worst-case gang block + * overhead, three DVAs per bp, plus one more bit in case we do anything + * else that expands the ASIZE. + */ +#define	SPA_LSIZEBITS		16	/* LSIZE up to 32M (2^16 * 512)	*/ +#define	SPA_PSIZEBITS		16	/* PSIZE up to 32M (2^16 * 512)	*/ +#define	SPA_ASIZEBITS		24	/* ASIZE up to 64 times larger	*/ + +/* + * All SPA data is represented by 128-bit data virtual addresses (DVAs). + * The members of the dva_t should be considered opaque outside the SPA. + */ +typedef struct dva { +	uint64_t	dva_word[2]; +} dva_t; + +/* + * Each block has a 256-bit checksum -- strong enough for cryptographic hashes. + */ +typedef struct zio_cksum { +	uint64_t	zc_word[4]; +} zio_cksum_t; + +/* + * Each block is described by its DVAs, time of birth, checksum, etc. + * The word-by-word, bit-by-bit layout of the blkptr is as follows: + * + *	64	56	48	40	32	24	16	8	0 + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * 0	|		vdev1		| GRID	|	  ASIZE		| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * 1	|G|			 offset1				| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * 2	|		vdev2		| GRID	|	  ASIZE		| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * 3	|G|			 offset2				| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * 4	|		vdev3		| GRID	|	  ASIZE		| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * 5	|G|			 offset3				| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * 6	|BDX|lvl| type	| cksum | comp	|	  PSIZE	|	  LSIZE	| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * 7	|			padding					| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * 8	|			padding					| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * 9	|			physical birth txg			| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * a	|			logical birth txg			| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * b	|			fill count				| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * c	|			checksum[0]				| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * d	|			checksum[1]				| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * e	|			checksum[2]				| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * f	|			checksum[3]				| + *	+-------+-------+-------+-------+-------+-------+-------+-------+ + * + * Legend: + * + * vdev		virtual device ID + * offset	offset into virtual device + * LSIZE	logical size + * PSIZE	physical size (after compression) + * ASIZE	allocated size (including RAID-Z parity and gang block headers) + * GRID		RAID-Z layout information (reserved for future use) + * cksum	checksum function + * comp		compression function + * G		gang block indicator + * B		byteorder (endianness) + * D		dedup + * X		unused + * lvl		level of indirection + * type		DMU object type + * phys birth	txg of block allocation; zero if same as logical birth txg + * log. birth	transaction group in which the block was logically born + * fill count	number of non-zero blocks under this bp + * checksum[4]	256-bit checksum of the data this bp describes + */ +#define	SPA_BLKPTRSHIFT	7		/* blkptr_t is 128 bytes	*/ +#define	SPA_DVAS_PER_BP	3		/* Number of DVAs in a bp	*/ + +typedef struct blkptr { +	dva_t		blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */ +	uint64_t	blk_prop;	/* size, compression, type, etc		*/ +	uint64_t	blk_pad[2];	/* Extra space for the future		*/ +	uint64_t	blk_phys_birth;	/* txg when block was allocated		*/ +	uint64_t	blk_birth;	/* transaction group at birth		*/ +	uint64_t	blk_fill;	/* fill count				*/ +	zio_cksum_t	blk_cksum;	/* 256-bit checksum			*/ +} blkptr_t; + +/* + * Macros to get and set fields in a bp or DVA. + */ +#define	DVA_GET_ASIZE(dva)											\ +	BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0) +#define	DVA_SET_ASIZE(dva, x)										\ +	BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x) + +#define	DVA_GET_GRID(dva)	BF64_GET((dva)->dva_word[0], 24, 8) +#define	DVA_SET_GRID(dva, x)	BF64_SET((dva)->dva_word[0], 24, 8, x) + +#define	DVA_GET_VDEV(dva)	BF64_GET((dva)->dva_word[0], 32, 32) +#define	DVA_SET_VDEV(dva, x)	BF64_SET((dva)->dva_word[0], 32, 32, x) + +#define	DVA_GET_GANG(dva)	BF64_GET((dva)->dva_word[1], 63, 1) +#define	DVA_SET_GANG(dva, x)	BF64_SET((dva)->dva_word[1], 63, 1, x) + +#define	BP_GET_LSIZE(bp)										\ +	BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1) +#define	BP_SET_LSIZE(bp, x)										\ +	BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x) + +#define	BP_GET_COMPRESS(bp)		BF64_GET((bp)->blk_prop, 32, 8) +#define	BP_SET_COMPRESS(bp, x)		BF64_SET((bp)->blk_prop, 32, 8, x) + +#define	BP_GET_CHECKSUM(bp)		BF64_GET((bp)->blk_prop, 40, 8) +#define	BP_SET_CHECKSUM(bp, x)		BF64_SET((bp)->blk_prop, 40, 8, x) + +#define	BP_GET_TYPE(bp)			BF64_GET((bp)->blk_prop, 48, 8) +#define	BP_SET_TYPE(bp, x)		BF64_SET((bp)->blk_prop, 48, 8, x) + +#define	BP_GET_LEVEL(bp)		BF64_GET((bp)->blk_prop, 56, 5) +#define	BP_SET_LEVEL(bp, x)		BF64_SET((bp)->blk_prop, 56, 5, x) + +#define	BP_GET_PROP_BIT_61(bp)		BF64_GET((bp)->blk_prop, 61, 1) +#define	BP_SET_PROP_BIT_61(bp, x)	BF64_SET((bp)->blk_prop, 61, 1, x) + +#define	BP_GET_DEDUP(bp)		BF64_GET((bp)->blk_prop, 62, 1) +#define	BP_SET_DEDUP(bp, x)		BF64_SET((bp)->blk_prop, 62, 1, x) + +#define	BP_GET_BYTEORDER(bp)		(0 - BF64_GET((bp)->blk_prop, 63, 1)) +#define	BP_SET_BYTEORDER(bp, x)		BF64_SET((bp)->blk_prop, 63, 1, x) + +#define	BP_PHYSICAL_BIRTH(bp)										\ +	((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth) + +#define	BP_SET_BIRTH(bp, logical, physical)								\ +	{																	\ +		(bp)->blk_birth = (logical);									\ +		(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \ +	} + +#define	BP_GET_ASIZE(bp)												\ +	(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ +	 DVA_GET_ASIZE(&(bp)->blk_dva[2])) + +#define	BP_GET_UCSIZE(bp)												\ +	((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ?	\ +	 BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)); + +#define	BP_GET_NDVAS(bp)						\ +	(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) +		\ +	 !!DVA_GET_ASIZE(&(bp)->blk_dva[1]) +		\ +	 !!DVA_GET_ASIZE(&(bp)->blk_dva[2])) + +#define	BP_COUNT_GANG(bp)						\ +	(DVA_GET_GANG(&(bp)->blk_dva[0]) +			\ +	 DVA_GET_GANG(&(bp)->blk_dva[1]) +			\ +	 DVA_GET_GANG(&(bp)->blk_dva[2])) + +#define	DVA_EQUAL(dva1, dva2)						\ +	((dva1)->dva_word[1] == (dva2)->dva_word[1] &&	\ +	 (dva1)->dva_word[0] == (dva2)->dva_word[0]) + +#define	BP_EQUAL(bp1, bp2)									\ +	(BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) &&	\ +	 DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) &&	\ +	 DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) &&	\ +	 DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2])) + +#define	ZIO_CHECKSUM_EQUAL(zc1, zc2)				\ +	(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) |	\ +		   ((zc1).zc_word[1] - (zc2).zc_word[1]) |	\ +		   ((zc1).zc_word[2] - (zc2).zc_word[2]) |	\ +		   ((zc1).zc_word[3] - (zc2).zc_word[3]))) + +#define	DVA_IS_VALID(dva)	(DVA_GET_ASIZE(dva) != 0) + +#define	ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3)	\ +	{											\ +		(zcp)->zc_word[0] = w0;					\ +		(zcp)->zc_word[1] = w1;					\ +		(zcp)->zc_word[2] = w2;					\ +		(zcp)->zc_word[3] = w3;					\ +	} + +#define	BP_IDENTITY(bp)		(&(bp)->blk_dva[0]) +#define	BP_IS_GANG(bp)		DVA_GET_GANG(BP_IDENTITY(bp)) +#define	BP_IS_HOLE(bp)		((bp)->blk_birth == 0) + +/* BP_IS_RAIDZ(bp) assumes no block compression */ +#define	BP_IS_RAIDZ(bp)		(DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \ +							 BP_GET_PSIZE(bp)) + +#define	BP_ZERO(bp)										\ +	{													\ +		(bp)->blk_dva[0].dva_word[0] = 0;				\ +		(bp)->blk_dva[0].dva_word[1] = 0;				\ +		(bp)->blk_dva[1].dva_word[0] = 0;				\ +		(bp)->blk_dva[1].dva_word[1] = 0;				\ +		(bp)->blk_dva[2].dva_word[0] = 0;				\ +		(bp)->blk_dva[2].dva_word[1] = 0;				\ +		(bp)->blk_prop = 0;								\ +		(bp)->blk_pad[0] = 0;							\ +		(bp)->blk_pad[1] = 0;							\ +		(bp)->blk_phys_birth = 0;						\ +		(bp)->blk_birth = 0;							\ +		(bp)->blk_fill = 0;								\ +		ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0);	\ +	} + +#define	BP_SPRINTF_LEN	320 + +#endif	/* ! ZFS_SPA_HEADER */ |