diff options
Diffstat (limited to 'lib/raid6')
| -rw-r--r-- | lib/raid6/Makefile | 78 | ||||
| -rw-r--r-- | lib/raid6/mktables.c | 132 | ||||
| -rw-r--r-- | lib/raid6/raid6algos.c | 154 | ||||
| -rw-r--r-- | lib/raid6/raid6altivec.uc | 130 | ||||
| -rw-r--r-- | lib/raid6/raid6int.uc | 117 | ||||
| -rw-r--r-- | lib/raid6/raid6mmx.c | 142 | ||||
| -rw-r--r-- | lib/raid6/raid6recov.c | 132 | ||||
| -rw-r--r-- | lib/raid6/raid6sse1.c | 162 | ||||
| -rw-r--r-- | lib/raid6/raid6sse2.c | 262 | ||||
| -rw-r--r-- | lib/raid6/raid6test/Makefile | 75 | ||||
| -rw-r--r-- | lib/raid6/raid6test/test.c | 124 | ||||
| -rw-r--r-- | lib/raid6/raid6x86.h | 61 | ||||
| -rw-r--r-- | lib/raid6/unroll.awk | 20 | 
13 files changed, 1589 insertions, 0 deletions
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile new file mode 100644 index 00000000000..19bf32da644 --- /dev/null +++ b/lib/raid6/Makefile @@ -0,0 +1,78 @@ +obj-$(CONFIG_RAID6_PQ)	+= raid6_pq.o + +raid6_pq-y	+= raid6algos.o raid6recov.o raid6tables.o \ +		   raid6int1.o raid6int2.o raid6int4.o \ +		   raid6int8.o raid6int16.o raid6int32.o \ +		   raid6altivec1.o raid6altivec2.o raid6altivec4.o \ +		   raid6altivec8.o \ +		   raid6mmx.o raid6sse1.o raid6sse2.o +hostprogs-y	+= mktables + +quiet_cmd_unroll = UNROLL  $@ +      cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \ +                   < $< > $@ || ( rm -f $@ && exit 1 ) + +ifeq ($(CONFIG_ALTIVEC),y) +altivec_flags := -maltivec -mabi=altivec +endif + +targets += raid6int1.c +$(obj)/raid6int1.c:   UNROLL := 1 +$(obj)/raid6int1.c:   $(src)/raid6int.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +targets += raid6int2.c +$(obj)/raid6int2.c:   UNROLL := 2 +$(obj)/raid6int2.c:   $(src)/raid6int.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +targets += raid6int4.c +$(obj)/raid6int4.c:   UNROLL := 4 +$(obj)/raid6int4.c:   $(src)/raid6int.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +targets += raid6int8.c +$(obj)/raid6int8.c:   UNROLL := 8 +$(obj)/raid6int8.c:   $(src)/raid6int.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +targets += raid6int16.c +$(obj)/raid6int16.c:  UNROLL := 16 +$(obj)/raid6int16.c:  $(src)/raid6int.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +targets += raid6int32.c +$(obj)/raid6int32.c:  UNROLL := 32 +$(obj)/raid6int32.c:  $(src)/raid6int.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +CFLAGS_raid6altivec1.o += $(altivec_flags) +targets += raid6altivec1.c +$(obj)/raid6altivec1.c:   UNROLL := 1 +$(obj)/raid6altivec1.c:   $(src)/raid6altivec.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +CFLAGS_raid6altivec2.o += $(altivec_flags) +targets += raid6altivec2.c +$(obj)/raid6altivec2.c:   UNROLL := 2 +$(obj)/raid6altivec2.c:   $(src)/raid6altivec.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +CFLAGS_raid6altivec4.o += $(altivec_flags) +targets += raid6altivec4.c +$(obj)/raid6altivec4.c:   UNROLL := 4 +$(obj)/raid6altivec4.c:   $(src)/raid6altivec.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +CFLAGS_raid6altivec8.o += $(altivec_flags) +targets += raid6altivec8.c +$(obj)/raid6altivec8.c:   UNROLL := 8 +$(obj)/raid6altivec8.c:   $(src)/raid6altivec.uc $(src)/unroll.awk FORCE +	$(call if_changed,unroll) + +quiet_cmd_mktable = TABLE   $@ +      cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) + +targets += raid6tables.c +$(obj)/raid6tables.c: $(obj)/mktables FORCE +	$(call if_changed,mktable) diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c new file mode 100644 index 00000000000..3b1500843bb --- /dev/null +++ b/lib/raid6/mktables.c @@ -0,0 +1,132 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002-2007 H. Peter Anvin - All Rights Reserved + * + *   This file is part of the Linux kernel, and is made available under + *   the terms of the GNU General Public License version 2 or (at your + *   option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * mktables.c + * + * Make RAID-6 tables.  This is a host user space program to be run at + * compile time. + */ + +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <stdlib.h> +#include <time.h> + +static uint8_t gfmul(uint8_t a, uint8_t b) +{ +	uint8_t v = 0; + +	while (b) { +		if (b & 1) +			v ^= a; +		a = (a << 1) ^ (a & 0x80 ? 0x1d : 0); +		b >>= 1; +	} + +	return v; +} + +static uint8_t gfpow(uint8_t a, int b) +{ +	uint8_t v = 1; + +	b %= 255; +	if (b < 0) +		b += 255; + +	while (b) { +		if (b & 1) +			v = gfmul(v, a); +		a = gfmul(a, a); +		b >>= 1; +	} + +	return v; +} + +int main(int argc, char *argv[]) +{ +	int i, j, k; +	uint8_t v; +	uint8_t exptbl[256], invtbl[256]; + +	printf("#include <linux/raid/pq.h>\n"); + +	/* Compute multiplication table */ +	printf("\nconst u8  __attribute__((aligned(256)))\n" +		"raid6_gfmul[256][256] =\n" +		"{\n"); +	for (i = 0; i < 256; i++) { +		printf("\t{\n"); +		for (j = 0; j < 256; j += 8) { +			printf("\t\t"); +			for (k = 0; k < 8; k++) +				printf("0x%02x,%c", gfmul(i, j + k), +				       (k == 7) ? '\n' : ' '); +		} +		printf("\t},\n"); +	} +	printf("};\n"); +	printf("#ifdef __KERNEL__\n"); +	printf("EXPORT_SYMBOL(raid6_gfmul);\n"); +	printf("#endif\n"); + +	/* Compute power-of-2 table (exponent) */ +	v = 1; +	printf("\nconst u8 __attribute__((aligned(256)))\n" +	       "raid6_gfexp[256] =\n" "{\n"); +	for (i = 0; i < 256; i += 8) { +		printf("\t"); +		for (j = 0; j < 8; j++) { +			exptbl[i + j] = v; +			printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); +			v = gfmul(v, 2); +			if (v == 1) +				v = 0;	/* For entry 255, not a real entry */ +		} +	} +	printf("};\n"); +	printf("#ifdef __KERNEL__\n"); +	printf("EXPORT_SYMBOL(raid6_gfexp);\n"); +	printf("#endif\n"); + +	/* Compute inverse table x^-1 == x^254 */ +	printf("\nconst u8 __attribute__((aligned(256)))\n" +	       "raid6_gfinv[256] =\n" "{\n"); +	for (i = 0; i < 256; i += 8) { +		printf("\t"); +		for (j = 0; j < 8; j++) { +			invtbl[i + j] = v = gfpow(i + j, 254); +			printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); +		} +	} +	printf("};\n"); +	printf("#ifdef __KERNEL__\n"); +	printf("EXPORT_SYMBOL(raid6_gfinv);\n"); +	printf("#endif\n"); + +	/* Compute inv(2^x + 1) (exponent-xor-inverse) table */ +	printf("\nconst u8 __attribute__((aligned(256)))\n" +	       "raid6_gfexi[256] =\n" "{\n"); +	for (i = 0; i < 256; i += 8) { +		printf("\t"); +		for (j = 0; j < 8; j++) +			printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1], +			       (j == 7) ? '\n' : ' '); +	} +	printf("};\n"); +	printf("#ifdef __KERNEL__\n"); +	printf("EXPORT_SYMBOL(raid6_gfexi);\n"); +	printf("#endif\n"); + +	return 0; +} diff --git a/lib/raid6/raid6algos.c b/lib/raid6/raid6algos.c new file mode 100644 index 00000000000..1f8784bfd44 --- /dev/null +++ b/lib/raid6/raid6algos.c @@ -0,0 +1,154 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6algos.c + * + * Algorithm list and algorithm selection for RAID-6 + */ + +#include <linux/raid/pq.h> +#include <linux/gfp.h> +#ifndef __KERNEL__ +#include <sys/mman.h> +#include <stdio.h> +#else +#if !RAID6_USE_EMPTY_ZERO_PAGE +/* In .bss so it's zeroed */ +const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); +EXPORT_SYMBOL(raid6_empty_zero_page); +#endif +#endif + +struct raid6_calls raid6_call; +EXPORT_SYMBOL_GPL(raid6_call); + +const struct raid6_calls * const raid6_algos[] = { +	&raid6_intx1, +	&raid6_intx2, +	&raid6_intx4, +	&raid6_intx8, +#if defined(__ia64__) +	&raid6_intx16, +	&raid6_intx32, +#endif +#if defined(__i386__) && !defined(__arch_um__) +	&raid6_mmxx1, +	&raid6_mmxx2, +	&raid6_sse1x1, +	&raid6_sse1x2, +	&raid6_sse2x1, +	&raid6_sse2x2, +#endif +#if defined(__x86_64__) && !defined(__arch_um__) +	&raid6_sse2x1, +	&raid6_sse2x2, +	&raid6_sse2x4, +#endif +#ifdef CONFIG_ALTIVEC +	&raid6_altivec1, +	&raid6_altivec2, +	&raid6_altivec4, +	&raid6_altivec8, +#endif +	NULL +}; + +#ifdef __KERNEL__ +#define RAID6_TIME_JIFFIES_LG2	4 +#else +/* Need more time to be stable in userspace */ +#define RAID6_TIME_JIFFIES_LG2	9 +#define time_before(x, y) ((x) < (y)) +#endif + +/* Try to pick the best algorithm */ +/* This code uses the gfmul table as convenient data set to abuse */ + +int __init raid6_select_algo(void) +{ +	const struct raid6_calls * const * algo; +	const struct raid6_calls * best; +	char *syndromes; +	void *dptrs[(65536/PAGE_SIZE)+2]; +	int i, disks; +	unsigned long perf, bestperf; +	int bestprefer; +	unsigned long j0, j1; + +	disks = (65536/PAGE_SIZE)+2; +	for ( i = 0 ; i < disks-2 ; i++ ) { +		dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; +	} + +	/* Normal code - use a 2-page allocation to avoid D$ conflict */ +	syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); + +	if ( !syndromes ) { +		printk("raid6: Yikes!  No memory available.\n"); +		return -ENOMEM; +	} + +	dptrs[disks-2] = syndromes; +	dptrs[disks-1] = syndromes + PAGE_SIZE; + +	bestperf = 0;  bestprefer = 0;  best = NULL; + +	for ( algo = raid6_algos ; *algo ; algo++ ) { +		if ( !(*algo)->valid || (*algo)->valid() ) { +			perf = 0; + +			preempt_disable(); +			j0 = jiffies; +			while ( (j1 = jiffies) == j0 ) +				cpu_relax(); +			while (time_before(jiffies, +					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { +				(*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs); +				perf++; +			} +			preempt_enable(); + +			if ( (*algo)->prefer > bestprefer || +			     ((*algo)->prefer == bestprefer && +			      perf > bestperf) ) { +				best = *algo; +				bestprefer = best->prefer; +				bestperf = perf; +			} +			printk("raid6: %-8s %5ld MB/s\n", (*algo)->name, +			       (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); +		} +	} + +	if (best) { +		printk("raid6: using algorithm %s (%ld MB/s)\n", +		       best->name, +		       (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); +		raid6_call = *best; +	} else +		printk("raid6: Yikes!  No algorithm found!\n"); + +	free_pages((unsigned long)syndromes, 1); + +	return best ? 0 : -EINVAL; +} + +static void raid6_exit(void) +{ +	do { } while (0); +} + +subsys_initcall(raid6_select_algo); +module_exit(raid6_exit); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RAID6 Q-syndrome calculations"); diff --git a/lib/raid6/raid6altivec.uc b/lib/raid6/raid6altivec.uc new file mode 100644 index 00000000000..2654d5c854b --- /dev/null +++ b/lib/raid6/raid6altivec.uc @@ -0,0 +1,130 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6altivec$#.c + * + * $#-way unrolled portable integer math RAID-6 instruction set + * + * This file is postprocessed using unroll.awk + * + * <benh> hpa: in process, + * you can just "steal" the vec unit with enable_kernel_altivec() (but + * bracked this with preempt_disable/enable or in a lock) + */ + +#include <linux/raid/pq.h> + +#ifdef CONFIG_ALTIVEC + +#include <altivec.h> +#ifdef __KERNEL__ +# include <asm/system.h> +# include <asm/cputable.h> +#endif + +/* + * This is the C data type to use.  We use a vector of + * signed char so vec_cmpgt() will generate the right + * instruction. + */ + +typedef vector signed char unative_t; + +#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) +#define NSIZE	sizeof(unative_t) + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) +{ +	return vec_add(v,v); +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ unative_t MASK(unative_t v) +{ +	unative_t zv = NBYTES(0); + +	/* vec_cmpgt returns a vector bool char; thus the need for the cast */ +	return (unative_t)vec_cmpgt(zv, v); +} + + +/* This is noinline to make damned sure that gcc doesn't move any of the +   Altivec code around the enable/disable code */ +static void noinline +raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	unative_t wd$$, wq$$, wp$$, w1$$, w2$$; +	unative_t x1d = NBYTES(0x1d); + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { +		wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; +		for ( z = z0-1 ; z >= 0 ; z-- ) { +			wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; +			wp$$ = vec_xor(wp$$, wd$$); +			w2$$ = MASK(wq$$); +			w1$$ = SHLBYTE(wq$$); +			w2$$ = vec_and(w2$$, x1d); +			w1$$ = vec_xor(w1$$, w2$$); +			wq$$ = vec_xor(w1$$, wd$$); +		} +		*(unative_t *)&p[d+NSIZE*$$] = wp$$; +		*(unative_t *)&q[d+NSIZE*$$] = wq$$; +	} +} + +static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	preempt_disable(); +	enable_kernel_altivec(); + +	raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs); + +	preempt_enable(); +} + +int raid6_have_altivec(void); +#if $# == 1 +int raid6_have_altivec(void) +{ +	/* This assumes either all CPUs have Altivec or none does */ +# ifdef __KERNEL__ +	return cpu_has_feature(CPU_FTR_ALTIVEC); +# else +	return 1; +# endif +} +#endif + +const struct raid6_calls raid6_altivec$# = { +	raid6_altivec$#_gen_syndrome, +	raid6_have_altivec, +	"altivecx$#", +	0 +}; + +#endif /* CONFIG_ALTIVEC */ diff --git a/lib/raid6/raid6int.uc b/lib/raid6/raid6int.uc new file mode 100644 index 00000000000..d1e276a14fa --- /dev/null +++ b/lib/raid6/raid6int.uc @@ -0,0 +1,117 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6int$#.c + * + * $#-way unrolled portable integer math RAID-6 instruction set + * + * This file is postprocessed using unroll.awk + */ + +#include <linux/raid/pq.h> + +/* + * This is the C data type to use + */ + +/* Change this from BITS_PER_LONG if there is something better... */ +#if BITS_PER_LONG == 64 +# define NBYTES(x) ((x) * 0x0101010101010101UL) +# define NSIZE  8 +# define NSHIFT 3 +# define NSTRING "64" +typedef u64 unative_t; +#else +# define NBYTES(x) ((x) * 0x01010101U) +# define NSIZE  4 +# define NSHIFT 2 +# define NSTRING "32" +typedef u32 unative_t; +#endif + + + +/* + * IA-64 wants insane amounts of unrolling.  On other architectures that + * is just a waste of space. + */ +#if ($# <= 8) || defined(__ia64__) + + +/* + * These sub-operations are separate inlines since they can sometimes be + * specially optimized using architecture-specific hacks. + */ + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) +{ +	unative_t vv; + +	vv = (v << 1) & NBYTES(0xfe); +	return vv; +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ unative_t MASK(unative_t v) +{ +	unative_t vv; + +	vv = v & NBYTES(0x80); +	vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */ +	return vv; +} + + +static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	unative_t wd$$, wq$$, wp$$, w1$$, w2$$; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { +		wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; +		for ( z = z0-1 ; z >= 0 ; z-- ) { +			wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; +			wp$$ ^= wd$$; +			w2$$ = MASK(wq$$); +			w1$$ = SHLBYTE(wq$$); +			w2$$ &= NBYTES(0x1d); +			w1$$ ^= w2$$; +			wq$$ = w1$$ ^ wd$$; +		} +		*(unative_t *)&p[d+NSIZE*$$] = wp$$; +		*(unative_t *)&q[d+NSIZE*$$] = wq$$; +	} +} + +const struct raid6_calls raid6_intx$# = { +	raid6_int$#_gen_syndrome, +	NULL,		/* always valid */ +	"int" NSTRING "x$#", +	0 +}; + +#endif diff --git a/lib/raid6/raid6mmx.c b/lib/raid6/raid6mmx.c new file mode 100644 index 00000000000..e7f6c13132b --- /dev/null +++ b/lib/raid6/raid6mmx.c @@ -0,0 +1,142 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6mmx.c + * + * MMX implementation of RAID-6 syndrome functions + */ + +#if defined(__i386__) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "raid6x86.h" + +/* Shared with raid6sse1.c */ +const struct raid6_mmx_constants { +	u64 x1d; +} raid6_mmx_constants = { +	0x1d1d1d1d1d1d1d1dULL, +}; + +static int raid6_have_mmx(void) +{ +	/* Not really "boot_cpu" but "all_cpus" */ +	return boot_cpu_has(X86_FEATURE_MMX); +} + +/* + * Plain MMX implementation + */ +static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	kernel_fpu_begin(); + +	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); +	asm volatile("pxor %mm5,%mm5");	/* Zero temp */ + +	for ( d = 0 ; d < bytes ; d += 8 ) { +		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ +		asm volatile("movq %mm2,%mm4");	/* Q[0] */ +		for ( z = z0-1 ; z >= 0 ; z-- ) { +			asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); +			asm volatile("pcmpgtb %mm4,%mm5"); +			asm volatile("paddb %mm4,%mm4"); +			asm volatile("pand %mm0,%mm5"); +			asm volatile("pxor %mm5,%mm4"); +			asm volatile("pxor %mm5,%mm5"); +			asm volatile("pxor %mm6,%mm2"); +			asm volatile("pxor %mm6,%mm4"); +		} +		asm volatile("movq %%mm2,%0" : "=m" (p[d])); +		asm volatile("pxor %mm2,%mm2"); +		asm volatile("movq %%mm4,%0" : "=m" (q[d])); +		asm volatile("pxor %mm4,%mm4"); +	} + +	kernel_fpu_end(); +} + +const struct raid6_calls raid6_mmxx1 = { +	raid6_mmx1_gen_syndrome, +	raid6_have_mmx, +	"mmxx1", +	0 +}; + +/* + * Unrolled-by-2 MMX implementation + */ +static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	kernel_fpu_begin(); + +	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); +	asm volatile("pxor %mm5,%mm5");	/* Zero temp */ +	asm volatile("pxor %mm7,%mm7"); /* Zero temp */ + +	for ( d = 0 ; d < bytes ; d += 16 ) { +		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ +		asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); +		asm volatile("movq %mm2,%mm4"); /* Q[0] */ +		asm volatile("movq %mm3,%mm6"); /* Q[1] */ +		for ( z = z0-1 ; z >= 0 ; z-- ) { +			asm volatile("pcmpgtb %mm4,%mm5"); +			asm volatile("pcmpgtb %mm6,%mm7"); +			asm volatile("paddb %mm4,%mm4"); +			asm volatile("paddb %mm6,%mm6"); +			asm volatile("pand %mm0,%mm5"); +			asm volatile("pand %mm0,%mm7"); +			asm volatile("pxor %mm5,%mm4"); +			asm volatile("pxor %mm7,%mm6"); +			asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); +			asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); +			asm volatile("pxor %mm5,%mm2"); +			asm volatile("pxor %mm7,%mm3"); +			asm volatile("pxor %mm5,%mm4"); +			asm volatile("pxor %mm7,%mm6"); +			asm volatile("pxor %mm5,%mm5"); +			asm volatile("pxor %mm7,%mm7"); +		} +		asm volatile("movq %%mm2,%0" : "=m" (p[d])); +		asm volatile("movq %%mm3,%0" : "=m" (p[d+8])); +		asm volatile("movq %%mm4,%0" : "=m" (q[d])); +		asm volatile("movq %%mm6,%0" : "=m" (q[d+8])); +	} + +	kernel_fpu_end(); +} + +const struct raid6_calls raid6_mmxx2 = { +	raid6_mmx2_gen_syndrome, +	raid6_have_mmx, +	"mmxx2", +	0 +}; + +#endif diff --git a/lib/raid6/raid6recov.c b/lib/raid6/raid6recov.c new file mode 100644 index 00000000000..2609f00e0d6 --- /dev/null +++ b/lib/raid6/raid6recov.c @@ -0,0 +1,132 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6recov.c + * + * RAID-6 data recovery in dual failure mode.  In single failure mode, + * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct + * the syndrome.) + */ + +#include <linux/raid/pq.h> + +/* Recover two failed data blocks. */ +void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, +		       void **ptrs) +{ +	u8 *p, *q, *dp, *dq; +	u8 px, qx, db; +	const u8 *pbmul;	/* P multiplier table for B data */ +	const u8 *qmul;		/* Q multiplier table (for both) */ + +	p = (u8 *)ptrs[disks-2]; +	q = (u8 *)ptrs[disks-1]; + +	/* Compute syndrome with zero for the missing data pages +	   Use the dead data pages as temporary storage for +	   delta p and delta q */ +	dp = (u8 *)ptrs[faila]; +	ptrs[faila] = (void *)raid6_empty_zero_page; +	ptrs[disks-2] = dp; +	dq = (u8 *)ptrs[failb]; +	ptrs[failb] = (void *)raid6_empty_zero_page; +	ptrs[disks-1] = dq; + +	raid6_call.gen_syndrome(disks, bytes, ptrs); + +	/* Restore pointer table */ +	ptrs[faila]   = dp; +	ptrs[failb]   = dq; +	ptrs[disks-2] = p; +	ptrs[disks-1] = q; + +	/* Now, pick the proper data tables */ +	pbmul = raid6_gfmul[raid6_gfexi[failb-faila]]; +	qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]]; + +	/* Now do it... */ +	while ( bytes-- ) { +		px    = *p ^ *dp; +		qx    = qmul[*q ^ *dq]; +		*dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */ +		*dp++ = db ^ px; /* Reconstructed A */ +		p++; q++; +	} +} +EXPORT_SYMBOL_GPL(raid6_2data_recov); + +/* Recover failure of one data block plus the P block */ +void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) +{ +	u8 *p, *q, *dq; +	const u8 *qmul;		/* Q multiplier table */ + +	p = (u8 *)ptrs[disks-2]; +	q = (u8 *)ptrs[disks-1]; + +	/* Compute syndrome with zero for the missing data page +	   Use the dead data page as temporary storage for delta q */ +	dq = (u8 *)ptrs[faila]; +	ptrs[faila] = (void *)raid6_empty_zero_page; +	ptrs[disks-1] = dq; + +	raid6_call.gen_syndrome(disks, bytes, ptrs); + +	/* Restore pointer table */ +	ptrs[faila]   = dq; +	ptrs[disks-1] = q; + +	/* Now, pick the proper data tables */ +	qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]]; + +	/* Now do it... */ +	while ( bytes-- ) { +		*p++ ^= *dq = qmul[*q ^ *dq]; +		q++; dq++; +	} +} +EXPORT_SYMBOL_GPL(raid6_datap_recov); + +#ifndef __KERNEL__ +/* Testing only */ + +/* Recover two failed blocks. */ +void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs) +{ +	if ( faila > failb ) { +		int tmp = faila; +		faila = failb; +		failb = tmp; +	} + +	if ( failb == disks-1 ) { +		if ( faila == disks-2 ) { +			/* P+Q failure.  Just rebuild the syndrome. */ +			raid6_call.gen_syndrome(disks, bytes, ptrs); +		} else { +			/* data+Q failure.  Reconstruct data from P, +			   then rebuild syndrome. */ +			/* NOT IMPLEMENTED - equivalent to RAID-5 */ +		} +	} else { +		if ( failb == disks-2 ) { +			/* data+P failure. */ +			raid6_datap_recov(disks, bytes, faila, ptrs); +		} else { +			/* data+data failure. */ +			raid6_2data_recov(disks, bytes, faila, failb, ptrs); +		} +	} +} + +#endif diff --git a/lib/raid6/raid6sse1.c b/lib/raid6/raid6sse1.c new file mode 100644 index 00000000000..b274dd5eab8 --- /dev/null +++ b/lib/raid6/raid6sse1.c @@ -0,0 +1,162 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6sse1.c + * + * SSE-1/MMXEXT implementation of RAID-6 syndrome functions + * + * This is really an MMX implementation, but it requires SSE-1 or + * AMD MMXEXT for prefetch support and a few other features.  The + * support for nontemporal memory accesses is enough to make this + * worthwhile as a separate implementation. + */ + +#if defined(__i386__) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "raid6x86.h" + +/* Defined in raid6mmx.c */ +extern const struct raid6_mmx_constants { +	u64 x1d; +} raid6_mmx_constants; + +static int raid6_have_sse1_or_mmxext(void) +{ +	/* Not really boot_cpu but "all_cpus" */ +	return boot_cpu_has(X86_FEATURE_MMX) && +		(boot_cpu_has(X86_FEATURE_XMM) || +		 boot_cpu_has(X86_FEATURE_MMXEXT)); +} + +/* + * Plain SSE1 implementation + */ +static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	kernel_fpu_begin(); + +	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); +	asm volatile("pxor %mm5,%mm5");	/* Zero temp */ + +	for ( d = 0 ; d < bytes ; d += 8 ) { +		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); +		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ +		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); +		asm volatile("movq %mm2,%mm4");	/* Q[0] */ +		asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d])); +		for ( z = z0-2 ; z >= 0 ; z-- ) { +			asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); +			asm volatile("pcmpgtb %mm4,%mm5"); +			asm volatile("paddb %mm4,%mm4"); +			asm volatile("pand %mm0,%mm5"); +			asm volatile("pxor %mm5,%mm4"); +			asm volatile("pxor %mm5,%mm5"); +			asm volatile("pxor %mm6,%mm2"); +			asm volatile("pxor %mm6,%mm4"); +			asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); +		} +		asm volatile("pcmpgtb %mm4,%mm5"); +		asm volatile("paddb %mm4,%mm4"); +		asm volatile("pand %mm0,%mm5"); +		asm volatile("pxor %mm5,%mm4"); +		asm volatile("pxor %mm5,%mm5"); +		asm volatile("pxor %mm6,%mm2"); +		asm volatile("pxor %mm6,%mm4"); + +		asm volatile("movntq %%mm2,%0" : "=m" (p[d])); +		asm volatile("movntq %%mm4,%0" : "=m" (q[d])); +	} + +	asm volatile("sfence" : : : "memory"); +	kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse1x1 = { +	raid6_sse11_gen_syndrome, +	raid6_have_sse1_or_mmxext, +	"sse1x1", +	1			/* Has cache hints */ +}; + +/* + * Unrolled-by-2 SSE1 implementation + */ +static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	kernel_fpu_begin(); + +	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); +	asm volatile("pxor %mm5,%mm5");	/* Zero temp */ +	asm volatile("pxor %mm7,%mm7"); /* Zero temp */ + +	/* We uniformly assume a single prefetch covers at least 16 bytes */ +	for ( d = 0 ; d < bytes ; d += 16 ) { +		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); +		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ +		asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */ +		asm volatile("movq %mm2,%mm4");	/* Q[0] */ +		asm volatile("movq %mm3,%mm6"); /* Q[1] */ +		for ( z = z0-1 ; z >= 0 ; z-- ) { +			asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); +			asm volatile("pcmpgtb %mm4,%mm5"); +			asm volatile("pcmpgtb %mm6,%mm7"); +			asm volatile("paddb %mm4,%mm4"); +			asm volatile("paddb %mm6,%mm6"); +			asm volatile("pand %mm0,%mm5"); +			asm volatile("pand %mm0,%mm7"); +			asm volatile("pxor %mm5,%mm4"); +			asm volatile("pxor %mm7,%mm6"); +			asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); +			asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); +			asm volatile("pxor %mm5,%mm2"); +			asm volatile("pxor %mm7,%mm3"); +			asm volatile("pxor %mm5,%mm4"); +			asm volatile("pxor %mm7,%mm6"); +			asm volatile("pxor %mm5,%mm5"); +			asm volatile("pxor %mm7,%mm7"); +		} +		asm volatile("movntq %%mm2,%0" : "=m" (p[d])); +		asm volatile("movntq %%mm3,%0" : "=m" (p[d+8])); +		asm volatile("movntq %%mm4,%0" : "=m" (q[d])); +		asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); +	} + +	asm volatile("sfence" : :: "memory"); +	kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse1x2 = { +	raid6_sse12_gen_syndrome, +	raid6_have_sse1_or_mmxext, +	"sse1x2", +	1			/* Has cache hints */ +}; + +#endif diff --git a/lib/raid6/raid6sse2.c b/lib/raid6/raid6sse2.c new file mode 100644 index 00000000000..6ed6c6c0389 --- /dev/null +++ b/lib/raid6/raid6sse2.c @@ -0,0 +1,262 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6sse2.c + * + * SSE-2 implementation of RAID-6 syndrome functions + * + */ + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "raid6x86.h" + +static const struct raid6_sse_constants { +	u64 x1d[2]; +} raid6_sse_constants  __attribute__((aligned(16))) = { +	{ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL }, +}; + +static int raid6_have_sse2(void) +{ +	/* Not really boot_cpu but "all_cpus" */ +	return boot_cpu_has(X86_FEATURE_MMX) && +		boot_cpu_has(X86_FEATURE_FXSR) && +		boot_cpu_has(X86_FEATURE_XMM) && +		boot_cpu_has(X86_FEATURE_XMM2); +} + +/* + * Plain SSE2 implementation + */ +static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	kernel_fpu_begin(); + +	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); +	asm volatile("pxor %xmm5,%xmm5");	/* Zero temp */ + +	for ( d = 0 ; d < bytes ; d += 16 ) { +		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); +		asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ +		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); +		asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ +		asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d])); +		for ( z = z0-2 ; z >= 0 ; z-- ) { +			asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); +			asm volatile("pcmpgtb %xmm4,%xmm5"); +			asm volatile("paddb %xmm4,%xmm4"); +			asm volatile("pand %xmm0,%xmm5"); +			asm volatile("pxor %xmm5,%xmm4"); +			asm volatile("pxor %xmm5,%xmm5"); +			asm volatile("pxor %xmm6,%xmm2"); +			asm volatile("pxor %xmm6,%xmm4"); +			asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d])); +		} +		asm volatile("pcmpgtb %xmm4,%xmm5"); +		asm volatile("paddb %xmm4,%xmm4"); +		asm volatile("pand %xmm0,%xmm5"); +		asm volatile("pxor %xmm5,%xmm4"); +		asm volatile("pxor %xmm5,%xmm5"); +		asm volatile("pxor %xmm6,%xmm2"); +		asm volatile("pxor %xmm6,%xmm4"); + +		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); +		asm volatile("pxor %xmm2,%xmm2"); +		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); +		asm volatile("pxor %xmm4,%xmm4"); +	} + +	asm volatile("sfence" : : : "memory"); +	kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x1 = { +	raid6_sse21_gen_syndrome, +	raid6_have_sse2, +	"sse2x1", +	1			/* Has cache hints */ +}; + +/* + * Unrolled-by-2 SSE2 implementation + */ +static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	kernel_fpu_begin(); + +	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); +	asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ +	asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ + +	/* We uniformly assume a single prefetch covers at least 32 bytes */ +	for ( d = 0 ; d < bytes ; d += 32 ) { +		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); +		asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d]));    /* P[0] */ +		asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */ +		asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ +		asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */ +		for ( z = z0-1 ; z >= 0 ; z-- ) { +			asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); +			asm volatile("pcmpgtb %xmm4,%xmm5"); +			asm volatile("pcmpgtb %xmm6,%xmm7"); +			asm volatile("paddb %xmm4,%xmm4"); +			asm volatile("paddb %xmm6,%xmm6"); +			asm volatile("pand %xmm0,%xmm5"); +			asm volatile("pand %xmm0,%xmm7"); +			asm volatile("pxor %xmm5,%xmm4"); +			asm volatile("pxor %xmm7,%xmm6"); +			asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d])); +			asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16])); +			asm volatile("pxor %xmm5,%xmm2"); +			asm volatile("pxor %xmm7,%xmm3"); +			asm volatile("pxor %xmm5,%xmm4"); +			asm volatile("pxor %xmm7,%xmm6"); +			asm volatile("pxor %xmm5,%xmm5"); +			asm volatile("pxor %xmm7,%xmm7"); +		} +		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); +		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); +		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); +		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); +	} + +	asm volatile("sfence" : : : "memory"); +	kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x2 = { +	raid6_sse22_gen_syndrome, +	raid6_have_sse2, +	"sse2x2", +	1			/* Has cache hints */ +}; + +#endif + +#if defined(__x86_64__) && !defined(__arch_um__) + +/* + * Unrolled-by-4 SSE2 implementation + */ +static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ +	u8 **dptr = (u8 **)ptrs; +	u8 *p, *q; +	int d, z, z0; + +	z0 = disks - 3;		/* Highest data disk */ +	p = dptr[z0+1];		/* XOR parity */ +	q = dptr[z0+2];		/* RS syndrome */ + +	kernel_fpu_begin(); + +	asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); +	asm volatile("pxor %xmm2,%xmm2");	/* P[0] */ +	asm volatile("pxor %xmm3,%xmm3");	/* P[1] */ +	asm volatile("pxor %xmm4,%xmm4"); 	/* Q[0] */ +	asm volatile("pxor %xmm5,%xmm5");	/* Zero temp */ +	asm volatile("pxor %xmm6,%xmm6"); 	/* Q[1] */ +	asm volatile("pxor %xmm7,%xmm7"); 	/* Zero temp */ +	asm volatile("pxor %xmm10,%xmm10");	/* P[2] */ +	asm volatile("pxor %xmm11,%xmm11");	/* P[3] */ +	asm volatile("pxor %xmm12,%xmm12"); 	/* Q[2] */ +	asm volatile("pxor %xmm13,%xmm13");	/* Zero temp */ +	asm volatile("pxor %xmm14,%xmm14"); 	/* Q[3] */ +	asm volatile("pxor %xmm15,%xmm15"); 	/* Zero temp */ + +	for ( d = 0 ; d < bytes ; d += 64 ) { +		for ( z = z0 ; z >= 0 ; z-- ) { +			/* The second prefetch seems to improve performance... */ +			asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); +			asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32])); +			asm volatile("pcmpgtb %xmm4,%xmm5"); +			asm volatile("pcmpgtb %xmm6,%xmm7"); +			asm volatile("pcmpgtb %xmm12,%xmm13"); +			asm volatile("pcmpgtb %xmm14,%xmm15"); +			asm volatile("paddb %xmm4,%xmm4"); +			asm volatile("paddb %xmm6,%xmm6"); +			asm volatile("paddb %xmm12,%xmm12"); +			asm volatile("paddb %xmm14,%xmm14"); +			asm volatile("pand %xmm0,%xmm5"); +			asm volatile("pand %xmm0,%xmm7"); +			asm volatile("pand %xmm0,%xmm13"); +			asm volatile("pand %xmm0,%xmm15"); +			asm volatile("pxor %xmm5,%xmm4"); +			asm volatile("pxor %xmm7,%xmm6"); +			asm volatile("pxor %xmm13,%xmm12"); +			asm volatile("pxor %xmm15,%xmm14"); +			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); +			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); +			asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32])); +			asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48])); +			asm volatile("pxor %xmm5,%xmm2"); +			asm volatile("pxor %xmm7,%xmm3"); +			asm volatile("pxor %xmm13,%xmm10"); +			asm volatile("pxor %xmm15,%xmm11"); +			asm volatile("pxor %xmm5,%xmm4"); +			asm volatile("pxor %xmm7,%xmm6"); +			asm volatile("pxor %xmm13,%xmm12"); +			asm volatile("pxor %xmm15,%xmm14"); +			asm volatile("pxor %xmm5,%xmm5"); +			asm volatile("pxor %xmm7,%xmm7"); +			asm volatile("pxor %xmm13,%xmm13"); +			asm volatile("pxor %xmm15,%xmm15"); +		} +		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); +		asm volatile("pxor %xmm2,%xmm2"); +		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); +		asm volatile("pxor %xmm3,%xmm3"); +		asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32])); +		asm volatile("pxor %xmm10,%xmm10"); +		asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48])); +		asm volatile("pxor %xmm11,%xmm11"); +		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); +		asm volatile("pxor %xmm4,%xmm4"); +		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); +		asm volatile("pxor %xmm6,%xmm6"); +		asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32])); +		asm volatile("pxor %xmm12,%xmm12"); +		asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); +		asm volatile("pxor %xmm14,%xmm14"); +	} + +	asm volatile("sfence" : : : "memory"); +	kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x4 = { +	raid6_sse24_gen_syndrome, +	raid6_have_sse2, +	"sse2x4", +	1			/* Has cache hints */ +}; + +#endif diff --git a/lib/raid6/raid6test/Makefile b/lib/raid6/raid6test/Makefile new file mode 100644 index 00000000000..2874cbef529 --- /dev/null +++ b/lib/raid6/raid6test/Makefile @@ -0,0 +1,75 @@ +# +# This is a simple Makefile to test some of the RAID-6 code +# from userspace. +# + +CC	 = gcc +OPTFLAGS = -O2			# Adjust as desired +CFLAGS	 = -I.. -I ../../../include -g $(OPTFLAGS) +LD	 = ld +AWK	 = awk +AR	 = ar +RANLIB	 = ranlib + +.c.o: +	$(CC) $(CFLAGS) -c -o $@ $< + +%.c: ../%.c +	cp -f $< $@ + +%.uc: ../%.uc +	cp -f $< $@ + +all:	raid6.a raid6test + +raid6.a: raid6int1.o raid6int2.o raid6int4.o raid6int8.o raid6int16.o \ +	 raid6int32.o \ +	 raid6mmx.o raid6sse1.o raid6sse2.o \ +	 raid6altivec1.o raid6altivec2.o raid6altivec4.o raid6altivec8.o \ +	 raid6recov.o raid6algos.o \ +	 raid6tables.o +	 rm -f $@ +	 $(AR) cq $@ $^ +	 $(RANLIB) $@ + +raid6test: test.c raid6.a +	$(CC) $(CFLAGS) -o raid6test $^ + +raid6altivec1.c: raid6altivec.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=1 < raid6altivec.uc > $@ + +raid6altivec2.c: raid6altivec.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=2 < raid6altivec.uc > $@ + +raid6altivec4.c: raid6altivec.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=4 < raid6altivec.uc > $@ + +raid6altivec8.c: raid6altivec.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=8 < raid6altivec.uc > $@ + +raid6int1.c: raid6int.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=1 < raid6int.uc > $@ + +raid6int2.c: raid6int.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=2 < raid6int.uc > $@ + +raid6int4.c: raid6int.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=4 < raid6int.uc > $@ + +raid6int8.c: raid6int.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=8 < raid6int.uc > $@ + +raid6int16.c: raid6int.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=16 < raid6int.uc > $@ + +raid6int32.c: raid6int.uc ../unroll.awk +	$(AWK) ../unroll.awk -vN=32 < raid6int.uc > $@ + +raid6tables.c: mktables +	./mktables > raid6tables.c + +clean: +	rm -f *.o *.a mktables mktables.c raid6int.uc raid6*.c raid6test + +spotless: clean +	rm -f *~ diff --git a/lib/raid6/raid6test/test.c b/lib/raid6/raid6test/test.c new file mode 100644 index 00000000000..7a930318b17 --- /dev/null +++ b/lib/raid6/raid6test/test.c @@ -0,0 +1,124 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + *   Copyright 2002-2007 H. Peter Anvin - All Rights Reserved + * + *   This file is part of the Linux kernel, and is made available under + *   the terms of the GNU General Public License version 2 or (at your + *   option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6test.c + * + * Test RAID-6 recovery with various algorithms + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <linux/raid/pq.h> + +#define NDISKS		16	/* Including P and Q */ + +const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); +struct raid6_calls raid6_call; + +char *dataptrs[NDISKS]; +char data[NDISKS][PAGE_SIZE]; +char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; + +static void makedata(void) +{ +	int i, j; + +	for (i = 0; i < NDISKS; i++) { +		for (j = 0; j < PAGE_SIZE; j++) +			data[i][j] = rand(); + +		dataptrs[i] = data[i]; +	} +} + +static char disk_type(int d) +{ +	switch (d) { +	case NDISKS-2: +		return 'P'; +	case NDISKS-1: +		return 'Q'; +	default: +		return 'D'; +	} +} + +static int test_disks(int i, int j) +{ +	int erra, errb; + +	memset(recovi, 0xf0, PAGE_SIZE); +	memset(recovj, 0xba, PAGE_SIZE); + +	dataptrs[i] = recovi; +	dataptrs[j] = recovj; + +	raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs); + +	erra = memcmp(data[i], recovi, PAGE_SIZE); +	errb = memcmp(data[j], recovj, PAGE_SIZE); + +	if (i < NDISKS-2 && j == NDISKS-1) { +		/* We don't implement the DQ failure scenario, since it's +		   equivalent to a RAID-5 failure (XOR, then recompute Q) */ +		erra = errb = 0; +	} else { +		printf("algo=%-8s  faila=%3d(%c)  failb=%3d(%c)  %s\n", +		       raid6_call.name, +		       i, disk_type(i), +		       j, disk_type(j), +		       (!erra && !errb) ? "OK" : +		       !erra ? "ERRB" : +		       !errb ? "ERRA" : "ERRAB"); +	} + +	dataptrs[i] = data[i]; +	dataptrs[j] = data[j]; + +	return erra || errb; +} + +int main(int argc, char *argv[]) +{ +	const struct raid6_calls *const *algo; +	int i, j; +	int err = 0; + +	makedata(); + +	for (algo = raid6_algos; *algo; algo++) { +		if (!(*algo)->valid || (*algo)->valid()) { +			raid6_call = **algo; + +			/* Nuke syndromes */ +			memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + +			/* Generate assumed good syndrome */ +			raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, +						(void **)&dataptrs); + +			for (i = 0; i < NDISKS-1; i++) +				for (j = i+1; j < NDISKS; j++) +					err += test_disks(i, j); +		} +		printf("\n"); +	} + +	printf("\n"); +	/* Pick the best algorithm test */ +	raid6_select_algo(); + +	if (err) +		printf("\n*** ERRORS FOUND ***\n"); + +	return err; +} diff --git a/lib/raid6/raid6x86.h b/lib/raid6/raid6x86.h new file mode 100644 index 00000000000..4c22c156855 --- /dev/null +++ b/lib/raid6/raid6x86.h @@ -0,0 +1,61 @@ +/* ----------------------------------------------------------------------- * + * + *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + *   This program is free software; you can redistribute it and/or modify + *   it under the terms of the GNU General Public License as published by + *   the Free Software Foundation, Inc., 53 Temple Place Ste 330, + *   Boston MA 02111-1307, USA; either version 2 of the License, or + *   (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6x86.h + * + * Definitions common to x86 and x86-64 RAID-6 code only + */ + +#ifndef LINUX_RAID_RAID6X86_H +#define LINUX_RAID_RAID6X86_H + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#ifdef __KERNEL__ /* Real code */ + +#include <asm/i387.h> + +#else /* Dummy code for user space testing */ + +static inline void kernel_fpu_begin(void) +{ +} + +static inline void kernel_fpu_end(void) +{ +} + +#define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions +					   * (fast save and restore) */ +#define X86_FEATURE_XMM		(0*32+25) /* Streaming SIMD Extensions */ +#define X86_FEATURE_XMM2	(0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */ + +/* Should work well enough on modern CPUs for testing */ +static inline int boot_cpu_has(int flag) +{ +	u32 eax = (flag >> 5) ? 0x80000001 : 1; +	u32 edx; + +	asm volatile("cpuid" +		     : "+a" (eax), "=d" (edx) +		     : : "ecx", "ebx"); + +	return (edx >> (flag & 31)) & 1; +} + +#endif /* ndef __KERNEL__ */ + +#endif +#endif diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk new file mode 100644 index 00000000000..c6aa03631df --- /dev/null +++ b/lib/raid6/unroll.awk @@ -0,0 +1,20 @@ + +# This filter requires one command line option of form -vN=n +# where n must be a decimal number. +# +# Repeat each input line containing $$ n times, replacing $$ with 0...n-1. +# Replace each $# with n, and each $* with a single $. + +BEGIN { +	n = N + 0 +} +{ +	if (/\$\$/) { rep = n } else { rep = 1 } +	for (i = 0; i < rep; ++i) { +		tmp = $0 +		gsub(/\$\$/, i, tmp) +		gsub(/\$\#/, n, tmp) +		gsub(/\$\*/, "$", tmp) +		print tmp +	} +}  |