diff options
Diffstat (limited to 'lib/raid6')
| -rw-r--r-- | lib/raid6/Makefile | 2 | ||||
| -rw-r--r-- | lib/raid6/algos.c | 127 | ||||
| -rw-r--r-- | lib/raid6/mktables.c | 25 | ||||
| -rw-r--r-- | lib/raid6/recov.c | 15 | ||||
| -rw-r--r-- | lib/raid6/recov_ssse3.c | 335 | ||||
| -rw-r--r-- | lib/raid6/test/Makefile | 2 | ||||
| -rw-r--r-- | lib/raid6/test/test.c | 32 | ||||
| -rw-r--r-- | lib/raid6/x86.h | 15 | 
8 files changed, 491 insertions, 62 deletions
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 8a38102770f..de06dfe165b 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -1,6 +1,6 @@  obj-$(CONFIG_RAID6_PQ)	+= raid6_pq.o -raid6_pq-y	+= algos.o recov.o tables.o int1.o int2.o int4.o \ +raid6_pq-y	+= algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \  		   int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \  		   altivec8.o mmx.o sse1.o sse2.o  hostprogs-y	+= mktables diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 8b02f60ffc8..589f5f50ad2 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -17,11 +17,11 @@   */  #include <linux/raid/pq.h> -#include <linux/module.h>  #ifndef __KERNEL__  #include <sys/mman.h>  #include <stdio.h>  #else +#include <linux/module.h>  #include <linux/gfp.h>  #if !RAID6_USE_EMPTY_ZERO_PAGE  /* In .bss so it's zeroed */ @@ -34,10 +34,6 @@ struct raid6_calls raid6_call;  EXPORT_SYMBOL_GPL(raid6_call);  const struct raid6_calls * const raid6_algos[] = { -	&raid6_intx1, -	&raid6_intx2, -	&raid6_intx4, -	&raid6_intx8,  #if defined(__ia64__)  	&raid6_intx16,  	&raid6_intx32, @@ -61,6 +57,24 @@ const struct raid6_calls * const raid6_algos[] = {  	&raid6_altivec4,  	&raid6_altivec8,  #endif +	&raid6_intx1, +	&raid6_intx2, +	&raid6_intx4, +	&raid6_intx8, +	NULL +}; + +void (*raid6_2data_recov)(int, size_t, int, int, void **); +EXPORT_SYMBOL_GPL(raid6_2data_recov); + +void (*raid6_datap_recov)(int, size_t, int, void **); +EXPORT_SYMBOL_GPL(raid6_datap_recov); + +const struct raid6_recov_calls *const raid6_recov_algos[] = { +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) +	&raid6_recov_ssse3, +#endif +	&raid6_recov_intx1,  	NULL  }; @@ -72,59 +86,55 @@ const struct raid6_calls * const raid6_algos[] = {  #define time_before(x, y) ((x) < (y))  #endif -/* Try to pick the best algorithm */ -/* This code uses the gfmul table as convenient data set to abuse */ - -int __init raid6_select_algo(void) +static inline const struct raid6_recov_calls *raid6_choose_recov(void)  { -	const struct raid6_calls * const * algo; -	const struct raid6_calls * best; -	char *syndromes; -	void *dptrs[(65536/PAGE_SIZE)+2]; -	int i, disks; -	unsigned long perf, bestperf; -	int bestprefer; -	unsigned long j0, j1; +	const struct raid6_recov_calls *const *algo; +	const struct raid6_recov_calls *best; -	disks = (65536/PAGE_SIZE)+2; -	for ( i = 0 ; i < disks-2 ; i++ ) { -		dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; -	} +	for (best = NULL, algo = raid6_recov_algos; *algo; algo++) +		if (!best || (*algo)->priority > best->priority) +			if (!(*algo)->valid || (*algo)->valid()) +				best = *algo; -	/* Normal code - use a 2-page allocation to avoid D$ conflict */ -	syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); +	if (best) { +		raid6_2data_recov = best->data2; +		raid6_datap_recov = best->datap; -	if ( !syndromes ) { -		printk("raid6: Yikes!  No memory available.\n"); -		return -ENOMEM; -	} +		printk("raid6: using %s recovery algorithm\n", best->name); +	} else +		printk("raid6: Yikes! No recovery algorithm found!\n"); -	dptrs[disks-2] = syndromes; -	dptrs[disks-1] = syndromes + PAGE_SIZE; +	return best; +} + +static inline const struct raid6_calls *raid6_choose_gen( +	void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) +{ +	unsigned long perf, bestperf, j0, j1; +	const struct raid6_calls *const *algo; +	const struct raid6_calls *best; -	bestperf = 0;  bestprefer = 0;  best = NULL; +	for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { +		if (!best || (*algo)->prefer >= best->prefer) { +			if ((*algo)->valid && !(*algo)->valid()) +				continue; -	for ( algo = raid6_algos ; *algo ; algo++ ) { -		if ( !(*algo)->valid || (*algo)->valid() ) {  			perf = 0;  			preempt_disable();  			j0 = jiffies; -			while ( (j1 = jiffies) == j0 ) +			while ((j1 = jiffies) == j0)  				cpu_relax();  			while (time_before(jiffies,  					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { -				(*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs); +				(*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs);  				perf++;  			}  			preempt_enable(); -			if ( (*algo)->prefer > bestprefer || -			     ((*algo)->prefer == bestprefer && -			      perf > bestperf) ) { -				best = *algo; -				bestprefer = best->prefer; +			if (perf > bestperf) {  				bestperf = perf; +				best = *algo;  			}  			printk("raid6: %-8s %5ld MB/s\n", (*algo)->name,  			       (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); @@ -139,9 +149,46 @@ int __init raid6_select_algo(void)  	} else  		printk("raid6: Yikes!  No algorithm found!\n"); +	return best; +} + + +/* Try to pick the best algorithm */ +/* This code uses the gfmul table as convenient data set to abuse */ + +int __init raid6_select_algo(void) +{ +	const int disks = (65536/PAGE_SIZE)+2; + +	const struct raid6_calls *gen_best; +	const struct raid6_recov_calls *rec_best; +	char *syndromes; +	void *dptrs[(65536/PAGE_SIZE)+2]; +	int i; + +	for (i = 0; i < disks-2; i++) +		dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; + +	/* Normal code - use a 2-page allocation to avoid D$ conflict */ +	syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); + +	if (!syndromes) { +		printk("raid6: Yikes!  No memory available.\n"); +		return -ENOMEM; +	} + +	dptrs[disks-2] = syndromes; +	dptrs[disks-1] = syndromes + PAGE_SIZE; + +	/* select raid gen_syndrome function */ +	gen_best = raid6_choose_gen(&dptrs, disks); + +	/* select raid recover functions */ +	rec_best = raid6_choose_recov(); +  	free_pages((unsigned long)syndromes, 1); -	return best ? 0 : -EINVAL; +	return gen_best && rec_best ? 0 : -EINVAL;  }  static void raid6_exit(void) diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c index 8a3780902ce..39787db588b 100644 --- a/lib/raid6/mktables.c +++ b/lib/raid6/mktables.c @@ -81,6 +81,31 @@ int main(int argc, char *argv[])  	printf("EXPORT_SYMBOL(raid6_gfmul);\n");  	printf("#endif\n"); +	/* Compute vector multiplication table */ +	printf("\nconst u8  __attribute__((aligned(256)))\n" +		"raid6_vgfmul[256][32] =\n" +		"{\n"); +	for (i = 0; i < 256; i++) { +		printf("\t{\n"); +		for (j = 0; j < 16; j += 8) { +			printf("\t\t"); +			for (k = 0; k < 8; k++) +				printf("0x%02x,%c", gfmul(i, j + k), +				       (k == 7) ? '\n' : ' '); +		} +		for (j = 0; j < 16; j += 8) { +			printf("\t\t"); +			for (k = 0; k < 8; k++) +				printf("0x%02x,%c", gfmul(i, (j + k) << 4), +				       (k == 7) ? '\n' : ' '); +		} +		printf("\t},\n"); +	} +	printf("};\n"); +	printf("#ifdef __KERNEL__\n"); +	printf("EXPORT_SYMBOL(raid6_vgfmul);\n"); +	printf("#endif\n"); +  	/* Compute power-of-2 table (exponent) */  	v = 1;  	printf("\nconst u8 __attribute__((aligned(256)))\n" diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c index fe275d7b6b3..1805a5cc5da 100644 --- a/lib/raid6/recov.c +++ b/lib/raid6/recov.c @@ -22,7 +22,7 @@  #include <linux/raid/pq.h>  /* Recover two failed data blocks. */ -void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, +void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb,  		       void **ptrs)  {  	u8 *p, *q, *dp, *dq; @@ -64,10 +64,9 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,  		p++; q++;  	}  } -EXPORT_SYMBOL_GPL(raid6_2data_recov);  /* Recover failure of one data block plus the P block */ -void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) +void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, void **ptrs)  {  	u8 *p, *q, *dq;  	const u8 *qmul;		/* Q multiplier table */ @@ -96,7 +95,15 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)  		q++; dq++;  	}  } -EXPORT_SYMBOL_GPL(raid6_datap_recov); + + +const struct raid6_recov_calls raid6_recov_intx1 = { +	.data2 = raid6_2data_recov_intx1, +	.datap = raid6_datap_recov_intx1, +	.valid = NULL, +	.name = "intx1", +	.priority = 0, +};  #ifndef __KERNEL__  /* Testing only */ diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c new file mode 100644 index 00000000000..37ae6193055 --- /dev/null +++ b/lib/raid6/recov_ssse3.c @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "x86.h" + +static int raid6_has_ssse3(void) +{ +	return boot_cpu_has(X86_FEATURE_XMM) && +		boot_cpu_has(X86_FEATURE_XMM2) && +		boot_cpu_has(X86_FEATURE_SSSE3); +} + +void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb, +		       void **ptrs) +{ +	u8 *p, *q, *dp, *dq; +	const u8 *pbmul;	/* P multiplier table for B data */ +	const u8 *qmul;		/* Q multiplier table (for both) */ +	static const u8 __aligned(16) x0f[16] = { +		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, +		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f}; + +	p = (u8 *)ptrs[disks-2]; +	q = (u8 *)ptrs[disks-1]; + +	/* Compute syndrome with zero for the missing data pages +	   Use the dead data pages as temporary storage for +	   delta p and delta q */ +	dp = (u8 *)ptrs[faila]; +	ptrs[faila] = (void *)raid6_empty_zero_page; +	ptrs[disks-2] = dp; +	dq = (u8 *)ptrs[failb]; +	ptrs[failb] = (void *)raid6_empty_zero_page; +	ptrs[disks-1] = dq; + +	raid6_call.gen_syndrome(disks, bytes, ptrs); + +	/* Restore pointer table */ +	ptrs[faila]   = dp; +	ptrs[failb]   = dq; +	ptrs[disks-2] = p; +	ptrs[disks-1] = q; + +	/* Now, pick the proper data tables */ +	pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; +	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ +		raid6_gfexp[failb]]]; + +	kernel_fpu_begin(); + +	asm volatile("movdqa %0,%%xmm7" : : "m" (x0f[0])); + +#ifdef CONFIG_X86_64 +	asm volatile("movdqa %0,%%xmm6" : : "m" (qmul[0])); +	asm volatile("movdqa %0,%%xmm14" : : "m" (pbmul[0])); +	asm volatile("movdqa %0,%%xmm15" : : "m" (pbmul[16])); +#endif + +	/* Now do it... */ +	while (bytes) { +#ifdef CONFIG_X86_64 +		/* xmm6, xmm14, xmm15 */ + +		asm volatile("movdqa %0,%%xmm1" : : "m" (q[0])); +		asm volatile("movdqa %0,%%xmm9" : : "m" (q[16])); +		asm volatile("movdqa %0,%%xmm0" : : "m" (p[0])); +		asm volatile("movdqa %0,%%xmm8" : : "m" (p[16])); +		asm volatile("pxor   %0,%%xmm1" : : "m" (dq[0])); +		asm volatile("pxor   %0,%%xmm9" : : "m" (dq[16])); +		asm volatile("pxor   %0,%%xmm0" : : "m" (dp[0])); +		asm volatile("pxor   %0,%%xmm8" : : "m" (dp[16])); + +		/* xmm0/8 = px */ + +		asm volatile("movdqa %xmm6,%xmm4"); +		asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16])); +		asm volatile("movdqa %xmm6,%xmm12"); +		asm volatile("movdqa %xmm5,%xmm13"); +		asm volatile("movdqa %xmm1,%xmm3"); +		asm volatile("movdqa %xmm9,%xmm11"); +		asm volatile("movdqa %xmm0,%xmm2"); /* xmm2/10 = px */ +		asm volatile("movdqa %xmm8,%xmm10"); +		asm volatile("psraw  $4,%xmm1"); +		asm volatile("psraw  $4,%xmm9"); +		asm volatile("pand   %xmm7,%xmm3"); +		asm volatile("pand   %xmm7,%xmm11"); +		asm volatile("pand   %xmm7,%xmm1"); +		asm volatile("pand   %xmm7,%xmm9"); +		asm volatile("pshufb %xmm3,%xmm4"); +		asm volatile("pshufb %xmm11,%xmm12"); +		asm volatile("pshufb %xmm1,%xmm5"); +		asm volatile("pshufb %xmm9,%xmm13"); +		asm volatile("pxor   %xmm4,%xmm5"); +		asm volatile("pxor   %xmm12,%xmm13"); + +		/* xmm5/13 = qx */ + +		asm volatile("movdqa %xmm14,%xmm4"); +		asm volatile("movdqa %xmm15,%xmm1"); +		asm volatile("movdqa %xmm14,%xmm12"); +		asm volatile("movdqa %xmm15,%xmm9"); +		asm volatile("movdqa %xmm2,%xmm3"); +		asm volatile("movdqa %xmm10,%xmm11"); +		asm volatile("psraw  $4,%xmm2"); +		asm volatile("psraw  $4,%xmm10"); +		asm volatile("pand   %xmm7,%xmm3"); +		asm volatile("pand   %xmm7,%xmm11"); +		asm volatile("pand   %xmm7,%xmm2"); +		asm volatile("pand   %xmm7,%xmm10"); +		asm volatile("pshufb %xmm3,%xmm4"); +		asm volatile("pshufb %xmm11,%xmm12"); +		asm volatile("pshufb %xmm2,%xmm1"); +		asm volatile("pshufb %xmm10,%xmm9"); +		asm volatile("pxor   %xmm4,%xmm1"); +		asm volatile("pxor   %xmm12,%xmm9"); + +		/* xmm1/9 = pbmul[px] */ +		asm volatile("pxor   %xmm5,%xmm1"); +		asm volatile("pxor   %xmm13,%xmm9"); +		/* xmm1/9 = db = DQ */ +		asm volatile("movdqa %%xmm1,%0" : "=m" (dq[0])); +		asm volatile("movdqa %%xmm9,%0" : "=m" (dq[16])); + +		asm volatile("pxor   %xmm1,%xmm0"); +		asm volatile("pxor   %xmm9,%xmm8"); +		asm volatile("movdqa %%xmm0,%0" : "=m" (dp[0])); +		asm volatile("movdqa %%xmm8,%0" : "=m" (dp[16])); + +		bytes -= 32; +		p += 32; +		q += 32; +		dp += 32; +		dq += 32; +#else +		asm volatile("movdqa %0,%%xmm1" : : "m" (*q)); +		asm volatile("movdqa %0,%%xmm0" : : "m" (*p)); +		asm volatile("pxor   %0,%%xmm1" : : "m" (*dq)); +		asm volatile("pxor   %0,%%xmm0" : : "m" (*dp)); + +		/* 1 = dq ^ q +		 * 0 = dp ^ p +		 */ +		asm volatile("movdqa %0,%%xmm4" : : "m" (qmul[0])); +		asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16])); + +		asm volatile("movdqa %xmm1,%xmm3"); +		asm volatile("psraw  $4,%xmm1"); +		asm volatile("pand   %xmm7,%xmm3"); +		asm volatile("pand   %xmm7,%xmm1"); +		asm volatile("pshufb %xmm3,%xmm4"); +		asm volatile("pshufb %xmm1,%xmm5"); +		asm volatile("pxor   %xmm4,%xmm5"); + +		asm volatile("movdqa %xmm0,%xmm2"); /* xmm2 = px */ + +		/* xmm5 = qx */ + +		asm volatile("movdqa %0,%%xmm4" : : "m" (pbmul[0])); +		asm volatile("movdqa %0,%%xmm1" : : "m" (pbmul[16])); +		asm volatile("movdqa %xmm2,%xmm3"); +		asm volatile("psraw  $4,%xmm2"); +		asm volatile("pand   %xmm7,%xmm3"); +		asm volatile("pand   %xmm7,%xmm2"); +		asm volatile("pshufb %xmm3,%xmm4"); +		asm volatile("pshufb %xmm2,%xmm1"); +		asm volatile("pxor   %xmm4,%xmm1"); + +		/* xmm1 = pbmul[px] */ +		asm volatile("pxor   %xmm5,%xmm1"); +		/* xmm1 = db = DQ */ +		asm volatile("movdqa %%xmm1,%0" : "=m" (*dq)); + +		asm volatile("pxor   %xmm1,%xmm0"); +		asm volatile("movdqa %%xmm0,%0" : "=m" (*dp)); + +		bytes -= 16; +		p += 16; +		q += 16; +		dp += 16; +		dq += 16; +#endif +	} + +	kernel_fpu_end(); +} + + +void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, void **ptrs) +{ +	u8 *p, *q, *dq; +	const u8 *qmul;		/* Q multiplier table */ +	static const u8 __aligned(16) x0f[16] = { +		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, +		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f}; + +	p = (u8 *)ptrs[disks-2]; +	q = (u8 *)ptrs[disks-1]; + +	/* Compute syndrome with zero for the missing data page +	   Use the dead data page as temporary storage for delta q */ +	dq = (u8 *)ptrs[faila]; +	ptrs[faila] = (void *)raid6_empty_zero_page; +	ptrs[disks-1] = dq; + +	raid6_call.gen_syndrome(disks, bytes, ptrs); + +	/* Restore pointer table */ +	ptrs[faila]   = dq; +	ptrs[disks-1] = q; + +	/* Now, pick the proper data tables */ +	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + +	kernel_fpu_begin(); + +	asm volatile("movdqa %0, %%xmm7" : : "m" (x0f[0])); + +	while (bytes) { +#ifdef CONFIG_X86_64 +		asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0])); +		asm volatile("movdqa %0, %%xmm4" : : "m" (dq[16])); +		asm volatile("pxor %0, %%xmm3" : : "m" (q[0])); +		asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0])); + +		/* xmm3 = q[0] ^ dq[0] */ + +		asm volatile("pxor %0, %%xmm4" : : "m" (q[16])); +		asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16])); + +		/* xmm4 = q[16] ^ dq[16] */ + +		asm volatile("movdqa %xmm3, %xmm6"); +		asm volatile("movdqa %xmm4, %xmm8"); + +		/* xmm4 = xmm8 = q[16] ^ dq[16] */ + +		asm volatile("psraw $4, %xmm3"); +		asm volatile("pand %xmm7, %xmm6"); +		asm volatile("pand %xmm7, %xmm3"); +		asm volatile("pshufb %xmm6, %xmm0"); +		asm volatile("pshufb %xmm3, %xmm1"); +		asm volatile("movdqa %0, %%xmm10" : : "m" (qmul[0])); +		asm volatile("pxor %xmm0, %xmm1"); +		asm volatile("movdqa %0, %%xmm11" : : "m" (qmul[16])); + +		/* xmm1 = qmul[q[0] ^ dq[0]] */ + +		asm volatile("psraw $4, %xmm4"); +		asm volatile("pand %xmm7, %xmm8"); +		asm volatile("pand %xmm7, %xmm4"); +		asm volatile("pshufb %xmm8, %xmm10"); +		asm volatile("pshufb %xmm4, %xmm11"); +		asm volatile("movdqa %0, %%xmm2" : : "m" (p[0])); +		asm volatile("pxor %xmm10, %xmm11"); +		asm volatile("movdqa %0, %%xmm12" : : "m" (p[16])); + +		/* xmm11 = qmul[q[16] ^ dq[16]] */ + +		asm volatile("pxor %xmm1, %xmm2"); + +		/* xmm2 = p[0] ^ qmul[q[0] ^ dq[0]] */ + +		asm volatile("pxor %xmm11, %xmm12"); + +		/* xmm12 = p[16] ^ qmul[q[16] ^ dq[16]] */ + +		asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0])); +		asm volatile("movdqa %%xmm11, %0" : "=m" (dq[16])); + +		asm volatile("movdqa %%xmm2, %0" : "=m" (p[0])); +		asm volatile("movdqa %%xmm12, %0" : "=m" (p[16])); + +		bytes -= 32; +		p += 32; +		q += 32; +		dq += 32; + +#else +		asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0])); +		asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0])); +		asm volatile("pxor %0, %%xmm3" : : "m" (q[0])); +		asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16])); + +		/* xmm3 = *q ^ *dq */ + +		asm volatile("movdqa %xmm3, %xmm6"); +		asm volatile("movdqa %0, %%xmm2" : : "m" (p[0])); +		asm volatile("psraw $4, %xmm3"); +		asm volatile("pand %xmm7, %xmm6"); +		asm volatile("pand %xmm7, %xmm3"); +		asm volatile("pshufb %xmm6, %xmm0"); +		asm volatile("pshufb %xmm3, %xmm1"); +		asm volatile("pxor %xmm0, %xmm1"); + +		/* xmm1 = qmul[*q ^ *dq */ + +		asm volatile("pxor %xmm1, %xmm2"); + +		/* xmm2 = *p ^ qmul[*q ^ *dq] */ + +		asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0])); +		asm volatile("movdqa %%xmm2, %0" : "=m" (p[0])); + +		bytes -= 16; +		p += 16; +		q += 16; +		dq += 16; +#endif +	} + +	kernel_fpu_end(); +} + +const struct raid6_recov_calls raid6_recov_ssse3 = { +	.data2 = raid6_2data_recov_ssse3, +	.datap = raid6_datap_recov_ssse3, +	.valid = raid6_has_ssse3, +#ifdef CONFIG_X86_64 +	.name = "ssse3x2", +#else +	.name = "ssse3x1", +#endif +	.priority = 1, +}; + +#endif diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index aa651697b6d..c76151d9476 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -23,7 +23,7 @@ RANLIB	 = ranlib  all:	raid6.a raid6test  raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \ -	 altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \ +	 altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \  	 tables.o  	 rm -f $@  	 $(AR) cq $@ $^ diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index 7a930318b17..5a485b7a7d3 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c @@ -90,25 +90,35 @@ static int test_disks(int i, int j)  int main(int argc, char *argv[])  {  	const struct raid6_calls *const *algo; +	const struct raid6_recov_calls *const *ra;  	int i, j;  	int err = 0;  	makedata(); -	for (algo = raid6_algos; *algo; algo++) { -		if (!(*algo)->valid || (*algo)->valid()) { -			raid6_call = **algo; +	for (ra = raid6_recov_algos; *ra; ra++) { +		if ((*ra)->valid  && !(*ra)->valid()) +			continue; +		raid6_2data_recov = (*ra)->data2; +		raid6_datap_recov = (*ra)->datap; -			/* Nuke syndromes */ -			memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); +		printf("using recovery %s\n", (*ra)->name); -			/* Generate assumed good syndrome */ -			raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, -						(void **)&dataptrs); +		for (algo = raid6_algos; *algo; algo++) { +			if (!(*algo)->valid || (*algo)->valid()) { +				raid6_call = **algo; -			for (i = 0; i < NDISKS-1; i++) -				for (j = i+1; j < NDISKS; j++) -					err += test_disks(i, j); +				/* Nuke syndromes */ +				memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + +				/* Generate assumed good syndrome */ +				raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, +							(void **)&dataptrs); + +				for (i = 0; i < NDISKS-1; i++) +					for (j = i+1; j < NDISKS; j++) +						err += test_disks(i, j); +			}  		}  		printf("\n");  	} diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index cb2a8c91c88..d55d63232c5 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h @@ -35,24 +35,29 @@ static inline void kernel_fpu_end(void)  {  } +#define __aligned(x) __attribute__((aligned(x))) +  #define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */  #define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions  					   * (fast save and restore) */  #define X86_FEATURE_XMM		(0*32+25) /* Streaming SIMD Extensions */  #define X86_FEATURE_XMM2	(0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_SSSE3	(4*32+ 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_AVX	(4*32+28) /* Advanced Vector Extensions */  #define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */  /* Should work well enough on modern CPUs for testing */  static inline int boot_cpu_has(int flag)  { -	u32 eax = (flag >> 5) ? 0x80000001 : 1; -	u32 edx; +	u32 eax = (flag & 0x20) ? 0x80000001 : 1; +	u32 ecx, edx;  	asm volatile("cpuid" -		     : "+a" (eax), "=d" (edx) -		     : : "ecx", "ebx"); +		     : "+a" (eax), "=d" (edx), "=c" (ecx) +		     : : "ebx"); -	return (edx >> (flag & 31)) & 1; +	return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1;  }  #endif /* ndef __KERNEL__ */  |