diff options
| author | Atsushi Nemoto <anemo@mba.ocn.ne.jp> | 2006-12-08 01:04:51 +0900 | 
|---|---|---|
| committer | Ralf Baechle <ralf@linux-mips.org> | 2006-12-09 01:03:59 +0000 | 
| commit | ed99e2bc1dc5dc54eb5a019f4975562dbef20103 (patch) | |
| tree | c8ff52ab4a29fe842e34fd94d01e74082486391d /arch/mips/lib | |
| parent | 773ff78838ca3c07245e45c06235e0baaa5f710a (diff) | |
| download | olio-linux-3.10-ed99e2bc1dc5dc54eb5a019f4975562dbef20103.tar.xz olio-linux-3.10-ed99e2bc1dc5dc54eb5a019f4975562dbef20103.zip  | |
[MIPS] Optimize csum_partial for 64bit kernel
Make csum_partial 64-bit powered.
Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib')
| -rw-r--r-- | arch/mips/lib/csum_partial.S | 76 | 
1 files changed, 54 insertions, 22 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index b04475d76f3..9db357294be 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -29,30 +29,49 @@  #define t5	$13  #define t6	$14  #define t7	$15 + +#define USE_DOUBLE  #endif +#ifdef USE_DOUBLE + +#define LOAD   ld +#define ADD    daddu +#define NBYTES 8 + +#else + +#define LOAD   lw +#define ADD    addu +#define NBYTES 4 + +#endif /* USE_DOUBLE */ + +#define UNIT(unit)  ((unit)*NBYTES) +  #define ADDC(sum,reg)						\ -	addu	sum, reg;					\ +	ADD	sum, reg;					\  	sltu	v1, sum, reg;					\ -	addu	sum, v1 +	ADD	sum, v1 -#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)	\ -	lw	_t0, (offset + 0x00)(src);			\ -	lw	_t1, (offset + 0x04)(src);			\ -	lw	_t2, (offset + 0x08)(src); 			\ -	lw	_t3, (offset + 0x0c)(src); 			\ -	ADDC(sum, _t0);						\ -	ADDC(sum, _t1);						\ -	ADDC(sum, _t2);						\ -	ADDC(sum, _t3);						\ -	lw	_t0, (offset + 0x10)(src);			\ -	lw	_t1, (offset + 0x14)(src);			\ -	lw	_t2, (offset + 0x18)(src);			\ -	lw	_t3, (offset + 0x1c)(src);			\ +#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)	\ +	LOAD	_t0, (offset + UNIT(0))(src);			\ +	LOAD	_t1, (offset + UNIT(1))(src);			\ +	LOAD	_t2, (offset + UNIT(2))(src); 			\ +	LOAD	_t3, (offset + UNIT(3))(src); 			\  	ADDC(sum, _t0);						\  	ADDC(sum, _t1);						\  	ADDC(sum, _t2);						\ -	ADDC(sum, _t3);						\ +	ADDC(sum, _t3) + +#ifdef USE_DOUBLE +#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)	\ +	CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) +#else +#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)	\ +	CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3);	\ +	CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3) +#endif  /*   * a0: source address @@ -117,11 +136,17 @@ qword_align:  	beqz	t8, oword_align  	 andi	t8, src, 0x10 +#ifdef USE_DOUBLE +	ld	t0, 0x00(src) +	LONG_SUBU	a1, a1, 0x8 +	ADDC(sum, t0) +#else  	lw	t0, 0x00(src)  	lw	t1, 0x04(src)  	LONG_SUBU	a1, a1, 0x8  	ADDC(sum, t0)  	ADDC(sum, t1) +#endif  	PTR_ADDU	src, src, 0x8  	andi	t8, src, 0x10 @@ -129,14 +154,14 @@ oword_align:  	beqz	t8, begin_movement  	 LONG_SRL	t8, a1, 0x7 -	lw	t3, 0x08(src) -	lw	t4, 0x0c(src) -	lw	t0, 0x00(src) -	lw	t1, 0x04(src) -	ADDC(sum, t3) -	ADDC(sum, t4) +#ifdef USE_DOUBLE +	ld	t0, 0x00(src) +	ld	t1, 0x08(src)  	ADDC(sum, t0)  	ADDC(sum, t1) +#else +	CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4) +#endif  	LONG_SUBU	a1, a1, 0x10  	PTR_ADDU	src, src, 0x10  	LONG_SRL	t8, a1, 0x7 @@ -219,6 +244,13 @@ small_csumcpy:  1:	ADDC(sum, t1)  	/* fold checksum */ +#ifdef USE_DOUBLE +	dsll32	v1, sum, 0 +	daddu	sum, v1 +	sltu	v1, sum, v1 +	dsra32	sum, sum, 0 +	addu	sum, v1 +#endif  	sll	v1, sum, 16  	addu	sum, v1  	sltu	v1, sum, v1  |