diff options
| author | Russell King <rmk+kernel@arm.linux.org.uk> | 2012-01-13 15:00:22 +0000 | 
|---|---|---|
| committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2012-01-13 15:00:22 +0000 | 
| commit | 4de3a8e101150feaefa1139611a50ff37467f33e (patch) | |
| tree | daada742542518b02d7db7c5d32e715eaa5f166d /arch/c6x/lib/csum_64plus.S | |
| parent | 294064f58953f9964e5945424b09c51800330a83 (diff) | |
| parent | 099469502f62fbe0d7e4f0b83a2f22538367f734 (diff) | |
| download | olio-linux-3.10-4de3a8e101150feaefa1139611a50ff37467f33e.tar.xz olio-linux-3.10-4de3a8e101150feaefa1139611a50ff37467f33e.zip  | |
Merge branch 'master' into fixes
Diffstat (limited to 'arch/c6x/lib/csum_64plus.S')
| -rw-r--r-- | arch/c6x/lib/csum_64plus.S | 419 | 
1 files changed, 419 insertions, 0 deletions
diff --git a/arch/c6x/lib/csum_64plus.S b/arch/c6x/lib/csum_64plus.S new file mode 100644 index 00000000000..6d258964722 --- /dev/null +++ b/arch/c6x/lib/csum_64plus.S @@ -0,0 +1,419 @@ +; +;  linux/arch/c6x/lib/csum_64plus.s +; +;  Port on Texas Instruments TMS320C6x architecture +; +;  Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated +;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) +; +;  This program is free software; you can redistribute it and/or modify +;  it under the terms of the GNU General Public License version 2 as +;  published by the Free Software Foundation. +; +#include <linux/linkage.h> + +; +;unsigned int csum_partial_copy(const char *src, char * dst, +;				int len, int sum) +; +; A4:	src +; B4:	dst +; A6:	len +; B6:	sum +; return csum in A4 +; + +	.text +ENTRY(csum_partial_copy) +	MVC	.S2	ILC,B30 + +	MV	.D1X	B6,A31		; given csum +	ZERO	.D1	A9		; csum (a side) +||	ZERO	.D2	B9		; csum (b side) +||	SHRU	.S2X	A6,2,B5		; len / 4 + +	;; Check alignment and size +	AND	.S1	3,A4,A1 +||	AND	.S2	3,B4,B0 +	OR	.L2X	B0,A1,B0	; non aligned condition +||	MVC	.S2	B5,ILC +||	MVK	.D2	1,B2 +||	MV	.D1X	B5,A1		; words condition +  [!A1]	B	.S1	L8 +   [B0] BNOP	.S1	L6,5 + +	SPLOOP		1 + +	;; Main loop for aligned words +	LDW	.D1T1	*A4++,A7 +	NOP	4 +	MV	.S2X	A7,B7 +||	EXTU	.S1	A7,0,16,A16 +	STW	.D2T2	B7,*B4++ +||	MPYU	.M2	B7,B2,B8 +||	ADD	.L1	A16,A9,A9 +	NOP +	SPKERNEL	8,0 +||	ADD	.L2	B8,B9,B9 + +	ZERO	.D1	A1 +||	ADD	.L1X	A9,B9,A9	;  add csum from a and b sides + +L6: +  [!A1]	BNOP	.S1	L8,5 + +	;; Main loop for non-aligned words +	SPLOOP		2 + ||	MVK	.L1	1,A2 + +	LDNW	.D1T1	*A4++,A7 +	NOP		3 + +	NOP +	MV	.S2X	A7,B7 + ||	EXTU	.S1	A7,0,16,A16 + ||	MPYU	.M1	A7,A2,A8 + +	ADD	.L1	A16,A9,A9 +	SPKERNEL	6,0 + ||	STNW	.D2T2	B7,*B4++ + ||	ADD	.L1	A8,A9,A9 + +L8:	AND	.S2X	2,A6,B5 +	CMPGT	.L2	B5,0,B0 +  [!B0]	BNOP	.S1	L82,4 + +	;; Manage half-word +	ZERO	.L1	A7 +||	ZERO	.D1	A8 + +#ifdef CONFIG_CPU_BIG_ENDIAN + +	LDBU	.D1T1	*A4++,A7 +	LDBU	.D1T1	*A4++,A8 +	NOP		3 +	SHL	.S1	A7,8,A0 +	ADD	.S1	A8,A9,A9 +	STB	.D2T1	A7,*B4++ +||	ADD	.S1	A0,A9,A9 +	STB	.D2T1	A8,*B4++ + +#else + +	LDBU	.D1T1	*A4++,A7 +	LDBU	.D1T1	*A4++,A8 +	NOP		3 +	ADD	.S1	A7,A9,A9 +	SHL	.S1	A8,8,A0 + +	STB	.D2T1	A7,*B4++ +||	ADD	.S1	A0,A9,A9 +	STB	.D2T1	A8,*B4++ + +#endif + +	;; Manage eventually the last byte +L82:	AND	.S2X	1,A6,B0 +  [!B0]	BNOP	.S1	L9,5 + +||	ZERO	.L1	A7 + +L83:	LDBU	.D1T1	*A4++,A7 +	NOP		4 + +	MV	.L2X	A7,B7 + +#ifdef CONFIG_CPU_BIG_ENDIAN + +	STB	.D2T2	B7,*B4++ +||	SHL	.S1	A7,8,A7 +	ADD	.S1	A7,A9,A9 + +#else + +	STB	.D2T2	B7,*B4++ +||	ADD	.S1	A7,A9,A9 + +#endif + +	;; Fold the csum +L9:	SHRU	.S2X	A9,16,B0 +  [!B0]	BNOP	.S1	L10,5 + +L91:	SHRU	.S2X	A9,16,B4 +||	EXTU	.S1	A9,16,16,A3 +	ADD	.D1X	A3,B4,A9 + +	SHRU	.S1	A9,16,A0 +   [A0]	BNOP	.S1	L91,5 + +L10:	ADD	.D1	A31,A9,A9 +	MV	.D1	A9,A4 + +	BNOP	.S2	B3,4 +	MVC	.S2	B30,ILC +ENDPROC(csum_partial_copy) + +; +;unsigned short +;ip_fast_csum(unsigned char *iph, unsigned int ihl) +;{ +;	unsigned int checksum = 0; +;	unsigned short *tosum = (unsigned short *) iph; +;	int len; +; +;	len = ihl*4; +; +;	if (len <= 0) +;		return 0; +; +;	while(len) { +;		len -= 2; +;		checksum += *tosum++; +;	} +;	if (len & 1) +;		checksum += *(unsigned char*) tosum; +; +;	while(checksum >> 16) +;		checksum = (checksum & 0xffff) + (checksum >> 16); +; +;	return ~checksum; +;} +; +; A4:	iph +; B4:	ihl +; return checksum in A4 +; +	.text + +ENTRY(ip_fast_csum) +	ZERO	.D1	A5 + ||	MVC	.S2	ILC,B30 +	SHL	.S2	B4,2,B0 +	CMPGT	.L2	B0,0,B1 +  [!B1] BNOP	.S1	L15,4 +  [!B1]	ZERO	.D1	A3 + +  [!B0]	B	.S1	L12 +	SHRU	.S2	B0,1,B0 +	MVC	.S2	B0,ILC +	NOP	3 + +	SPLOOP	1 +	LDHU	.D1T1	*A4++,A3 +	NOP	3 +	NOP +	SPKERNEL	5,0 + ||	ADD	.L1	A3,A5,A5 + +L12:	SHRU	.S1	A5,16,A0 +  [!A0]	BNOP	.S1	L14,5 + +L13:	SHRU	.S2X	A5,16,B4 +	EXTU	.S1	A5,16,16,A3 +	ADD	.D1X	A3,B4,A5 +	SHRU	.S1	A5,16,A0 +  [A0]	BNOP	.S1	L13,5 + +L14:	NOT	.D1	A5,A3 +	EXTU	.S1	A3,16,16,A3 + +L15:	BNOP	.S2	B3,3 +	MVC	.S2	B30,ILC +	MV	.D1	A3,A4 +ENDPROC(ip_fast_csum) + +; +;unsigned short +;do_csum(unsigned char *buff, unsigned int len) +;{ +;	int odd, count; +;	unsigned int result = 0; +; +;	if (len <= 0) +;		goto out; +;	odd = 1 & (unsigned long) buff; +;	if (odd) { +;#ifdef __LITTLE_ENDIAN +;		result += (*buff << 8); +;#else +;		result = *buff; +;#endif +;		len--; +;		buff++; +;	} +;	count = len >> 1;		/* nr of 16-bit words.. */ +;	if (count) { +;		if (2 & (unsigned long) buff) { +;			result += *(unsigned short *) buff; +;			count--; +;			len -= 2; +;			buff += 2; +;		} +;		count >>= 1;		/* nr of 32-bit words.. */ +;		if (count) { +;			unsigned int carry = 0; +;			do { +;				unsigned int w = *(unsigned int *) buff; +;				count--; +;				buff += 4; +;				result += carry; +;				result += w; +;				carry = (w > result); +;			} while (count); +;			result += carry; +;			result = (result & 0xffff) + (result >> 16); +;		} +;		if (len & 2) { +;			result += *(unsigned short *) buff; +;			buff += 2; +;		} +;	} +;	if (len & 1) +;#ifdef __LITTLE_ENDIAN +;		result += *buff; +;#else +;		result += (*buff << 8); +;#endif +;	result = (result & 0xffff) + (result >> 16); +;	/* add up carry.. */ +;	result = (result & 0xffff) + (result >> 16); +;	if (odd) +;		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); +;out: +;	return result; +;} +; +; A4:	buff +; B4:	len +; return checksum in A4 +; + +ENTRY(do_csum) +	   CMPGT   .L2	   B4,0,B0 +   [!B0]   BNOP    .S1	   L26,3 +	   EXTU    .S1	   A4,31,31,A0 + +	   MV	   .L1	   A0,A3 +||	   MV	   .S1X    B3,A5 +||	   MV	   .L2	   B4,B3 +||	   ZERO    .D1	   A1 + +#ifdef CONFIG_CPU_BIG_ENDIAN +   [A0]    SUB	   .L2	   B3,1,B3 +|| [A0]    LDBU    .D1T1   *A4++,A1 +#else +   [!A0]   BNOP    .S1	   L21,5 +|| [A0]    LDBU    .D1T1   *A4++,A0 +	   SUB	   .L2	   B3,1,B3 +||	   SHL	   .S1	   A0,8,A1 +L21: +#endif +	   SHR	   .S2	   B3,1,B0 +   [!B0]   BNOP    .S1	   L24,3 +	   MVK	   .L1	   2,A0 +	   AND	   .L1	   A4,A0,A0 + +   [!A0]   BNOP    .S1	   L22,5 +|| [A0]    LDHU    .D1T1   *A4++,A0 +	   SUB	   .L2	   B0,1,B0 +||	   SUB	   .S2	   B3,2,B3 +||	   ADD	   .L1	   A0,A1,A1 +L22: +	   SHR	   .S2	   B0,1,B0 +||	   ZERO    .L1	   A0 + +   [!B0]   BNOP    .S1	   L23,5 +|| [B0]    MVC	   .S2	   B0,ILC + +	   SPLOOP  3 +	   SPMASK  L1 +||	   MV	   .L1	   A1,A2 +||	   LDW	   .D1T1   *A4++,A1 + +	   NOP	   4 +	   ADD	   .L1	   A0,A1,A0 +	   ADD	   .L1	   A2,A0,A2 + +	   SPKERNEL 1,2 +||	   CMPGTU  .L1	   A1,A2,A0 + +	   ADD	   .L1	   A0,A2,A6 +	   EXTU    .S1	   A6,16,16,A7 +	   SHRU    .S2X    A6,16,B0 +	   NOP		   1 +	   ADD	   .L1X    A7,B0,A1 +L23: +	   MVK	   .L2	   2,B0 +	   AND	   .L2	   B3,B0,B0 +   [B0]    LDHU    .D1T1   *A4++,A0 +	   NOP	   4 +   [B0]    ADD	   .L1	   A0,A1,A1 +L24: +	   EXTU    .S2	   B3,31,31,B0 +#ifdef CONFIG_CPU_BIG_ENDIAN +   [!B0]   BNOP    .S1	   L25,4 +|| [B0]    LDBU    .D1T1   *A4,A0 +	   SHL	   .S1	   A0,8,A0 +	   ADD	   .L1	   A0,A1,A1 +L25: +#else +   [B0]    LDBU    .D1T1   *A4,A0 +	   NOP	   4 +   [B0]    ADD	   .L1	   A0,A1,A1 +#endif +	   EXTU    .S1	   A1,16,16,A0 +	   SHRU    .S2X    A1,16,B0 +	   NOP	   1 +	   ADD	   .L1X    A0,B0,A0 +	   SHRU    .S1	   A0,16,A1 +	   ADD	   .L1	   A0,A1,A0 +	   EXTU    .S1	   A0,16,16,A1 +	   EXTU    .S1	   A1,16,24,A2 + +	   EXTU    .S1	   A1,24,16,A0 +||	   MV	   .L2X    A3,B0 + +   [B0]    OR	   .L1	   A0,A2,A1 +L26: +	   NOP	   1 +	   BNOP    .S2X    A5,4 +	   MV	   .L1	   A1,A4 +ENDPROC(do_csum) + +;__wsum csum_partial(const void *buff, int len, __wsum wsum) +;{ +;	unsigned int sum = (__force unsigned int)wsum; +;	unsigned int result = do_csum(buff, len); +; +;	/* add in old sum, and carry.. */ +;	result += sum; +;	if (sum > result) +;		result += 1; +;	return (__force __wsum)result; +;} +; +ENTRY(csum_partial) +	   MV	   .L1X    B3,A9 +||	   CALLP   .S2	   do_csum,B3 +||	   MV	   .S1	   A6,A8 +	   BNOP    .S2X    A9,2 +	   ADD	   .L1	   A8,A4,A1 +	   CMPGTU  .L1	   A8,A1,A0 +	   ADD	   .L1	   A1,A0,A4 +ENDPROC(csum_partial) + +;unsigned short +;ip_compute_csum(unsigned char *buff, unsigned int len) +; +; A4:	buff +; B4:	len +; return checksum in A4 + +ENTRY(ip_compute_csum) +	   MV	   .L1X    B3,A9 +||	   CALLP   .S2	   do_csum,B3 +	   BNOP    .S2X    A9,3 +	   NOT	   .S1	   A4,A4 +	   CLR     .S1	   A4,16,31,A4 +ENDPROC(ip_compute_csum)  |