diff options
Diffstat (limited to 'lib_blackfin/memcpy.S')
| -rw-r--r-- | lib_blackfin/memcpy.S | 114 | 
1 files changed, 114 insertions, 0 deletions
| diff --git a/lib_blackfin/memcpy.S b/lib_blackfin/memcpy.S new file mode 100644 index 000000000..24577bebd --- /dev/null +++ b/lib_blackfin/memcpy.S @@ -0,0 +1,114 @@ +/* + * File: memcpy.S + * + * Copyright 2004-2007 Analog Devices Inc. + * Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + */ + +.align 2 + +.globl _memcpy_ASM; +_memcpy_ASM: +	CC = R2 <=  0;	/* length not positive?*/ +	IF CC JUMP  .L_P1L2147483647;	/* Nothing to do */ + +	P0 = R0 ;	/* dst*/ +	P1 = R1 ;	/* src*/ +	P2 = R2 ;	/* length */ + +	/* check for overlapping data */ +	CC = R1 < R0;	/* src < dst */ +	IF !CC JUMP .Lno_overlap; +	R3 = R1 + R2; +	CC = R0 < R3;	/* and dst < src+len */ +	IF CC JUMP .Lhas_overlap; + +.Lno_overlap: +	/* Check for aligned data.*/ + +	R3 = R1 | R0; +	R0 = 0x3; +	R3 = R3 & R0; +	CC = R3;	/* low bits set on either address? */ +	IF CC JUMP .Lnot_aligned; + +	/* Both addresses are word-aligned, so we can copy +	at least part of the data using word copies.*/ +	P2 = P2 >> 2; +	CC = P2 <= 2; +	IF !CC JUMP .Lmore_than_seven; +	/* less than eight bytes... */ +	P2 = R2; +	LSETUP(.Lthree_start, .Lthree_end) LC0=P2; +	R0 = R1;	/* setup src address for return */ +.Lthree_start: +	R3 = B[P1++] (X); +.Lthree_end: +	B[P0++] = R3; + +	RTS; + +.Lmore_than_seven: +	/* There's at least eight bytes to copy. */ +	P2 += -1;	/* because we unroll one iteration */ +	LSETUP(.Lword_loop, .Lword_loop) LC0=P2; +	R0 = R1; +	I1 = P1; +	R3 = [I1++]; +.Lword_loop: +	MNOP || [P0++] = R3 || R3 = [I1++]; + +	[P0++] = R3; +	/* Any remaining bytes to copy? */ +	R3 = 0x3; +	R3 = R2 & R3; +	CC = R3 == 0; +	P1 = I1;	/* in case there's something left, */ +	IF !CC JUMP .Lbytes_left; +	RTS; +.Lbytes_left:	P2 = R3; +.Lnot_aligned: +	/* From here, we're copying byte-by-byte. */ +	LSETUP (.Lbyte_start , .Lbyte_end) LC0=P2; +	R0 = R1;	/* Save src address for return */ +.Lbyte_start: +	R1 = B[P1++] (X); +.Lbyte_end: +	B[P0++] = R1; + +.L_P1L2147483647: +	RTS; + +.Lhas_overlap: +/* Need to reverse the copying, because the + * dst would clobber the src. + * Don't bother to work out alignment for + * the reverse case. + */ +	R0 = R1;	/* save src for later. */ +	P0 = P0 + P2; +	P0 += -1; +	P1 = P1 + P2; +	P1 += -1; +	LSETUP(.Lover_start, .Lover_end) LC0=P2; +.Lover_start: +	R1 = B[P1--] (X); +.Lover_end: +	B[P0--] = R1; + +	RTS; |