diff options
| -rw-r--r-- | arch/arc/lib/Makefile | 16 | ||||
| -rw-r--r-- | arch/arc/lib/bootm.c | 106 | ||||
| -rw-r--r-- | arch/arc/lib/memcmp.S | 121 | ||||
| -rw-r--r-- | arch/arc/lib/memcpy-700.S | 63 | ||||
| -rw-r--r-- | arch/arc/lib/memset.S | 62 | ||||
| -rw-r--r-- | arch/arc/lib/relocate.c | 72 | ||||
| -rw-r--r-- | arch/arc/lib/sections.c | 21 | ||||
| -rw-r--r-- | arch/arc/lib/strchr-700.S | 141 | ||||
| -rw-r--r-- | arch/arc/lib/strcmp.S | 97 | ||||
| -rw-r--r-- | arch/arc/lib/strcpy-700.S | 67 | ||||
| -rw-r--r-- | arch/arc/lib/strlen.S | 80 | 
11 files changed, 846 insertions, 0 deletions
| diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile new file mode 100644 index 000000000..7675f855d --- /dev/null +++ b/arch/arc/lib/Makefile @@ -0,0 +1,16 @@ +# +# Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved. +# +# SPDX-License-Identifier:	GPL-2.0+ +# + +obj-y += sections.o +obj-y += relocate.o +obj-y += strchr-700.o +obj-y += strcmp.o +obj-y += strcpy-700.o +obj-y += strlen.o +obj-y += memcmp.o +obj-y += memcpy-700.o +obj-y += memset.o +obj-$(CONFIG_CMD_BOOTM) += bootm.o diff --git a/arch/arc/lib/bootm.c b/arch/arc/lib/bootm.c new file mode 100644 index 000000000..d185a50bd --- /dev/null +++ b/arch/arc/lib/bootm.c @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved. + * + * SPDX-License-Identifier:	GPL-2.0+ + */ + +#include <common.h> + +DECLARE_GLOBAL_DATA_PTR; + +static ulong get_sp(void) +{ +	ulong ret; + +	asm("mov %0, sp" : "=r"(ret) : ); +	return ret; +} + +void arch_lmb_reserve(struct lmb *lmb) +{ +	ulong sp; + +	/* +	 * Booting a (Linux) kernel image +	 * +	 * Allocate space for command line and board info - the +	 * address should be as high as possible within the reach of +	 * the kernel (see CONFIG_SYS_BOOTMAPSZ settings), but in unused +	 * memory, which means far enough below the current stack +	 * pointer. +	 */ +	sp = get_sp(); +	debug("## Current stack ends at 0x%08lx ", sp); + +	/* adjust sp by 4K to be safe */ +	sp -= 4096; +	lmb_reserve(lmb, sp, (CONFIG_SYS_SDRAM_BASE + gd->ram_size - sp)); +} + +static int cleanup_before_linux(void) +{ +	disable_interrupts(); +	flush_dcache_all(); +	invalidate_icache_all(); + +	return 0; +} + +/* Subcommand: PREP */ +static void boot_prep_linux(bootm_headers_t *images) +{ +	if (image_setup_linux(images)) +		hang(); +} + +/* Subcommand: GO */ +static void boot_jump_linux(bootm_headers_t *images, int flag) +{ +	void (*kernel_entry)(int zero, int arch, uint params); +	unsigned int r0, r2; +	int fake = (flag & BOOTM_STATE_OS_FAKE_GO); + +	kernel_entry = (void (*)(int, int, uint))images->ep; + +	debug("## Transferring control to Linux (at address %08lx)...\n", +	      (ulong) kernel_entry); +	bootstage_mark(BOOTSTAGE_ID_RUN_OS); + +	printf("\nStarting kernel ...%s\n\n", fake ? +	       "(fake run for tracing)" : ""); +	bootstage_mark_name(BOOTSTAGE_ID_BOOTM_HANDOFF, "start_kernel"); + +	cleanup_before_linux(); + +	if (IMAGE_ENABLE_OF_LIBFDT && images->ft_len) { +		r0 = 2; +		r2 = (unsigned int)images->ft_addr; +	} else { +		r0 = 1; +		r2 = (unsigned int)getenv("bootargs"); +	} + +	if (!fake) +		kernel_entry(r0, 0, r2); +} + +int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images) +{ +	/* No need for those on ARC */ +	if ((flag & BOOTM_STATE_OS_BD_T) || (flag & BOOTM_STATE_OS_CMDLINE)) +		return -1; + +	if (flag & BOOTM_STATE_OS_PREP) { +		boot_prep_linux(images); +		return 0; +	} + +	if (flag & (BOOTM_STATE_OS_GO | BOOTM_STATE_OS_FAKE_GO)) { +		boot_jump_linux(images, flag); +		return 0; +	} + +	boot_prep_linux(images); +	boot_jump_linux(images, flag); +	return 0; +} diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S new file mode 100644 index 000000000..fa5aac5f6 --- /dev/null +++ b/arch/arc/lib/memcmp.S @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. + * + * SPDX-License-Identifier:	GPL-2.0+ + */ + +#ifdef __LITTLE_ENDIAN__ +#define WORD2 r2 +#define SHIFT r3 +#else /* __BIG_ENDIAN__ */ +#define WORD2 r3 +#define SHIFT r2 +#endif /* _ENDIAN__ */ + +.global memcmp +.align 4 +memcmp: +	or	%r12, %r0, %r1 +	asl_s	%r12, %r12, 30 +	sub	%r3, %r2, 1 +	brls	%r2, %r12, .Lbytewise +	ld	%r4, [%r0, 0] +	ld	%r5, [%r1, 0] +	lsr.f	%lp_count, %r3, 3 +	lpne	.Loop_end +	ld_s	WORD2, [%r0, 4] +	ld_s	%r12, [%r1, 4] +	brne	%r4, %r5, .Leven +	ld.a	%r4, [%r0, 8] +	ld.a	%r5, [%r1, 8] +	brne	WORD2, %r12, .Lodd +.Loop_end: +	asl_s	SHIFT, SHIFT, 3 +	bhs_s	.Last_cmp +	brne	%r4, %r5, .Leven +	ld	%r4, [%r0, 4] +	ld	%r5, [%r1, 4] +#ifdef __LITTLE_ENDIAN__ +	nop_s +	/* one more load latency cycle */ +.Last_cmp: +	xor	%r0, %r4, %r5 +	bset	%r0, %r0, SHIFT +	sub_s	%r1, %r0, 1 +	bic_s	%r1, %r1, %r0 +	norm	%r1, %r1 +	b.d	.Leven_cmp +	and	%r1, %r1, 24 +.Leven: +	xor	%r0, %r4, %r5 +	sub_s	%r1, %r0, 1 +	bic_s	%r1, %r1, %r0 +	norm	%r1, %r1 +	/* slow track insn */ +	and	%r1, %r1, 24 +.Leven_cmp: +	asl	%r2, %r4, %r1 +	asl	%r12, %r5, %r1 +	lsr_s	%r2, %r2, 1 +	lsr_s	%r12, %r12, 1 +	j_s.d	[%blink] +	sub	%r0, %r2, %r12 +	.balign	4 +.Lodd: +	xor	%r0, WORD2, %r12 +	sub_s	%r1, %r0, 1 +	bic_s	%r1, %r1, %r0 +	norm	%r1, %r1 +	/* slow track insn */ +	and	%r1, %r1, 24 +	asl_s	%r2, %r2, %r1 +	asl_s	%r12, %r12, %r1 +	lsr_s	%r2, %r2, 1 +	lsr_s	%r12, %r12, 1 +	j_s.d	[%blink] +	sub	%r0, %r2, %r12 +#else /* __BIG_ENDIAN__ */ +.Last_cmp: +	neg_s	SHIFT, SHIFT +	lsr	%r4, %r4, SHIFT +	lsr	%r5, %r5, SHIFT +	/* slow track insn */ +.Leven: +	sub.f	%r0, %r4, %r5 +	mov.ne	%r0, 1 +	j_s.d	[%blink] +	bset.cs	%r0, %r0, 31 +.Lodd: +	cmp_s	WORD2, %r12 + +	mov_s	%r0, 1 +	j_s.d	[%blink] +	bset.cs	%r0, %r0, 31 +#endif /* _ENDIAN__ */ +	.balign	4 +.Lbytewise: +	breq	%r2, 0, .Lnil +	ldb	%r4, [%r0, 0] +	ldb	%r5, [%r1, 0] +	lsr.f	%lp_count, %r3 +	lpne	.Lbyte_end +	ldb_s	%r3, [%r0, 1] +	ldb	%r12, [%r1, 1] +	brne	%r4, %r5, .Lbyte_even +	ldb.a	%r4, [%r0, 2] +	ldb.a	%r5, [%r1, 2] +	brne	%r3, %r12, .Lbyte_odd +.Lbyte_end: +	bcc	.Lbyte_even +	brne	%r4, %r5, .Lbyte_even +	ldb_s	%r3, [%r0, 1] +	ldb_s	%r12, [%r1, 1] +.Lbyte_odd: +	j_s.d	[%blink] +	sub	%r0, %r3, %r12 +.Lbyte_even: +	j_s.d	[%blink] +	sub	%r0, %r4, %r5 +.Lnil: +	j_s.d	[%blink] +	mov	%r0, 0 diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S new file mode 100644 index 000000000..51dd73ab8 --- /dev/null +++ b/arch/arc/lib/memcpy-700.S @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. + * + * SPDX-License-Identifier:	GPL-2.0+ + */ + +.global memcpy +.align 4 +memcpy: +	or	%r3, %r0, %r1 +	asl_s	%r3, %r3, 30 +	mov_s	%r5, %r0 +	brls.d	%r2, %r3, .Lcopy_bytewise +	sub.f	%r3, %r2, 1 +	ld_s	%r12, [%r1, 0] +	asr.f	%lp_count, %r3, 3 +	bbit0.d	%r3, 2, .Lnox4 +	bmsk_s	%r2, %r2, 1 +	st.ab	%r12, [%r5, 4] +	ld.a	%r12, [%r1, 4] +.Lnox4: +	lppnz	.Lendloop +	ld_s	%r3, [%r1, 4] +	st.ab	%r12, [%r5, 4] +	ld.a	%r12, [%r1, 8] +	st.ab	%r3, [%r5, 4] +.Lendloop: +	breq	%r2, 0, .Last_store +	ld	%r3, [%r5, 0] +#ifdef __LITTLE_ENDIAN__ +	add3	%r2, -1, %r2 +	/* uses long immediate */ +	xor_s	%r12, %r12, %r3 +	bmsk	%r12, %r12, %r2 +	xor_s	%r12, %r12, %r3 +#else /* __BIG_ENDIAN__ */ +	sub3	%r2, 31, %r2 +	/* uses long immediate */ +	xor_s	%r3, %r3, %r12 +	bmsk	%r3, %r3, %r2 +	xor_s	%r12, %r12, %r3 +#endif /* _ENDIAN__ */ +.Last_store: +	j_s.d	[%blink] +	st	%r12, [%r5, 0] + +	.balign	4 +.Lcopy_bytewise: +	jcs	[%blink] +	ldb_s	%r12, [%r1, 0] +	lsr.f	%lp_count, %r3 +	bhs_s	.Lnox1 +	stb.ab	%r12, [%r5, 1] +	ldb.a	%r12, [%r1, 1] +.Lnox1: +	lppnz	.Lendbloop +	ldb_s	%r3, [%r1, 1] +	stb.ab	%r12, [%r5, 1] +	ldb.a	%r12, [%r1, 2] +	stb.ab	%r3, [%r5, 1] +.Lendbloop: +	j_s.d	[%blink] +	stb	%r12, [%r5, 0] diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S new file mode 100644 index 000000000..017e8af0e --- /dev/null +++ b/arch/arc/lib/memset.S @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. + * + * SPDX-License-Identifier:	GPL-2.0+ + */ + +#define SMALL	7 /* Must be at least 6 to deal with alignment/loop issues.  */ + +.global memset +.align 4 +memset: +	mov_s	%r4, %r0 +	or	%r12, %r0, %r2 +	bmsk.f	%r12, %r12, 1 +	extb_s	%r1, %r1 +	asl	%r3, %r1, 8 +	beq.d	.Laligned +	or_s	%r1, %r1, %r3 +	brls	%r2, SMALL, .Ltiny +	add	%r3, %r2, %r0 +	stb	%r1, [%r3, -1] +	bclr_s	%r3, %r3, 0 +	stw	%r1, [%r3, -2] +	bmsk.f	%r12, %r0, 1 +	add_s	%r2, %r2, %r12 +	sub.ne	%r2, %r2, 4 +	stb.ab	%r1, [%r4, 1] +	and	%r4, %r4, -2 +	stw.ab	%r1, [%r4, 2] +	and	%r4, %r4, -4 + +	.balign	4 +.Laligned: +	asl	%r3, %r1, 16 +	lsr.f	%lp_count, %r2, 2 +	or_s	%r1, %r1, %r3 +	lpne	.Loop_end +	st.ab	%r1, [%r4, 4] +.Loop_end: +	j_s	[%blink] + +	.balign	4 +.Ltiny: +	mov.f	%lp_count, %r2 +	lpne	.Ltiny_end +	stb.ab	%r1, [%r4, 1] +.Ltiny_end: +	j_s	[%blink] + +/* + * memzero: @r0 = mem, @r1 = size_t + * memset:  @r0 = mem, @r1 = char, @r2 = size_t + */ + +.global memzero +.align 4 +memzero: +	/* adjust bzero args to memset args */ +	mov	%r2, %r1 +	mov	%r1, 0 +	/* tail call so need to tinker with blink */ +	b	memset diff --git a/arch/arc/lib/relocate.c b/arch/arc/lib/relocate.c new file mode 100644 index 000000000..956aa1494 --- /dev/null +++ b/arch/arc/lib/relocate.c @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved. + * + * SPDX-License-Identifier:	GPL-2.0+ + */ + +#include <common.h> +#include <elf.h> +#include <asm/sections.h> + +DECLARE_GLOBAL_DATA_PTR; + +/* + * Base functionality is taken from x86 version with added ARC-specifics + */ +int do_elf_reloc_fixups(void) +{ +	Elf32_Rela *re_src = (Elf32_Rela *)(&__rel_dyn_start); +	Elf32_Rela *re_end = (Elf32_Rela *)(&__rel_dyn_end); + +	Elf32_Addr *offset_ptr_rom, *last_offset = NULL; +	Elf32_Addr *offset_ptr_ram; + +	do { +		/* Get the location from the relocation entry */ +		offset_ptr_rom = (Elf32_Addr *)re_src->r_offset; + +		/* Check that the location of the relocation is in .text */ +		if (offset_ptr_rom >= (Elf32_Addr *)CONFIG_SYS_TEXT_BASE && +		    offset_ptr_rom > last_offset) { +			unsigned int val; +			/* Switch to the in-RAM version */ +			offset_ptr_ram = (Elf32_Addr *)((ulong)offset_ptr_rom + +							gd->reloc_off); + +			/* +			 * Use "memcpy" because target location might be +			 * 16-bit aligned on ARC so we may need to read +			 * byte-by-byte. On attempt to read entire word by +			 * CPU throws an exception +			 */ +			memcpy(&val, offset_ptr_ram, sizeof(int)); + +			/* If location in ".text" section swap value */ +			if ((unsigned int)offset_ptr_rom < +			    (unsigned int)&__text_end) +				val = (val << 16) | (val >> 16); + +			/* Check that the target points into .text */ +			if (val >= CONFIG_SYS_TEXT_BASE && val <= +			    (unsigned int)&__bss_end) { +				val += gd->reloc_off; +				/* If location in ".text" section swap value */ +				if ((unsigned int)offset_ptr_rom < +				    (unsigned int)&__text_end) +					val = (val << 16) | (val >> 16); +				memcpy(offset_ptr_ram, &val, sizeof(int)); +			} else { +				debug("   %p: rom reloc %x, ram %p, value %x, limit %x\n", +				      re_src, re_src->r_offset, offset_ptr_ram, +				      val, (unsigned int)&__bss_end); +			} +		} else { +			debug("   %p: rom reloc %x, last %p\n", re_src, +			      re_src->r_offset, last_offset); +		} +		last_offset = offset_ptr_rom; + +	} while (++re_src < re_end); + +	return 0; +} diff --git a/arch/arc/lib/sections.c b/arch/arc/lib/sections.c new file mode 100644 index 000000000..b0b46a4e9 --- /dev/null +++ b/arch/arc/lib/sections.c @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved. + * + * SPDX-License-Identifier:	GPL-2.0+ + */ + +/* + * For some reason linker sets linker-generated symbols to zero in PIE mode. + * A work-around is substitution of linker-generated symbols with + * compiler-generated symbols which are properly handled by linker in PAE mode. + */ + +char __bss_start[0] __attribute__((section(".__bss_start"))); +char __bss_end[0] __attribute__((section(".__bss_end"))); +char __image_copy_start[0] __attribute__((section(".__image_copy_start"))); +char __image_copy_end[0] __attribute__((section(".__image_copy_end"))); +char __rel_dyn_start[0] __attribute__((section(".__rel_dyn_start"))); +char __rel_dyn_end[0] __attribute__((section(".__rel_dyn_end"))); +char __text_start[0] __attribute__((section(".__text_start"))); +char __text_end[0] __attribute__((section(".__text_end"))); +char __init_end[0] __attribute__((section(".__init_end"))); diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S new file mode 100644 index 000000000..55fcc9fb0 --- /dev/null +++ b/arch/arc/lib/strchr-700.S @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. + * + * SPDX-License-Identifier:	GPL-2.0+ + */ + +/* + * ARC700 has a relatively long pipeline and branch prediction, so we want + * to avoid branches that are hard to predict.  On the other hand, the + * presence of the norm instruction makes it easier to operate on whole + * words branch-free. + */ + +.global strchr +.align 4 +strchr: +	extb_s	%r1, %r1 +	asl	%r5, %r1, 8 +	bmsk	%r2, %r0, 1 +	or	%r5, %r5, %r1 +	mov_s	%r3, 0x01010101 +	breq.d	%r2, %r0, .Laligned +	asl	%r4, %r5, 16 +	sub_s	%r0, %r0, %r2 +	asl	%r7, %r2, 3 +	ld_s	%r2, [%r0] +#ifdef __LITTLE_ENDIAN__ +	asl	%r7, %r3, %r7 +#else /* __BIG_ENDIAN__ */ +	lsr	%r7, %r3, %r7 +#endif /* _ENDIAN__ */ +	or	%r5, %r5, %r4 +	ror	%r4, %r3 +	sub	%r12, %r2, %r7 +	bic_s	%r12, %r12, %r2 +	and	%r12, %r12, %r4 +	brne.d	%r12, 0, .Lfound0_ua +	xor	%r6, %r2, %r5 +	ld.a	%r2, [%r0, 4] +	sub	%r12, %r6, %r7 +	bic	%r12, %r12, %r6 +#ifdef __LITTLE_ENDIAN__ +	and	%r7, %r12, %r4 +	/* For speed, we want this branch to be unaligned. */ +	breq	%r7, 0, .Loop +	/* Likewise this one */ +	b	.Lfound_char +#else /* __BIG_ENDIAN__ */ +	and	%r12, %r12, %r4 +	/* For speed, we want this branch to be unaligned. */ +	breq	%r12, 0, .Loop +	lsr_s	%r12, %r12, 7 +	bic 	%r2, %r7, %r6 +	b.d	.Lfound_char_b +	and_s	%r2, %r2, %r12 +#endif /* _ENDIAN__ */ +	/* We require this code address to be unaligned for speed...  */ +.Laligned: +	ld_s	%r2, [%r0] +	or	%r5, %r5, %r4 +	ror	%r4, %r3 +	/* ... so that this code address is aligned, for itself and ...  */ +.Loop: +	sub	%r12, %r2, %r3 +	bic_s	%r12, %r12, %r2 +	and	%r12, %r12, %r4 +	brne.d	%r12, 0, .Lfound0 +	xor	%r6, %r2, %r5 +	ld.a	%r2, [%r0, 4] +	sub	%r12, %r6, %r3 +	bic	%r12, %r12, %r6 +	and	%r7, %r12, %r4 +	breq	%r7, 0, .Loop +	/* +	 *... so that this branch is unaligned. +	 * Found searched-for character. +	 * r0 has already advanced to next word. +	 */ +#ifdef __LITTLE_ENDIAN__ +	/* +	 * We only need the information about the first matching byte +	 * (i.e. the least significant matching byte) to be exact, +	 * hence there is no problem with carry effects. +	 */ +.Lfound_char: +	sub	%r3, %r7, 1 +	bic	%r3, %r3, %r7 +	norm	%r2, %r3 +	sub_s	%r0, %r0, 1 +	asr_s	%r2, %r2, 3 +	j.d	[%blink] +	sub_s	%r0, %r0, %r2 + +	.balign	4 +.Lfound0_ua: +	mov	%r3, %r7 +.Lfound0: +	sub	%r3, %r6, %r3 +	bic	%r3, %r3, %r6 +	and	%r2, %r3, %r4 +	or_s	%r12, %r12, %r2 +	sub_s	%r3, %r12, 1 +	bic_s	%r3, %r3, %r12 +	norm	%r3, %r3 +	add_s	%r0, %r0, 3 +	asr_s	%r12, %r3, 3 +	asl.f	0, %r2, %r3 +	sub_s	%r0, %r0, %r12 +	j_s.d	[%blink] +	mov.pl	%r0, 0 +#else /* __BIG_ENDIAN__ */ +.Lfound_char: +	lsr	%r7, %r7, 7 + +	bic	%r2, %r7, %r6 +.Lfound_char_b: +	norm	%r2, %r2 +	sub_s	%r0, %r0, 4 +	asr_s	%r2, %r2, 3 +	j.d	[%blink] +	add_s	%r0, %r0, %r2 + +.Lfound0_ua: +	mov_s	%r3, %r7 +.Lfound0: +	asl_s	%r2, %r2, 7 +	or	%r7, %r6, %r4 +	bic_s	%r12, %r12, %r2 +	sub	%r2, %r7, %r3 +	or	%r2, %r2, %r6 +	bic	%r12, %r2, %r12 +	bic.f	%r3, %r4, %r12 +	norm	%r3, %r3 + +	add.pl	%r3, %r3, 1 +	asr_s	%r12, %r3, 3 +	asl.f	0, %r2, %r3 +	add_s	%r0, %r0, %r12 +	j_s.d	[%blink] +	mov.mi	%r0, 0 +#endif /* _ENDIAN__ */ diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S new file mode 100644 index 000000000..8cb7d2f18 --- /dev/null +++ b/arch/arc/lib/strcmp.S @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. + * + * SPDX-License-Identifier:	GPL-2.0+ + */ + +/* + * This is optimized primarily for the ARC700. + * It would be possible to speed up the loops by one cycle / word + * respective one cycle / byte by forcing double source 1 alignment, unrolling + * by a factor of two, and speculatively loading the second word / byte of + * source 1; however, that would increase the overhead for loop setup / finish, + * and strcmp might often terminate early. + */ + +.global strcmp +.align 4 +strcmp: +	or	%r2, %r0, %r1 +	bmsk_s	%r2, %r2, 1 +	brne	%r2, 0, .Lcharloop +	mov_s	%r12, 0x01010101 +	ror	%r5, %r12 +.Lwordloop: +	ld.ab	%r2, [%r0, 4] +	ld.ab	%r3, [%r1, 4] +	nop_s +	sub	%r4, %r2, %r12 +	bic	%r4, %r4, %r2 +	and	%r4, %r4, %r5 +	brne	%r4, 0, .Lfound0 +	breq	%r2 ,%r3, .Lwordloop +#ifdef	__LITTLE_ENDIAN__ +	xor	%r0, %r2, %r3	/* mask for difference */ +	sub_s	%r1, %r0, 1 +	bic_s	%r0, %r0, %r1	/* mask for least significant difference bit */ +	sub	%r1, %r5, %r0 +	xor	%r0, %r5, %r1	/* mask for least significant difference byte */ +	and_s	%r2, %r2, %r0 +	and_s	%r3, %r3, %r0 +#endif /* _ENDIAN__ */ +	cmp_s	%r2, %r3 +	mov_s	%r0, 1 +	j_s.d	[%blink] +	bset.lo	%r0, %r0, 31 + +	.balign	4 +#ifdef __LITTLE_ENDIAN__ +.Lfound0: +	xor	%r0, %r2, %r3	/* mask for difference */ +	or	%r0, %r0, %r4	/* or in zero indicator */ +	sub_s	%r1, %r0, 1 +	bic_s	%r0, %r0, %r1	/* mask for least significant difference bit */ +	sub	%r1, %r5, %r0 +	xor	%r0, %r5, %r1	/* mask for least significant difference byte */ +	and_s	%r2, %r2, %r0 +	and_s	%r3, %r3, %r0 +	sub.f	%r0, %r2, %r3 +	mov.hi	%r0, 1 +	j_s.d	[%blink] +	bset.lo	%r0, %r0, 31 +#else /* __BIG_ENDIAN__ */ +	/* +	 * The zero-detection above can mis-detect 0x01 bytes as zeroes +	 * because of carry-propagateion from a lower significant zero byte. +	 * We can compensate for this by checking that bit0 is zero. +	 * This compensation is not necessary in the step where we +	 * get a low estimate for r2, because in any affected bytes +	 * we already have 0x00 or 0x01, which will remain unchanged +	 * when bit 7 is cleared. +	 */ +	.balign	4 +.Lfound0: +	lsr	%r0, %r4, 8 +	lsr_s	%r1, %r2 +	bic_s	%r2, %r2, %r0	/* get low estimate for r2 and get ... */ +	bic_s	%r0, %r0, %r1	/* <this is the adjusted mask for zeros> */ +	or_s	%r3, %r3, %r0	/* ... high estimate r3 so that r2 > r3 will */ +	cmp_s	%r3, %r2	/* ... be independent of trailing garbage */ +	or_s	%r2, %r2, %r0	/* likewise for r3 > r2 */ +	bic_s	%r3, %r3, %r0 +	rlc	%r0, 0		/* r0 := r2 > r3 ? 1 : 0 */ +	cmp_s	%r2, %r3 +	j_s.d	[%blink] +	bset.lo	%r0, %r0, 31 +#endif /* _ENDIAN__ */ + +	.balign	4 +.Lcharloop: +	ldb.ab	%r2,[%r0,1] +	ldb.ab	%r3,[%r1,1] +	nop_s +	breq	%r2, 0, .Lcmpend +	breq	%r2, %r3, .Lcharloop +.Lcmpend: +	j_s.d	[%blink] +	sub	%r0, %r2, %r3 diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S new file mode 100644 index 000000000..41bb53e50 --- /dev/null +++ b/arch/arc/lib/strcpy-700.S @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. + * + * SPDX-License-Identifier:	GPL-2.0+ + */ + +/* + * If dst and src are 4 byte aligned, copy 8 bytes at a time. + * If the src is 4, but not 8 byte aligned, we first read 4 bytes to get + * it 8 byte aligned.  Thus, we can do a little read-ahead, without + * dereferencing a cache line that we should not touch. + * Note that short and long instructions have been scheduled to avoid + * branch stalls. + * The beq_s to r3z could be made unaligned & long to avoid a stall + * there, but it is not likely to be taken often, and it would also be likely + * to cost an unaligned mispredict at the next call. + */ + +.global strcpy +.align 4 +strcpy: +	or	%r2, %r0, %r1 +	bmsk_s	%r2, %r2, 1 +	brne.d	%r2, 0, charloop +	mov_s	%r10, %r0 +	ld_s	%r3, [%r1, 0] +	mov	%r8, 0x01010101 +	bbit0.d	%r1, 2, loop_start +	ror	%r12, %r8 +	sub	%r2, %r3, %r8 +	bic_s	%r2, %r2, %r3 +	tst_s	%r2,%r12 +	bne	r3z +	mov_s	%r4,%r3 +	.balign 4 +loop: +	ld.a	%r3, [%r1, 4] +	st.ab	%r4, [%r10, 4] +loop_start: +	ld.a	%r4, [%r1, 4] +	sub	%r2, %r3, %r8 +	bic_s	%r2, %r2, %r3 +	tst_s	%r2, %r12 +	bne_s	r3z +	st.ab	%r3, [%r10, 4] +	sub	%r2, %r4, %r8 +	bic	%r2, %r2, %r4 +	tst	%r2, %r12 +	beq	loop +	mov_s	%r3, %r4 +#ifdef __LITTLE_ENDIAN__ +r3z:	bmsk.f	%r1, %r3, 7 +	lsr_s	%r3, %r3, 8 +#else /* __BIG_ENDIAN__ */ +r3z:	lsr.f	%r1, %r3, 24 +	asl_s	%r3, %r3, 8 +#endif /* _ENDIAN__ */ +	bne.d	r3z +	stb.ab	%r1, [%r10, 1] +	j_s	[%blink] + +	.balign	4 +charloop: +	ldb.ab	%r3, [%r1, 1] +	brne.d	%r3, 0, charloop +	stb.ab	%r3, [%r10, 1] +	j	[%blink] diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S new file mode 100644 index 000000000..666e22c0d --- /dev/null +++ b/arch/arc/lib/strlen.S @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. + * + * SPDX-License-Identifier:	GPL-2.0+ + */ + +.global strlen +.align 4 +strlen: +	or	%r3, %r0, 7 +	ld	%r2, [%r3, -7] +	ld.a	%r6, [%r3, -3] +	mov	%r4, 0x01010101 +	/* uses long immediate */ +#ifdef __LITTLE_ENDIAN__ +	asl_s	%r1, %r0, 3 +	btst_s	%r0, 2 +	asl	%r7, %r4, %r1 +	ror	%r5, %r4 +	sub	%r1, %r2, %r7 +	bic_s	%r1, %r1, %r2 +	mov.eq	%r7, %r4 +	sub	%r12, %r6, %r7 +	bic	%r12, %r12, %r6 +	or.eq	%r12, %r12, %r1 +	and	%r12, %r12, %r5 +	brne	%r12, 0, .Learly_end +#else /* __BIG_ENDIAN__ */ +	ror	%r5, %r4 +	btst_s	%r0, 2 +	mov_s	%r1, 31 +	sub3	%r7, %r1, %r0 +	sub	%r1, %r2, %r4 +	bic_s	%r1, %r1, %r2 +	bmsk	%r1, %r1, %r7 +	sub	%r12, %r6, %r4 +	bic	%r12, %r12, %r6 +	bmsk.ne	%r12, %r12, %r7 +	or.eq	%r12, %r12, %r1 +	and	%r12, %r12, %r5 +	brne	%r12, 0, .Learly_end +#endif /* _ENDIAN__ */ + +.Loop: +	ld_s	%r2, [%r3, 4] +	ld.a	%r6, [%r3, 8] +	/* stall for load result */ +	sub	%r1, %r2, %r4 +	bic_s	%r1, %r1, %r2 +	sub	%r12, %r6, %r4 +	bic	%r12, %r12, %r6 +	or	%r12, %r12, %r1 +	and	%r12, %r12, %r5 +	breq	%r12, 0, .Loop +.Lend: +	and.f	%r1, %r1, %r5 +	sub.ne	%r3, %r3, 4 +	mov.eq	%r1, %r12 +#ifdef __LITTLE_ENDIAN__ +	sub_s	%r2, %r1, 1 +	bic_s	%r2, %r2, %r1 +	norm	%r1, %r2 +	sub_s	%r0, %r0, 3 +	lsr_s	%r1, %r1, 3 +	sub	%r0, %r3, %r0 +	j_s.d	[%blink] +	sub	%r0, %r0, %r1 +#else /* __BIG_ENDIAN__ */ +	lsr_s	%r1, %r1, 7 +	mov.eq	%r2, %r6 +	bic_s	%r1, %r1, %r2 +	norm	%r1, %r1 +	sub	%r0, %r3, %r0 +	lsr_s	%r1, %r1, 3 +	j_s.d	[%blink] +	add	%r0, %r0, %r1 +#endif /* _ENDIAN */ +.Learly_end: +	b.d	.Lend +	sub_s.ne %r1, %r1, %r1 |