diff options
| author | Atsushi Nemoto <anemo@mba.ocn.ne.jp> | 2006-12-08 01:04:45 +0900 | 
|---|---|---|
| committer | Ralf Baechle <ralf@linux-mips.org> | 2006-12-09 01:03:59 +0000 | 
| commit | 773ff78838ca3c07245e45c06235e0baaa5f710a (patch) | |
| tree | c238920f34ab310a7a3d426cefbf9ebb1d5ea78c /arch/mips/lib/csum_partial.S | |
| parent | 52ffe760ea9ec407292d093c3f06c1cda5187228 (diff) | |
| download | olio-linux-3.10-773ff78838ca3c07245e45c06235e0baaa5f710a.tar.xz olio-linux-3.10-773ff78838ca3c07245e45c06235e0baaa5f710a.zip  | |
[MIPS] Optimize flow of csum_partial
Delete dead codes at end of the function and move small_csumcopy
there.  This makes some labels (maybe_end_cruft, small_memcpy,
end_bytes, out) needless and eliminates some branches.
Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib/csum_partial.S')
| -rw-r--r-- | arch/mips/lib/csum_partial.S | 129 | 
1 files changed, 54 insertions, 75 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 3bffdbb1c1f..b04475d76f3 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -65,64 +65,6 @@  	.text  	.set	noreorder - -/* unknown src alignment and < 8 bytes to go  */ -small_csumcpy: -	move	a1, t2 - -	andi	t0, a1, 4 -	beqz	t0, 1f -	 andi	t0, a1, 2 - -	/* Still a full word to go  */ -	ulw	t1, (src) -	PTR_ADDIU	src, 4 -	ADDC(sum, t1) - -1:	move	t1, zero -	beqz	t0, 1f -	 andi	t0, a1, 1 - -	/* Still a halfword to go  */ -	ulhu	t1, (src) -	PTR_ADDIU	src, 2 - -1:	beqz	t0, 1f -	 sll	t1, t1, 16 - -	lbu	t2, (src) -	 nop - -#ifdef __MIPSEB__ -	sll	t2, t2, 8 -#endif -	or	t1, t2 - -1:	ADDC(sum, t1) - -	/* fold checksum */ -	sll	v1, sum, 16 -	addu	sum, v1 -	sltu	v1, sum, v1 -	srl	sum, sum, 16 -	addu	sum, v1 - -	/* odd buffer alignment? */ -	beqz	t7, 1f -	 nop -	sll	v1, sum, 8 -	srl	sum, sum, 8 -	or	sum, v1 -	andi	sum, 0xffff -1: -	.set	reorder -	/* Add the passed partial csum.  */ -	ADDC(sum, a2) -	jr	ra -	.set	noreorder - -/* ------------------------------------------------------------------------- */ -  	.align	5  LEAF(csum_partial)  	move	sum, zero @@ -132,8 +74,7 @@ LEAF(csum_partial)  	bnez	t8, small_csumcpy		/* < 8 bytes to copy */  	 move	t2, a1 -	beqz	a1, out -	 andi	t7, src, 0x1			/* odd buffer? */ +	andi	t7, src, 0x1			/* odd buffer? */  hword_align:  	beqz	t7, word_align @@ -232,8 +173,9 @@ move_32bytes:  	PTR_ADDU	src, src, 0x20  do_end_words: -	beqz	t8, maybe_end_cruft -	 LONG_SRL	t8, t8, 0x2 +	beqz	t8, small_csumcpy +	 andi	t2, a1, 0x3 +	LONG_SRL	t8, t8, 0x2  end_words:  	lw	t0, (src) @@ -242,21 +184,58 @@ end_words:  	bnez	t8, end_words  	 PTR_ADDU	src, src, 0x4 -maybe_end_cruft: -	andi	t2, a1, 0x3 +/* unknown src alignment and < 8 bytes to go  */ +small_csumcpy: +	move	a1, t2 -small_memcpy: - j small_csumcpy; move a1, t2		/* XXX ??? */ -	beqz	t2, out -	 move	a1, t2 +	andi	t0, a1, 4 +	beqz	t0, 1f +	 andi	t0, a1, 2 -end_bytes: -	lb	t0, (src) -	LONG_SUBU	a1, a1, 0x1 -	bnez	a2, end_bytes -	 PTR_ADDU	src, src, 0x1 +	/* Still a full word to go  */ +	ulw	t1, (src) +	PTR_ADDIU	src, 4 +	ADDC(sum, t1) + +1:	move	t1, zero +	beqz	t0, 1f +	 andi	t0, a1, 1 + +	/* Still a halfword to go  */ +	ulhu	t1, (src) +	PTR_ADDIU	src, 2 + +1:	beqz	t0, 1f +	 sll	t1, t1, 16 + +	lbu	t2, (src) +	 nop + +#ifdef __MIPSEB__ +	sll	t2, t2, 8 +#endif +	or	t1, t2 + +1:	ADDC(sum, t1) -out: +	/* fold checksum */ +	sll	v1, sum, 16 +	addu	sum, v1 +	sltu	v1, sum, v1 +	srl	sum, sum, 16 +	addu	sum, v1 + +	/* odd buffer alignment? */ +	beqz	t7, 1f +	 nop +	sll	v1, sum, 8 +	srl	sum, sum, 8 +	or	sum, v1 +	andi	sum, 0xffff +1: +	.set	reorder +	/* Add the passed partial csum.  */ +	ADDC(sum, a2)  	jr	ra -	 move	v0, sum +	.set	noreorder  	END(csum_partial)  |