patch-2.4.5 linux/arch/arm/lib/csumpartial.S

Next file: linux/arch/arm/lib/csumpartialcopy.S
Previous file: linux/arch/arm/lib/Makefile
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.4/linux/arch/arm/lib/csumpartial.S linux/arch/arm/lib/csumpartial.S
@@ -12,57 +12,119 @@
 
 		.text
 
-/* Function: __u32 csum_partial(const char *src, int len, __u32)
+/*
+ * Function: __u32 csum_partial(const char *src, int len, __u32 sum)
  * Params  : r0 = buffer, r1 = len, r2 = checksum
  * Returns : r0 = new checksum
  */
 
+buf	.req	r0
+len	.req	r1
+sum	.req	r2
+td0	.req	r3
+td1	.req	r4	@ save before use
+td2	.req	r5	@ save before use
+td3	.req	lr
+
+.zero:		mov	r0, sum
+		add	sp, sp, #4
+		ldr	pc, [sp], #4
+
+		/*
+		 * Handle 0 to 7 bytes, with any alignment of source and
+		 * destination pointers.  Note that when we get here, C = 0
+		 */
+.less8:		teq	len, #0			@ check for zero count
+		beq	.zero
+
+		/* we must have at least one byte. */
+		tst	buf, #1			@ odd address?
+		ldrneb	td0, [buf], #1
+		subne	len, len, #1
+		adcnes	sum, sum, td0, lsl #8
+
+.less4:		tst	len, #6
+		beq	.less8_byte
+
+		/* we are now half-word aligned */
+
+.less8_wordlp:
+#ifdef __ARM_ARCH_4__
+		ldrh	td0, [buf], #2
+		sub	len, len, #2
+#else
+		ldrb	td0, [buf], #1
+		ldrb	td3, [buf], #1
+		sub	len, len, #2
+		orr	td0, td0, td3, lsl #8
+#endif
+		adcs	sum, sum, td0
+		tst	len, #6
+		bne	.less8_wordlp
+
+.less8_byte:	tst	len, #1			@ odd number of bytes
+		ldrneb	td0, [buf], #1		@ include last byte
+		adcnes	sum, sum, td0		@ update checksum
+
+.done:		adc	r0, sum, #0		@ collect up the last carry
+		ldr	td0, [sp], #4
+		tst	td0, #1			@ check buffer alignment
+		movne	td0, r0, lsl #8		@ rotate checksum by 8 bits
+		orrne	r0, td0, r0, lsr #24
+		ldr	pc, [sp], #4		@ return
+
+.not_aligned:	tst	buf, #1			@ odd address
+		ldrneb	td0, [buf], #1		@ make even
+		subne	len, len, #1
+		adcnes	sum, sum, td0, lsl #8	@ update checksum
+
+		tst	buf, #2			@ 32-bit aligned?
+#ifdef __ARM_ARCH_4__
+		ldrneh	td0, [buf], #2		@ make 32-bit aligned
+		subne	len, len, #2
+#else
+		ldrneb	td0, [buf], #1
+		ldrneb	ip, [buf], #1
+		subne	len, len, #2
+		orrne	td0, td0, ip, lsl #8
+#endif
+		adcnes	sum, sum, td0		@ update checksum
+		mov	pc, lr
+
 ENTRY(csum_partial)
-		tst	r0, #2
-		beq	1f
-		subs	r1, r1, #2
-		addmi	r1, r1, #2
-		bmi	3f
-		bic	r0, r0, #3
-		ldr	r3, [r0], #4
-		adds	r2, r2, r3, lsr #16
-		adcs	r2, r2, #0
-1:		adds	r2, r2, #0
-		bics	ip, r1, #31
+		stmfd	sp!, {buf, lr}
+		cmp	len, #8			@ Ensure that we have at least
+		blo	.less8			@ 8 bytes to copy.
+
+		adds	sum, sum, #0		@ C = 0
+		tst	buf, #3			@ Test destination alignment
+		blne	.not_aligned		@ aligh destination, return here
+
+1:		bics	ip, len, #31
 		beq	3f
-		stmfd	sp!, {r4 - r6}
-2:		ldmia	r0!, {r3 - r6}
-		adcs	r2, r2, r3
-		adcs	r2, r2, r4
-		adcs	r2, r2, r5
-		adcs	r2, r2, r6
-		ldmia	r0!, {r3 - r6}
-		adcs	r2, r2, r3
-		adcs	r2, r2, r4
-		adcs	r2, r2, r5
-		adcs	r2, r2, r6
+
+		stmfd	sp!, {r4 - r5}
+2:		ldmia	buf!, {td0, td1, td2, td3}
+		adcs	sum, sum, td0
+		adcs	sum, sum, td1
+		adcs	sum, sum, td2
+		adcs	sum, sum, td3
+		ldmia	buf!, {td0, td1, td2, td3}
+		adcs	sum, sum, td0
+		adcs	sum, sum, td1
+		adcs	sum, sum, td2
+		adcs	sum, sum, td3
 		sub	ip, ip, #32
 		teq	ip, #0
 		bne	2b
-		adcs	r2, r2, #0
-		ldmfd	sp!, {r4 - r6}
-3:		ands	ip, r1, #0x1c
-		beq	5f
-4:		ldr	r3, [r0], #4
-		sub	ip, ip, #4
-		adcs	r2, r2, r3
-		teq	ip, #0
-		bne	4b
-		adcs	r2, r2, #0
-5:		ands	ip, r1, #3
-		moveq	r0, r2
-		RETINSTR(moveq,pc,lr)
-		mov	ip, ip, lsl #3
-		ldr	r3, [r0]
-		rsb	ip, ip, #32
-		mov	r3, r3, lsl ip
-		adds	r2, r2, r3, lsr ip
-		adc	r0, r2, #0
-		RETINSTR(mov,pc,lr)
+		ldmfd	sp!, {r4 - r5}
 
+3:		tst	len, #0x1c		@ should not change C
+		beq	.less4
 
+4:		ldr	td0, [buf], #4
+		sub	len, len, #4
+		adcs	sum, sum, td0
+		tst	len, #0x1c
+		bne	4b
+		b	.less4

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)