patch-1.3.6 linux/arch/i386/lib/checksum.c
Next file: linux/arch/i386/math-emu/Makefile
Previous file: linux/arch/i386/config.in
Back to the patch index
Back to the overall index
- Lines: 291
- Date:
Thu Jun 29 19:18:49 1995
- Orig file:
v1.3.5/linux/arch/i386/lib/checksum.c
- Orig date:
Fri Jun 16 22:02:54 1995
diff -u --recursive --new-file v1.3.5/linux/arch/i386/lib/checksum.c linux/arch/i386/lib/checksum.c
@@ -7,6 +7,7 @@
*
* Authors: Jorge Cwik, <jorge@laser.satlink.net>
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ * Tom May, <ftom@netcom.com>
* Lots of code moved from tcp.c and ip.c; see those files
* for more names.
*
@@ -23,62 +24,74 @@
*/
unsigned int csum_partial(unsigned char * buff, int len, unsigned int sum) {
-#ifdef __i386__
+ /*
+ * Experiments with ethernet and slip connections show that buff
+ * is aligned on either a 2-byte or 4-byte boundary. We get at
+ * least a 2x speedup on 486 and Pentium if it is 4-byte aligned.
+ * Fortunately, it is easy to convert 2-byte alignment to 4-byte
+ * alignment for the unrolled loop.
+ */
__asm__("
+ testl $2, %%esi # Check alignment.
+ jz 2f # Jump if alignment is ok.
+ subl $2, %%ecx # Alignment uses up two bytes.
+ jae 1f # Jump if we had at least two bytes.
+ addl $2, %%ecx # ecx was < 2. Deal with it.
+ jmp 4f
+1: movw (%%esi), %%bx
+ addl $2, %%esi
+ addw %%bx, %%ax
+ adcl $0, %%eax
+2:
movl %%ecx, %%edx
- cld
shrl $5, %%ecx
jz 2f
- orl %%ecx, %%ecx
-1: movl (%%esi), %%eax
- adcl %%eax, %%ebx
- movl 4(%%esi), %%eax
- adcl %%eax, %%ebx
- movl 8(%%esi), %%eax
- adcl %%eax, %%ebx
- movl 12(%%esi), %%eax
- adcl %%eax, %%ebx
- movl 16(%%esi), %%eax
- adcl %%eax, %%ebx
- movl 20(%%esi), %%eax
- adcl %%eax, %%ebx
- movl 24(%%esi), %%eax
- adcl %%eax, %%ebx
- movl 28(%%esi), %%eax
- adcl %%eax, %%ebx
+ testl %%esi, %%esi
+1: movl (%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl 4(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl 8(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl 12(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl 16(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl 20(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl 24(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl 28(%%esi), %%ebx
+ adcl %%ebx, %%eax
lea 32(%%esi), %%esi
dec %%ecx
jne 1b
- adcl $0, %%ebx
+ adcl $0, %%eax
2: movl %%edx, %%ecx
- andl $28, %%ecx
+ andl $0x1c, %%edx
je 4f
- shrl $2, %%ecx
- orl %%ecx, %%ecx
-3: adcl (%%esi), %%ebx
+ shrl $2, %%edx
+ testl %%esi, %%esi
+3: adcl (%%esi), %%eax
lea 4(%%esi), %%esi
- dec %%ecx
+ dec %%edx
jne 3b
- adcl $0, %%ebx
-4: movl $0, %%eax
- testw $2, %%dx
- je 5f
- lodsw
- addl %%eax, %%ebx
- adcl $0, %%ebx
- movw $0, %%ax
-5: test $1, %%edx
+ adcl $0, %%eax
+4: andl $3, %%ecx
+ jz 7f
+ cmpl $2, %%ecx
+ jb 5f
+ movw (%%esi),%%cx
+ leal 2(%%esi),%%esi
je 6f
- lodsb
- addl %%eax, %%ebx
- adcl $0, %%ebx
-6: "
- : "=b"(sum)
+ shll $16,%%ecx
+5: movb (%%esi),%%cl
+6: addl %%ecx,%%eax
+ adcl $0, %%eax
+7: "
+ : "=a"(sum)
: "0"(sum), "c"(len), "S"(buff)
- : "ax", "bx", "cx", "dx", "si" );
-#else
-#error Not implemented for this CPU
-#endif
+ : "bx", "cx", "dx", "si");
return(sum);
}
@@ -90,90 +103,93 @@
unsigned int csum_partial_copyffs( char *src, char *dst,
int len, int sum) {
-#ifdef __i386__
__asm__("
- push %%ds
- push %%es
- movw %%ds, %%dx
- movw %%dx, %%es
- movw %%fs, %%dx
- movw %%dx, %%ds
- cld
- cmpl $32, %%ecx
- jb 2f
- pushl %%ecx
+ testl $2, %%edi # Check alignment.
+ jz 2f # Jump if alignment is ok.
+ subl $2, %%ecx # Alignment uses up two bytes.
+ jae 1f # Jump if we had at least two bytes.
+ addl $2, %%ecx # ecx was < 2. Deal with it.
+ jmp 4f
+1: movw %%fs:(%%esi), %%bx
+ addl $2, %%esi
+ movw %%bx, (%%edi)
+ addl $2, %%edi
+ addw %%bx, %%ax
+ adcl $0, %%eax
+2:
+ movl %%ecx, %%edx
shrl $5, %%ecx
- orl %%ecx, %%ecx
-1: movl (%%esi), %%eax
- movl 4(%%esi), %%edx
- adcl %%eax, %%ebx
- movl %%eax, %%es:(%%edi)
- adcl %%edx, %%ebx
- movl %%edx, %%es:4(%%edi)
-
- movl 8(%%esi), %%eax
- movl 12(%%esi), %%edx
- adcl %%eax, %%ebx
- movl %%eax, %%es:8(%%edi)
- adcl %%edx, %%ebx
- movl %%edx, %%es:12(%%edi)
-
- movl 16(%%esi), %%eax
- movl 20(%%esi), %%edx
- adcl %%eax, %%ebx
- movl %%eax, %%es:16(%%edi)
- adcl %%edx, %%ebx
- movl %%edx, %%es:20(%%edi)
-
- movl 24(%%esi), %%eax
- movl 28(%%esi), %%edx
- adcl %%eax, %%ebx
- movl %%eax, %%es:24(%%edi)
- adcl %%edx, %%ebx
- movl %%edx, %%es:28(%%edi)
+ jz 2f
+ testl %%esi, %%esi
+1: movl %%fs:(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl %%ebx, (%%edi)
+
+ movl %%fs:4(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl %%ebx, 4(%%edi)
+
+ movl %%fs:8(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl %%ebx, 8(%%edi)
+
+ movl %%fs:12(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl %%ebx, 12(%%edi)
+
+ movl %%fs:16(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl %%ebx, 16(%%edi)
+
+ movl %%fs:20(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl %%ebx, 20(%%edi)
+
+ movl %%fs:24(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl %%ebx, 24(%%edi)
+
+ movl %%fs:28(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl %%ebx, 28(%%edi)
lea 32(%%esi), %%esi
lea 32(%%edi), %%edi
dec %%ecx
jne 1b
- adcl $0, %%ebx
- popl %%ecx
-2: movl %%ecx, %%edx
- andl $28, %%ecx
+ adcl $0, %%eax
+2: movl %%edx, %%ecx
+ andl $28, %%edx
je 4f
- shrl $2, %%ecx
- orl %%ecx, %%ecx
-3: movl (%%esi), %%eax
- adcl %%eax, %%ebx
- movl %%eax, %%es:(%%edi)
+ shrl $2, %%edx
+ testl %%esi, %%esi
+3: movl %%fs:(%%esi), %%ebx
+ adcl %%ebx, %%eax
+ movl %%ebx, (%%edi)
lea 4(%%esi), %%esi
lea 4(%%edi), %%edi
- dec %%ecx
+ dec %%edx
jne 3b
- adcl $0, %%ebx
-4: movl $0, %%eax
- testl $2, %%edx
- je 5f
- lodsw
- stosw
- addl %%eax, %%ebx
- movw $0, %%ax
- adcl %%eax, %%ebx
-5: test $1, %%edx
+ adcl $0, %%eax
+4: andl $3, %%ecx
+ jz 7f
+ cmpl $2, %%ecx
+ jb 5f
+ movw %%fs:(%%esi), %%dx
+ leal 2(%%esi), %%esi
+ movw %%dx, (%%edi)
+ leal 2(%%edi), %%edi
je 6f
- lodsb
- stosb
- addl %%eax, %%ebx
- adcl $0, %%ebx
-6: pop %%es
- pop %%ds
+ shll $16,%%edx
+5: movb %%fs:(%%esi), %%dl
+ movb %%dl, (%%edi)
+6: addl %%edx, %%eax
+ adcl $0, %%eax
+7:
"
- : "=b"(sum)
- : "0"(sum), "c"(len), "S"(src), "D"(dst)
- : "ax", "bx", "cx", "dx", "si", "di" );
-#else
-#error Not implemented for this CPU
-#endif
+ : "=a" (sum)
+ : "0"(sum), "c"(len), "S"(src), "D" (dst)
+ : "bx", "cx", "dx", "si", "di" );
return(sum);
}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov
with Sam's (original) version of this