*/
/*
- * unsigned int csum_partial(const unsigned char *buf, int len,
- * unsigned int sum);
+ * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
*/
.text
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
* alignment for the unrolled loop.
*/
- mov r5, r1
mov r4, r0
- tst #2, r0 ! Check alignment.
- bt 2f ! Jump if alignment is ok.
+ tst #3, r0 ! Check alignment.
+ bt/s 2f ! Jump if alignment is ok.
+ mov r4, r7 ! Keep a copy to check for alignment
!
+ tst #1, r0 ! Check alignment.
+ bt 21f ! Jump if alignment is boundary of 2bytes.
+
+ ! buf is odd
+ tst r5, r5
+ add #-1, r5
+ bt 9f
+ mov.b @r4+, r0
+ extu.b r0, r0
+ addc r0, r6 ! t=0 from previous tst
+ mov r6, r0
+ shll8 r6
+ shlr16 r0
+ shlr8 r0
+ or r0, r6
+ mov r4, r0
+ tst #2, r0
+ bt 2f
+21:
+ ! buf is 2 byte aligned (len could be 0)
add #-2, r5 ! Alignment uses up two bytes.
cmp/pz r5 !
bt/s 1f ! Jump if we had at least two bytes.
bra 6f
add #2, r5 ! r5 was < 2. Deal with it.
1:
- mov r5, r1 ! Save new len for later use.
mov.w @r4+, r0
extu.w r0, r0
addc r0, r6
bf 2f
add #1, r6
2:
+ ! buf is 4 byte aligned (len could be 0)
+ mov r5, r1
mov #-5, r0
- shld r0, r5
- tst r5, r5
+ shld r0, r1
+ tst r1, r1
bt/s 4f ! if it's =0, go to 4f
clrt
.align 2
addc r0, r6
addc r2, r6
movt r0
- dt r5
+ dt r1
bf/s 3b
cmp/eq #1, r0
- ! here, we know r5==0
- addc r5, r6 ! add carry to r6
+ ! here, we know r1==0
+ addc r1, r6 ! add carry to r6
4:
- mov r1, r0
+ mov r5, r0
and #0x1c, r0
tst r0, r0
- bt/s 6f
- mov r0, r5
- shlr2 r5
+ bt 6f
+ ! 4 bytes or more remaining
+ mov r0, r1
+ shlr2 r1
mov #0, r2
5:
addc r2, r6
mov.l @r4+, r2
movt r0
- dt r5
+ dt r1
bf/s 5b
cmp/eq #1, r0
addc r2, r6
- addc r5, r6 ! r5==0 here, so it means add carry-bit
+ addc r1, r6 ! r1==0 here, so it means add carry-bit
6:
- mov r1, r5
+ ! 3 bytes or less remaining
mov #3, r0
and r0, r5
tst r5, r5
8:
addc r0, r6
mov #0, r0
- addc r0, r6
+ addc r0, r6
9:
+ ! Check if the buffer was misaligned, if so realign sum
+ mov r7, r0
+ tst #1, r0
+ bt 10f
+ mov r6, r0
+ shll8 r6
+ shlr16 r0
+ shlr8 r0
+ or r0, r6
+10:
rts
mov r6, r0