1/* 2 * A fast checksum+copy routine using movem 3 * Copyright (c) 1998, 2001 Axis Communications AB 4 * 5 * Authors: Bjorn Wesen 6 * 7 * csum_partial_copy_nocheck(const char *src, char *dst, 8 * int len, unsigned int sum) 9 */ 10 11 .globl csum_partial_copy_nocheck 12csum_partial_copy_nocheck: 13 14 ;; r10 - src 15 ;; r11 - dst 16 ;; r12 - length 17 ;; r13 - checksum 18 19 ;; check for breakeven length between movem and normal word looping versions 20 ;; we also do _NOT_ want to compute a checksum over more than the 21 ;; actual length when length < 40 22 23 cmpu.w 80, $r12 24 blo _word_loop 25 nop 26 27 ;; need to save the registers we use below in the movem loop 28 ;; this overhead is why we have a check above for breakeven length 29 ;; only r0 - r8 have to be saved, the other ones are clobber-able 30 ;; according to the ABI 31 32 subq 9*4, $sp 33 movem $r8, [$sp] 34 35 ;; do a movem copy and checksum 36 37 subq 10*4, $r12 ; update length for the first loop 38 39_mloop: movem [$r10+],$r9 ; read 10 longwords 401: ;; A failing userspace access will have this as PC. 41 movem $r9,[$r11+] ; write 10 longwords 42 43 ;; perform dword checksumming on the 10 longwords 44 45 add.d $r0,$r13 46 ax 47 add.d $r1,$r13 48 ax 49 add.d $r2,$r13 50 ax 51 add.d $r3,$r13 52 ax 53 add.d $r4,$r13 54 ax 55 add.d $r5,$r13 56 ax 57 add.d $r6,$r13 58 ax 59 add.d $r7,$r13 60 ax 61 add.d $r8,$r13 62 ax 63 add.d $r9,$r13 64 65 ;; fold the carry into the checksum, to avoid having to loop the carry 66 ;; back into the top 67 68 ax 69 addq 0,$r13 70 71 subq 10*4,$r12 72 bge _mloop 73 nop 74 75 addq 10*4,$r12 ; compensate for last loop underflowing length 76 77 movem [$sp+],$r8 ; restore regs 78 79_word_loop: 80 ;; only fold if there is anything to fold. 81 82 cmpq 0,$r13 83 beq _no_fold 84 85 ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below 86 ;; r9 can be used as temporary. 87 88 move.d $r13,$r9 89 lsrq 16,$r9 ; r0 = checksum >> 16 90 and.d 0xffff,$r13 ; checksum = checksum & 0xffff 91 add.d $r9,$r13 ; checksum += r0 92 93_no_fold: 94 cmpq 2,$r12 95 blt _no_words 96 nop 97 98 ;; copy and checksum the rest of the words 99 100 subq 2,$r12 101 102_wloop: move.w [$r10+],$r9 1032: ;; A failing userspace access will have this as PC. 104 addu.w $r9,$r13 105 subq 2,$r12 106 bge _wloop 107 move.w $r9,[$r11+] 108 109 addq 2,$r12 110 111_no_words: 112 ;; see if we have one odd byte more 113 cmpq 1,$r12 114 beq _do_byte 115 nop 116 ret 117 move.d $r13, $r10 118 119_do_byte: 120 ;; copy and checksum the last byte 121 move.b [$r10],$r9 1223: ;; A failing userspace access will have this as PC. 123 addu.b $r9,$r13 124 move.b $r9,[$r11] 125 ret 126 move.d $r13, $r10 127