1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IP/TCP/UDP checksumming routines 7 * 8 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 10 * Tom May, <ftom@netcom.com> 11 * Pentium Pro/II routines: 12 * Alexander Kjeldaas <astor@guardian.no> 13 * Finn Arne Gangstad <finnag@guardian.no> 14 * Lots of code moved from tcp.c and ip.c; see those files 15 * for more names. 16 * 17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 18 * handling. 19 * Andi Kleen, add zeroing on error 20 * converted to pure assembler 21 * Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture. 22 * 23 * This program is free software; you can redistribute it and/or 24 * modify it under the terms of the GNU General Public License 25 * as published by the Free Software Foundation; either version 26 * 2 of the License, or (at your option) any later version. 27 */ 28 29#include <linux/linkage.h> 30#include <asm/assembler.h> 31#include <asm/errno.h> 32 33/* 34 * computes a partial checksum, e.g. for TCP/UDP fragments 35 */ 36 37/* 38unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 39 */ 40 41 42#ifdef CONFIG_ISA_DUAL_ISSUE 43 44 /* 45 * Experiments with Ethernet and SLIP connections show that buff 46 * is aligned on either a 2-byte or 4-byte boundary. We get at 47 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 48 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 49 * alignment for the unrolled loop. 50 */ 51 52 .text 53ENTRY(csum_partial) 54 ; Function args 55 ; r0: unsigned char *buff 56 ; r1: int len 57 ; r2: unsigned int sum 58 59 push r2 || ldi r2, #0 60 and3 r7, r0, #1 ; Check alignment. 61 beqz r7, 1f ; Jump if alignment is ok. 62 ; 1-byte mis aligned 63 ldub r4, @r0 || addi r0, #1 64 ; clear c-bit || Alignment uses up bytes. 65 cmp r0, r0 || addi r1, #-1 66 ldi r3, #0 || addx r2, r4 67 addx r2, r3 68 .fillinsn 691: 70 and3 r4, r0, #2 ; Check alignment. 71 beqz r4, 2f ; Jump if alignment is ok. 72 ; clear c-bit || Alignment uses up two bytes. 73 cmp r0, r0 || addi r1, #-2 74 bgtz r1, 1f ; Jump if we had at least two bytes. 75 bra 4f || addi r1, #2 76 .fillinsn ; len(r1) was < 2. Deal with it. 771: 78 ; 2-byte aligned 79 lduh r4, @r0 || ldi r3, #0 80 addx r2, r4 || addi r0, #2 81 addx r2, r3 82 .fillinsn 832: 84 ; 4-byte aligned 85 cmp r0, r0 ; clear c-bit 86 srl3 r6, r1, #5 87 beqz r6, 2f 88 .fillinsn 89 901: ld r3, @r0+ 91 ld r4, @r0+ ; +4 92 ld r5, @r0+ ; +8 93 ld r3, @r0+ || addx r2, r3 ; +12 94 ld r4, @r0+ || addx r2, r4 ; +16 95 ld r5, @r0+ || addx r2, r5 ; +20 96 ld r3, @r0+ || addx r2, r3 ; +24 97 ld r4, @r0+ || addx r2, r4 ; +28 98 addx r2, r5 || addi r6, #-1 99 addx r2, r3 100 addx r2, r4 101 bnez r6, 1b 102 103 addx r2, r6 ; r6=0 104 cmp r0, r0 ; This clears c-bit 105 .fillinsn 1062: and3 r6, r1, #0x1c ; withdraw len 107 beqz r6, 4f 108 srli r6, #2 109 .fillinsn 110 1113: ld r4, @r0+ || addi r6, #-1 112 addx r2, r4 113 bnez r6, 3b 114 115 addx r2, r6 ; r6=0 116 cmp r0, r0 ; This clears c-bit 117 .fillinsn 1184: and3 r1, r1, #3 119 beqz r1, 7f ; if len == 0 goto end 120 and3 r6, r1, #2 121 beqz r6, 5f ; if len < 2 goto 5f(1byte) 122 lduh r4, @r0 || addi r0, #2 123 addi r1, #-2 || slli r4, #16 124 addx r2, r4 125 beqz r1, 6f 126 .fillinsn 1275: ldub r4, @r0 || ldi r1, #0 128#ifndef __LITTLE_ENDIAN__ 129 slli r4, #8 130#endif 131 addx r2, r4 132 .fillinsn 1336: addx r2, r1 134 .fillinsn 1357: 136 and3 r0, r2, #0xffff 137 srli r2, #16 138 add r0, r2 139 srl3 r2, r0, #16 140 beqz r2, 1f 141 addi r0, #1 142 and3 r0, r0, #0xffff 143 .fillinsn 1441: 145 beqz r7, 1f ; swap the upper byte for the lower 146 and3 r2, r0, #0xff 147 srl3 r0, r0, #8 148 slli r2, #8 149 or r0, r2 150 .fillinsn 1511: 152 pop r2 || cmp r0, r0 153 addx r0, r2 || ldi r2, #0 154 addx r0, r2 155 jmp r14 156 157#else /* not CONFIG_ISA_DUAL_ISSUE */ 158 159 /* 160 * Experiments with Ethernet and SLIP connections show that buff 161 * is aligned on either a 2-byte or 4-byte boundary. We get at 162 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 163 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 164 * alignment for the unrolled loop. 165 */ 166 167 .text 168ENTRY(csum_partial) 169 ; Function args 170 ; r0: unsigned char *buff 171 ; r1: int len 172 ; r2: unsigned int sum 173 174 push r2 175 ldi r2, #0 176 and3 r7, r0, #1 ; Check alignment. 177 beqz r7, 1f ; Jump if alignment is ok. 178 ; 1-byte mis aligned 179 ldub r4, @r0 180 addi r0, #1 181 addi r1, #-1 ; Alignment uses up bytes. 182 cmp r0, r0 ; clear c-bit 183 ldi r3, #0 184 addx r2, r4 185 addx r2, r3 186 .fillinsn 1871: 188 and3 r4, r0, #2 ; Check alignment. 189 beqz r4, 2f ; Jump if alignment is ok. 190 addi r1, #-2 ; Alignment uses up two bytes. 191 cmp r0, r0 ; clear c-bit 192 bgtz r1, 1f ; Jump if we had at least two bytes. 193 addi r1, #2 ; len(r1) was < 2. Deal with it. 194 bra 4f 195 .fillinsn 1961: 197 ; 2-byte aligned 198 lduh r4, @r0 199 addi r0, #2 200 ldi r3, #0 201 addx r2, r4 202 addx r2, r3 203 .fillinsn 2042: 205 ; 4-byte aligned 206 cmp r0, r0 ; clear c-bit 207 srl3 r6, r1, #5 208 beqz r6, 2f 209 .fillinsn 210 2111: ld r3, @r0+ 212 ld r4, @r0+ ; +4 213 ld r5, @r0+ ; +8 214 addx r2, r3 215 addx r2, r4 216 addx r2, r5 217 ld r3, @r0+ ; +12 218 ld r4, @r0+ ; +16 219 ld r5, @r0+ ; +20 220 addx r2, r3 221 addx r2, r4 222 addx r2, r5 223 ld r3, @r0+ ; +24 224 ld r4, @r0+ ; +28 225 addi r6, #-1 226 addx r2, r3 227 addx r2, r4 228 bnez r6, 1b 229 addx r2, r6 ; r6=0 230 cmp r0, r0 ; This clears c-bit 231 .fillinsn 232 2332: and3 r6, r1, #0x1c ; withdraw len 234 beqz r6, 4f 235 srli r6, #2 236 .fillinsn 237 2383: ld r4, @r0+ 239 addi r6, #-1 240 addx r2, r4 241 bnez r6, 3b 242 addx r2, r6 ; r6=0 243 cmp r0, r0 ; This clears c-bit 244 .fillinsn 245 2464: and3 r1, r1, #3 247 beqz r1, 7f ; if len == 0 goto end 248 and3 r6, r1, #2 249 beqz r6, 5f ; if len < 2 goto 5f(1byte) 250 251 lduh r4, @r0 252 addi r0, #2 253 addi r1, #-2 254 slli r4, #16 255 addx r2, r4 256 beqz r1, 6f 257 .fillinsn 2585: ldub r4, @r0 259#ifndef __LITTLE_ENDIAN__ 260 slli r4, #8 261#endif 262 addx r2, r4 263 .fillinsn 2646: ldi r5, #0 265 addx r2, r5 266 .fillinsn 2677: 268 and3 r0, r2, #0xffff 269 srli r2, #16 270 add r0, r2 271 srl3 r2, r0, #16 272 beqz r2, 1f 273 addi r0, #1 274 and3 r0, r0, #0xffff 275 .fillinsn 2761: 277 beqz r7, 1f 278 mv r2, r0 279 srl3 r0, r2, #8 280 and3 r2, r2, #0xff 281 slli r2, #8 282 or r0, r2 283 .fillinsn 2841: 285 pop r2 286 cmp r0, r0 287 addx r0, r2 288 ldi r2, #0 289 addx r0, r2 290 jmp r14 291 292#endif /* not CONFIG_ISA_DUAL_ISSUE */ 293 294/* 295unsigned int csum_partial_copy_generic (const char *src, char *dst, 296 int len, int sum, int *src_err_ptr, int *dst_err_ptr) 297 */ 298 299/* 300 * Copy from ds while checksumming, otherwise like csum_partial 301 * 302 * The macros SRC and DST specify the type of access for the instruction. 303 * thus we can call a custom exception handler for all access types. 304 * 305 * FIXME: could someone double-check whether I haven't mixed up some SRC and 306 * DST definitions? It's damn hard to trigger all cases. I hope I got 307 * them all but there's no guarantee. 308 */ 309 310ENTRY(csum_partial_copy_generic) 311 nop 312 nop 313 nop 314 nop 315 jmp r14 316 nop 317 nop 318 nop 319 320 .end 321