1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IP/TCP/UDP checksumming routines 7 * 8 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 10 * Tom May, <ftom@netcom.com> 11 * Pentium Pro/II routines: 12 * Alexander Kjeldaas <astor@guardian.no> 13 * Finn Arne Gangstad <finnag@guardian.no> 14 * Lots of code moved from tcp.c and ip.c; see those files 15 * for more names. 16 * 17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 18 * handling. 19 * Andi Kleen, add zeroing on error 20 * converted to pure assembler 21 * 22 * This program is free software; you can redistribute it and/or 23 * modify it under the terms of the GNU General Public License 24 * as published by the Free Software Foundation; either version 25 * 2 of the License, or (at your option) any later version. 26 */ 27 28#include <asm/errno.h> 29 30/* 31 * computes a partial checksum, e.g. for TCP/UDP fragments 32 */ 33 34/* 35unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 36 */ 37 38.text 39.align 4 40.globl csum_partial 41 42#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 43 44 /* 45 * Experiments with Ethernet and SLIP connections show that buff 46 * is aligned on either a 2-byte or 4-byte boundary. We get at 47 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 48 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 49 * alignment for the unrolled loop. 50 */ 51csum_partial: 52 pushl %esi 53 pushl %ebx 54 movl 20(%esp),%eax # Function arg: unsigned int sum 55 movl 16(%esp),%ecx # Function arg: int len 56 movl 12(%esp),%esi # Function arg: unsigned char *buff 57 testl $2, %esi # Check alignment. 58 jz 2f # Jump if alignment is ok. 59 subl $2, %ecx # Alignment uses up two bytes. 60 jae 1f # Jump if we had at least two bytes. 61 addl $2, %ecx # ecx was < 2. Deal with it. 62 jmp 4f 631: movw (%esi), %bx 64 addl $2, %esi 65 addw %bx, %ax 66 adcl $0, %eax 672: 68 movl %ecx, %edx 69 shrl $5, %ecx 70 jz 2f 71 testl %esi, %esi 721: movl (%esi), %ebx 73 adcl %ebx, %eax 74 movl 4(%esi), %ebx 75 adcl %ebx, %eax 76 movl 8(%esi), %ebx 77 adcl %ebx, %eax 78 movl 12(%esi), %ebx 79 adcl %ebx, %eax 80 movl 16(%esi), %ebx 81 adcl %ebx, %eax 82 movl 20(%esi), %ebx 83 adcl %ebx, %eax 84 movl 24(%esi), %ebx 85 adcl %ebx, %eax 86 movl 28(%esi), %ebx 87 adcl %ebx, %eax 88 lea 32(%esi), %esi 89 dec %ecx 90 jne 1b 91 adcl $0, %eax 922: movl %edx, %ecx 93 andl $0x1c, %edx 94 je 4f 95 shrl $2, %edx # This clears CF 963: adcl (%esi), %eax 97 lea 4(%esi), %esi 98 dec %edx 99 jne 3b 100 adcl $0, %eax 1014: andl $3, %ecx 102 jz 7f 103 cmpl $2, %ecx 104 jb 5f 105 movw (%esi),%cx 106 leal 2(%esi),%esi 107 je 6f 108 shll $16,%ecx 1095: movb (%esi),%cl 1106: addl %ecx,%eax 111 adcl $0, %eax 1127: 113 popl %ebx 114 popl %esi 115 ret 116 117#else 118 119/* Version for PentiumII/PPro */ 120 121csum_partial: 122 pushl %esi 123 pushl %ebx 124 movl 20(%esp),%eax # Function arg: unsigned int sum 125 movl 16(%esp),%ecx # Function arg: int len 126 movl 12(%esp),%esi # Function arg: const unsigned char *buf 127 128 testl $2, %esi 129 jnz 30f 13010: 131 movl %ecx, %edx 132 movl %ecx, %ebx 133 andl $0x7c, %ebx 134 shrl $7, %ecx 135 addl %ebx,%esi 136 shrl $2, %ebx 137 negl %ebx 138 lea 45f(%ebx,%ebx,2), %ebx 139 testl %esi, %esi 140 jmp *%ebx 141 142 # Handle 2-byte-aligned regions 14320: addw (%esi), %ax 144 lea 2(%esi), %esi 145 adcl $0, %eax 146 jmp 10b 147 14830: subl $2, %ecx 149 ja 20b 150 je 32f 151 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 152 addl %ebx, %eax 153 adcl $0, %eax 154 jmp 80f 15532: 156 addw (%esi), %ax # csumming 2 bytes, 2-aligned 157 adcl $0, %eax 158 jmp 80f 159 16040: 161 addl -128(%esi), %eax 162 adcl -124(%esi), %eax 163 adcl -120(%esi), %eax 164 adcl -116(%esi), %eax 165 adcl -112(%esi), %eax 166 adcl -108(%esi), %eax 167 adcl -104(%esi), %eax 168 adcl -100(%esi), %eax 169 adcl -96(%esi), %eax 170 adcl -92(%esi), %eax 171 adcl -88(%esi), %eax 172 adcl -84(%esi), %eax 173 adcl -80(%esi), %eax 174 adcl -76(%esi), %eax 175 adcl -72(%esi), %eax 176 adcl -68(%esi), %eax 177 adcl -64(%esi), %eax 178 adcl -60(%esi), %eax 179 adcl -56(%esi), %eax 180 adcl -52(%esi), %eax 181 adcl -48(%esi), %eax 182 adcl -44(%esi), %eax 183 adcl -40(%esi), %eax 184 adcl -36(%esi), %eax 185 adcl -32(%esi), %eax 186 adcl -28(%esi), %eax 187 adcl -24(%esi), %eax 188 adcl -20(%esi), %eax 189 adcl -16(%esi), %eax 190 adcl -12(%esi), %eax 191 adcl -8(%esi), %eax 192 adcl -4(%esi), %eax 19345: 194 lea 128(%esi), %esi 195 adcl $0, %eax 196 dec %ecx 197 jge 40b 198 movl %edx, %ecx 19950: andl $3, %ecx 200 jz 80f 201 202 # Handle the last 1-3 bytes without jumping 203 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked 204 movl $0xffffff,%ebx # by the shll and shrl instructions 205 shll $3,%ecx 206 shrl %cl,%ebx 207 andl -128(%esi),%ebx # esi is 4-aligned so should be ok 208 addl %ebx,%eax 209 adcl $0,%eax 21080: 211 popl %ebx 212 popl %esi 213 ret 214 215#endif 216 217/* 218unsigned int csum_partial_copy_generic (const char *src, char *dst, 219 int len, int sum, int *src_err_ptr, int *dst_err_ptr) 220 */ 221 222/* 223 * Copy from ds while checksumming, otherwise like csum_partial 224 * 225 * The macros SRC and DST specify the type of access for the instruction. 226 * thus we can call a custom exception handler for all access types. 227 * 228 * FIXME: could someone double-check whether I haven't mixed up some SRC and 229 * DST definitions? It's damn hard to trigger all cases. I hope I got 230 * them all but there's no guarantee. 231 */ 232 233#define SRC(y...) \ 234 9999: y; \ 235 .section __ex_table, "a"; \ 236 .long 9999b, 6001f ; \ 237 .previous 238 239#define DST(y...) \ 240 9999: y; \ 241 .section __ex_table, "a"; \ 242 .long 9999b, 6002f ; \ 243 .previous 244 245.align 4 246 247#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 248 249#define ARGBASE 16 250#define FP 12 251 252csum_partial_copy_generic_i386: 253 subl $4,%esp 254 pushl %edi 255 pushl %esi 256 pushl %ebx 257 movl ARGBASE+16(%esp),%eax # sum 258 movl ARGBASE+12(%esp),%ecx # len 259 movl ARGBASE+4(%esp),%esi # src 260 movl ARGBASE+8(%esp),%edi # dst 261 262 testl $2, %edi # Check alignment. 263 jz 2f # Jump if alignment is ok. 264 subl $2, %ecx # Alignment uses up two bytes. 265 jae 1f # Jump if we had at least two bytes. 266 addl $2, %ecx # ecx was < 2. Deal with it. 267 jmp 4f 268SRC(1: movw (%esi), %bx ) 269 addl $2, %esi 270DST( movw %bx, (%edi) ) 271 addl $2, %edi 272 addw %bx, %ax 273 adcl $0, %eax 2742: 275 movl %ecx, FP(%esp) 276 shrl $5, %ecx 277 jz 2f 278 testl %esi, %esi 279SRC(1: movl (%esi), %ebx ) 280SRC( movl 4(%esi), %edx ) 281 adcl %ebx, %eax 282DST( movl %ebx, (%edi) ) 283 adcl %edx, %eax 284DST( movl %edx, 4(%edi) ) 285 286SRC( movl 8(%esi), %ebx ) 287SRC( movl 12(%esi), %edx ) 288 adcl %ebx, %eax 289DST( movl %ebx, 8(%edi) ) 290 adcl %edx, %eax 291DST( movl %edx, 12(%edi) ) 292 293SRC( movl 16(%esi), %ebx ) 294SRC( movl 20(%esi), %edx ) 295 adcl %ebx, %eax 296DST( movl %ebx, 16(%edi) ) 297 adcl %edx, %eax 298DST( movl %edx, 20(%edi) ) 299 300SRC( movl 24(%esi), %ebx ) 301SRC( movl 28(%esi), %edx ) 302 adcl %ebx, %eax 303DST( movl %ebx, 24(%edi) ) 304 adcl %edx, %eax 305DST( movl %edx, 28(%edi) ) 306 307 lea 32(%esi), %esi 308 lea 32(%edi), %edi 309 dec %ecx 310 jne 1b 311 adcl $0, %eax 3122: movl FP(%esp), %edx 313 movl %edx, %ecx 314 andl $0x1c, %edx 315 je 4f 316 shrl $2, %edx # This clears CF 317SRC(3: movl (%esi), %ebx ) 318 adcl %ebx, %eax 319DST( movl %ebx, (%edi) ) 320 lea 4(%esi), %esi 321 lea 4(%edi), %edi 322 dec %edx 323 jne 3b 324 adcl $0, %eax 3254: andl $3, %ecx 326 jz 7f 327 cmpl $2, %ecx 328 jb 5f 329SRC( movw (%esi), %cx ) 330 leal 2(%esi), %esi 331DST( movw %cx, (%edi) ) 332 leal 2(%edi), %edi 333 je 6f 334 shll $16,%ecx 335SRC(5: movb (%esi), %cl ) 336DST( movb %cl, (%edi) ) 3376: addl %ecx, %eax 338 adcl $0, %eax 3397: 3405000: 341 342# Exception handler: 343.section .fixup, "ax" 344 3456001: 346 movl ARGBASE+20(%esp), %ebx # src_err_ptr 347 movl $-EFAULT, (%ebx) 348 349 # zero the complete destination - computing the rest 350 # is too much work 351 movl ARGBASE+8(%esp), %edi # dst 352 movl ARGBASE+12(%esp), %ecx # len 353 xorl %eax,%eax 354 rep ; stosb 355 356 jmp 5000b 357 3586002: 359 movl ARGBASE+24(%esp), %ebx # dst_err_ptr 360 movl $-EFAULT,(%ebx) 361 jmp 5000b 362 363.previous 364 365 popl %ebx 366 popl %esi 367 popl %edi 368 popl %ecx # equivalent to addl $4,%esp 369 ret 370 371#else 372 373/* Version for PentiumII/PPro */ 374 375#define ROUND1(x) \ 376 SRC(movl x(%esi), %ebx ) ; \ 377 addl %ebx, %eax ; \ 378 DST(movl %ebx, x(%edi) ) ; 379 380#define ROUND(x) \ 381 SRC(movl x(%esi), %ebx ) ; \ 382 adcl %ebx, %eax ; \ 383 DST(movl %ebx, x(%edi) ) ; 384 385#define ARGBASE 12 386 387csum_partial_copy_generic_i386: 388 pushl %ebx 389 pushl %edi 390 pushl %esi 391 movl ARGBASE+4(%esp),%esi #src 392 movl ARGBASE+8(%esp),%edi #dst 393 movl ARGBASE+12(%esp),%ecx #len 394 movl ARGBASE+16(%esp),%eax #sum 395# movl %ecx, %edx 396 movl %ecx, %ebx 397 movl %esi, %edx 398 shrl $6, %ecx 399 andl $0x3c, %ebx 400 negl %ebx 401 subl %ebx, %esi 402 subl %ebx, %edi 403 lea -1(%esi),%edx 404 andl $-32,%edx 405 lea 3f(%ebx,%ebx), %ebx 406 testl %esi, %esi 407 jmp *%ebx 4081: addl $64,%esi 409 addl $64,%edi 410 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) 411 ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) 412 ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) 413 ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) 414 ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) 4153: adcl $0,%eax 416 addl $64, %edx 417 dec %ecx 418 jge 1b 4194: movl ARGBASE+12(%esp),%edx #len 420 andl $3, %edx 421 jz 7f 422 cmpl $2, %edx 423 jb 5f 424SRC( movw (%esi), %dx ) 425 leal 2(%esi), %esi 426DST( movw %dx, (%edi) ) 427 leal 2(%edi), %edi 428 je 6f 429 shll $16,%edx 4305: 431SRC( movb (%esi), %dl ) 432DST( movb %dl, (%edi) ) 4336: addl %edx, %eax 434 adcl $0, %eax 4357: 436.section .fixup, "ax" 4376001: movl ARGBASE+20(%esp), %ebx # src_err_ptr 438 movl $-EFAULT, (%ebx) 439 # zero the complete destination (computing the rest is too much work) 440 movl ARGBASE+8(%esp),%edi # dst 441 movl ARGBASE+12(%esp),%ecx # len 442 xorl %eax,%eax 443 rep; stosb 444 jmp 7b 4456002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr 446 movl $-EFAULT, (%ebx) 447 jmp 7b 448.previous 449 450 popl %esi 451 popl %edi 452 popl %ebx 453 ret 454 455#undef ROUND 456#undef ROUND1 457 458#endif 459