1/* memcmp with SSSE3, wmemcmp with SSSE3 2 Copyright (C) 2010-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20 21# include <sysdep.h> 22 23# ifndef MEMCMP 24# define MEMCMP __memcmp_ssse3 25# endif 26 27# define CFI_PUSH(REG) \ 28 cfi_adjust_cfa_offset (4); \ 29 cfi_rel_offset (REG, 0) 30 31# define CFI_POP(REG) \ 32 cfi_adjust_cfa_offset (-4); \ 33 cfi_restore (REG) 34 35# define PUSH(REG) pushl REG; CFI_PUSH (REG) 36# define POP(REG) popl REG; CFI_POP (REG) 37 38# define PARMS 4 39# define BLK1 PARMS 40# define BLK2 BLK1+4 41# define LEN BLK2+4 42# define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret 43# define RETURN RETURN_END; cfi_restore_state; cfi_remember_state 44 45/* Warning! 46 wmemcmp has to use SIGNED comparison for elements. 47 memcmp has to use UNSIGNED comparison for elemnts. 48*/ 49 50 atom_text_section 51ENTRY (MEMCMP) 52 movl LEN(%esp), %ecx 53 54# ifdef USE_AS_WMEMCMP 55 shl $2, %ecx 56 test %ecx, %ecx 57 jz L(zero) 58# endif 59 60 movl BLK1(%esp), %eax 61 cmp $48, %ecx 62 movl BLK2(%esp), %edx 63 jae L(48bytesormore) 64 65# ifndef USE_AS_WMEMCMP 66 cmp $1, %ecx 67 jbe L(less1bytes) 68# endif 69 70 PUSH (%ebx) 71 add %ecx, %edx 72 add %ecx, %eax 73 jmp L(less48bytes) 74 75 CFI_POP (%ebx) 76 77# ifndef USE_AS_WMEMCMP 78 .p2align 4 79L(less1bytes): 80 jb L(zero) 81 movb (%eax), %cl 82 cmp (%edx), %cl 83 je L(zero) 84 mov $1, %eax 85 ja L(1bytesend) 86 neg %eax 87L(1bytesend): 88 ret 89# endif 90 91 .p2align 4 92L(zero): 93 xor %eax, %eax 94 ret 95 96 .p2align 4 97L(48bytesormore): 98 PUSH (%ebx) 99 PUSH (%esi) 100 PUSH (%edi) 101 cfi_remember_state 102 movdqu (%eax), %xmm3 103 movdqu (%edx), %xmm0 104 movl %eax, %edi 105 movl %edx, %esi 106 pcmpeqb %xmm0, %xmm3 107 pmovmskb %xmm3, %edx 108 lea 16(%edi), %edi 109 110 sub $0xffff, %edx 111 lea 16(%esi), %esi 112 jnz L(less16bytes) 113 mov %edi, %edx 114 and $0xf, %edx 115 xor %edx, %edi 116 sub %edx, %esi 117 add %edx, %ecx 118 mov %esi, %edx 119 and $0xf, %edx 120 jz L(shr_0) 121 xor %edx, %esi 122 123# ifndef USE_AS_WMEMCMP 124 cmp $8, %edx 125 jae L(next_unaligned_table) 126 cmp $0, %edx 127 je L(shr_0) 128 cmp $1, %edx 129 je L(shr_1) 130 cmp $2, %edx 131 je L(shr_2) 132 cmp $3, %edx 133 je L(shr_3) 134 cmp $4, %edx 135 je L(shr_4) 136 cmp $5, %edx 137 je L(shr_5) 138 cmp $6, %edx 139 je L(shr_6) 140 jmp L(shr_7) 141 142 .p2align 2 143L(next_unaligned_table): 144 cmp $8, %edx 145 je L(shr_8) 146 cmp $9, %edx 147 je L(shr_9) 148 cmp $10, %edx 149 je L(shr_10) 150 cmp $11, %edx 151 je L(shr_11) 152 cmp $12, %edx 153 je L(shr_12) 154 cmp $13, %edx 155 je L(shr_13) 156 cmp $14, %edx 157 je L(shr_14) 158 jmp L(shr_15) 159# else 160 cmp $0, %edx 161 je L(shr_0) 162 cmp $4, %edx 163 je L(shr_4) 164 cmp $8, %edx 165 je L(shr_8) 166 jmp L(shr_12) 167# endif 168 169 .p2align 4 170L(shr_0): 171 cmp $80, %ecx 172 jae L(shr_0_gobble) 173 lea -48(%ecx), %ecx 174 xor %eax, %eax 175 movaps (%esi), %xmm1 176 pcmpeqb (%edi), %xmm1 177 movaps 16(%esi), %xmm2 178 pcmpeqb 16(%edi), %xmm2 179 pand %xmm1, %xmm2 180 pmovmskb %xmm2, %edx 181 add $32, %edi 182 add $32, %esi 183 sub $0xffff, %edx 184 jnz L(exit) 185 186 lea (%ecx, %edi,1), %eax 187 lea (%ecx, %esi,1), %edx 188 POP (%edi) 189 POP (%esi) 190 jmp L(less48bytes) 191 192 cfi_restore_state 193 cfi_remember_state 194 .p2align 4 195L(shr_0_gobble): 196 lea -48(%ecx), %ecx 197 movdqa (%esi), %xmm0 198 xor %eax, %eax 199 pcmpeqb (%edi), %xmm0 200 sub $32, %ecx 201 movdqa 16(%esi), %xmm2 202 pcmpeqb 16(%edi), %xmm2 203L(shr_0_gobble_loop): 204 pand %xmm0, %xmm2 205 sub $32, %ecx 206 pmovmskb %xmm2, %edx 207 movdqa %xmm0, %xmm1 208 movdqa 32(%esi), %xmm0 209 movdqa 48(%esi), %xmm2 210 sbb $0xffff, %edx 211 pcmpeqb 32(%edi), %xmm0 212 pcmpeqb 48(%edi), %xmm2 213 lea 32(%edi), %edi 214 lea 32(%esi), %esi 215 jz L(shr_0_gobble_loop) 216 217 pand %xmm0, %xmm2 218 cmp $0, %ecx 219 jge L(shr_0_gobble_loop_next) 220 inc %edx 221 add $32, %ecx 222L(shr_0_gobble_loop_next): 223 test %edx, %edx 224 jnz L(exit) 225 226 pmovmskb %xmm2, %edx 227 movdqa %xmm0, %xmm1 228 lea 32(%edi), %edi 229 lea 32(%esi), %esi 230 sub $0xffff, %edx 231 jnz L(exit) 232 lea (%ecx, %edi,1), %eax 233 lea (%ecx, %esi,1), %edx 234 POP (%edi) 235 POP (%esi) 236 jmp L(less48bytes) 237 238# ifndef USE_AS_WMEMCMP 239 cfi_restore_state 240 cfi_remember_state 241 .p2align 4 242L(shr_1): 243 cmp $80, %ecx 244 lea -48(%ecx), %ecx 245 mov %edx, %eax 246 jae L(shr_1_gobble) 247 248 movdqa 16(%esi), %xmm1 249 movdqa %xmm1, %xmm2 250 palignr $1,(%esi), %xmm1 251 pcmpeqb (%edi), %xmm1 252 253 movdqa 32(%esi), %xmm3 254 palignr $1,%xmm2, %xmm3 255 pcmpeqb 16(%edi), %xmm3 256 257 pand %xmm1, %xmm3 258 pmovmskb %xmm3, %edx 259 lea 32(%edi), %edi 260 lea 32(%esi), %esi 261 sub $0xffff, %edx 262 jnz L(exit) 263 lea (%ecx, %edi,1), %eax 264 lea 1(%ecx, %esi,1), %edx 265 POP (%edi) 266 POP (%esi) 267 jmp L(less48bytes) 268 269 cfi_restore_state 270 cfi_remember_state 271 .p2align 4 272L(shr_1_gobble): 273 sub $32, %ecx 274 movdqa 16(%esi), %xmm0 275 palignr $1,(%esi), %xmm0 276 pcmpeqb (%edi), %xmm0 277 278 movdqa 32(%esi), %xmm3 279 palignr $1,16(%esi), %xmm3 280 pcmpeqb 16(%edi), %xmm3 281 282L(shr_1_gobble_loop): 283 pand %xmm0, %xmm3 284 sub $32, %ecx 285 pmovmskb %xmm3, %edx 286 movdqa %xmm0, %xmm1 287 288 movdqa 64(%esi), %xmm3 289 palignr $1,48(%esi), %xmm3 290 sbb $0xffff, %edx 291 movdqa 48(%esi), %xmm0 292 palignr $1,32(%esi), %xmm0 293 pcmpeqb 32(%edi), %xmm0 294 lea 32(%esi), %esi 295 pcmpeqb 48(%edi), %xmm3 296 297 lea 32(%edi), %edi 298 jz L(shr_1_gobble_loop) 299 pand %xmm0, %xmm3 300 301 cmp $0, %ecx 302 jge L(shr_1_gobble_next) 303 inc %edx 304 add $32, %ecx 305L(shr_1_gobble_next): 306 test %edx, %edx 307 jnz L(exit) 308 309 pmovmskb %xmm3, %edx 310 movdqa %xmm0, %xmm1 311 lea 32(%edi), %edi 312 lea 32(%esi), %esi 313 sub $0xffff, %edx 314 jnz L(exit) 315 316 lea (%ecx, %edi,1), %eax 317 lea 1(%ecx, %esi,1), %edx 318 POP (%edi) 319 POP (%esi) 320 jmp L(less48bytes) 321 322 323 cfi_restore_state 324 cfi_remember_state 325 .p2align 4 326L(shr_2): 327 cmp $80, %ecx 328 lea -48(%ecx), %ecx 329 mov %edx, %eax 330 jae L(shr_2_gobble) 331 332 movdqa 16(%esi), %xmm1 333 movdqa %xmm1, %xmm2 334 palignr $2,(%esi), %xmm1 335 pcmpeqb (%edi), %xmm1 336 337 movdqa 32(%esi), %xmm3 338 palignr $2,%xmm2, %xmm3 339 pcmpeqb 16(%edi), %xmm3 340 341 pand %xmm1, %xmm3 342 pmovmskb %xmm3, %edx 343 lea 32(%edi), %edi 344 lea 32(%esi), %esi 345 sub $0xffff, %edx 346 jnz L(exit) 347 lea (%ecx, %edi,1), %eax 348 lea 2(%ecx, %esi,1), %edx 349 POP (%edi) 350 POP (%esi) 351 jmp L(less48bytes) 352 353 cfi_restore_state 354 cfi_remember_state 355 .p2align 4 356L(shr_2_gobble): 357 sub $32, %ecx 358 movdqa 16(%esi), %xmm0 359 palignr $2,(%esi), %xmm0 360 pcmpeqb (%edi), %xmm0 361 362 movdqa 32(%esi), %xmm3 363 palignr $2,16(%esi), %xmm3 364 pcmpeqb 16(%edi), %xmm3 365 366L(shr_2_gobble_loop): 367 pand %xmm0, %xmm3 368 sub $32, %ecx 369 pmovmskb %xmm3, %edx 370 movdqa %xmm0, %xmm1 371 372 movdqa 64(%esi), %xmm3 373 palignr $2,48(%esi), %xmm3 374 sbb $0xffff, %edx 375 movdqa 48(%esi), %xmm0 376 palignr $2,32(%esi), %xmm0 377 pcmpeqb 32(%edi), %xmm0 378 lea 32(%esi), %esi 379 pcmpeqb 48(%edi), %xmm3 380 381 lea 32(%edi), %edi 382 jz L(shr_2_gobble_loop) 383 pand %xmm0, %xmm3 384 385 cmp $0, %ecx 386 jge L(shr_2_gobble_next) 387 inc %edx 388 add $32, %ecx 389L(shr_2_gobble_next): 390 test %edx, %edx 391 jnz L(exit) 392 393 pmovmskb %xmm3, %edx 394 movdqa %xmm0, %xmm1 395 lea 32(%edi), %edi 396 lea 32(%esi), %esi 397 sub $0xffff, %edx 398 jnz L(exit) 399 400 lea (%ecx, %edi,1), %eax 401 lea 2(%ecx, %esi,1), %edx 402 POP (%edi) 403 POP (%esi) 404 jmp L(less48bytes) 405 406 cfi_restore_state 407 cfi_remember_state 408 .p2align 4 409L(shr_3): 410 cmp $80, %ecx 411 lea -48(%ecx), %ecx 412 mov %edx, %eax 413 jae L(shr_3_gobble) 414 415 movdqa 16(%esi), %xmm1 416 movdqa %xmm1, %xmm2 417 palignr $3,(%esi), %xmm1 418 pcmpeqb (%edi), %xmm1 419 420 movdqa 32(%esi), %xmm3 421 palignr $3,%xmm2, %xmm3 422 pcmpeqb 16(%edi), %xmm3 423 424 pand %xmm1, %xmm3 425 pmovmskb %xmm3, %edx 426 lea 32(%edi), %edi 427 lea 32(%esi), %esi 428 sub $0xffff, %edx 429 jnz L(exit) 430 lea (%ecx, %edi,1), %eax 431 lea 3(%ecx, %esi,1), %edx 432 POP (%edi) 433 POP (%esi) 434 jmp L(less48bytes) 435 436 cfi_restore_state 437 cfi_remember_state 438 .p2align 4 439L(shr_3_gobble): 440 sub $32, %ecx 441 movdqa 16(%esi), %xmm0 442 palignr $3,(%esi), %xmm0 443 pcmpeqb (%edi), %xmm0 444 445 movdqa 32(%esi), %xmm3 446 palignr $3,16(%esi), %xmm3 447 pcmpeqb 16(%edi), %xmm3 448 449L(shr_3_gobble_loop): 450 pand %xmm0, %xmm3 451 sub $32, %ecx 452 pmovmskb %xmm3, %edx 453 movdqa %xmm0, %xmm1 454 455 movdqa 64(%esi), %xmm3 456 palignr $3,48(%esi), %xmm3 457 sbb $0xffff, %edx 458 movdqa 48(%esi), %xmm0 459 palignr $3,32(%esi), %xmm0 460 pcmpeqb 32(%edi), %xmm0 461 lea 32(%esi), %esi 462 pcmpeqb 48(%edi), %xmm3 463 464 lea 32(%edi), %edi 465 jz L(shr_3_gobble_loop) 466 pand %xmm0, %xmm3 467 468 cmp $0, %ecx 469 jge L(shr_3_gobble_next) 470 inc %edx 471 add $32, %ecx 472L(shr_3_gobble_next): 473 test %edx, %edx 474 jnz L(exit) 475 476 pmovmskb %xmm3, %edx 477 movdqa %xmm0, %xmm1 478 lea 32(%edi), %edi 479 lea 32(%esi), %esi 480 sub $0xffff, %edx 481 jnz L(exit) 482 483 lea (%ecx, %edi,1), %eax 484 lea 3(%ecx, %esi,1), %edx 485 POP (%edi) 486 POP (%esi) 487 jmp L(less48bytes) 488# endif 489 490 cfi_restore_state 491 cfi_remember_state 492 .p2align 4 493L(shr_4): 494 cmp $80, %ecx 495 lea -48(%ecx), %ecx 496 mov %edx, %eax 497 jae L(shr_4_gobble) 498 499 movdqa 16(%esi), %xmm1 500 movdqa %xmm1, %xmm2 501 palignr $4,(%esi), %xmm1 502 pcmpeqb (%edi), %xmm1 503 504 movdqa 32(%esi), %xmm3 505 palignr $4,%xmm2, %xmm3 506 pcmpeqb 16(%edi), %xmm3 507 508 pand %xmm1, %xmm3 509 pmovmskb %xmm3, %edx 510 lea 32(%edi), %edi 511 lea 32(%esi), %esi 512 sub $0xffff, %edx 513 jnz L(exit) 514 lea (%ecx, %edi,1), %eax 515 lea 4(%ecx, %esi,1), %edx 516 POP (%edi) 517 POP (%esi) 518 jmp L(less48bytes) 519 520 cfi_restore_state 521 cfi_remember_state 522 .p2align 4 523L(shr_4_gobble): 524 sub $32, %ecx 525 movdqa 16(%esi), %xmm0 526 palignr $4,(%esi), %xmm0 527 pcmpeqb (%edi), %xmm0 528 529 movdqa 32(%esi), %xmm3 530 palignr $4,16(%esi), %xmm3 531 pcmpeqb 16(%edi), %xmm3 532 533L(shr_4_gobble_loop): 534 pand %xmm0, %xmm3 535 sub $32, %ecx 536 pmovmskb %xmm3, %edx 537 movdqa %xmm0, %xmm1 538 539 movdqa 64(%esi), %xmm3 540 palignr $4,48(%esi), %xmm3 541 sbb $0xffff, %edx 542 movdqa 48(%esi), %xmm0 543 palignr $4,32(%esi), %xmm0 544 pcmpeqb 32(%edi), %xmm0 545 lea 32(%esi), %esi 546 pcmpeqb 48(%edi), %xmm3 547 548 lea 32(%edi), %edi 549 jz L(shr_4_gobble_loop) 550 pand %xmm0, %xmm3 551 552 cmp $0, %ecx 553 jge L(shr_4_gobble_next) 554 inc %edx 555 add $32, %ecx 556L(shr_4_gobble_next): 557 test %edx, %edx 558 jnz L(exit) 559 560 pmovmskb %xmm3, %edx 561 movdqa %xmm0, %xmm1 562 lea 32(%edi), %edi 563 lea 32(%esi), %esi 564 sub $0xffff, %edx 565 jnz L(exit) 566 567 lea (%ecx, %edi,1), %eax 568 lea 4(%ecx, %esi,1), %edx 569 POP (%edi) 570 POP (%esi) 571 jmp L(less48bytes) 572 573# ifndef USE_AS_WMEMCMP 574 cfi_restore_state 575 cfi_remember_state 576 .p2align 4 577L(shr_5): 578 cmp $80, %ecx 579 lea -48(%ecx), %ecx 580 mov %edx, %eax 581 jae L(shr_5_gobble) 582 583 movdqa 16(%esi), %xmm1 584 movdqa %xmm1, %xmm2 585 palignr $5,(%esi), %xmm1 586 pcmpeqb (%edi), %xmm1 587 588 movdqa 32(%esi), %xmm3 589 palignr $5,%xmm2, %xmm3 590 pcmpeqb 16(%edi), %xmm3 591 592 pand %xmm1, %xmm3 593 pmovmskb %xmm3, %edx 594 lea 32(%edi), %edi 595 lea 32(%esi), %esi 596 sub $0xffff, %edx 597 jnz L(exit) 598 lea (%ecx, %edi,1), %eax 599 lea 5(%ecx, %esi,1), %edx 600 POP (%edi) 601 POP (%esi) 602 jmp L(less48bytes) 603 604 cfi_restore_state 605 cfi_remember_state 606 .p2align 4 607L(shr_5_gobble): 608 sub $32, %ecx 609 movdqa 16(%esi), %xmm0 610 palignr $5,(%esi), %xmm0 611 pcmpeqb (%edi), %xmm0 612 613 movdqa 32(%esi), %xmm3 614 palignr $5,16(%esi), %xmm3 615 pcmpeqb 16(%edi), %xmm3 616 617L(shr_5_gobble_loop): 618 pand %xmm0, %xmm3 619 sub $32, %ecx 620 pmovmskb %xmm3, %edx 621 movdqa %xmm0, %xmm1 622 623 movdqa 64(%esi), %xmm3 624 palignr $5,48(%esi), %xmm3 625 sbb $0xffff, %edx 626 movdqa 48(%esi), %xmm0 627 palignr $5,32(%esi), %xmm0 628 pcmpeqb 32(%edi), %xmm0 629 lea 32(%esi), %esi 630 pcmpeqb 48(%edi), %xmm3 631 632 lea 32(%edi), %edi 633 jz L(shr_5_gobble_loop) 634 pand %xmm0, %xmm3 635 636 cmp $0, %ecx 637 jge L(shr_5_gobble_next) 638 inc %edx 639 add $32, %ecx 640L(shr_5_gobble_next): 641 test %edx, %edx 642 jnz L(exit) 643 644 pmovmskb %xmm3, %edx 645 movdqa %xmm0, %xmm1 646 lea 32(%edi), %edi 647 lea 32(%esi), %esi 648 sub $0xffff, %edx 649 jnz L(exit) 650 651 lea (%ecx, %edi,1), %eax 652 lea 5(%ecx, %esi,1), %edx 653 POP (%edi) 654 POP (%esi) 655 jmp L(less48bytes) 656 657 cfi_restore_state 658 cfi_remember_state 659 .p2align 4 660L(shr_6): 661 cmp $80, %ecx 662 lea -48(%ecx), %ecx 663 mov %edx, %eax 664 jae L(shr_6_gobble) 665 666 movdqa 16(%esi), %xmm1 667 movdqa %xmm1, %xmm2 668 palignr $6,(%esi), %xmm1 669 pcmpeqb (%edi), %xmm1 670 671 movdqa 32(%esi), %xmm3 672 palignr $6,%xmm2, %xmm3 673 pcmpeqb 16(%edi), %xmm3 674 675 pand %xmm1, %xmm3 676 pmovmskb %xmm3, %edx 677 lea 32(%edi), %edi 678 lea 32(%esi), %esi 679 sub $0xffff, %edx 680 jnz L(exit) 681 lea (%ecx, %edi,1), %eax 682 lea 6(%ecx, %esi,1), %edx 683 POP (%edi) 684 POP (%esi) 685 jmp L(less48bytes) 686 687 cfi_restore_state 688 cfi_remember_state 689 .p2align 4 690L(shr_6_gobble): 691 sub $32, %ecx 692 movdqa 16(%esi), %xmm0 693 palignr $6,(%esi), %xmm0 694 pcmpeqb (%edi), %xmm0 695 696 movdqa 32(%esi), %xmm3 697 palignr $6,16(%esi), %xmm3 698 pcmpeqb 16(%edi), %xmm3 699 700L(shr_6_gobble_loop): 701 pand %xmm0, %xmm3 702 sub $32, %ecx 703 pmovmskb %xmm3, %edx 704 movdqa %xmm0, %xmm1 705 706 movdqa 64(%esi), %xmm3 707 palignr $6,48(%esi), %xmm3 708 sbb $0xffff, %edx 709 movdqa 48(%esi), %xmm0 710 palignr $6,32(%esi), %xmm0 711 pcmpeqb 32(%edi), %xmm0 712 lea 32(%esi), %esi 713 pcmpeqb 48(%edi), %xmm3 714 715 lea 32(%edi), %edi 716 jz L(shr_6_gobble_loop) 717 pand %xmm0, %xmm3 718 719 cmp $0, %ecx 720 jge L(shr_6_gobble_next) 721 inc %edx 722 add $32, %ecx 723L(shr_6_gobble_next): 724 test %edx, %edx 725 jnz L(exit) 726 727 pmovmskb %xmm3, %edx 728 movdqa %xmm0, %xmm1 729 lea 32(%edi), %edi 730 lea 32(%esi), %esi 731 sub $0xffff, %edx 732 jnz L(exit) 733 734 lea (%ecx, %edi,1), %eax 735 lea 6(%ecx, %esi,1), %edx 736 POP (%edi) 737 POP (%esi) 738 jmp L(less48bytes) 739 740 cfi_restore_state 741 cfi_remember_state 742 .p2align 4 743L(shr_7): 744 cmp $80, %ecx 745 lea -48(%ecx), %ecx 746 mov %edx, %eax 747 jae L(shr_7_gobble) 748 749 movdqa 16(%esi), %xmm1 750 movdqa %xmm1, %xmm2 751 palignr $7,(%esi), %xmm1 752 pcmpeqb (%edi), %xmm1 753 754 movdqa 32(%esi), %xmm3 755 palignr $7,%xmm2, %xmm3 756 pcmpeqb 16(%edi), %xmm3 757 758 pand %xmm1, %xmm3 759 pmovmskb %xmm3, %edx 760 lea 32(%edi), %edi 761 lea 32(%esi), %esi 762 sub $0xffff, %edx 763 jnz L(exit) 764 lea (%ecx, %edi,1), %eax 765 lea 7(%ecx, %esi,1), %edx 766 POP (%edi) 767 POP (%esi) 768 jmp L(less48bytes) 769 770 cfi_restore_state 771 cfi_remember_state 772 .p2align 4 773L(shr_7_gobble): 774 sub $32, %ecx 775 movdqa 16(%esi), %xmm0 776 palignr $7,(%esi), %xmm0 777 pcmpeqb (%edi), %xmm0 778 779 movdqa 32(%esi), %xmm3 780 palignr $7,16(%esi), %xmm3 781 pcmpeqb 16(%edi), %xmm3 782 783L(shr_7_gobble_loop): 784 pand %xmm0, %xmm3 785 sub $32, %ecx 786 pmovmskb %xmm3, %edx 787 movdqa %xmm0, %xmm1 788 789 movdqa 64(%esi), %xmm3 790 palignr $7,48(%esi), %xmm3 791 sbb $0xffff, %edx 792 movdqa 48(%esi), %xmm0 793 palignr $7,32(%esi), %xmm0 794 pcmpeqb 32(%edi), %xmm0 795 lea 32(%esi), %esi 796 pcmpeqb 48(%edi), %xmm3 797 798 lea 32(%edi), %edi 799 jz L(shr_7_gobble_loop) 800 pand %xmm0, %xmm3 801 802 cmp $0, %ecx 803 jge L(shr_7_gobble_next) 804 inc %edx 805 add $32, %ecx 806L(shr_7_gobble_next): 807 test %edx, %edx 808 jnz L(exit) 809 810 pmovmskb %xmm3, %edx 811 movdqa %xmm0, %xmm1 812 lea 32(%edi), %edi 813 lea 32(%esi), %esi 814 sub $0xffff, %edx 815 jnz L(exit) 816 817 lea (%ecx, %edi,1), %eax 818 lea 7(%ecx, %esi,1), %edx 819 POP (%edi) 820 POP (%esi) 821 jmp L(less48bytes) 822# endif 823 824 cfi_restore_state 825 cfi_remember_state 826 .p2align 4 827L(shr_8): 828 cmp $80, %ecx 829 lea -48(%ecx), %ecx 830 mov %edx, %eax 831 jae L(shr_8_gobble) 832 833 movdqa 16(%esi), %xmm1 834 movdqa %xmm1, %xmm2 835 palignr $8,(%esi), %xmm1 836 pcmpeqb (%edi), %xmm1 837 838 movdqa 32(%esi), %xmm3 839 palignr $8,%xmm2, %xmm3 840 pcmpeqb 16(%edi), %xmm3 841 842 pand %xmm1, %xmm3 843 pmovmskb %xmm3, %edx 844 lea 32(%edi), %edi 845 lea 32(%esi), %esi 846 sub $0xffff, %edx 847 jnz L(exit) 848 lea (%ecx, %edi,1), %eax 849 lea 8(%ecx, %esi,1), %edx 850 POP (%edi) 851 POP (%esi) 852 jmp L(less48bytes) 853 854 cfi_restore_state 855 cfi_remember_state 856 .p2align 4 857L(shr_8_gobble): 858 sub $32, %ecx 859 movdqa 16(%esi), %xmm0 860 palignr $8,(%esi), %xmm0 861 pcmpeqb (%edi), %xmm0 862 863 movdqa 32(%esi), %xmm3 864 palignr $8,16(%esi), %xmm3 865 pcmpeqb 16(%edi), %xmm3 866 867L(shr_8_gobble_loop): 868 pand %xmm0, %xmm3 869 sub $32, %ecx 870 pmovmskb %xmm3, %edx 871 movdqa %xmm0, %xmm1 872 873 movdqa 64(%esi), %xmm3 874 palignr $8,48(%esi), %xmm3 875 sbb $0xffff, %edx 876 movdqa 48(%esi), %xmm0 877 palignr $8,32(%esi), %xmm0 878 pcmpeqb 32(%edi), %xmm0 879 lea 32(%esi), %esi 880 pcmpeqb 48(%edi), %xmm3 881 882 lea 32(%edi), %edi 883 jz L(shr_8_gobble_loop) 884 pand %xmm0, %xmm3 885 886 cmp $0, %ecx 887 jge L(shr_8_gobble_next) 888 inc %edx 889 add $32, %ecx 890L(shr_8_gobble_next): 891 test %edx, %edx 892 jnz L(exit) 893 894 pmovmskb %xmm3, %edx 895 movdqa %xmm0, %xmm1 896 lea 32(%edi), %edi 897 lea 32(%esi), %esi 898 sub $0xffff, %edx 899 jnz L(exit) 900 901 lea (%ecx, %edi,1), %eax 902 lea 8(%ecx, %esi,1), %edx 903 POP (%edi) 904 POP (%esi) 905 jmp L(less48bytes) 906 907# ifndef USE_AS_WMEMCMP 908 cfi_restore_state 909 cfi_remember_state 910 .p2align 4 911L(shr_9): 912 cmp $80, %ecx 913 lea -48(%ecx), %ecx 914 mov %edx, %eax 915 jae L(shr_9_gobble) 916 917 movdqa 16(%esi), %xmm1 918 movdqa %xmm1, %xmm2 919 palignr $9,(%esi), %xmm1 920 pcmpeqb (%edi), %xmm1 921 922 movdqa 32(%esi), %xmm3 923 palignr $9,%xmm2, %xmm3 924 pcmpeqb 16(%edi), %xmm3 925 926 pand %xmm1, %xmm3 927 pmovmskb %xmm3, %edx 928 lea 32(%edi), %edi 929 lea 32(%esi), %esi 930 sub $0xffff, %edx 931 jnz L(exit) 932 lea (%ecx, %edi,1), %eax 933 lea 9(%ecx, %esi,1), %edx 934 POP (%edi) 935 POP (%esi) 936 jmp L(less48bytes) 937 938 cfi_restore_state 939 cfi_remember_state 940 .p2align 4 941L(shr_9_gobble): 942 sub $32, %ecx 943 movdqa 16(%esi), %xmm0 944 palignr $9,(%esi), %xmm0 945 pcmpeqb (%edi), %xmm0 946 947 movdqa 32(%esi), %xmm3 948 palignr $9,16(%esi), %xmm3 949 pcmpeqb 16(%edi), %xmm3 950 951L(shr_9_gobble_loop): 952 pand %xmm0, %xmm3 953 sub $32, %ecx 954 pmovmskb %xmm3, %edx 955 movdqa %xmm0, %xmm1 956 957 movdqa 64(%esi), %xmm3 958 palignr $9,48(%esi), %xmm3 959 sbb $0xffff, %edx 960 movdqa 48(%esi), %xmm0 961 palignr $9,32(%esi), %xmm0 962 pcmpeqb 32(%edi), %xmm0 963 lea 32(%esi), %esi 964 pcmpeqb 48(%edi), %xmm3 965 966 lea 32(%edi), %edi 967 jz L(shr_9_gobble_loop) 968 pand %xmm0, %xmm3 969 970 cmp $0, %ecx 971 jge L(shr_9_gobble_next) 972 inc %edx 973 add $32, %ecx 974L(shr_9_gobble_next): 975 test %edx, %edx 976 jnz L(exit) 977 978 pmovmskb %xmm3, %edx 979 movdqa %xmm0, %xmm1 980 lea 32(%edi), %edi 981 lea 32(%esi), %esi 982 sub $0xffff, %edx 983 jnz L(exit) 984 985 lea (%ecx, %edi,1), %eax 986 lea 9(%ecx, %esi,1), %edx 987 POP (%edi) 988 POP (%esi) 989 jmp L(less48bytes) 990 991 cfi_restore_state 992 cfi_remember_state 993 .p2align 4 994L(shr_10): 995 cmp $80, %ecx 996 lea -48(%ecx), %ecx 997 mov %edx, %eax 998 jae L(shr_10_gobble) 999 1000 movdqa 16(%esi), %xmm1 1001 movdqa %xmm1, %xmm2 1002 palignr $10, (%esi), %xmm1 1003 pcmpeqb (%edi), %xmm1 1004 1005 movdqa 32(%esi), %xmm3 1006 palignr $10,%xmm2, %xmm3 1007 pcmpeqb 16(%edi), %xmm3 1008 1009 pand %xmm1, %xmm3 1010 pmovmskb %xmm3, %edx 1011 lea 32(%edi), %edi 1012 lea 32(%esi), %esi 1013 sub $0xffff, %edx 1014 jnz L(exit) 1015 lea (%ecx, %edi,1), %eax 1016 lea 10(%ecx, %esi,1), %edx 1017 POP (%edi) 1018 POP (%esi) 1019 jmp L(less48bytes) 1020 1021 cfi_restore_state 1022 cfi_remember_state 1023 .p2align 4 1024L(shr_10_gobble): 1025 sub $32, %ecx 1026 movdqa 16(%esi), %xmm0 1027 palignr $10, (%esi), %xmm0 1028 pcmpeqb (%edi), %xmm0 1029 1030 movdqa 32(%esi), %xmm3 1031 palignr $10, 16(%esi), %xmm3 1032 pcmpeqb 16(%edi), %xmm3 1033 1034L(shr_10_gobble_loop): 1035 pand %xmm0, %xmm3 1036 sub $32, %ecx 1037 pmovmskb %xmm3, %edx 1038 movdqa %xmm0, %xmm1 1039 1040 movdqa 64(%esi), %xmm3 1041 palignr $10,48(%esi), %xmm3 1042 sbb $0xffff, %edx 1043 movdqa 48(%esi), %xmm0 1044 palignr $10,32(%esi), %xmm0 1045 pcmpeqb 32(%edi), %xmm0 1046 lea 32(%esi), %esi 1047 pcmpeqb 48(%edi), %xmm3 1048 1049 lea 32(%edi), %edi 1050 jz L(shr_10_gobble_loop) 1051 pand %xmm0, %xmm3 1052 1053 cmp $0, %ecx 1054 jge L(shr_10_gobble_next) 1055 inc %edx 1056 add $32, %ecx 1057L(shr_10_gobble_next): 1058 test %edx, %edx 1059 jnz L(exit) 1060 1061 pmovmskb %xmm3, %edx 1062 movdqa %xmm0, %xmm1 1063 lea 32(%edi), %edi 1064 lea 32(%esi), %esi 1065 sub $0xffff, %edx 1066 jnz L(exit) 1067 1068 lea (%ecx, %edi,1), %eax 1069 lea 10(%ecx, %esi,1), %edx 1070 POP (%edi) 1071 POP (%esi) 1072 jmp L(less48bytes) 1073 1074 cfi_restore_state 1075 cfi_remember_state 1076 .p2align 4 1077L(shr_11): 1078 cmp $80, %ecx 1079 lea -48(%ecx), %ecx 1080 mov %edx, %eax 1081 jae L(shr_11_gobble) 1082 1083 movdqa 16(%esi), %xmm1 1084 movdqa %xmm1, %xmm2 1085 palignr $11, (%esi), %xmm1 1086 pcmpeqb (%edi), %xmm1 1087 1088 movdqa 32(%esi), %xmm3 1089 palignr $11, %xmm2, %xmm3 1090 pcmpeqb 16(%edi), %xmm3 1091 1092 pand %xmm1, %xmm3 1093 pmovmskb %xmm3, %edx 1094 lea 32(%edi), %edi 1095 lea 32(%esi), %esi 1096 sub $0xffff, %edx 1097 jnz L(exit) 1098 lea (%ecx, %edi,1), %eax 1099 lea 11(%ecx, %esi,1), %edx 1100 POP (%edi) 1101 POP (%esi) 1102 jmp L(less48bytes) 1103 1104 cfi_restore_state 1105 cfi_remember_state 1106 .p2align 4 1107L(shr_11_gobble): 1108 sub $32, %ecx 1109 movdqa 16(%esi), %xmm0 1110 palignr $11, (%esi), %xmm0 1111 pcmpeqb (%edi), %xmm0 1112 1113 movdqa 32(%esi), %xmm3 1114 palignr $11, 16(%esi), %xmm3 1115 pcmpeqb 16(%edi), %xmm3 1116 1117L(shr_11_gobble_loop): 1118 pand %xmm0, %xmm3 1119 sub $32, %ecx 1120 pmovmskb %xmm3, %edx 1121 movdqa %xmm0, %xmm1 1122 1123 movdqa 64(%esi), %xmm3 1124 palignr $11,48(%esi), %xmm3 1125 sbb $0xffff, %edx 1126 movdqa 48(%esi), %xmm0 1127 palignr $11,32(%esi), %xmm0 1128 pcmpeqb 32(%edi), %xmm0 1129 lea 32(%esi), %esi 1130 pcmpeqb 48(%edi), %xmm3 1131 1132 lea 32(%edi), %edi 1133 jz L(shr_11_gobble_loop) 1134 pand %xmm0, %xmm3 1135 1136 cmp $0, %ecx 1137 jge L(shr_11_gobble_next) 1138 inc %edx 1139 add $32, %ecx 1140L(shr_11_gobble_next): 1141 test %edx, %edx 1142 jnz L(exit) 1143 1144 pmovmskb %xmm3, %edx 1145 movdqa %xmm0, %xmm1 1146 lea 32(%edi), %edi 1147 lea 32(%esi), %esi 1148 sub $0xffff, %edx 1149 jnz L(exit) 1150 1151 lea (%ecx, %edi,1), %eax 1152 lea 11(%ecx, %esi,1), %edx 1153 POP (%edi) 1154 POP (%esi) 1155 jmp L(less48bytes) 1156# endif 1157 1158 cfi_restore_state 1159 cfi_remember_state 1160 .p2align 4 1161L(shr_12): 1162 cmp $80, %ecx 1163 lea -48(%ecx), %ecx 1164 mov %edx, %eax 1165 jae L(shr_12_gobble) 1166 1167 movdqa 16(%esi), %xmm1 1168 movdqa %xmm1, %xmm2 1169 palignr $12, (%esi), %xmm1 1170 pcmpeqb (%edi), %xmm1 1171 1172 movdqa 32(%esi), %xmm3 1173 palignr $12, %xmm2, %xmm3 1174 pcmpeqb 16(%edi), %xmm3 1175 1176 pand %xmm1, %xmm3 1177 pmovmskb %xmm3, %edx 1178 lea 32(%edi), %edi 1179 lea 32(%esi), %esi 1180 sub $0xffff, %edx 1181 jnz L(exit) 1182 lea (%ecx, %edi,1), %eax 1183 lea 12(%ecx, %esi,1), %edx 1184 POP (%edi) 1185 POP (%esi) 1186 jmp L(less48bytes) 1187 1188 cfi_restore_state 1189 cfi_remember_state 1190 .p2align 4 1191L(shr_12_gobble): 1192 sub $32, %ecx 1193 movdqa 16(%esi), %xmm0 1194 palignr $12, (%esi), %xmm0 1195 pcmpeqb (%edi), %xmm0 1196 1197 movdqa 32(%esi), %xmm3 1198 palignr $12, 16(%esi), %xmm3 1199 pcmpeqb 16(%edi), %xmm3 1200 1201L(shr_12_gobble_loop): 1202 pand %xmm0, %xmm3 1203 sub $32, %ecx 1204 pmovmskb %xmm3, %edx 1205 movdqa %xmm0, %xmm1 1206 1207 movdqa 64(%esi), %xmm3 1208 palignr $12,48(%esi), %xmm3 1209 sbb $0xffff, %edx 1210 movdqa 48(%esi), %xmm0 1211 palignr $12,32(%esi), %xmm0 1212 pcmpeqb 32(%edi), %xmm0 1213 lea 32(%esi), %esi 1214 pcmpeqb 48(%edi), %xmm3 1215 1216 lea 32(%edi), %edi 1217 jz L(shr_12_gobble_loop) 1218 pand %xmm0, %xmm3 1219 1220 cmp $0, %ecx 1221 jge L(shr_12_gobble_next) 1222 inc %edx 1223 add $32, %ecx 1224L(shr_12_gobble_next): 1225 test %edx, %edx 1226 jnz L(exit) 1227 1228 pmovmskb %xmm3, %edx 1229 movdqa %xmm0, %xmm1 1230 lea 32(%edi), %edi 1231 lea 32(%esi), %esi 1232 sub $0xffff, %edx 1233 jnz L(exit) 1234 1235 lea (%ecx, %edi,1), %eax 1236 lea 12(%ecx, %esi,1), %edx 1237 POP (%edi) 1238 POP (%esi) 1239 jmp L(less48bytes) 1240 1241# ifndef USE_AS_WMEMCMP 1242 cfi_restore_state 1243 cfi_remember_state 1244 .p2align 4 1245L(shr_13): 1246 cmp $80, %ecx 1247 lea -48(%ecx), %ecx 1248 mov %edx, %eax 1249 jae L(shr_13_gobble) 1250 1251 movdqa 16(%esi), %xmm1 1252 movdqa %xmm1, %xmm2 1253 palignr $13, (%esi), %xmm1 1254 pcmpeqb (%edi), %xmm1 1255 1256 movdqa 32(%esi), %xmm3 1257 palignr $13, %xmm2, %xmm3 1258 pcmpeqb 16(%edi), %xmm3 1259 1260 pand %xmm1, %xmm3 1261 pmovmskb %xmm3, %edx 1262 lea 32(%edi), %edi 1263 lea 32(%esi), %esi 1264 sub $0xffff, %edx 1265 jnz L(exit) 1266 lea (%ecx, %edi,1), %eax 1267 lea 13(%ecx, %esi,1), %edx 1268 POP (%edi) 1269 POP (%esi) 1270 jmp L(less48bytes) 1271 1272 cfi_restore_state 1273 cfi_remember_state 1274 .p2align 4 1275L(shr_13_gobble): 1276 sub $32, %ecx 1277 movdqa 16(%esi), %xmm0 1278 palignr $13, (%esi), %xmm0 1279 pcmpeqb (%edi), %xmm0 1280 1281 movdqa 32(%esi), %xmm3 1282 palignr $13, 16(%esi), %xmm3 1283 pcmpeqb 16(%edi), %xmm3 1284 1285L(shr_13_gobble_loop): 1286 pand %xmm0, %xmm3 1287 sub $32, %ecx 1288 pmovmskb %xmm3, %edx 1289 movdqa %xmm0, %xmm1 1290 1291 movdqa 64(%esi), %xmm3 1292 palignr $13,48(%esi), %xmm3 1293 sbb $0xffff, %edx 1294 movdqa 48(%esi), %xmm0 1295 palignr $13,32(%esi), %xmm0 1296 pcmpeqb 32(%edi), %xmm0 1297 lea 32(%esi), %esi 1298 pcmpeqb 48(%edi), %xmm3 1299 1300 lea 32(%edi), %edi 1301 jz L(shr_13_gobble_loop) 1302 pand %xmm0, %xmm3 1303 1304 cmp $0, %ecx 1305 jge L(shr_13_gobble_next) 1306 inc %edx 1307 add $32, %ecx 1308L(shr_13_gobble_next): 1309 test %edx, %edx 1310 jnz L(exit) 1311 1312 pmovmskb %xmm3, %edx 1313 movdqa %xmm0, %xmm1 1314 lea 32(%edi), %edi 1315 lea 32(%esi), %esi 1316 sub $0xffff, %edx 1317 jnz L(exit) 1318 1319 lea (%ecx, %edi,1), %eax 1320 lea 13(%ecx, %esi,1), %edx 1321 POP (%edi) 1322 POP (%esi) 1323 jmp L(less48bytes) 1324 1325 cfi_restore_state 1326 cfi_remember_state 1327 .p2align 4 1328L(shr_14): 1329 cmp $80, %ecx 1330 lea -48(%ecx), %ecx 1331 mov %edx, %eax 1332 jae L(shr_14_gobble) 1333 1334 movdqa 16(%esi), %xmm1 1335 movdqa %xmm1, %xmm2 1336 palignr $14, (%esi), %xmm1 1337 pcmpeqb (%edi), %xmm1 1338 1339 movdqa 32(%esi), %xmm3 1340 palignr $14, %xmm2, %xmm3 1341 pcmpeqb 16(%edi), %xmm3 1342 1343 pand %xmm1, %xmm3 1344 pmovmskb %xmm3, %edx 1345 lea 32(%edi), %edi 1346 lea 32(%esi), %esi 1347 sub $0xffff, %edx 1348 jnz L(exit) 1349 lea (%ecx, %edi,1), %eax 1350 lea 14(%ecx, %esi,1), %edx 1351 POP (%edi) 1352 POP (%esi) 1353 jmp L(less48bytes) 1354 1355 cfi_restore_state 1356 cfi_remember_state 1357 .p2align 4 1358L(shr_14_gobble): 1359 sub $32, %ecx 1360 movdqa 16(%esi), %xmm0 1361 palignr $14, (%esi), %xmm0 1362 pcmpeqb (%edi), %xmm0 1363 1364 movdqa 32(%esi), %xmm3 1365 palignr $14, 16(%esi), %xmm3 1366 pcmpeqb 16(%edi), %xmm3 1367 1368L(shr_14_gobble_loop): 1369 pand %xmm0, %xmm3 1370 sub $32, %ecx 1371 pmovmskb %xmm3, %edx 1372 movdqa %xmm0, %xmm1 1373 1374 movdqa 64(%esi), %xmm3 1375 palignr $14,48(%esi), %xmm3 1376 sbb $0xffff, %edx 1377 movdqa 48(%esi), %xmm0 1378 palignr $14,32(%esi), %xmm0 1379 pcmpeqb 32(%edi), %xmm0 1380 lea 32(%esi), %esi 1381 pcmpeqb 48(%edi), %xmm3 1382 1383 lea 32(%edi), %edi 1384 jz L(shr_14_gobble_loop) 1385 pand %xmm0, %xmm3 1386 1387 cmp $0, %ecx 1388 jge L(shr_14_gobble_next) 1389 inc %edx 1390 add $32, %ecx 1391L(shr_14_gobble_next): 1392 test %edx, %edx 1393 jnz L(exit) 1394 1395 pmovmskb %xmm3, %edx 1396 movdqa %xmm0, %xmm1 1397 lea 32(%edi), %edi 1398 lea 32(%esi), %esi 1399 sub $0xffff, %edx 1400 jnz L(exit) 1401 1402 lea (%ecx, %edi,1), %eax 1403 lea 14(%ecx, %esi,1), %edx 1404 POP (%edi) 1405 POP (%esi) 1406 jmp L(less48bytes) 1407 1408 cfi_restore_state 1409 cfi_remember_state 1410 .p2align 4 1411L(shr_15): 1412 cmp $80, %ecx 1413 lea -48(%ecx), %ecx 1414 mov %edx, %eax 1415 jae L(shr_15_gobble) 1416 1417 movdqa 16(%esi), %xmm1 1418 movdqa %xmm1, %xmm2 1419 palignr $15, (%esi), %xmm1 1420 pcmpeqb (%edi), %xmm1 1421 1422 movdqa 32(%esi), %xmm3 1423 palignr $15, %xmm2, %xmm3 1424 pcmpeqb 16(%edi), %xmm3 1425 1426 pand %xmm1, %xmm3 1427 pmovmskb %xmm3, %edx 1428 lea 32(%edi), %edi 1429 lea 32(%esi), %esi 1430 sub $0xffff, %edx 1431 jnz L(exit) 1432 lea (%ecx, %edi,1), %eax 1433 lea 15(%ecx, %esi,1), %edx 1434 POP (%edi) 1435 POP (%esi) 1436 jmp L(less48bytes) 1437 1438 cfi_restore_state 1439 cfi_remember_state 1440 .p2align 4 1441L(shr_15_gobble): 1442 sub $32, %ecx 1443 movdqa 16(%esi), %xmm0 1444 palignr $15, (%esi), %xmm0 1445 pcmpeqb (%edi), %xmm0 1446 1447 movdqa 32(%esi), %xmm3 1448 palignr $15, 16(%esi), %xmm3 1449 pcmpeqb 16(%edi), %xmm3 1450 1451L(shr_15_gobble_loop): 1452 pand %xmm0, %xmm3 1453 sub $32, %ecx 1454 pmovmskb %xmm3, %edx 1455 movdqa %xmm0, %xmm1 1456 1457 movdqa 64(%esi), %xmm3 1458 palignr $15,48(%esi), %xmm3 1459 sbb $0xffff, %edx 1460 movdqa 48(%esi), %xmm0 1461 palignr $15,32(%esi), %xmm0 1462 pcmpeqb 32(%edi), %xmm0 1463 lea 32(%esi), %esi 1464 pcmpeqb 48(%edi), %xmm3 1465 1466 lea 32(%edi), %edi 1467 jz L(shr_15_gobble_loop) 1468 pand %xmm0, %xmm3 1469 1470 cmp $0, %ecx 1471 jge L(shr_15_gobble_next) 1472 inc %edx 1473 add $32, %ecx 1474L(shr_15_gobble_next): 1475 test %edx, %edx 1476 jnz L(exit) 1477 1478 pmovmskb %xmm3, %edx 1479 movdqa %xmm0, %xmm1 1480 lea 32(%edi), %edi 1481 lea 32(%esi), %esi 1482 sub $0xffff, %edx 1483 jnz L(exit) 1484 1485 lea (%ecx, %edi,1), %eax 1486 lea 15(%ecx, %esi,1), %edx 1487 POP (%edi) 1488 POP (%esi) 1489 jmp L(less48bytes) 1490# endif 1491 1492 cfi_restore_state 1493 cfi_remember_state 1494 .p2align 4 1495L(exit): 1496 pmovmskb %xmm1, %ebx 1497 sub $0xffff, %ebx 1498 jz L(first16bytes) 1499 lea -16(%esi), %esi 1500 lea -16(%edi), %edi 1501 mov %ebx, %edx 1502 1503L(first16bytes): 1504 add %eax, %esi 1505L(less16bytes): 1506 1507# ifndef USE_AS_WMEMCMP 1508 test %dl, %dl 1509 jz L(next_24_bytes) 1510 1511 test $0x01, %dl 1512 jnz L(Byte16) 1513 1514 test $0x02, %dl 1515 jnz L(Byte17) 1516 1517 test $0x04, %dl 1518 jnz L(Byte18) 1519 1520 test $0x08, %dl 1521 jnz L(Byte19) 1522 1523 test $0x10, %dl 1524 jnz L(Byte20) 1525 1526 test $0x20, %dl 1527 jnz L(Byte21) 1528 1529 test $0x40, %dl 1530 jnz L(Byte22) 1531L(Byte23): 1532 movzbl -9(%edi), %eax 1533 movzbl -9(%esi), %edx 1534 sub %edx, %eax 1535 RETURN 1536 1537 .p2align 4 1538L(Byte16): 1539 movzbl -16(%edi), %eax 1540 movzbl -16(%esi), %edx 1541 sub %edx, %eax 1542 RETURN 1543 1544 .p2align 4 1545L(Byte17): 1546 movzbl -15(%edi), %eax 1547 movzbl -15(%esi), %edx 1548 sub %edx, %eax 1549 RETURN 1550 1551 .p2align 4 1552L(Byte18): 1553 movzbl -14(%edi), %eax 1554 movzbl -14(%esi), %edx 1555 sub %edx, %eax 1556 RETURN 1557 1558 .p2align 4 1559L(Byte19): 1560 movzbl -13(%edi), %eax 1561 movzbl -13(%esi), %edx 1562 sub %edx, %eax 1563 RETURN 1564 1565 .p2align 4 1566L(Byte20): 1567 movzbl -12(%edi), %eax 1568 movzbl -12(%esi), %edx 1569 sub %edx, %eax 1570 RETURN 1571 1572 .p2align 4 1573L(Byte21): 1574 movzbl -11(%edi), %eax 1575 movzbl -11(%esi), %edx 1576 sub %edx, %eax 1577 RETURN 1578 1579 .p2align 4 1580L(Byte22): 1581 movzbl -10(%edi), %eax 1582 movzbl -10(%esi), %edx 1583 sub %edx, %eax 1584 RETURN 1585 1586 .p2align 4 1587L(next_24_bytes): 1588 lea 8(%edi), %edi 1589 lea 8(%esi), %esi 1590 test $0x01, %dh 1591 jnz L(Byte16) 1592 1593 test $0x02, %dh 1594 jnz L(Byte17) 1595 1596 test $0x04, %dh 1597 jnz L(Byte18) 1598 1599 test $0x08, %dh 1600 jnz L(Byte19) 1601 1602 test $0x10, %dh 1603 jnz L(Byte20) 1604 1605 test $0x20, %dh 1606 jnz L(Byte21) 1607 1608 test $0x40, %dh 1609 jnz L(Byte22) 1610 1611 .p2align 4 1612L(Byte31): 1613 movzbl -9(%edi), %eax 1614 movzbl -9(%esi), %edx 1615 sub %edx, %eax 1616 RETURN_END 1617# else 1618 1619/* special for wmemcmp */ 1620 xor %eax, %eax 1621 test %dl, %dl 1622 jz L(next_two_double_words) 1623 and $15, %dl 1624 jz L(second_double_word) 1625 mov -16(%edi), %eax 1626 cmp -16(%esi), %eax 1627 jne L(nequal) 1628 RETURN 1629 1630 .p2align 4 1631L(second_double_word): 1632 mov -12(%edi), %eax 1633 cmp -12(%esi), %eax 1634 jne L(nequal) 1635 RETURN 1636 1637 .p2align 4 1638L(next_two_double_words): 1639 and $15, %dh 1640 jz L(fourth_double_word) 1641 mov -8(%edi), %eax 1642 cmp -8(%esi), %eax 1643 jne L(nequal) 1644 RETURN 1645 1646 .p2align 4 1647L(fourth_double_word): 1648 mov -4(%edi), %eax 1649 cmp -4(%esi), %eax 1650 jne L(nequal) 1651 RETURN 1652 1653 .p2align 4 1654L(nequal): 1655 mov $1, %eax 1656 jg L(nequal_bigger) 1657 neg %eax 1658 RETURN 1659 1660 .p2align 4 1661L(nequal_bigger): 1662 RETURN_END 1663# endif 1664 1665 CFI_PUSH (%ebx) 1666 1667 .p2align 4 1668L(more8bytes): 1669 cmp $16, %ecx 1670 jae L(more16bytes) 1671 cmp $8, %ecx 1672 je L(8bytes) 1673# ifndef USE_AS_WMEMCMP 1674 cmp $9, %ecx 1675 je L(9bytes) 1676 cmp $10, %ecx 1677 je L(10bytes) 1678 cmp $11, %ecx 1679 je L(11bytes) 1680 cmp $12, %ecx 1681 je L(12bytes) 1682 cmp $13, %ecx 1683 je L(13bytes) 1684 cmp $14, %ecx 1685 je L(14bytes) 1686 jmp L(15bytes) 1687# else 1688 jmp L(12bytes) 1689# endif 1690 1691 .p2align 4 1692L(more16bytes): 1693 cmp $24, %ecx 1694 jae L(more24bytes) 1695 cmp $16, %ecx 1696 je L(16bytes) 1697# ifndef USE_AS_WMEMCMP 1698 cmp $17, %ecx 1699 je L(17bytes) 1700 cmp $18, %ecx 1701 je L(18bytes) 1702 cmp $19, %ecx 1703 je L(19bytes) 1704 cmp $20, %ecx 1705 je L(20bytes) 1706 cmp $21, %ecx 1707 je L(21bytes) 1708 cmp $22, %ecx 1709 je L(22bytes) 1710 jmp L(23bytes) 1711# else 1712 jmp L(20bytes) 1713# endif 1714 1715 .p2align 4 1716L(more24bytes): 1717 cmp $32, %ecx 1718 jae L(more32bytes) 1719 cmp $24, %ecx 1720 je L(24bytes) 1721# ifndef USE_AS_WMEMCMP 1722 cmp $25, %ecx 1723 je L(25bytes) 1724 cmp $26, %ecx 1725 je L(26bytes) 1726 cmp $27, %ecx 1727 je L(27bytes) 1728 cmp $28, %ecx 1729 je L(28bytes) 1730 cmp $29, %ecx 1731 je L(29bytes) 1732 cmp $30, %ecx 1733 je L(30bytes) 1734 jmp L(31bytes) 1735# else 1736 jmp L(28bytes) 1737# endif 1738 1739 .p2align 4 1740L(more32bytes): 1741 cmp $40, %ecx 1742 jae L(more40bytes) 1743 cmp $32, %ecx 1744 je L(32bytes) 1745# ifndef USE_AS_WMEMCMP 1746 cmp $33, %ecx 1747 je L(33bytes) 1748 cmp $34, %ecx 1749 je L(34bytes) 1750 cmp $35, %ecx 1751 je L(35bytes) 1752 cmp $36, %ecx 1753 je L(36bytes) 1754 cmp $37, %ecx 1755 je L(37bytes) 1756 cmp $38, %ecx 1757 je L(38bytes) 1758 jmp L(39bytes) 1759# else 1760 jmp L(36bytes) 1761# endif 1762 1763 .p2align 4 1764L(less48bytes): 1765 cmp $8, %ecx 1766 jae L(more8bytes) 1767# ifndef USE_AS_WMEMCMP 1768 cmp $2, %ecx 1769 je L(2bytes) 1770 cmp $3, %ecx 1771 je L(3bytes) 1772 cmp $4, %ecx 1773 je L(4bytes) 1774 cmp $5, %ecx 1775 je L(5bytes) 1776 cmp $6, %ecx 1777 je L(6bytes) 1778 jmp L(7bytes) 1779# else 1780 jmp L(4bytes) 1781# endif 1782 1783 .p2align 4 1784L(more40bytes): 1785 cmp $40, %ecx 1786 je L(40bytes) 1787# ifndef USE_AS_WMEMCMP 1788 cmp $41, %ecx 1789 je L(41bytes) 1790 cmp $42, %ecx 1791 je L(42bytes) 1792 cmp $43, %ecx 1793 je L(43bytes) 1794 cmp $44, %ecx 1795 je L(44bytes) 1796 cmp $45, %ecx 1797 je L(45bytes) 1798 cmp $46, %ecx 1799 je L(46bytes) 1800 jmp L(47bytes) 1801 1802 .p2align 4 1803L(44bytes): 1804 mov -44(%eax), %ecx 1805 mov -44(%edx), %ebx 1806 cmp %ebx, %ecx 1807 jne L(find_diff) 1808L(40bytes): 1809 mov -40(%eax), %ecx 1810 mov -40(%edx), %ebx 1811 cmp %ebx, %ecx 1812 jne L(find_diff) 1813L(36bytes): 1814 mov -36(%eax), %ecx 1815 mov -36(%edx), %ebx 1816 cmp %ebx, %ecx 1817 jne L(find_diff) 1818L(32bytes): 1819 mov -32(%eax), %ecx 1820 mov -32(%edx), %ebx 1821 cmp %ebx, %ecx 1822 jne L(find_diff) 1823L(28bytes): 1824 mov -28(%eax), %ecx 1825 mov -28(%edx), %ebx 1826 cmp %ebx, %ecx 1827 jne L(find_diff) 1828L(24bytes): 1829 mov -24(%eax), %ecx 1830 mov -24(%edx), %ebx 1831 cmp %ebx, %ecx 1832 jne L(find_diff) 1833L(20bytes): 1834 mov -20(%eax), %ecx 1835 mov -20(%edx), %ebx 1836 cmp %ebx, %ecx 1837 jne L(find_diff) 1838L(16bytes): 1839 mov -16(%eax), %ecx 1840 mov -16(%edx), %ebx 1841 cmp %ebx, %ecx 1842 jne L(find_diff) 1843L(12bytes): 1844 mov -12(%eax), %ecx 1845 mov -12(%edx), %ebx 1846 cmp %ebx, %ecx 1847 jne L(find_diff) 1848L(8bytes): 1849 mov -8(%eax), %ecx 1850 mov -8(%edx), %ebx 1851 cmp %ebx, %ecx 1852 jne L(find_diff) 1853L(4bytes): 1854 mov -4(%eax), %ecx 1855 mov -4(%edx), %ebx 1856 cmp %ebx, %ecx 1857 mov $0, %eax 1858 jne L(find_diff) 1859 POP (%ebx) 1860 ret 1861 CFI_PUSH (%ebx) 1862# else 1863 .p2align 4 1864L(44bytes): 1865 mov -44(%eax), %ecx 1866 cmp -44(%edx), %ecx 1867 jne L(find_diff) 1868L(40bytes): 1869 mov -40(%eax), %ecx 1870 cmp -40(%edx), %ecx 1871 jne L(find_diff) 1872L(36bytes): 1873 mov -36(%eax), %ecx 1874 cmp -36(%edx), %ecx 1875 jne L(find_diff) 1876L(32bytes): 1877 mov -32(%eax), %ecx 1878 cmp -32(%edx), %ecx 1879 jne L(find_diff) 1880L(28bytes): 1881 mov -28(%eax), %ecx 1882 cmp -28(%edx), %ecx 1883 jne L(find_diff) 1884L(24bytes): 1885 mov -24(%eax), %ecx 1886 cmp -24(%edx), %ecx 1887 jne L(find_diff) 1888L(20bytes): 1889 mov -20(%eax), %ecx 1890 cmp -20(%edx), %ecx 1891 jne L(find_diff) 1892L(16bytes): 1893 mov -16(%eax), %ecx 1894 cmp -16(%edx), %ecx 1895 jne L(find_diff) 1896L(12bytes): 1897 mov -12(%eax), %ecx 1898 cmp -12(%edx), %ecx 1899 jne L(find_diff) 1900L(8bytes): 1901 mov -8(%eax), %ecx 1902 cmp -8(%edx), %ecx 1903 jne L(find_diff) 1904L(4bytes): 1905 mov -4(%eax), %ecx 1906 xor %eax, %eax 1907 cmp -4(%edx), %ecx 1908 jne L(find_diff) 1909 POP (%ebx) 1910 ret 1911 CFI_PUSH (%ebx) 1912# endif 1913 1914# ifndef USE_AS_WMEMCMP 1915 1916 .p2align 4 1917L(45bytes): 1918 mov -45(%eax), %ecx 1919 mov -45(%edx), %ebx 1920 cmp %ebx, %ecx 1921 jne L(find_diff) 1922L(41bytes): 1923 mov -41(%eax), %ecx 1924 mov -41(%edx), %ebx 1925 cmp %ebx, %ecx 1926 jne L(find_diff) 1927L(37bytes): 1928 mov -37(%eax), %ecx 1929 mov -37(%edx), %ebx 1930 cmp %ebx, %ecx 1931 jne L(find_diff) 1932L(33bytes): 1933 mov -33(%eax), %ecx 1934 mov -33(%edx), %ebx 1935 cmp %ebx, %ecx 1936 jne L(find_diff) 1937L(29bytes): 1938 mov -29(%eax), %ecx 1939 mov -29(%edx), %ebx 1940 cmp %ebx, %ecx 1941 jne L(find_diff) 1942L(25bytes): 1943 mov -25(%eax), %ecx 1944 mov -25(%edx), %ebx 1945 cmp %ebx, %ecx 1946 jne L(find_diff) 1947L(21bytes): 1948 mov -21(%eax), %ecx 1949 mov -21(%edx), %ebx 1950 cmp %ebx, %ecx 1951 jne L(find_diff) 1952L(17bytes): 1953 mov -17(%eax), %ecx 1954 mov -17(%edx), %ebx 1955 cmp %ebx, %ecx 1956 jne L(find_diff) 1957L(13bytes): 1958 mov -13(%eax), %ecx 1959 mov -13(%edx), %ebx 1960 cmp %ebx, %ecx 1961 jne L(find_diff) 1962L(9bytes): 1963 mov -9(%eax), %ecx 1964 mov -9(%edx), %ebx 1965 cmp %ebx, %ecx 1966 jne L(find_diff) 1967L(5bytes): 1968 mov -5(%eax), %ecx 1969 mov -5(%edx), %ebx 1970 cmp %ebx, %ecx 1971 jne L(find_diff) 1972 movzbl -1(%eax), %ecx 1973 cmp -1(%edx), %cl 1974 mov $0, %eax 1975 jne L(end) 1976 POP (%ebx) 1977 ret 1978 CFI_PUSH (%ebx) 1979 1980 .p2align 4 1981L(46bytes): 1982 mov -46(%eax), %ecx 1983 mov -46(%edx), %ebx 1984 cmp %ebx, %ecx 1985 jne L(find_diff) 1986L(42bytes): 1987 mov -42(%eax), %ecx 1988 mov -42(%edx), %ebx 1989 cmp %ebx, %ecx 1990 jne L(find_diff) 1991L(38bytes): 1992 mov -38(%eax), %ecx 1993 mov -38(%edx), %ebx 1994 cmp %ebx, %ecx 1995 jne L(find_diff) 1996L(34bytes): 1997 mov -34(%eax), %ecx 1998 mov -34(%edx), %ebx 1999 cmp %ebx, %ecx 2000 jne L(find_diff) 2001L(30bytes): 2002 mov -30(%eax), %ecx 2003 mov -30(%edx), %ebx 2004 cmp %ebx, %ecx 2005 jne L(find_diff) 2006L(26bytes): 2007 mov -26(%eax), %ecx 2008 mov -26(%edx), %ebx 2009 cmp %ebx, %ecx 2010 jne L(find_diff) 2011L(22bytes): 2012 mov -22(%eax), %ecx 2013 mov -22(%edx), %ebx 2014 cmp %ebx, %ecx 2015 jne L(find_diff) 2016L(18bytes): 2017 mov -18(%eax), %ecx 2018 mov -18(%edx), %ebx 2019 cmp %ebx, %ecx 2020 jne L(find_diff) 2021L(14bytes): 2022 mov -14(%eax), %ecx 2023 mov -14(%edx), %ebx 2024 cmp %ebx, %ecx 2025 jne L(find_diff) 2026L(10bytes): 2027 mov -10(%eax), %ecx 2028 mov -10(%edx), %ebx 2029 cmp %ebx, %ecx 2030 jne L(find_diff) 2031L(6bytes): 2032 mov -6(%eax), %ecx 2033 mov -6(%edx), %ebx 2034 cmp %ebx, %ecx 2035 jne L(find_diff) 2036L(2bytes): 2037 movzwl -2(%eax), %ecx 2038 movzwl -2(%edx), %ebx 2039 cmp %bl, %cl 2040 jne L(end) 2041 cmp %bh, %ch 2042 mov $0, %eax 2043 jne L(end) 2044 POP (%ebx) 2045 ret 2046 CFI_PUSH (%ebx) 2047 2048 .p2align 4 2049L(47bytes): 2050 movl -47(%eax), %ecx 2051 movl -47(%edx), %ebx 2052 cmp %ebx, %ecx 2053 jne L(find_diff) 2054L(43bytes): 2055 movl -43(%eax), %ecx 2056 movl -43(%edx), %ebx 2057 cmp %ebx, %ecx 2058 jne L(find_diff) 2059L(39bytes): 2060 movl -39(%eax), %ecx 2061 movl -39(%edx), %ebx 2062 cmp %ebx, %ecx 2063 jne L(find_diff) 2064L(35bytes): 2065 movl -35(%eax), %ecx 2066 movl -35(%edx), %ebx 2067 cmp %ebx, %ecx 2068 jne L(find_diff) 2069L(31bytes): 2070 movl -31(%eax), %ecx 2071 movl -31(%edx), %ebx 2072 cmp %ebx, %ecx 2073 jne L(find_diff) 2074L(27bytes): 2075 movl -27(%eax), %ecx 2076 movl -27(%edx), %ebx 2077 cmp %ebx, %ecx 2078 jne L(find_diff) 2079L(23bytes): 2080 movl -23(%eax), %ecx 2081 movl -23(%edx), %ebx 2082 cmp %ebx, %ecx 2083 jne L(find_diff) 2084L(19bytes): 2085 movl -19(%eax), %ecx 2086 movl -19(%edx), %ebx 2087 cmp %ebx, %ecx 2088 jne L(find_diff) 2089L(15bytes): 2090 movl -15(%eax), %ecx 2091 movl -15(%edx), %ebx 2092 cmp %ebx, %ecx 2093 jne L(find_diff) 2094L(11bytes): 2095 movl -11(%eax), %ecx 2096 movl -11(%edx), %ebx 2097 cmp %ebx, %ecx 2098 jne L(find_diff) 2099L(7bytes): 2100 movl -7(%eax), %ecx 2101 movl -7(%edx), %ebx 2102 cmp %ebx, %ecx 2103 jne L(find_diff) 2104L(3bytes): 2105 movzwl -3(%eax), %ecx 2106 movzwl -3(%edx), %ebx 2107 cmpb %bl, %cl 2108 jne L(end) 2109 cmp %bx, %cx 2110 jne L(end) 2111 movzbl -1(%eax), %eax 2112 cmpb -1(%edx), %al 2113 mov $0, %eax 2114 jne L(end) 2115 POP (%ebx) 2116 ret 2117 CFI_PUSH (%ebx) 2118 2119 .p2align 4 2120L(find_diff): 2121 cmpb %bl, %cl 2122 jne L(end) 2123 cmp %bx, %cx 2124 jne L(end) 2125 shr $16,%ecx 2126 shr $16,%ebx 2127 cmp %bl, %cl 2128 jne L(end) 2129 cmp %bx, %cx 2130 2131 .p2align 4 2132L(end): 2133 POP (%ebx) 2134 mov $1, %eax 2135 ja L(bigger) 2136 neg %eax 2137L(bigger): 2138 ret 2139# else 2140 2141/* for wmemcmp */ 2142 .p2align 4 2143L(find_diff): 2144 POP (%ebx) 2145 mov $1, %eax 2146 jg L(find_diff_bigger) 2147 neg %eax 2148 ret 2149 2150 .p2align 4 2151L(find_diff_bigger): 2152 ret 2153 2154# endif 2155END (MEMCMP) 2156#endif 2157