1/* strrchr SSE2 without bsf and bsr 2 Copyright (C) 2011-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20 21# include <sysdep.h> 22 23# define CFI_PUSH(REG) \ 24 cfi_adjust_cfa_offset (4); \ 25 cfi_rel_offset (REG, 0) 26 27# define CFI_POP(REG) \ 28 cfi_adjust_cfa_offset (-4); \ 29 cfi_restore (REG) 30 31# define PUSH(REG) pushl REG; CFI_PUSH (REG) 32# define POP(REG) popl REG; CFI_POP (REG) 33 34# define PARMS 8 35# define ENTRANCE PUSH(%edi); 36# define RETURN POP(%edi); ret; CFI_PUSH(%edi); 37 38# define STR1 PARMS 39# define STR2 STR1+4 40 41 atom_text_section 42ENTRY (__strrchr_sse2) 43 44 ENTRANCE 45 mov STR1(%esp), %ecx 46 movd STR2(%esp), %xmm1 47 48 pxor %xmm2, %xmm2 49 mov %ecx, %edi 50 punpcklbw %xmm1, %xmm1 51 punpcklbw %xmm1, %xmm1 52 /* ECX has OFFSET. */ 53 and $63, %ecx 54 cmp $48, %ecx 55 pshufd $0, %xmm1, %xmm1 56 ja L(crosscache) 57 58/* unaligned string. */ 59 movdqu (%edi), %xmm0 60 pcmpeqb %xmm0, %xmm2 61 pcmpeqb %xmm1, %xmm0 62 /* Find where NULL is. */ 63 pmovmskb %xmm2, %ecx 64 /* Check if there is a match. */ 65 pmovmskb %xmm0, %eax 66 add $16, %edi 67 68 test %eax, %eax 69 jnz L(unaligned_match1) 70 71 test %ecx, %ecx 72 jnz L(return_null) 73 74 and $-16, %edi 75 76 PUSH (%esi) 77 PUSH (%ebx) 78 79 xor %ebx, %ebx 80 jmp L(loop) 81 82 CFI_POP (%esi) 83 CFI_POP (%ebx) 84 85 .p2align 4 86L(unaligned_match1): 87 test %ecx, %ecx 88 jnz L(prolog_find_zero_1) 89 90 PUSH (%esi) 91 PUSH (%ebx) 92 93 mov %eax, %ebx 94 mov %edi, %esi 95 and $-16, %edi 96 jmp L(loop) 97 98 CFI_POP (%esi) 99 CFI_POP (%ebx) 100 101 .p2align 4 102L(crosscache): 103/* Hancle unaligned string. */ 104 and $15, %ecx 105 and $-16, %edi 106 pxor %xmm3, %xmm3 107 movdqa (%edi), %xmm0 108 pcmpeqb %xmm0, %xmm3 109 pcmpeqb %xmm1, %xmm0 110 /* Find where NULL is. */ 111 pmovmskb %xmm3, %edx 112 /* Check if there is a match. */ 113 pmovmskb %xmm0, %eax 114 /* Remove the leading bytes. */ 115 shr %cl, %edx 116 shr %cl, %eax 117 add $16, %edi 118 119 test %eax, %eax 120 jnz L(unaligned_match) 121 122 test %edx, %edx 123 jnz L(return_null) 124 125 PUSH (%esi) 126 PUSH (%ebx) 127 128 xor %ebx, %ebx 129 jmp L(loop) 130 131 CFI_POP (%esi) 132 CFI_POP (%ebx) 133 134 .p2align 4 135L(unaligned_match): 136 test %edx, %edx 137 jnz L(prolog_find_zero) 138 139 PUSH (%esi) 140 PUSH (%ebx) 141 142 mov %eax, %ebx 143 lea (%edi, %ecx), %esi 144 145/* Loop start on aligned string. */ 146 .p2align 4 147L(loop): 148 movdqa (%edi), %xmm0 149 pcmpeqb %xmm0, %xmm2 150 add $16, %edi 151 pcmpeqb %xmm1, %xmm0 152 pmovmskb %xmm2, %ecx 153 pmovmskb %xmm0, %eax 154 or %eax, %ecx 155 jnz L(matches) 156 157 movdqa (%edi), %xmm0 158 pcmpeqb %xmm0, %xmm2 159 add $16, %edi 160 pcmpeqb %xmm1, %xmm0 161 pmovmskb %xmm2, %ecx 162 pmovmskb %xmm0, %eax 163 or %eax, %ecx 164 jnz L(matches) 165 166 movdqa (%edi), %xmm0 167 pcmpeqb %xmm0, %xmm2 168 add $16, %edi 169 pcmpeqb %xmm1, %xmm0 170 pmovmskb %xmm2, %ecx 171 pmovmskb %xmm0, %eax 172 or %eax, %ecx 173 jnz L(matches) 174 175 movdqa (%edi), %xmm0 176 pcmpeqb %xmm0, %xmm2 177 add $16, %edi 178 pcmpeqb %xmm1, %xmm0 179 pmovmskb %xmm2, %ecx 180 pmovmskb %xmm0, %eax 181 or %eax, %ecx 182 jz L(loop) 183 184L(matches): 185 test %eax, %eax 186 jnz L(match) 187L(return_value): 188 test %ebx, %ebx 189 jz L(return_null_1) 190 mov %ebx, %eax 191 mov %esi, %edi 192 193 POP (%ebx) 194 POP (%esi) 195 196 jmp L(match_exit) 197 198 CFI_PUSH (%ebx) 199 CFI_PUSH (%esi) 200 201 .p2align 4 202L(return_null_1): 203 POP (%ebx) 204 POP (%esi) 205 206 xor %eax, %eax 207 RETURN 208 209 CFI_PUSH (%ebx) 210 CFI_PUSH (%esi) 211 212 .p2align 4 213L(match): 214 pmovmskb %xmm2, %ecx 215 test %ecx, %ecx 216 jnz L(find_zero) 217 mov %eax, %ebx 218 mov %edi, %esi 219 jmp L(loop) 220 221 .p2align 4 222L(find_zero): 223 test %cl, %cl 224 jz L(find_zero_high) 225 mov %cl, %dl 226 and $15, %dl 227 jz L(find_zero_8) 228 test $0x01, %cl 229 jnz L(FindZeroExit1) 230 test $0x02, %cl 231 jnz L(FindZeroExit2) 232 test $0x04, %cl 233 jnz L(FindZeroExit3) 234 and $1 << 4 - 1, %eax 235 jz L(return_value) 236 237 POP (%ebx) 238 POP (%esi) 239 jmp L(match_exit) 240 241 CFI_PUSH (%ebx) 242 CFI_PUSH (%esi) 243 244 .p2align 4 245L(find_zero_8): 246 test $0x10, %cl 247 jnz L(FindZeroExit5) 248 test $0x20, %cl 249 jnz L(FindZeroExit6) 250 test $0x40, %cl 251 jnz L(FindZeroExit7) 252 and $1 << 8 - 1, %eax 253 jz L(return_value) 254 255 POP (%ebx) 256 POP (%esi) 257 jmp L(match_exit) 258 259 CFI_PUSH (%ebx) 260 CFI_PUSH (%esi) 261 262 .p2align 4 263L(find_zero_high): 264 mov %ch, %dh 265 and $15, %dh 266 jz L(find_zero_high_8) 267 test $0x01, %ch 268 jnz L(FindZeroExit9) 269 test $0x02, %ch 270 jnz L(FindZeroExit10) 271 test $0x04, %ch 272 jnz L(FindZeroExit11) 273 and $1 << 12 - 1, %eax 274 jz L(return_value) 275 276 POP (%ebx) 277 POP (%esi) 278 jmp L(match_exit) 279 280 CFI_PUSH (%ebx) 281 CFI_PUSH (%esi) 282 283 .p2align 4 284L(find_zero_high_8): 285 test $0x10, %ch 286 jnz L(FindZeroExit13) 287 test $0x20, %ch 288 jnz L(FindZeroExit14) 289 test $0x40, %ch 290 jnz L(FindZeroExit15) 291 and $1 << 16 - 1, %eax 292 jz L(return_value) 293 294 POP (%ebx) 295 POP (%esi) 296 jmp L(match_exit) 297 298 CFI_PUSH (%ebx) 299 CFI_PUSH (%esi) 300 301 .p2align 4 302L(FindZeroExit1): 303 and $1, %eax 304 jz L(return_value) 305 306 POP (%ebx) 307 POP (%esi) 308 jmp L(match_exit) 309 310 CFI_PUSH (%ebx) 311 CFI_PUSH (%esi) 312 313 .p2align 4 314L(FindZeroExit2): 315 and $1 << 2 - 1, %eax 316 jz L(return_value) 317 318 POP (%ebx) 319 POP (%esi) 320 jmp L(match_exit) 321 322 CFI_PUSH (%ebx) 323 CFI_PUSH (%esi) 324 325 .p2align 4 326L(FindZeroExit3): 327 and $1 << 3 - 1, %eax 328 jz L(return_value) 329 330 POP (%ebx) 331 POP (%esi) 332 jmp L(match_exit) 333 334 CFI_PUSH (%ebx) 335 CFI_PUSH (%esi) 336 337 .p2align 4 338L(FindZeroExit5): 339 and $1 << 5 - 1, %eax 340 jz L(return_value) 341 342 POP (%ebx) 343 POP (%esi) 344 jmp L(match_exit) 345 346 CFI_PUSH (%ebx) 347 CFI_PUSH (%esi) 348 349 .p2align 4 350L(FindZeroExit6): 351 and $1 << 6 - 1, %eax 352 jz L(return_value) 353 354 POP (%ebx) 355 POP (%esi) 356 jmp L(match_exit) 357 358 CFI_PUSH (%ebx) 359 CFI_PUSH (%esi) 360 361 .p2align 4 362L(FindZeroExit7): 363 and $1 << 7 - 1, %eax 364 jz L(return_value) 365 366 POP (%ebx) 367 POP (%esi) 368 jmp L(match_exit) 369 370 CFI_PUSH (%ebx) 371 CFI_PUSH (%esi) 372 373 .p2align 4 374L(FindZeroExit9): 375 and $1 << 9 - 1, %eax 376 jz L(return_value) 377 378 POP (%ebx) 379 POP (%esi) 380 jmp L(match_exit) 381 382 CFI_PUSH (%ebx) 383 CFI_PUSH (%esi) 384 385 .p2align 4 386L(FindZeroExit10): 387 and $1 << 10 - 1, %eax 388 jz L(return_value) 389 390 POP (%ebx) 391 POP (%esi) 392 jmp L(match_exit) 393 394 CFI_PUSH (%ebx) 395 CFI_PUSH (%esi) 396 397 .p2align 4 398L(FindZeroExit11): 399 and $1 << 11 - 1, %eax 400 jz L(return_value) 401 402 POP (%ebx) 403 POP (%esi) 404 jmp L(match_exit) 405 406 CFI_PUSH (%ebx) 407 CFI_PUSH (%esi) 408 409 .p2align 4 410L(FindZeroExit13): 411 and $1 << 13 - 1, %eax 412 jz L(return_value) 413 414 POP (%ebx) 415 POP (%esi) 416 jmp L(match_exit) 417 418 CFI_PUSH (%ebx) 419 CFI_PUSH (%esi) 420 421 .p2align 4 422L(FindZeroExit14): 423 and $1 << 14 - 1, %eax 424 jz L(return_value) 425 426 POP (%ebx) 427 POP (%esi) 428 jmp L(match_exit) 429 430 CFI_PUSH (%ebx) 431 CFI_PUSH (%esi) 432 433 .p2align 4 434L(FindZeroExit15): 435 and $1 << 15 - 1, %eax 436 jz L(return_value) 437 438 POP (%ebx) 439 POP (%esi) 440 441 .p2align 4 442L(match_exit): 443 test %ah, %ah 444 jnz L(match_exit_high) 445 mov %al, %dl 446 and $15 << 4, %dl 447 jnz L(match_exit_8) 448 test $0x08, %al 449 jnz L(Exit4) 450 test $0x04, %al 451 jnz L(Exit3) 452 test $0x02, %al 453 jnz L(Exit2) 454 lea -16(%edi), %eax 455 RETURN 456 457 .p2align 4 458L(match_exit_8): 459 test $0x80, %al 460 jnz L(Exit8) 461 test $0x40, %al 462 jnz L(Exit7) 463 test $0x20, %al 464 jnz L(Exit6) 465 lea -12(%edi), %eax 466 RETURN 467 468 .p2align 4 469L(match_exit_high): 470 mov %ah, %dh 471 and $15 << 4, %dh 472 jnz L(match_exit_high_8) 473 test $0x08, %ah 474 jnz L(Exit12) 475 test $0x04, %ah 476 jnz L(Exit11) 477 test $0x02, %ah 478 jnz L(Exit10) 479 lea -8(%edi), %eax 480 RETURN 481 482 .p2align 4 483L(match_exit_high_8): 484 test $0x80, %ah 485 jnz L(Exit16) 486 test $0x40, %ah 487 jnz L(Exit15) 488 test $0x20, %ah 489 jnz L(Exit14) 490 lea -4(%edi), %eax 491 RETURN 492 493 .p2align 4 494L(Exit2): 495 lea -15(%edi), %eax 496 RETURN 497 498 .p2align 4 499L(Exit3): 500 lea -14(%edi), %eax 501 RETURN 502 503 .p2align 4 504L(Exit4): 505 lea -13(%edi), %eax 506 RETURN 507 508 .p2align 4 509L(Exit6): 510 lea -11(%edi), %eax 511 RETURN 512 513 .p2align 4 514L(Exit7): 515 lea -10(%edi), %eax 516 RETURN 517 518 .p2align 4 519L(Exit8): 520 lea -9(%edi), %eax 521 RETURN 522 523 .p2align 4 524L(Exit10): 525 lea -7(%edi), %eax 526 RETURN 527 528 .p2align 4 529L(Exit11): 530 lea -6(%edi), %eax 531 RETURN 532 533 .p2align 4 534L(Exit12): 535 lea -5(%edi), %eax 536 RETURN 537 538 .p2align 4 539L(Exit14): 540 lea -3(%edi), %eax 541 RETURN 542 543 .p2align 4 544L(Exit15): 545 lea -2(%edi), %eax 546 RETURN 547 548 .p2align 4 549L(Exit16): 550 lea -1(%edi), %eax 551 RETURN 552 553/* Return NULL. */ 554 .p2align 4 555L(return_null): 556 xor %eax, %eax 557 RETURN 558 559 .p2align 4 560L(prolog_find_zero): 561 add %ecx, %edi 562 mov %edx, %ecx 563L(prolog_find_zero_1): 564 test %cl, %cl 565 jz L(prolog_find_zero_high) 566 mov %cl, %dl 567 and $15, %dl 568 jz L(prolog_find_zero_8) 569 test $0x01, %cl 570 jnz L(PrologFindZeroExit1) 571 test $0x02, %cl 572 jnz L(PrologFindZeroExit2) 573 test $0x04, %cl 574 jnz L(PrologFindZeroExit3) 575 and $1 << 4 - 1, %eax 576 jnz L(match_exit) 577 xor %eax, %eax 578 RETURN 579 580 .p2align 4 581L(prolog_find_zero_8): 582 test $0x10, %cl 583 jnz L(PrologFindZeroExit5) 584 test $0x20, %cl 585 jnz L(PrologFindZeroExit6) 586 test $0x40, %cl 587 jnz L(PrologFindZeroExit7) 588 and $1 << 8 - 1, %eax 589 jnz L(match_exit) 590 xor %eax, %eax 591 RETURN 592 593 .p2align 4 594L(prolog_find_zero_high): 595 mov %ch, %dh 596 and $15, %dh 597 jz L(prolog_find_zero_high_8) 598 test $0x01, %ch 599 jnz L(PrologFindZeroExit9) 600 test $0x02, %ch 601 jnz L(PrologFindZeroExit10) 602 test $0x04, %ch 603 jnz L(PrologFindZeroExit11) 604 and $1 << 12 - 1, %eax 605 jnz L(match_exit) 606 xor %eax, %eax 607 RETURN 608 609 .p2align 4 610L(prolog_find_zero_high_8): 611 test $0x10, %ch 612 jnz L(PrologFindZeroExit13) 613 test $0x20, %ch 614 jnz L(PrologFindZeroExit14) 615 test $0x40, %ch 616 jnz L(PrologFindZeroExit15) 617 and $1 << 16 - 1, %eax 618 jnz L(match_exit) 619 xor %eax, %eax 620 RETURN 621 622 .p2align 4 623L(PrologFindZeroExit1): 624 and $1, %eax 625 jnz L(match_exit) 626 xor %eax, %eax 627 RETURN 628 629 .p2align 4 630L(PrologFindZeroExit2): 631 and $1 << 2 - 1, %eax 632 jnz L(match_exit) 633 xor %eax, %eax 634 RETURN 635 636 .p2align 4 637L(PrologFindZeroExit3): 638 and $1 << 3 - 1, %eax 639 jnz L(match_exit) 640 xor %eax, %eax 641 RETURN 642 643 .p2align 4 644L(PrologFindZeroExit5): 645 and $1 << 5 - 1, %eax 646 jnz L(match_exit) 647 xor %eax, %eax 648 RETURN 649 650 .p2align 4 651L(PrologFindZeroExit6): 652 and $1 << 6 - 1, %eax 653 jnz L(match_exit) 654 xor %eax, %eax 655 RETURN 656 657 .p2align 4 658L(PrologFindZeroExit7): 659 and $1 << 7 - 1, %eax 660 jnz L(match_exit) 661 xor %eax, %eax 662 RETURN 663 664 .p2align 4 665L(PrologFindZeroExit9): 666 and $1 << 9 - 1, %eax 667 jnz L(match_exit) 668 xor %eax, %eax 669 RETURN 670 671 .p2align 4 672L(PrologFindZeroExit10): 673 and $1 << 10 - 1, %eax 674 jnz L(match_exit) 675 xor %eax, %eax 676 RETURN 677 678 .p2align 4 679L(PrologFindZeroExit11): 680 and $1 << 11 - 1, %eax 681 jnz L(match_exit) 682 xor %eax, %eax 683 RETURN 684 685 .p2align 4 686L(PrologFindZeroExit13): 687 and $1 << 13 - 1, %eax 688 jnz L(match_exit) 689 xor %eax, %eax 690 RETURN 691 692 .p2align 4 693L(PrologFindZeroExit14): 694 and $1 << 14 - 1, %eax 695 jnz L(match_exit) 696 xor %eax, %eax 697 RETURN 698 699 .p2align 4 700L(PrologFindZeroExit15): 701 and $1 << 15 - 1, %eax 702 jnz L(match_exit) 703 xor %eax, %eax 704 RETURN 705 706END (__strrchr_sse2) 707#endif 708