1/* strcat with SSE2 2 Copyright (C) 2011-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19 20#if IS_IN (libc) 21 22# include <sysdep.h> 23 24 25# define CFI_PUSH(REG) \ 26 cfi_adjust_cfa_offset (4); \ 27 cfi_rel_offset (REG, 0) 28 29# define CFI_POP(REG) \ 30 cfi_adjust_cfa_offset (-4); \ 31 cfi_restore (REG) 32 33# define PUSH(REG) pushl REG; CFI_PUSH (REG) 34# define POP(REG) popl REG; CFI_POP (REG) 35 36# ifdef PIC 37# define JMPTBL(I, B) I - B 38 39/* Load an entry in a jump table into ECX and branch to it. TABLE is a 40 jump table with relative offsets. INDEX is a register contains the 41 index into the jump table. SCALE is the scale of INDEX. */ 42 43# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ 44 /* We first load PC into ECX. */ \ 45 SETUP_PIC_REG(cx); \ 46 /* Get the address of the jump table. */ \ 47 addl $(TABLE - .), %ecx; \ 48 /* Get the entry and convert the relative offset to the \ 49 absolute address. */ \ 50 addl (%ecx,INDEX,SCALE), %ecx; \ 51 /* We loaded the jump table and adjusted ECX. Go. */ \ 52 _CET_NOTRACK jmp *%ecx 53# else 54# define JMPTBL(I, B) I 55 56/* Branch to an entry in a jump table. TABLE is a jump table with 57 absolute offsets. INDEX is a register contains the index into the 58 jump table. SCALE is the scale of INDEX. */ 59 60# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ 61 _CET_NOTRACK jmp *TABLE(,INDEX,SCALE) 62# endif 63 64# ifndef STRCAT 65# define STRCAT __strcat_sse2 66# endif 67 68# define PARMS 4 69# define STR1 PARMS+4 70# define STR2 STR1+4 71 72# ifdef USE_AS_STRNCAT 73# define LEN STR2+8 74# define STR3 STR1+4 75# else 76# define STR3 STR1 77# endif 78 79# define USE_AS_STRCAT 80# ifdef USE_AS_STRNCAT 81# define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi); 82# else 83# define RETURN POP(%esi); ret; CFI_PUSH(%esi); 84# endif 85 86.text 87ENTRY (STRCAT) 88 PUSH (%esi) 89 mov STR1(%esp), %eax 90 mov STR2(%esp), %esi 91# ifdef USE_AS_STRNCAT 92 PUSH (%ebx) 93 movl LEN(%esp), %ebx 94 test %ebx, %ebx 95 jz L(ExitZero) 96# endif 97 cmpb $0, (%esi) 98 mov %esi, %ecx 99 mov %eax, %edx 100 jz L(ExitZero) 101 102 and $63, %ecx 103 and $63, %edx 104 cmp $32, %ecx 105 ja L(StrlenCore7_1) 106 cmp $48, %edx 107 ja L(alignment_prolog) 108 109 pxor %xmm0, %xmm0 110 pxor %xmm4, %xmm4 111 pxor %xmm7, %xmm7 112 movdqu (%eax), %xmm1 113 movdqu (%esi), %xmm5 114 pcmpeqb %xmm1, %xmm0 115 movdqu 16(%esi), %xmm6 116 pmovmskb %xmm0, %ecx 117 pcmpeqb %xmm5, %xmm4 118 pcmpeqb %xmm6, %xmm7 119 test %ecx, %ecx 120 jnz L(exit_less16_) 121 mov %eax, %ecx 122 and $-16, %eax 123 jmp L(loop_prolog) 124 125L(alignment_prolog): 126 pxor %xmm0, %xmm0 127 pxor %xmm4, %xmm4 128 mov %edx, %ecx 129 pxor %xmm7, %xmm7 130 and $15, %ecx 131 and $-16, %eax 132 pcmpeqb (%eax), %xmm0 133 movdqu (%esi), %xmm5 134 movdqu 16(%esi), %xmm6 135 pmovmskb %xmm0, %edx 136 pcmpeqb %xmm5, %xmm4 137 shr %cl, %edx 138 pcmpeqb %xmm6, %xmm7 139 test %edx, %edx 140 jnz L(exit_less16) 141 add %eax, %ecx 142 143 pxor %xmm0, %xmm0 144L(loop_prolog): 145 pxor %xmm1, %xmm1 146 pxor %xmm2, %xmm2 147 pxor %xmm3, %xmm3 148 .p2align 4 149L(align16_loop): 150 pcmpeqb 16(%eax), %xmm0 151 pmovmskb %xmm0, %edx 152 test %edx, %edx 153 jnz L(exit16) 154 155 pcmpeqb 32(%eax), %xmm1 156 pmovmskb %xmm1, %edx 157 test %edx, %edx 158 jnz L(exit32) 159 160 pcmpeqb 48(%eax), %xmm2 161 pmovmskb %xmm2, %edx 162 test %edx, %edx 163 jnz L(exit48) 164 165 pcmpeqb 64(%eax), %xmm3 166 pmovmskb %xmm3, %edx 167 lea 64(%eax), %eax 168 test %edx, %edx 169 jz L(align16_loop) 170 bsf %edx, %edx 171 add %edx, %eax 172 jmp L(StartStrcpyPart) 173 174 .p2align 4 175L(exit16): 176 bsf %edx, %edx 177 lea 16(%eax, %edx), %eax 178 jmp L(StartStrcpyPart) 179 180 .p2align 4 181L(exit32): 182 bsf %edx, %edx 183 lea 32(%eax, %edx), %eax 184 jmp L(StartStrcpyPart) 185 186 .p2align 4 187L(exit48): 188 bsf %edx, %edx 189 lea 48(%eax, %edx), %eax 190 jmp L(StartStrcpyPart) 191 192 .p2align 4 193L(exit_less16): 194 bsf %edx, %edx 195 add %ecx, %eax 196 add %edx, %eax 197 jmp L(StartStrcpyPart) 198 199 .p2align 4 200L(exit_less16_): 201 bsf %ecx, %ecx 202 add %ecx, %eax 203 204 .p2align 4 205L(StartStrcpyPart): 206 pmovmskb %xmm4, %edx 207# ifdef USE_AS_STRNCAT 208 cmp $16, %ebx 209 jbe L(CopyFrom1To16BytesTail1Case2OrCase3) 210# endif 211 test %edx, %edx 212 jnz L(CopyFrom1To16BytesTail1) 213 214 movdqu %xmm5, (%eax) 215 pmovmskb %xmm7, %edx 216# ifdef USE_AS_STRNCAT 217 cmp $32, %ebx 218 jbe L(CopyFrom1To32Bytes1Case2OrCase3) 219# endif 220 test %edx, %edx 221 jnz L(CopyFrom1To32Bytes1) 222 223 mov %esi, %ecx 224 and $-16, %esi 225 and $15, %ecx 226 pxor %xmm0, %xmm0 227# ifdef USE_AS_STRNCAT 228 add %ecx, %ebx 229 sbb %edx, %edx 230 or %edx, %ebx 231# endif 232 sub %ecx, %eax 233 jmp L(Unalign16Both) 234 235L(StrlenCore7_1): 236 mov %eax, %ecx 237 pxor %xmm0, %xmm0 238 and $15, %ecx 239 and $-16, %eax 240 pcmpeqb (%eax), %xmm0 241 pmovmskb %xmm0, %edx 242 shr %cl, %edx 243 test %edx, %edx 244 jnz L(exit_less16_1) 245 add %eax, %ecx 246 247 pxor %xmm0, %xmm0 248 pxor %xmm1, %xmm1 249 pxor %xmm2, %xmm2 250 pxor %xmm3, %xmm3 251 252 .p2align 4 253L(align16_loop_1): 254 pcmpeqb 16(%eax), %xmm0 255 pmovmskb %xmm0, %edx 256 test %edx, %edx 257 jnz L(exit16_1) 258 259 pcmpeqb 32(%eax), %xmm1 260 pmovmskb %xmm1, %edx 261 test %edx, %edx 262 jnz L(exit32_1) 263 264 pcmpeqb 48(%eax), %xmm2 265 pmovmskb %xmm2, %edx 266 test %edx, %edx 267 jnz L(exit48_1) 268 269 pcmpeqb 64(%eax), %xmm3 270 pmovmskb %xmm3, %edx 271 lea 64(%eax), %eax 272 test %edx, %edx 273 jz L(align16_loop_1) 274 bsf %edx, %edx 275 add %edx, %eax 276 jmp L(StartStrcpyPart_1) 277 278 .p2align 4 279L(exit16_1): 280 bsf %edx, %edx 281 lea 16(%eax, %edx), %eax 282 jmp L(StartStrcpyPart_1) 283 284 .p2align 4 285L(exit32_1): 286 bsf %edx, %edx 287 lea 32(%eax, %edx), %eax 288 jmp L(StartStrcpyPart_1) 289 290 .p2align 4 291L(exit48_1): 292 bsf %edx, %edx 293 lea 48(%eax, %edx), %eax 294 jmp L(StartStrcpyPart_1) 295 296 .p2align 4 297L(exit_less16_1): 298 bsf %edx, %edx 299 add %ecx, %eax 300 add %edx, %eax 301 302 .p2align 4 303L(StartStrcpyPart_1): 304 mov %esi, %ecx 305 and $15, %ecx 306 and $-16, %esi 307 pxor %xmm0, %xmm0 308 pxor %xmm1, %xmm1 309 310# ifdef USE_AS_STRNCAT 311 cmp $48, %ebx 312 ja L(BigN) 313# endif 314 pcmpeqb (%esi), %xmm1 315# ifdef USE_AS_STRNCAT 316 add %ecx, %ebx 317# endif 318 pmovmskb %xmm1, %edx 319 shr %cl, %edx 320# ifdef USE_AS_STRNCAT 321 cmp $16, %ebx 322 jbe L(CopyFrom1To16BytesTailCase2OrCase3) 323# endif 324 test %edx, %edx 325 jnz L(CopyFrom1To16BytesTail) 326 327 pcmpeqb 16(%esi), %xmm0 328 pmovmskb %xmm0, %edx 329# ifdef USE_AS_STRNCAT 330 cmp $32, %ebx 331 jbe L(CopyFrom1To32BytesCase2OrCase3) 332# endif 333 test %edx, %edx 334 jnz L(CopyFrom1To32Bytes) 335 336 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ 337 movdqu %xmm1, (%eax) 338 sub %ecx, %eax 339 340 .p2align 4 341L(Unalign16Both): 342 mov $16, %ecx 343 movdqa (%esi, %ecx), %xmm1 344 movaps 16(%esi, %ecx), %xmm2 345 movdqu %xmm1, (%eax, %ecx) 346 pcmpeqb %xmm2, %xmm0 347 pmovmskb %xmm0, %edx 348 add $16, %ecx 349# ifdef USE_AS_STRNCAT 350 sub $48, %ebx 351 jbe L(CopyFrom1To16BytesCase2OrCase3) 352# endif 353 test %edx, %edx 354 jnz L(CopyFrom1To16Bytes) 355L(Unalign16BothBigN): 356 movaps 16(%esi, %ecx), %xmm3 357 movdqu %xmm2, (%eax, %ecx) 358 pcmpeqb %xmm3, %xmm0 359 pmovmskb %xmm0, %edx 360 add $16, %ecx 361# ifdef USE_AS_STRNCAT 362 sub $16, %ebx 363 jbe L(CopyFrom1To16BytesCase2OrCase3) 364# endif 365 test %edx, %edx 366 jnz L(CopyFrom1To16Bytes) 367 368 movaps 16(%esi, %ecx), %xmm4 369 movdqu %xmm3, (%eax, %ecx) 370 pcmpeqb %xmm4, %xmm0 371 pmovmskb %xmm0, %edx 372 add $16, %ecx 373# ifdef USE_AS_STRNCAT 374 sub $16, %ebx 375 jbe L(CopyFrom1To16BytesCase2OrCase3) 376# endif 377 test %edx, %edx 378 jnz L(CopyFrom1To16Bytes) 379 380 movaps 16(%esi, %ecx), %xmm1 381 movdqu %xmm4, (%eax, %ecx) 382 pcmpeqb %xmm1, %xmm0 383 pmovmskb %xmm0, %edx 384 add $16, %ecx 385# ifdef USE_AS_STRNCAT 386 sub $16, %ebx 387 jbe L(CopyFrom1To16BytesCase2OrCase3) 388# endif 389 test %edx, %edx 390 jnz L(CopyFrom1To16Bytes) 391 392 movaps 16(%esi, %ecx), %xmm2 393 movdqu %xmm1, (%eax, %ecx) 394 pcmpeqb %xmm2, %xmm0 395 pmovmskb %xmm0, %edx 396 add $16, %ecx 397# ifdef USE_AS_STRNCAT 398 sub $16, %ebx 399 jbe L(CopyFrom1To16BytesCase2OrCase3) 400# endif 401 test %edx, %edx 402 jnz L(CopyFrom1To16Bytes) 403 404 movaps 16(%esi, %ecx), %xmm3 405 movdqu %xmm2, (%eax, %ecx) 406 pcmpeqb %xmm3, %xmm0 407 pmovmskb %xmm0, %edx 408 add $16, %ecx 409# ifdef USE_AS_STRNCAT 410 sub $16, %ebx 411 jbe L(CopyFrom1To16BytesCase2OrCase3) 412# endif 413 test %edx, %edx 414 jnz L(CopyFrom1To16Bytes) 415 416 movdqu %xmm3, (%eax, %ecx) 417 mov %esi, %edx 418 lea 16(%esi, %ecx), %esi 419 and $-0x40, %esi 420 sub %esi, %edx 421 sub %edx, %eax 422# ifdef USE_AS_STRNCAT 423 lea 128(%ebx, %edx), %ebx 424# endif 425 movaps (%esi), %xmm2 426 movaps %xmm2, %xmm4 427 movaps 16(%esi), %xmm5 428 movaps 32(%esi), %xmm3 429 movaps %xmm3, %xmm6 430 movaps 48(%esi), %xmm7 431 pminub %xmm5, %xmm2 432 pminub %xmm7, %xmm3 433 pminub %xmm2, %xmm3 434 pcmpeqb %xmm0, %xmm3 435 pmovmskb %xmm3, %edx 436# ifdef USE_AS_STRNCAT 437 sub $64, %ebx 438 jbe L(UnalignedLeaveCase2OrCase3) 439# endif 440 test %edx, %edx 441 jnz L(Unaligned64Leave) 442 443 .p2align 4 444L(Unaligned64Loop_start): 445 add $64, %eax 446 add $64, %esi 447 movdqu %xmm4, -64(%eax) 448 movaps (%esi), %xmm2 449 movdqa %xmm2, %xmm4 450 movdqu %xmm5, -48(%eax) 451 movaps 16(%esi), %xmm5 452 pminub %xmm5, %xmm2 453 movaps 32(%esi), %xmm3 454 movdqu %xmm6, -32(%eax) 455 movaps %xmm3, %xmm6 456 movdqu %xmm7, -16(%eax) 457 movaps 48(%esi), %xmm7 458 pminub %xmm7, %xmm3 459 pminub %xmm2, %xmm3 460 pcmpeqb %xmm0, %xmm3 461 pmovmskb %xmm3, %edx 462# ifdef USE_AS_STRNCAT 463 sub $64, %ebx 464 jbe L(UnalignedLeaveCase2OrCase3) 465# endif 466 test %edx, %edx 467 jz L(Unaligned64Loop_start) 468 469L(Unaligned64Leave): 470 pxor %xmm1, %xmm1 471 472 pcmpeqb %xmm4, %xmm0 473 pcmpeqb %xmm5, %xmm1 474 pmovmskb %xmm0, %edx 475 pmovmskb %xmm1, %ecx 476 test %edx, %edx 477 jnz L(CopyFrom1To16BytesUnaligned_0) 478 test %ecx, %ecx 479 jnz L(CopyFrom1To16BytesUnaligned_16) 480 481 pcmpeqb %xmm6, %xmm0 482 pcmpeqb %xmm7, %xmm1 483 pmovmskb %xmm0, %edx 484 pmovmskb %xmm1, %ecx 485 test %edx, %edx 486 jnz L(CopyFrom1To16BytesUnaligned_32) 487 488 bsf %ecx, %edx 489 movdqu %xmm4, (%eax) 490 movdqu %xmm5, 16(%eax) 491 movdqu %xmm6, 32(%eax) 492 add $48, %esi 493 add $48, %eax 494 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) 495 496# ifdef USE_AS_STRNCAT 497 .p2align 4 498L(BigN): 499 pcmpeqb (%esi), %xmm1 500 pmovmskb %xmm1, %edx 501 shr %cl, %edx 502 test %edx, %edx 503 jnz L(CopyFrom1To16BytesTail) 504 505 pcmpeqb 16(%esi), %xmm0 506 pmovmskb %xmm0, %edx 507 test %edx, %edx 508 jnz L(CopyFrom1To32Bytes) 509 510 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ 511 movdqu %xmm1, (%eax) 512 sub %ecx, %eax 513 sub $48, %ebx 514 add %ecx, %ebx 515 516 mov $16, %ecx 517 movdqa (%esi, %ecx), %xmm1 518 movaps 16(%esi, %ecx), %xmm2 519 movdqu %xmm1, (%eax, %ecx) 520 pcmpeqb %xmm2, %xmm0 521 pmovmskb %xmm0, %edx 522 add $16, %ecx 523 test %edx, %edx 524 jnz L(CopyFrom1To16Bytes) 525 jmp L(Unalign16BothBigN) 526# endif 527 528/*------------end of main part-------------------------------*/ 529 530/* Case1 */ 531 .p2align 4 532L(CopyFrom1To16Bytes): 533 add %ecx, %eax 534 add %ecx, %esi 535 bsf %edx, %edx 536 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) 537 538 .p2align 4 539L(CopyFrom1To16BytesTail): 540 add %ecx, %esi 541 bsf %edx, %edx 542 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) 543 544 .p2align 4 545L(CopyFrom1To32Bytes1): 546 add $16, %esi 547 add $16, %eax 548L(CopyFrom1To16BytesTail1): 549 bsf %edx, %edx 550 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) 551 552 .p2align 4 553L(CopyFrom1To32Bytes): 554 bsf %edx, %edx 555 add %ecx, %esi 556 add $16, %edx 557 sub %ecx, %edx 558 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) 559 560 .p2align 4 561L(CopyFrom1To16BytesUnaligned_0): 562 bsf %edx, %edx 563 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) 564 565 .p2align 4 566L(CopyFrom1To16BytesUnaligned_16): 567 bsf %ecx, %edx 568 movdqu %xmm4, (%eax) 569 add $16, %esi 570 add $16, %eax 571 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) 572 573 .p2align 4 574L(CopyFrom1To16BytesUnaligned_32): 575 bsf %edx, %edx 576 movdqu %xmm4, (%eax) 577 movdqu %xmm5, 16(%eax) 578 add $32, %esi 579 add $32, %eax 580 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) 581 582# ifdef USE_AS_STRNCAT 583 584 .p2align 4 585L(CopyFrom1To16BytesExit): 586 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) 587 588/* Case2 */ 589 590 .p2align 4 591L(CopyFrom1To16BytesCase2): 592 add $16, %ebx 593 add %ecx, %eax 594 add %ecx, %esi 595 bsf %edx, %edx 596 cmp %ebx, %edx 597 jb L(CopyFrom1To16BytesExit) 598 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) 599 600 .p2align 4 601L(CopyFrom1To32BytesCase2): 602 sub %ecx, %ebx 603 add %ecx, %esi 604 bsf %edx, %edx 605 add $16, %edx 606 sub %ecx, %edx 607 cmp %ebx, %edx 608 jb L(CopyFrom1To16BytesExit) 609 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) 610 611L(CopyFrom1To16BytesTailCase2): 612 sub %ecx, %ebx 613 add %ecx, %esi 614 bsf %edx, %edx 615 cmp %ebx, %edx 616 jb L(CopyFrom1To16BytesExit) 617 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) 618 619L(CopyFrom1To16BytesTail1Case2): 620 bsf %edx, %edx 621 cmp %ebx, %edx 622 jb L(CopyFrom1To16BytesExit) 623 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) 624 625/* Case2 or Case3, Case3 */ 626 627 .p2align 4 628L(CopyFrom1To16BytesCase2OrCase3): 629 test %edx, %edx 630 jnz L(CopyFrom1To16BytesCase2) 631L(CopyFrom1To16BytesCase3): 632 add $16, %ebx 633 add %ecx, %eax 634 add %ecx, %esi 635 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) 636 637 .p2align 4 638L(CopyFrom1To32BytesCase2OrCase3): 639 test %edx, %edx 640 jnz L(CopyFrom1To32BytesCase2) 641 sub %ecx, %ebx 642 add %ecx, %esi 643 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) 644 645 .p2align 4 646L(CopyFrom1To16BytesTailCase2OrCase3): 647 test %edx, %edx 648 jnz L(CopyFrom1To16BytesTailCase2) 649 sub %ecx, %ebx 650 add %ecx, %esi 651 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) 652 653 .p2align 4 654L(CopyFrom1To32Bytes1Case2OrCase3): 655 add $16, %eax 656 add $16, %esi 657 sub $16, %ebx 658L(CopyFrom1To16BytesTail1Case2OrCase3): 659 test %edx, %edx 660 jnz L(CopyFrom1To16BytesTail1Case2) 661 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) 662 663# endif 664 665# ifdef USE_AS_STRNCAT 666 .p2align 4 667L(StrncatExit0): 668 movb %bh, (%eax) 669 mov STR3(%esp), %eax 670 RETURN 671# endif 672 673 .p2align 4 674# ifdef USE_AS_STRNCAT 675L(StrncatExit1): 676 movb %bh, 1(%eax) 677# endif 678L(Exit1): 679# ifdef USE_AS_STRNCAT 680 movb (%esi), %dh 681# endif 682 movb %dh, (%eax) 683 mov STR3(%esp), %eax 684 RETURN 685 686 .p2align 4 687# ifdef USE_AS_STRNCAT 688L(StrncatExit2): 689 movb %bh, 2(%eax) 690# endif 691L(Exit2): 692 movw (%esi), %dx 693 movw %dx, (%eax) 694 mov STR3(%esp), %eax 695 RETURN 696 697 .p2align 4 698# ifdef USE_AS_STRNCAT 699L(StrncatExit3): 700 movb %bh, 3(%eax) 701# endif 702L(Exit3): 703 movw (%esi), %cx 704 movw %cx, (%eax) 705# ifdef USE_AS_STRNCAT 706 movb 2(%esi), %dh 707# endif 708 movb %dh, 2(%eax) 709 mov STR3(%esp), %eax 710 RETURN 711 712 .p2align 4 713# ifdef USE_AS_STRNCAT 714L(StrncatExit4): 715 movb %bh, 4(%eax) 716# endif 717L(Exit4): 718 movl (%esi), %edx 719 movl %edx, (%eax) 720 mov STR3(%esp), %eax 721 RETURN 722 723 .p2align 4 724# ifdef USE_AS_STRNCAT 725L(StrncatExit5): 726 movb %bh, 5(%eax) 727# endif 728L(Exit5): 729 movl (%esi), %ecx 730# ifdef USE_AS_STRNCAT 731 movb 4(%esi), %dh 732# endif 733 movb %dh, 4(%eax) 734 movl %ecx, (%eax) 735 mov STR3(%esp), %eax 736 RETURN 737 738 .p2align 4 739# ifdef USE_AS_STRNCAT 740L(StrncatExit6): 741 movb %bh, 6(%eax) 742# endif 743L(Exit6): 744 movl (%esi), %ecx 745 movw 4(%esi), %dx 746 movl %ecx, (%eax) 747 movw %dx, 4(%eax) 748 mov STR3(%esp), %eax 749 RETURN 750 751 .p2align 4 752# ifdef USE_AS_STRNCAT 753L(StrncatExit7): 754 movb %bh, 7(%eax) 755# endif 756L(Exit7): 757 movl (%esi), %ecx 758 movl 3(%esi), %edx 759 movl %ecx, (%eax) 760 movl %edx, 3(%eax) 761 mov STR3(%esp), %eax 762 RETURN 763 764 .p2align 4 765# ifdef USE_AS_STRNCAT 766L(StrncatExit8): 767 movb %bh, 8(%eax) 768# endif 769L(Exit8): 770 movlpd (%esi), %xmm0 771 movlpd %xmm0, (%eax) 772 mov STR3(%esp), %eax 773 RETURN 774 775 .p2align 4 776# ifdef USE_AS_STRNCAT 777L(StrncatExit9): 778 movb %bh, 9(%eax) 779# endif 780L(Exit9): 781 movlpd (%esi), %xmm0 782# ifdef USE_AS_STRNCAT 783 movb 8(%esi), %dh 784# endif 785 movb %dh, 8(%eax) 786 movlpd %xmm0, (%eax) 787 mov STR3(%esp), %eax 788 RETURN 789 790 .p2align 4 791# ifdef USE_AS_STRNCAT 792L(StrncatExit10): 793 movb %bh, 10(%eax) 794# endif 795L(Exit10): 796 movlpd (%esi), %xmm0 797 movw 8(%esi), %dx 798 movlpd %xmm0, (%eax) 799 movw %dx, 8(%eax) 800 mov STR3(%esp), %eax 801 RETURN 802 803 .p2align 4 804# ifdef USE_AS_STRNCAT 805L(StrncatExit11): 806 movb %bh, 11(%eax) 807# endif 808L(Exit11): 809 movlpd (%esi), %xmm0 810 movl 7(%esi), %edx 811 movlpd %xmm0, (%eax) 812 movl %edx, 7(%eax) 813 mov STR3(%esp), %eax 814 RETURN 815 816 .p2align 4 817# ifdef USE_AS_STRNCAT 818L(StrncatExit12): 819 movb %bh, 12(%eax) 820# endif 821L(Exit12): 822 movlpd (%esi), %xmm0 823 movl 8(%esi), %edx 824 movlpd %xmm0, (%eax) 825 movl %edx, 8(%eax) 826 mov STR3(%esp), %eax 827 RETURN 828 829 .p2align 4 830# ifdef USE_AS_STRNCAT 831L(StrncatExit13): 832 movb %bh, 13(%eax) 833# endif 834L(Exit13): 835 movlpd (%esi), %xmm0 836 movlpd 5(%esi), %xmm1 837 movlpd %xmm0, (%eax) 838 movlpd %xmm1, 5(%eax) 839 mov STR3(%esp), %eax 840 RETURN 841 842 .p2align 4 843# ifdef USE_AS_STRNCAT 844L(StrncatExit14): 845 movb %bh, 14(%eax) 846# endif 847L(Exit14): 848 movlpd (%esi), %xmm0 849 movlpd 6(%esi), %xmm1 850 movlpd %xmm0, (%eax) 851 movlpd %xmm1, 6(%eax) 852 mov STR3(%esp), %eax 853 RETURN 854 855 .p2align 4 856# ifdef USE_AS_STRNCAT 857L(StrncatExit15): 858 movb %bh, 15(%eax) 859# endif 860L(Exit15): 861 movlpd (%esi), %xmm0 862 movlpd 7(%esi), %xmm1 863 movlpd %xmm0, (%eax) 864 movlpd %xmm1, 7(%eax) 865 mov STR3(%esp), %eax 866 RETURN 867 868 .p2align 4 869# ifdef USE_AS_STRNCAT 870L(StrncatExit16): 871 movb %bh, 16(%eax) 872# endif 873L(Exit16): 874 movdqu (%esi), %xmm0 875 movdqu %xmm0, (%eax) 876 mov STR3(%esp), %eax 877 RETURN 878 879 .p2align 4 880# ifdef USE_AS_STRNCAT 881L(StrncatExit17): 882 movb %bh, 17(%eax) 883# endif 884L(Exit17): 885 movdqu (%esi), %xmm0 886# ifdef USE_AS_STRNCAT 887 movb 16(%esi), %dh 888# endif 889 movdqu %xmm0, (%eax) 890 movb %dh, 16(%eax) 891 mov STR3(%esp), %eax 892 RETURN 893 894 .p2align 4 895# ifdef USE_AS_STRNCAT 896L(StrncatExit18): 897 movb %bh, 18(%eax) 898# endif 899L(Exit18): 900 movdqu (%esi), %xmm0 901 movw 16(%esi), %cx 902 movdqu %xmm0, (%eax) 903 movw %cx, 16(%eax) 904 mov STR3(%esp), %eax 905 RETURN 906 907 .p2align 4 908# ifdef USE_AS_STRNCAT 909L(StrncatExit19): 910 movb %bh, 19(%eax) 911# endif 912L(Exit19): 913 movdqu (%esi), %xmm0 914 movl 15(%esi), %ecx 915 movdqu %xmm0, (%eax) 916 movl %ecx, 15(%eax) 917 mov STR3(%esp), %eax 918 RETURN 919 920 .p2align 4 921# ifdef USE_AS_STRNCAT 922L(StrncatExit20): 923 movb %bh, 20(%eax) 924# endif 925L(Exit20): 926 movdqu (%esi), %xmm0 927 movl 16(%esi), %ecx 928 movdqu %xmm0, (%eax) 929 movl %ecx, 16(%eax) 930 mov STR3(%esp), %eax 931 RETURN 932 933 .p2align 4 934# ifdef USE_AS_STRNCAT 935L(StrncatExit21): 936 movb %bh, 21(%eax) 937# endif 938L(Exit21): 939 movdqu (%esi), %xmm0 940 movl 16(%esi), %ecx 941# ifdef USE_AS_STRNCAT 942 movb 20(%esi), %dh 943# endif 944 movdqu %xmm0, (%eax) 945 movl %ecx, 16(%eax) 946 movb %dh, 20(%eax) 947 mov STR3(%esp), %eax 948 RETURN 949 950 .p2align 4 951# ifdef USE_AS_STRNCAT 952L(StrncatExit22): 953 movb %bh, 22(%eax) 954# endif 955L(Exit22): 956 movdqu (%esi), %xmm0 957 movlpd 14(%esi), %xmm3 958 movdqu %xmm0, (%eax) 959 movlpd %xmm3, 14(%eax) 960 mov STR3(%esp), %eax 961 RETURN 962 963 .p2align 4 964# ifdef USE_AS_STRNCAT 965L(StrncatExit23): 966 movb %bh, 23(%eax) 967# endif 968L(Exit23): 969 movdqu (%esi), %xmm0 970 movlpd 15(%esi), %xmm3 971 movdqu %xmm0, (%eax) 972 movlpd %xmm3, 15(%eax) 973 mov STR3(%esp), %eax 974 RETURN 975 976 .p2align 4 977# ifdef USE_AS_STRNCAT 978L(StrncatExit24): 979 movb %bh, 24(%eax) 980# endif 981L(Exit24): 982 movdqu (%esi), %xmm0 983 movlpd 16(%esi), %xmm2 984 movdqu %xmm0, (%eax) 985 movlpd %xmm2, 16(%eax) 986 mov STR3(%esp), %eax 987 RETURN 988 989 .p2align 4 990# ifdef USE_AS_STRNCAT 991L(StrncatExit25): 992 movb %bh, 25(%eax) 993# endif 994L(Exit25): 995 movdqu (%esi), %xmm0 996 movlpd 16(%esi), %xmm2 997# ifdef USE_AS_STRNCAT 998 movb 24(%esi), %dh 999# endif 1000 movdqu %xmm0, (%eax) 1001 movlpd %xmm2, 16(%eax) 1002 movb %dh, 24(%eax) 1003 mov STR3(%esp), %eax 1004 RETURN 1005 1006 .p2align 4 1007# ifdef USE_AS_STRNCAT 1008L(StrncatExit26): 1009 movb %bh, 26(%eax) 1010# endif 1011L(Exit26): 1012 movdqu (%esi), %xmm0 1013 movlpd 16(%esi), %xmm2 1014 movw 24(%esi), %cx 1015 movdqu %xmm0, (%eax) 1016 movlpd %xmm2, 16(%eax) 1017 movw %cx, 24(%eax) 1018 mov STR3(%esp), %eax 1019 RETURN 1020 1021 .p2align 4 1022# ifdef USE_AS_STRNCAT 1023L(StrncatExit27): 1024 movb %bh, 27(%eax) 1025# endif 1026L(Exit27): 1027 movdqu (%esi), %xmm0 1028 movlpd 16(%esi), %xmm2 1029 movl 23(%esi), %ecx 1030 movdqu %xmm0, (%eax) 1031 movlpd %xmm2, 16(%eax) 1032 movl %ecx, 23(%eax) 1033 mov STR3(%esp), %eax 1034 RETURN 1035 1036 .p2align 4 1037# ifdef USE_AS_STRNCAT 1038L(StrncatExit28): 1039 movb %bh, 28(%eax) 1040# endif 1041L(Exit28): 1042 movdqu (%esi), %xmm0 1043 movlpd 16(%esi), %xmm2 1044 movl 24(%esi), %ecx 1045 movdqu %xmm0, (%eax) 1046 movlpd %xmm2, 16(%eax) 1047 movl %ecx, 24(%eax) 1048 mov STR3(%esp), %eax 1049 RETURN 1050 1051 .p2align 4 1052# ifdef USE_AS_STRNCAT 1053L(StrncatExit29): 1054 movb %bh, 29(%eax) 1055# endif 1056L(Exit29): 1057 movdqu (%esi), %xmm0 1058 movdqu 13(%esi), %xmm2 1059 movdqu %xmm0, (%eax) 1060 movdqu %xmm2, 13(%eax) 1061 mov STR3(%esp), %eax 1062 RETURN 1063 1064 .p2align 4 1065# ifdef USE_AS_STRNCAT 1066L(StrncatExit30): 1067 movb %bh, 30(%eax) 1068# endif 1069L(Exit30): 1070 movdqu (%esi), %xmm0 1071 movdqu 14(%esi), %xmm2 1072 movdqu %xmm0, (%eax) 1073 movdqu %xmm2, 14(%eax) 1074 mov STR3(%esp), %eax 1075 RETURN 1076 1077 .p2align 4 1078# ifdef USE_AS_STRNCAT 1079L(StrncatExit31): 1080 movb %bh, 31(%eax) 1081# endif 1082L(Exit31): 1083 movdqu (%esi), %xmm0 1084 movdqu 15(%esi), %xmm2 1085 movdqu %xmm0, (%eax) 1086 movdqu %xmm2, 15(%eax) 1087 mov STR3(%esp), %eax 1088 RETURN 1089 1090 .p2align 4 1091# ifdef USE_AS_STRNCAT 1092L(StrncatExit32): 1093 movb %bh, 32(%eax) 1094# endif 1095L(Exit32): 1096 movdqu (%esi), %xmm0 1097 movdqu 16(%esi), %xmm2 1098 movdqu %xmm0, (%eax) 1099 movdqu %xmm2, 16(%eax) 1100 mov STR3(%esp), %eax 1101 RETURN 1102 1103# ifdef USE_AS_STRNCAT 1104 1105 .p2align 4 1106L(UnalignedLeaveCase2OrCase3): 1107 test %edx, %edx 1108 jnz L(Unaligned64LeaveCase2) 1109L(Unaligned64LeaveCase3): 1110 lea 64(%ebx), %ecx 1111 and $-16, %ecx 1112 add $48, %ebx 1113 jl L(CopyFrom1To16BytesCase3) 1114 movdqu %xmm4, (%eax) 1115 sub $16, %ebx 1116 jb L(CopyFrom1To16BytesCase3) 1117 movdqu %xmm5, 16(%eax) 1118 sub $16, %ebx 1119 jb L(CopyFrom1To16BytesCase3) 1120 movdqu %xmm6, 32(%eax) 1121 sub $16, %ebx 1122 jb L(CopyFrom1To16BytesCase3) 1123 movdqu %xmm7, 48(%eax) 1124 xor %bh, %bh 1125 movb %bh, 64(%eax) 1126 mov STR3(%esp), %eax 1127 RETURN 1128 1129 .p2align 4 1130L(Unaligned64LeaveCase2): 1131 xor %ecx, %ecx 1132 pcmpeqb %xmm4, %xmm0 1133 pmovmskb %xmm0, %edx 1134 add $48, %ebx 1135 jle L(CopyFrom1To16BytesCase2OrCase3) 1136 test %edx, %edx 1137 jnz L(CopyFrom1To16Bytes) 1138 1139 pcmpeqb %xmm5, %xmm0 1140 pmovmskb %xmm0, %edx 1141 movdqu %xmm4, (%eax) 1142 add $16, %ecx 1143 sub $16, %ebx 1144 jbe L(CopyFrom1To16BytesCase2OrCase3) 1145 test %edx, %edx 1146 jnz L(CopyFrom1To16Bytes) 1147 1148 pcmpeqb %xmm6, %xmm0 1149 pmovmskb %xmm0, %edx 1150 movdqu %xmm5, 16(%eax) 1151 add $16, %ecx 1152 sub $16, %ebx 1153 jbe L(CopyFrom1To16BytesCase2OrCase3) 1154 test %edx, %edx 1155 jnz L(CopyFrom1To16Bytes) 1156 1157 pcmpeqb %xmm7, %xmm0 1158 pmovmskb %xmm0, %edx 1159 movdqu %xmm6, 32(%eax) 1160 lea 16(%eax, %ecx), %eax 1161 lea 16(%esi, %ecx), %esi 1162 bsf %edx, %edx 1163 cmp %ebx, %edx 1164 jb L(CopyFrom1To16BytesExit) 1165 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) 1166# endif 1167 .p2align 4 1168L(ExitZero): 1169 RETURN 1170 1171END (STRCAT) 1172 1173 .p2align 4 1174 .section .rodata 1175L(ExitTable): 1176 .int JMPTBL(L(Exit1), L(ExitTable)) 1177 .int JMPTBL(L(Exit2), L(ExitTable)) 1178 .int JMPTBL(L(Exit3), L(ExitTable)) 1179 .int JMPTBL(L(Exit4), L(ExitTable)) 1180 .int JMPTBL(L(Exit5), L(ExitTable)) 1181 .int JMPTBL(L(Exit6), L(ExitTable)) 1182 .int JMPTBL(L(Exit7), L(ExitTable)) 1183 .int JMPTBL(L(Exit8), L(ExitTable)) 1184 .int JMPTBL(L(Exit9), L(ExitTable)) 1185 .int JMPTBL(L(Exit10), L(ExitTable)) 1186 .int JMPTBL(L(Exit11), L(ExitTable)) 1187 .int JMPTBL(L(Exit12), L(ExitTable)) 1188 .int JMPTBL(L(Exit13), L(ExitTable)) 1189 .int JMPTBL(L(Exit14), L(ExitTable)) 1190 .int JMPTBL(L(Exit15), L(ExitTable)) 1191 .int JMPTBL(L(Exit16), L(ExitTable)) 1192 .int JMPTBL(L(Exit17), L(ExitTable)) 1193 .int JMPTBL(L(Exit18), L(ExitTable)) 1194 .int JMPTBL(L(Exit19), L(ExitTable)) 1195 .int JMPTBL(L(Exit20), L(ExitTable)) 1196 .int JMPTBL(L(Exit21), L(ExitTable)) 1197 .int JMPTBL(L(Exit22), L(ExitTable)) 1198 .int JMPTBL(L(Exit23), L(ExitTable)) 1199 .int JMPTBL(L(Exit24), L(ExitTable)) 1200 .int JMPTBL(L(Exit25), L(ExitTable)) 1201 .int JMPTBL(L(Exit26), L(ExitTable)) 1202 .int JMPTBL(L(Exit27), L(ExitTable)) 1203 .int JMPTBL(L(Exit28), L(ExitTable)) 1204 .int JMPTBL(L(Exit29), L(ExitTable)) 1205 .int JMPTBL(L(Exit30), L(ExitTable)) 1206 .int JMPTBL(L(Exit31), L(ExitTable)) 1207 .int JMPTBL(L(Exit32), L(ExitTable)) 1208# ifdef USE_AS_STRNCAT 1209L(ExitStrncatTable): 1210 .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable)) 1211 .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable)) 1212 .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable)) 1213 .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable)) 1214 .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable)) 1215 .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable)) 1216 .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable)) 1217 .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable)) 1218 .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable)) 1219 .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable)) 1220 .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable)) 1221 .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable)) 1222 .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable)) 1223 .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable)) 1224 .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable)) 1225 .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable)) 1226 .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable)) 1227 .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable)) 1228 .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable)) 1229 .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable)) 1230 .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable)) 1231 .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable)) 1232 .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable)) 1233 .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable)) 1234 .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable)) 1235 .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable)) 1236 .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable)) 1237 .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable)) 1238 .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable)) 1239 .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable)) 1240 .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable)) 1241 .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable)) 1242 .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable)) 1243# endif 1244#endif 1245