1/* strcpy with SSE2 and unaligned load 2 Copyright (C) 2011-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <isa-level.h> 20 21/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation 22 so we need this to build for ISA V2 builds. */ 23#if ISA_SHOULD_BUILD (2) 24 25 26# ifndef USE_AS_STRCAT 27# include <sysdep.h> 28 29# ifndef STRCPY 30# define STRCPY __strcpy_sse2_unaligned 31# endif 32 33# endif 34 35# define JMPTBL(I, B) I - B 36# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ 37 lea TABLE(%rip), %r11; \ 38 movslq (%r11, INDEX, SCALE), %rcx; \ 39 lea (%r11, %rcx), %rcx; \ 40 _CET_NOTRACK jmp *%rcx 41 42# ifndef USE_AS_STRCAT 43 44.text 45ENTRY (STRCPY) 46# ifdef USE_AS_STRNCPY 47 mov %RDX_LP, %R8_LP 48 test %R8_LP, %R8_LP 49 jz L(ExitZero) 50# endif 51 mov %rsi, %rcx 52# ifndef USE_AS_STPCPY 53 mov %rdi, %rax /* save result */ 54# endif 55 56# endif 57 58 and $63, %rcx 59 cmp $32, %rcx 60 jbe L(SourceStringAlignmentLess32) 61 62 and $-16, %rsi 63 and $15, %rcx 64 pxor %xmm0, %xmm0 65 pxor %xmm1, %xmm1 66 67 pcmpeqb (%rsi), %xmm1 68 pmovmskb %xmm1, %rdx 69 shr %cl, %rdx 70 71# ifdef USE_AS_STRNCPY 72# if defined USE_AS_STPCPY || defined USE_AS_STRCAT 73 mov $16, %r10 74 sub %rcx, %r10 75 cmp %r10, %r8 76# else 77 mov $17, %r10 78 sub %rcx, %r10 79 cmp %r10, %r8 80# endif 81 jbe L(CopyFrom1To16BytesTailCase2OrCase3) 82# endif 83 test %rdx, %rdx 84 jnz L(CopyFrom1To16BytesTail) 85 86 pcmpeqb 16(%rsi), %xmm0 87 pmovmskb %xmm0, %rdx 88 89# ifdef USE_AS_STRNCPY 90 add $16, %r10 91 cmp %r10, %r8 92 jbe L(CopyFrom1To32BytesCase2OrCase3) 93# endif 94 test %rdx, %rdx 95 jnz L(CopyFrom1To32Bytes) 96 97 movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */ 98 movdqu %xmm1, (%rdi) 99 100/* If source address alignment != destination address alignment */ 101 .p2align 4 102L(Unalign16Both): 103 sub %rcx, %rdi 104# ifdef USE_AS_STRNCPY 105 add %rcx, %r8 106 sbb %rcx, %rcx 107 or %rcx, %r8 108# endif 109 mov $16, %rcx 110 movdqa (%rsi, %rcx), %xmm1 111 movaps 16(%rsi, %rcx), %xmm2 112 movdqu %xmm1, (%rdi, %rcx) 113 pcmpeqb %xmm2, %xmm0 114 pmovmskb %xmm0, %rdx 115 add $16, %rcx 116# ifdef USE_AS_STRNCPY 117 sub $48, %r8 118 jbe L(CopyFrom1To16BytesCase2OrCase3) 119# endif 120 test %rdx, %rdx 121# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 122 jnz L(CopyFrom1To16BytesUnalignedXmm2) 123# else 124 jnz L(CopyFrom1To16Bytes) 125# endif 126 127 movaps 16(%rsi, %rcx), %xmm3 128 movdqu %xmm2, (%rdi, %rcx) 129 pcmpeqb %xmm3, %xmm0 130 pmovmskb %xmm0, %rdx 131 add $16, %rcx 132# ifdef USE_AS_STRNCPY 133 sub $16, %r8 134 jbe L(CopyFrom1To16BytesCase2OrCase3) 135# endif 136 test %rdx, %rdx 137# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 138 jnz L(CopyFrom1To16BytesUnalignedXmm3) 139# else 140 jnz L(CopyFrom1To16Bytes) 141# endif 142 143 movaps 16(%rsi, %rcx), %xmm4 144 movdqu %xmm3, (%rdi, %rcx) 145 pcmpeqb %xmm4, %xmm0 146 pmovmskb %xmm0, %rdx 147 add $16, %rcx 148# ifdef USE_AS_STRNCPY 149 sub $16, %r8 150 jbe L(CopyFrom1To16BytesCase2OrCase3) 151# endif 152 test %rdx, %rdx 153# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 154 jnz L(CopyFrom1To16BytesUnalignedXmm4) 155# else 156 jnz L(CopyFrom1To16Bytes) 157# endif 158 159 movaps 16(%rsi, %rcx), %xmm1 160 movdqu %xmm4, (%rdi, %rcx) 161 pcmpeqb %xmm1, %xmm0 162 pmovmskb %xmm0, %rdx 163 add $16, %rcx 164# ifdef USE_AS_STRNCPY 165 sub $16, %r8 166 jbe L(CopyFrom1To16BytesCase2OrCase3) 167# endif 168 test %rdx, %rdx 169# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 170 jnz L(CopyFrom1To16BytesUnalignedXmm1) 171# else 172 jnz L(CopyFrom1To16Bytes) 173# endif 174 175 movaps 16(%rsi, %rcx), %xmm2 176 movdqu %xmm1, (%rdi, %rcx) 177 pcmpeqb %xmm2, %xmm0 178 pmovmskb %xmm0, %rdx 179 add $16, %rcx 180# ifdef USE_AS_STRNCPY 181 sub $16, %r8 182 jbe L(CopyFrom1To16BytesCase2OrCase3) 183# endif 184 test %rdx, %rdx 185# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 186 jnz L(CopyFrom1To16BytesUnalignedXmm2) 187# else 188 jnz L(CopyFrom1To16Bytes) 189# endif 190 191 movaps 16(%rsi, %rcx), %xmm3 192 movdqu %xmm2, (%rdi, %rcx) 193 pcmpeqb %xmm3, %xmm0 194 pmovmskb %xmm0, %rdx 195 add $16, %rcx 196# ifdef USE_AS_STRNCPY 197 sub $16, %r8 198 jbe L(CopyFrom1To16BytesCase2OrCase3) 199# endif 200 test %rdx, %rdx 201# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 202 jnz L(CopyFrom1To16BytesUnalignedXmm3) 203# else 204 jnz L(CopyFrom1To16Bytes) 205# endif 206 207 movdqu %xmm3, (%rdi, %rcx) 208 mov %rsi, %rdx 209 lea 16(%rsi, %rcx), %rsi 210 and $-0x40, %rsi 211 sub %rsi, %rdx 212 sub %rdx, %rdi 213# ifdef USE_AS_STRNCPY 214 lea 128(%r8, %rdx), %r8 215# endif 216L(Unaligned64Loop): 217 movaps (%rsi), %xmm2 218 movaps %xmm2, %xmm4 219 movaps 16(%rsi), %xmm5 220 movaps 32(%rsi), %xmm3 221 movaps %xmm3, %xmm6 222 movaps 48(%rsi), %xmm7 223 pminub %xmm5, %xmm2 224 pminub %xmm7, %xmm3 225 pminub %xmm2, %xmm3 226 pcmpeqb %xmm0, %xmm3 227 pmovmskb %xmm3, %rdx 228# ifdef USE_AS_STRNCPY 229 sub $64, %r8 230 jbe L(UnalignedLeaveCase2OrCase3) 231# endif 232 test %rdx, %rdx 233 jnz L(Unaligned64Leave) 234 235L(Unaligned64Loop_start): 236 add $64, %rdi 237 add $64, %rsi 238 movdqu %xmm4, -64(%rdi) 239 movaps (%rsi), %xmm2 240 movdqa %xmm2, %xmm4 241 movdqu %xmm5, -48(%rdi) 242 movaps 16(%rsi), %xmm5 243 pminub %xmm5, %xmm2 244 movaps 32(%rsi), %xmm3 245 movdqu %xmm6, -32(%rdi) 246 movaps %xmm3, %xmm6 247 movdqu %xmm7, -16(%rdi) 248 movaps 48(%rsi), %xmm7 249 pminub %xmm7, %xmm3 250 pminub %xmm2, %xmm3 251 pcmpeqb %xmm0, %xmm3 252 pmovmskb %xmm3, %rdx 253# ifdef USE_AS_STRNCPY 254 sub $64, %r8 255 jbe L(UnalignedLeaveCase2OrCase3) 256# endif 257 test %rdx, %rdx 258 jz L(Unaligned64Loop_start) 259 260L(Unaligned64Leave): 261 pxor %xmm1, %xmm1 262 263 pcmpeqb %xmm4, %xmm0 264 pcmpeqb %xmm5, %xmm1 265 pmovmskb %xmm0, %rdx 266 pmovmskb %xmm1, %rcx 267 test %rdx, %rdx 268 jnz L(CopyFrom1To16BytesUnaligned_0) 269 test %rcx, %rcx 270 jnz L(CopyFrom1To16BytesUnaligned_16) 271 272 pcmpeqb %xmm6, %xmm0 273 pcmpeqb %xmm7, %xmm1 274 pmovmskb %xmm0, %rdx 275 pmovmskb %xmm1, %rcx 276 test %rdx, %rdx 277 jnz L(CopyFrom1To16BytesUnaligned_32) 278 279 bsf %rcx, %rdx 280 movdqu %xmm4, (%rdi) 281 movdqu %xmm5, 16(%rdi) 282 movdqu %xmm6, 32(%rdi) 283# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 284# ifdef USE_AS_STPCPY 285 lea 48(%rdi, %rdx), %rax 286# endif 287 movdqu %xmm7, 48(%rdi) 288 add $15, %r8 289 sub %rdx, %r8 290 lea 49(%rdi, %rdx), %rdi 291 jmp L(StrncpyFillTailWithZero) 292# else 293 add $48, %rsi 294 add $48, %rdi 295 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 296# endif 297 298/* If source address alignment == destination address alignment */ 299 300L(SourceStringAlignmentLess32): 301 pxor %xmm0, %xmm0 302 movdqu (%rsi), %xmm1 303 movdqu 16(%rsi), %xmm2 304 pcmpeqb %xmm1, %xmm0 305 pmovmskb %xmm0, %rdx 306 307# ifdef USE_AS_STRNCPY 308# if defined USE_AS_STPCPY || defined USE_AS_STRCAT 309 cmp $16, %r8 310# else 311 cmp $17, %r8 312# endif 313 jbe L(CopyFrom1To16BytesTail1Case2OrCase3) 314# endif 315 test %rdx, %rdx 316 jnz L(CopyFrom1To16BytesTail1) 317 318 pcmpeqb %xmm2, %xmm0 319 movdqu %xmm1, (%rdi) 320 pmovmskb %xmm0, %rdx 321 322# ifdef USE_AS_STRNCPY 323# if defined USE_AS_STPCPY || defined USE_AS_STRCAT 324 cmp $32, %r8 325# else 326 cmp $33, %r8 327# endif 328 jbe L(CopyFrom1To32Bytes1Case2OrCase3) 329# endif 330 test %rdx, %rdx 331 jnz L(CopyFrom1To32Bytes1) 332 333 and $-16, %rsi 334 and $15, %rcx 335 jmp L(Unalign16Both) 336 337/*------End of main part with loops---------------------*/ 338 339/* Case1 */ 340 341# if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT) 342 .p2align 4 343L(CopyFrom1To16Bytes): 344 add %rcx, %rdi 345 add %rcx, %rsi 346 bsf %rdx, %rdx 347 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 348# endif 349 .p2align 4 350L(CopyFrom1To16BytesTail): 351 add %rcx, %rsi 352 bsf %rdx, %rdx 353 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 354 355 .p2align 4 356L(CopyFrom1To32Bytes1): 357 add $16, %rsi 358 add $16, %rdi 359# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 360 sub $16, %r8 361# endif 362L(CopyFrom1To16BytesTail1): 363 bsf %rdx, %rdx 364 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 365 366 .p2align 4 367L(CopyFrom1To32Bytes): 368 bsf %rdx, %rdx 369 add %rcx, %rsi 370 add $16, %rdx 371 sub %rcx, %rdx 372 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 373 374 .p2align 4 375L(CopyFrom1To16BytesUnaligned_0): 376 bsf %rdx, %rdx 377# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 378# ifdef USE_AS_STPCPY 379 lea (%rdi, %rdx), %rax 380# endif 381 movdqu %xmm4, (%rdi) 382 add $63, %r8 383 sub %rdx, %r8 384 lea 1(%rdi, %rdx), %rdi 385 jmp L(StrncpyFillTailWithZero) 386# else 387 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 388# endif 389 390 .p2align 4 391L(CopyFrom1To16BytesUnaligned_16): 392 bsf %rcx, %rdx 393 movdqu %xmm4, (%rdi) 394# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 395# ifdef USE_AS_STPCPY 396 lea 16(%rdi, %rdx), %rax 397# endif 398 movdqu %xmm5, 16(%rdi) 399 add $47, %r8 400 sub %rdx, %r8 401 lea 17(%rdi, %rdx), %rdi 402 jmp L(StrncpyFillTailWithZero) 403# else 404 add $16, %rsi 405 add $16, %rdi 406 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 407# endif 408 409 .p2align 4 410L(CopyFrom1To16BytesUnaligned_32): 411 bsf %rdx, %rdx 412 movdqu %xmm4, (%rdi) 413 movdqu %xmm5, 16(%rdi) 414# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 415# ifdef USE_AS_STPCPY 416 lea 32(%rdi, %rdx), %rax 417# endif 418 movdqu %xmm6, 32(%rdi) 419 add $31, %r8 420 sub %rdx, %r8 421 lea 33(%rdi, %rdx), %rdi 422 jmp L(StrncpyFillTailWithZero) 423# else 424 add $32, %rsi 425 add $32, %rdi 426 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 427# endif 428 429# ifdef USE_AS_STRNCPY 430# ifndef USE_AS_STRCAT 431 .p2align 4 432L(CopyFrom1To16BytesUnalignedXmm6): 433 movdqu %xmm6, (%rdi, %rcx) 434 jmp L(CopyFrom1To16BytesXmmExit) 435 436 .p2align 4 437L(CopyFrom1To16BytesUnalignedXmm5): 438 movdqu %xmm5, (%rdi, %rcx) 439 jmp L(CopyFrom1To16BytesXmmExit) 440 441 .p2align 4 442L(CopyFrom1To16BytesUnalignedXmm4): 443 movdqu %xmm4, (%rdi, %rcx) 444 jmp L(CopyFrom1To16BytesXmmExit) 445 446 .p2align 4 447L(CopyFrom1To16BytesUnalignedXmm3): 448 movdqu %xmm3, (%rdi, %rcx) 449 jmp L(CopyFrom1To16BytesXmmExit) 450 451 .p2align 4 452L(CopyFrom1To16BytesUnalignedXmm1): 453 movdqu %xmm1, (%rdi, %rcx) 454 jmp L(CopyFrom1To16BytesXmmExit) 455# endif 456 457 .p2align 4 458L(CopyFrom1To16BytesExit): 459 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4) 460 461/* Case2 */ 462 463 .p2align 4 464L(CopyFrom1To16BytesCase2): 465 add $16, %r8 466 add %rcx, %rdi 467 add %rcx, %rsi 468 bsf %rdx, %rdx 469 cmp %r8, %rdx 470 jb L(CopyFrom1To16BytesExit) 471 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 472 473 .p2align 4 474L(CopyFrom1To32BytesCase2): 475 add %rcx, %rsi 476 bsf %rdx, %rdx 477 add $16, %rdx 478 sub %rcx, %rdx 479 cmp %r8, %rdx 480 jb L(CopyFrom1To16BytesExit) 481 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 482 483L(CopyFrom1To16BytesTailCase2): 484 add %rcx, %rsi 485 bsf %rdx, %rdx 486 cmp %r8, %rdx 487 jb L(CopyFrom1To16BytesExit) 488 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 489 490L(CopyFrom1To16BytesTail1Case2): 491 bsf %rdx, %rdx 492 cmp %r8, %rdx 493 jb L(CopyFrom1To16BytesExit) 494 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 495 496/* Case2 or Case3, Case3 */ 497 498 .p2align 4 499L(CopyFrom1To16BytesCase2OrCase3): 500 test %rdx, %rdx 501 jnz L(CopyFrom1To16BytesCase2) 502L(CopyFrom1To16BytesCase3): 503 add $16, %r8 504 add %rcx, %rdi 505 add %rcx, %rsi 506 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 507 508 .p2align 4 509L(CopyFrom1To32BytesCase2OrCase3): 510 test %rdx, %rdx 511 jnz L(CopyFrom1To32BytesCase2) 512 add %rcx, %rsi 513 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 514 515 .p2align 4 516L(CopyFrom1To16BytesTailCase2OrCase3): 517 test %rdx, %rdx 518 jnz L(CopyFrom1To16BytesTailCase2) 519 add %rcx, %rsi 520 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 521 522 .p2align 4 523L(CopyFrom1To32Bytes1Case2OrCase3): 524 add $16, %rdi 525 add $16, %rsi 526 sub $16, %r8 527L(CopyFrom1To16BytesTail1Case2OrCase3): 528 test %rdx, %rdx 529 jnz L(CopyFrom1To16BytesTail1Case2) 530 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 531 532# endif 533 534/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/ 535 536 .p2align 4 537L(Exit1): 538 mov %dh, (%rdi) 539# ifdef USE_AS_STPCPY 540 lea (%rdi), %rax 541# endif 542# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 543 sub $1, %r8 544 lea 1(%rdi), %rdi 545 jnz L(StrncpyFillTailWithZero) 546# endif 547 ret 548 549 .p2align 4 550L(Exit2): 551 mov (%rsi), %dx 552 mov %dx, (%rdi) 553# ifdef USE_AS_STPCPY 554 lea 1(%rdi), %rax 555# endif 556# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 557 sub $2, %r8 558 lea 2(%rdi), %rdi 559 jnz L(StrncpyFillTailWithZero) 560# endif 561 ret 562 563 .p2align 4 564L(Exit3): 565 mov (%rsi), %cx 566 mov %cx, (%rdi) 567 mov %dh, 2(%rdi) 568# ifdef USE_AS_STPCPY 569 lea 2(%rdi), %rax 570# endif 571# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 572 sub $3, %r8 573 lea 3(%rdi), %rdi 574 jnz L(StrncpyFillTailWithZero) 575# endif 576 ret 577 578 .p2align 4 579L(Exit4): 580 mov (%rsi), %edx 581 mov %edx, (%rdi) 582# ifdef USE_AS_STPCPY 583 lea 3(%rdi), %rax 584# endif 585# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 586 sub $4, %r8 587 lea 4(%rdi), %rdi 588 jnz L(StrncpyFillTailWithZero) 589# endif 590 ret 591 592 .p2align 4 593L(Exit5): 594 mov (%rsi), %ecx 595 mov %dh, 4(%rdi) 596 mov %ecx, (%rdi) 597# ifdef USE_AS_STPCPY 598 lea 4(%rdi), %rax 599# endif 600# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 601 sub $5, %r8 602 lea 5(%rdi), %rdi 603 jnz L(StrncpyFillTailWithZero) 604# endif 605 ret 606 607 .p2align 4 608L(Exit6): 609 mov (%rsi), %ecx 610 mov 4(%rsi), %dx 611 mov %ecx, (%rdi) 612 mov %dx, 4(%rdi) 613# ifdef USE_AS_STPCPY 614 lea 5(%rdi), %rax 615# endif 616# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 617 sub $6, %r8 618 lea 6(%rdi), %rdi 619 jnz L(StrncpyFillTailWithZero) 620# endif 621 ret 622 623 .p2align 4 624L(Exit7): 625 mov (%rsi), %ecx 626 mov 3(%rsi), %edx 627 mov %ecx, (%rdi) 628 mov %edx, 3(%rdi) 629# ifdef USE_AS_STPCPY 630 lea 6(%rdi), %rax 631# endif 632# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 633 sub $7, %r8 634 lea 7(%rdi), %rdi 635 jnz L(StrncpyFillTailWithZero) 636# endif 637 ret 638 639 .p2align 4 640L(Exit8): 641 mov (%rsi), %rdx 642 mov %rdx, (%rdi) 643# ifdef USE_AS_STPCPY 644 lea 7(%rdi), %rax 645# endif 646# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 647 sub $8, %r8 648 lea 8(%rdi), %rdi 649 jnz L(StrncpyFillTailWithZero) 650# endif 651 ret 652 653 .p2align 4 654L(Exit9): 655 mov (%rsi), %rcx 656 mov %dh, 8(%rdi) 657 mov %rcx, (%rdi) 658# ifdef USE_AS_STPCPY 659 lea 8(%rdi), %rax 660# endif 661# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 662 sub $9, %r8 663 lea 9(%rdi), %rdi 664 jnz L(StrncpyFillTailWithZero) 665# endif 666 ret 667 668 .p2align 4 669L(Exit10): 670 mov (%rsi), %rcx 671 mov 8(%rsi), %dx 672 mov %rcx, (%rdi) 673 mov %dx, 8(%rdi) 674# ifdef USE_AS_STPCPY 675 lea 9(%rdi), %rax 676# endif 677# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 678 sub $10, %r8 679 lea 10(%rdi), %rdi 680 jnz L(StrncpyFillTailWithZero) 681# endif 682 ret 683 684 .p2align 4 685L(Exit11): 686 mov (%rsi), %rcx 687 mov 7(%rsi), %edx 688 mov %rcx, (%rdi) 689 mov %edx, 7(%rdi) 690# ifdef USE_AS_STPCPY 691 lea 10(%rdi), %rax 692# endif 693# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 694 sub $11, %r8 695 lea 11(%rdi), %rdi 696 jnz L(StrncpyFillTailWithZero) 697# endif 698 ret 699 700 .p2align 4 701L(Exit12): 702 mov (%rsi), %rcx 703 mov 8(%rsi), %edx 704 mov %rcx, (%rdi) 705 mov %edx, 8(%rdi) 706# ifdef USE_AS_STPCPY 707 lea 11(%rdi), %rax 708# endif 709# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 710 sub $12, %r8 711 lea 12(%rdi), %rdi 712 jnz L(StrncpyFillTailWithZero) 713# endif 714 ret 715 716 .p2align 4 717L(Exit13): 718 mov (%rsi), %rcx 719 mov 5(%rsi), %rdx 720 mov %rcx, (%rdi) 721 mov %rdx, 5(%rdi) 722# ifdef USE_AS_STPCPY 723 lea 12(%rdi), %rax 724# endif 725# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 726 sub $13, %r8 727 lea 13(%rdi), %rdi 728 jnz L(StrncpyFillTailWithZero) 729# endif 730 ret 731 732 .p2align 4 733L(Exit14): 734 mov (%rsi), %rcx 735 mov 6(%rsi), %rdx 736 mov %rcx, (%rdi) 737 mov %rdx, 6(%rdi) 738# ifdef USE_AS_STPCPY 739 lea 13(%rdi), %rax 740# endif 741# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 742 sub $14, %r8 743 lea 14(%rdi), %rdi 744 jnz L(StrncpyFillTailWithZero) 745# endif 746 ret 747 748 .p2align 4 749L(Exit15): 750 mov (%rsi), %rcx 751 mov 7(%rsi), %rdx 752 mov %rcx, (%rdi) 753 mov %rdx, 7(%rdi) 754# ifdef USE_AS_STPCPY 755 lea 14(%rdi), %rax 756# endif 757# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 758 sub $15, %r8 759 lea 15(%rdi), %rdi 760 jnz L(StrncpyFillTailWithZero) 761# endif 762 ret 763 764 .p2align 4 765L(Exit16): 766 movdqu (%rsi), %xmm0 767 movdqu %xmm0, (%rdi) 768# ifdef USE_AS_STPCPY 769 lea 15(%rdi), %rax 770# endif 771# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 772 sub $16, %r8 773 lea 16(%rdi), %rdi 774 jnz L(StrncpyFillTailWithZero) 775# endif 776 ret 777 778 .p2align 4 779L(Exit17): 780 movdqu (%rsi), %xmm0 781 movdqu %xmm0, (%rdi) 782 mov %dh, 16(%rdi) 783# ifdef USE_AS_STPCPY 784 lea 16(%rdi), %rax 785# endif 786# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 787 sub $17, %r8 788 lea 17(%rdi), %rdi 789 jnz L(StrncpyFillTailWithZero) 790# endif 791 ret 792 793 .p2align 4 794L(Exit18): 795 movdqu (%rsi), %xmm0 796 mov 16(%rsi), %cx 797 movdqu %xmm0, (%rdi) 798 mov %cx, 16(%rdi) 799# ifdef USE_AS_STPCPY 800 lea 17(%rdi), %rax 801# endif 802# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 803 sub $18, %r8 804 lea 18(%rdi), %rdi 805 jnz L(StrncpyFillTailWithZero) 806# endif 807 ret 808 809 .p2align 4 810L(Exit19): 811 movdqu (%rsi), %xmm0 812 mov 15(%rsi), %ecx 813 movdqu %xmm0, (%rdi) 814 mov %ecx, 15(%rdi) 815# ifdef USE_AS_STPCPY 816 lea 18(%rdi), %rax 817# endif 818# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 819 sub $19, %r8 820 lea 19(%rdi), %rdi 821 jnz L(StrncpyFillTailWithZero) 822# endif 823 ret 824 825 .p2align 4 826L(Exit20): 827 movdqu (%rsi), %xmm0 828 mov 16(%rsi), %ecx 829 movdqu %xmm0, (%rdi) 830 mov %ecx, 16(%rdi) 831# ifdef USE_AS_STPCPY 832 lea 19(%rdi), %rax 833# endif 834# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 835 sub $20, %r8 836 lea 20(%rdi), %rdi 837 jnz L(StrncpyFillTailWithZero) 838# endif 839 ret 840 841 .p2align 4 842L(Exit21): 843 movdqu (%rsi), %xmm0 844 mov 16(%rsi), %ecx 845 movdqu %xmm0, (%rdi) 846 mov %ecx, 16(%rdi) 847 mov %dh, 20(%rdi) 848# ifdef USE_AS_STPCPY 849 lea 20(%rdi), %rax 850# endif 851# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 852 sub $21, %r8 853 lea 21(%rdi), %rdi 854 jnz L(StrncpyFillTailWithZero) 855# endif 856 ret 857 858 .p2align 4 859L(Exit22): 860 movdqu (%rsi), %xmm0 861 mov 14(%rsi), %rcx 862 movdqu %xmm0, (%rdi) 863 mov %rcx, 14(%rdi) 864# ifdef USE_AS_STPCPY 865 lea 21(%rdi), %rax 866# endif 867# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 868 sub $22, %r8 869 lea 22(%rdi), %rdi 870 jnz L(StrncpyFillTailWithZero) 871# endif 872 ret 873 874 .p2align 4 875L(Exit23): 876 movdqu (%rsi), %xmm0 877 mov 15(%rsi), %rcx 878 movdqu %xmm0, (%rdi) 879 mov %rcx, 15(%rdi) 880# ifdef USE_AS_STPCPY 881 lea 22(%rdi), %rax 882# endif 883# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 884 sub $23, %r8 885 lea 23(%rdi), %rdi 886 jnz L(StrncpyFillTailWithZero) 887# endif 888 ret 889 890 .p2align 4 891L(Exit24): 892 movdqu (%rsi), %xmm0 893 mov 16(%rsi), %rcx 894 movdqu %xmm0, (%rdi) 895 mov %rcx, 16(%rdi) 896# ifdef USE_AS_STPCPY 897 lea 23(%rdi), %rax 898# endif 899# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 900 sub $24, %r8 901 lea 24(%rdi), %rdi 902 jnz L(StrncpyFillTailWithZero) 903# endif 904 ret 905 906 .p2align 4 907L(Exit25): 908 movdqu (%rsi), %xmm0 909 mov 16(%rsi), %rcx 910 movdqu %xmm0, (%rdi) 911 mov %rcx, 16(%rdi) 912 mov %dh, 24(%rdi) 913# ifdef USE_AS_STPCPY 914 lea 24(%rdi), %rax 915# endif 916# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 917 sub $25, %r8 918 lea 25(%rdi), %rdi 919 jnz L(StrncpyFillTailWithZero) 920# endif 921 ret 922 923 .p2align 4 924L(Exit26): 925 movdqu (%rsi), %xmm0 926 mov 16(%rsi), %rdx 927 mov 24(%rsi), %cx 928 movdqu %xmm0, (%rdi) 929 mov %rdx, 16(%rdi) 930 mov %cx, 24(%rdi) 931# ifdef USE_AS_STPCPY 932 lea 25(%rdi), %rax 933# endif 934# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 935 sub $26, %r8 936 lea 26(%rdi), %rdi 937 jnz L(StrncpyFillTailWithZero) 938# endif 939 ret 940 941 .p2align 4 942L(Exit27): 943 movdqu (%rsi), %xmm0 944 mov 16(%rsi), %rdx 945 mov 23(%rsi), %ecx 946 movdqu %xmm0, (%rdi) 947 mov %rdx, 16(%rdi) 948 mov %ecx, 23(%rdi) 949# ifdef USE_AS_STPCPY 950 lea 26(%rdi), %rax 951# endif 952# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 953 sub $27, %r8 954 lea 27(%rdi), %rdi 955 jnz L(StrncpyFillTailWithZero) 956# endif 957 ret 958 959 .p2align 4 960L(Exit28): 961 movdqu (%rsi), %xmm0 962 mov 16(%rsi), %rdx 963 mov 24(%rsi), %ecx 964 movdqu %xmm0, (%rdi) 965 mov %rdx, 16(%rdi) 966 mov %ecx, 24(%rdi) 967# ifdef USE_AS_STPCPY 968 lea 27(%rdi), %rax 969# endif 970# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 971 sub $28, %r8 972 lea 28(%rdi), %rdi 973 jnz L(StrncpyFillTailWithZero) 974# endif 975 ret 976 977 .p2align 4 978L(Exit29): 979 movdqu (%rsi), %xmm0 980 movdqu 13(%rsi), %xmm2 981 movdqu %xmm0, (%rdi) 982 movdqu %xmm2, 13(%rdi) 983# ifdef USE_AS_STPCPY 984 lea 28(%rdi), %rax 985# endif 986# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 987 sub $29, %r8 988 lea 29(%rdi), %rdi 989 jnz L(StrncpyFillTailWithZero) 990# endif 991 ret 992 993 .p2align 4 994L(Exit30): 995 movdqu (%rsi), %xmm0 996 movdqu 14(%rsi), %xmm2 997 movdqu %xmm0, (%rdi) 998 movdqu %xmm2, 14(%rdi) 999# ifdef USE_AS_STPCPY 1000 lea 29(%rdi), %rax 1001# endif 1002# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 1003 sub $30, %r8 1004 lea 30(%rdi), %rdi 1005 jnz L(StrncpyFillTailWithZero) 1006# endif 1007 ret 1008 1009 .p2align 4 1010L(Exit31): 1011 movdqu (%rsi), %xmm0 1012 movdqu 15(%rsi), %xmm2 1013 movdqu %xmm0, (%rdi) 1014 movdqu %xmm2, 15(%rdi) 1015# ifdef USE_AS_STPCPY 1016 lea 30(%rdi), %rax 1017# endif 1018# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 1019 sub $31, %r8 1020 lea 31(%rdi), %rdi 1021 jnz L(StrncpyFillTailWithZero) 1022# endif 1023 ret 1024 1025 .p2align 4 1026L(Exit32): 1027 movdqu (%rsi), %xmm0 1028 movdqu 16(%rsi), %xmm2 1029 movdqu %xmm0, (%rdi) 1030 movdqu %xmm2, 16(%rdi) 1031# ifdef USE_AS_STPCPY 1032 lea 31(%rdi), %rax 1033# endif 1034# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT 1035 sub $32, %r8 1036 lea 32(%rdi), %rdi 1037 jnz L(StrncpyFillTailWithZero) 1038# endif 1039 ret 1040 1041# ifdef USE_AS_STRNCPY 1042 1043 .p2align 4 1044L(StrncpyExit0): 1045# ifdef USE_AS_STPCPY 1046 mov %rdi, %rax 1047# endif 1048# ifdef USE_AS_STRCAT 1049 xor %ch, %ch 1050 movb %ch, (%rdi) 1051# endif 1052 ret 1053 1054 .p2align 4 1055L(StrncpyExit1): 1056 mov (%rsi), %dl 1057 mov %dl, (%rdi) 1058# ifdef USE_AS_STPCPY 1059 lea 1(%rdi), %rax 1060# endif 1061# ifdef USE_AS_STRCAT 1062 xor %ch, %ch 1063 movb %ch, 1(%rdi) 1064# endif 1065 ret 1066 1067 .p2align 4 1068L(StrncpyExit2): 1069 mov (%rsi), %dx 1070 mov %dx, (%rdi) 1071# ifdef USE_AS_STPCPY 1072 lea 2(%rdi), %rax 1073# endif 1074# ifdef USE_AS_STRCAT 1075 xor %ch, %ch 1076 movb %ch, 2(%rdi) 1077# endif 1078 ret 1079 1080 .p2align 4 1081L(StrncpyExit3): 1082 mov (%rsi), %cx 1083 mov 2(%rsi), %dl 1084 mov %cx, (%rdi) 1085 mov %dl, 2(%rdi) 1086# ifdef USE_AS_STPCPY 1087 lea 3(%rdi), %rax 1088# endif 1089# ifdef USE_AS_STRCAT 1090 xor %ch, %ch 1091 movb %ch, 3(%rdi) 1092# endif 1093 ret 1094 1095 .p2align 4 1096L(StrncpyExit4): 1097 mov (%rsi), %edx 1098 mov %edx, (%rdi) 1099# ifdef USE_AS_STPCPY 1100 lea 4(%rdi), %rax 1101# endif 1102# ifdef USE_AS_STRCAT 1103 xor %ch, %ch 1104 movb %ch, 4(%rdi) 1105# endif 1106 ret 1107 1108 .p2align 4 1109L(StrncpyExit5): 1110 mov (%rsi), %ecx 1111 mov 4(%rsi), %dl 1112 mov %ecx, (%rdi) 1113 mov %dl, 4(%rdi) 1114# ifdef USE_AS_STPCPY 1115 lea 5(%rdi), %rax 1116# endif 1117# ifdef USE_AS_STRCAT 1118 xor %ch, %ch 1119 movb %ch, 5(%rdi) 1120# endif 1121 ret 1122 1123 .p2align 4 1124L(StrncpyExit6): 1125 mov (%rsi), %ecx 1126 mov 4(%rsi), %dx 1127 mov %ecx, (%rdi) 1128 mov %dx, 4(%rdi) 1129# ifdef USE_AS_STPCPY 1130 lea 6(%rdi), %rax 1131# endif 1132# ifdef USE_AS_STRCAT 1133 xor %ch, %ch 1134 movb %ch, 6(%rdi) 1135# endif 1136 ret 1137 1138 .p2align 4 1139L(StrncpyExit7): 1140 mov (%rsi), %ecx 1141 mov 3(%rsi), %edx 1142 mov %ecx, (%rdi) 1143 mov %edx, 3(%rdi) 1144# ifdef USE_AS_STPCPY 1145 lea 7(%rdi), %rax 1146# endif 1147# ifdef USE_AS_STRCAT 1148 xor %ch, %ch 1149 movb %ch, 7(%rdi) 1150# endif 1151 ret 1152 1153 .p2align 4 1154L(StrncpyExit8): 1155 mov (%rsi), %rdx 1156 mov %rdx, (%rdi) 1157# ifdef USE_AS_STPCPY 1158 lea 8(%rdi), %rax 1159# endif 1160# ifdef USE_AS_STRCAT 1161 xor %ch, %ch 1162 movb %ch, 8(%rdi) 1163# endif 1164 ret 1165 1166 .p2align 4 1167L(StrncpyExit9): 1168 mov (%rsi), %rcx 1169 mov 8(%rsi), %dl 1170 mov %rcx, (%rdi) 1171 mov %dl, 8(%rdi) 1172# ifdef USE_AS_STPCPY 1173 lea 9(%rdi), %rax 1174# endif 1175# ifdef USE_AS_STRCAT 1176 xor %ch, %ch 1177 movb %ch, 9(%rdi) 1178# endif 1179 ret 1180 1181 .p2align 4 1182L(StrncpyExit10): 1183 mov (%rsi), %rcx 1184 mov 8(%rsi), %dx 1185 mov %rcx, (%rdi) 1186 mov %dx, 8(%rdi) 1187# ifdef USE_AS_STPCPY 1188 lea 10(%rdi), %rax 1189# endif 1190# ifdef USE_AS_STRCAT 1191 xor %ch, %ch 1192 movb %ch, 10(%rdi) 1193# endif 1194 ret 1195 1196 .p2align 4 1197L(StrncpyExit11): 1198 mov (%rsi), %rcx 1199 mov 7(%rsi), %edx 1200 mov %rcx, (%rdi) 1201 mov %edx, 7(%rdi) 1202# ifdef USE_AS_STPCPY 1203 lea 11(%rdi), %rax 1204# endif 1205# ifdef USE_AS_STRCAT 1206 xor %ch, %ch 1207 movb %ch, 11(%rdi) 1208# endif 1209 ret 1210 1211 .p2align 4 1212L(StrncpyExit12): 1213 mov (%rsi), %rcx 1214 mov 8(%rsi), %edx 1215 mov %rcx, (%rdi) 1216 mov %edx, 8(%rdi) 1217# ifdef USE_AS_STPCPY 1218 lea 12(%rdi), %rax 1219# endif 1220# ifdef USE_AS_STRCAT 1221 xor %ch, %ch 1222 movb %ch, 12(%rdi) 1223# endif 1224 ret 1225 1226 .p2align 4 1227L(StrncpyExit13): 1228 mov (%rsi), %rcx 1229 mov 5(%rsi), %rdx 1230 mov %rcx, (%rdi) 1231 mov %rdx, 5(%rdi) 1232# ifdef USE_AS_STPCPY 1233 lea 13(%rdi), %rax 1234# endif 1235# ifdef USE_AS_STRCAT 1236 xor %ch, %ch 1237 movb %ch, 13(%rdi) 1238# endif 1239 ret 1240 1241 .p2align 4 1242L(StrncpyExit14): 1243 mov (%rsi), %rcx 1244 mov 6(%rsi), %rdx 1245 mov %rcx, (%rdi) 1246 mov %rdx, 6(%rdi) 1247# ifdef USE_AS_STPCPY 1248 lea 14(%rdi), %rax 1249# endif 1250# ifdef USE_AS_STRCAT 1251 xor %ch, %ch 1252 movb %ch, 14(%rdi) 1253# endif 1254 ret 1255 1256 .p2align 4 1257L(StrncpyExit15): 1258 mov (%rsi), %rcx 1259 mov 7(%rsi), %rdx 1260 mov %rcx, (%rdi) 1261 mov %rdx, 7(%rdi) 1262# ifdef USE_AS_STPCPY 1263 lea 15(%rdi), %rax 1264# endif 1265# ifdef USE_AS_STRCAT 1266 xor %ch, %ch 1267 movb %ch, 15(%rdi) 1268# endif 1269 ret 1270 1271 .p2align 4 1272L(StrncpyExit16): 1273 movdqu (%rsi), %xmm0 1274 movdqu %xmm0, (%rdi) 1275# ifdef USE_AS_STPCPY 1276 lea 16(%rdi), %rax 1277# endif 1278# ifdef USE_AS_STRCAT 1279 xor %ch, %ch 1280 movb %ch, 16(%rdi) 1281# endif 1282 ret 1283 1284 .p2align 4 1285L(StrncpyExit17): 1286 movdqu (%rsi), %xmm0 1287 mov 16(%rsi), %cl 1288 movdqu %xmm0, (%rdi) 1289 mov %cl, 16(%rdi) 1290# ifdef USE_AS_STPCPY 1291 lea 17(%rdi), %rax 1292# endif 1293# ifdef USE_AS_STRCAT 1294 xor %ch, %ch 1295 movb %ch, 17(%rdi) 1296# endif 1297 ret 1298 1299 .p2align 4 1300L(StrncpyExit18): 1301 movdqu (%rsi), %xmm0 1302 mov 16(%rsi), %cx 1303 movdqu %xmm0, (%rdi) 1304 mov %cx, 16(%rdi) 1305# ifdef USE_AS_STPCPY 1306 lea 18(%rdi), %rax 1307# endif 1308# ifdef USE_AS_STRCAT 1309 xor %ch, %ch 1310 movb %ch, 18(%rdi) 1311# endif 1312 ret 1313 1314 .p2align 4 1315L(StrncpyExit19): 1316 movdqu (%rsi), %xmm0 1317 mov 15(%rsi), %ecx 1318 movdqu %xmm0, (%rdi) 1319 mov %ecx, 15(%rdi) 1320# ifdef USE_AS_STPCPY 1321 lea 19(%rdi), %rax 1322# endif 1323# ifdef USE_AS_STRCAT 1324 xor %ch, %ch 1325 movb %ch, 19(%rdi) 1326# endif 1327 ret 1328 1329 .p2align 4 1330L(StrncpyExit20): 1331 movdqu (%rsi), %xmm0 1332 mov 16(%rsi), %ecx 1333 movdqu %xmm0, (%rdi) 1334 mov %ecx, 16(%rdi) 1335# ifdef USE_AS_STPCPY 1336 lea 20(%rdi), %rax 1337# endif 1338# ifdef USE_AS_STRCAT 1339 xor %ch, %ch 1340 movb %ch, 20(%rdi) 1341# endif 1342 ret 1343 1344 .p2align 4 1345L(StrncpyExit21): 1346 movdqu (%rsi), %xmm0 1347 mov 16(%rsi), %ecx 1348 mov 20(%rsi), %dl 1349 movdqu %xmm0, (%rdi) 1350 mov %ecx, 16(%rdi) 1351 mov %dl, 20(%rdi) 1352# ifdef USE_AS_STPCPY 1353 lea 21(%rdi), %rax 1354# endif 1355# ifdef USE_AS_STRCAT 1356 xor %ch, %ch 1357 movb %ch, 21(%rdi) 1358# endif 1359 ret 1360 1361 .p2align 4 1362L(StrncpyExit22): 1363 movdqu (%rsi), %xmm0 1364 mov 14(%rsi), %rcx 1365 movdqu %xmm0, (%rdi) 1366 mov %rcx, 14(%rdi) 1367# ifdef USE_AS_STPCPY 1368 lea 22(%rdi), %rax 1369# endif 1370# ifdef USE_AS_STRCAT 1371 xor %ch, %ch 1372 movb %ch, 22(%rdi) 1373# endif 1374 ret 1375 1376 .p2align 4 1377L(StrncpyExit23): 1378 movdqu (%rsi), %xmm0 1379 mov 15(%rsi), %rcx 1380 movdqu %xmm0, (%rdi) 1381 mov %rcx, 15(%rdi) 1382# ifdef USE_AS_STPCPY 1383 lea 23(%rdi), %rax 1384# endif 1385# ifdef USE_AS_STRCAT 1386 xor %ch, %ch 1387 movb %ch, 23(%rdi) 1388# endif 1389 ret 1390 1391 .p2align 4 1392L(StrncpyExit24): 1393 movdqu (%rsi), %xmm0 1394 mov 16(%rsi), %rcx 1395 movdqu %xmm0, (%rdi) 1396 mov %rcx, 16(%rdi) 1397# ifdef USE_AS_STPCPY 1398 lea 24(%rdi), %rax 1399# endif 1400# ifdef USE_AS_STRCAT 1401 xor %ch, %ch 1402 movb %ch, 24(%rdi) 1403# endif 1404 ret 1405 1406 .p2align 4 1407L(StrncpyExit25): 1408 movdqu (%rsi), %xmm0 1409 mov 16(%rsi), %rdx 1410 mov 24(%rsi), %cl 1411 movdqu %xmm0, (%rdi) 1412 mov %rdx, 16(%rdi) 1413 mov %cl, 24(%rdi) 1414# ifdef USE_AS_STPCPY 1415 lea 25(%rdi), %rax 1416# endif 1417# ifdef USE_AS_STRCAT 1418 xor %ch, %ch 1419 movb %ch, 25(%rdi) 1420# endif 1421 ret 1422 1423 .p2align 4 1424L(StrncpyExit26): 1425 movdqu (%rsi), %xmm0 1426 mov 16(%rsi), %rdx 1427 mov 24(%rsi), %cx 1428 movdqu %xmm0, (%rdi) 1429 mov %rdx, 16(%rdi) 1430 mov %cx, 24(%rdi) 1431# ifdef USE_AS_STPCPY 1432 lea 26(%rdi), %rax 1433# endif 1434# ifdef USE_AS_STRCAT 1435 xor %ch, %ch 1436 movb %ch, 26(%rdi) 1437# endif 1438 ret 1439 1440 .p2align 4 1441L(StrncpyExit27): 1442 movdqu (%rsi), %xmm0 1443 mov 16(%rsi), %rdx 1444 mov 23(%rsi), %ecx 1445 movdqu %xmm0, (%rdi) 1446 mov %rdx, 16(%rdi) 1447 mov %ecx, 23(%rdi) 1448# ifdef USE_AS_STPCPY 1449 lea 27(%rdi), %rax 1450# endif 1451# ifdef USE_AS_STRCAT 1452 xor %ch, %ch 1453 movb %ch, 27(%rdi) 1454# endif 1455 ret 1456 1457 .p2align 4 1458L(StrncpyExit28): 1459 movdqu (%rsi), %xmm0 1460 mov 16(%rsi), %rdx 1461 mov 24(%rsi), %ecx 1462 movdqu %xmm0, (%rdi) 1463 mov %rdx, 16(%rdi) 1464 mov %ecx, 24(%rdi) 1465# ifdef USE_AS_STPCPY 1466 lea 28(%rdi), %rax 1467# endif 1468# ifdef USE_AS_STRCAT 1469 xor %ch, %ch 1470 movb %ch, 28(%rdi) 1471# endif 1472 ret 1473 1474 .p2align 4 1475L(StrncpyExit29): 1476 movdqu (%rsi), %xmm0 1477 movdqu 13(%rsi), %xmm2 1478 movdqu %xmm0, (%rdi) 1479 movdqu %xmm2, 13(%rdi) 1480# ifdef USE_AS_STPCPY 1481 lea 29(%rdi), %rax 1482# endif 1483# ifdef USE_AS_STRCAT 1484 xor %ch, %ch 1485 movb %ch, 29(%rdi) 1486# endif 1487 ret 1488 1489 .p2align 4 1490L(StrncpyExit30): 1491 movdqu (%rsi), %xmm0 1492 movdqu 14(%rsi), %xmm2 1493 movdqu %xmm0, (%rdi) 1494 movdqu %xmm2, 14(%rdi) 1495# ifdef USE_AS_STPCPY 1496 lea 30(%rdi), %rax 1497# endif 1498# ifdef USE_AS_STRCAT 1499 xor %ch, %ch 1500 movb %ch, 30(%rdi) 1501# endif 1502 ret 1503 1504 .p2align 4 1505L(StrncpyExit31): 1506 movdqu (%rsi), %xmm0 1507 movdqu 15(%rsi), %xmm2 1508 movdqu %xmm0, (%rdi) 1509 movdqu %xmm2, 15(%rdi) 1510# ifdef USE_AS_STPCPY 1511 lea 31(%rdi), %rax 1512# endif 1513# ifdef USE_AS_STRCAT 1514 xor %ch, %ch 1515 movb %ch, 31(%rdi) 1516# endif 1517 ret 1518 1519 .p2align 4 1520L(StrncpyExit32): 1521 movdqu (%rsi), %xmm0 1522 movdqu 16(%rsi), %xmm2 1523 movdqu %xmm0, (%rdi) 1524 movdqu %xmm2, 16(%rdi) 1525# ifdef USE_AS_STPCPY 1526 lea 32(%rdi), %rax 1527# endif 1528# ifdef USE_AS_STRCAT 1529 xor %ch, %ch 1530 movb %ch, 32(%rdi) 1531# endif 1532 ret 1533 1534 .p2align 4 1535L(StrncpyExit33): 1536 movdqu (%rsi), %xmm0 1537 movdqu 16(%rsi), %xmm2 1538 mov 32(%rsi), %cl 1539 movdqu %xmm0, (%rdi) 1540 movdqu %xmm2, 16(%rdi) 1541 mov %cl, 32(%rdi) 1542# ifdef USE_AS_STRCAT 1543 xor %ch, %ch 1544 movb %ch, 33(%rdi) 1545# endif 1546 ret 1547 1548# ifndef USE_AS_STRCAT 1549 1550 .p2align 4 1551L(Fill0): 1552 ret 1553 1554 .p2align 4 1555L(Fill1): 1556 mov %dl, (%rdi) 1557 ret 1558 1559 .p2align 4 1560L(Fill2): 1561 mov %dx, (%rdi) 1562 ret 1563 1564 .p2align 4 1565L(Fill3): 1566 mov %edx, -1(%rdi) 1567 ret 1568 1569 .p2align 4 1570L(Fill4): 1571 mov %edx, (%rdi) 1572 ret 1573 1574 .p2align 4 1575L(Fill5): 1576 mov %edx, (%rdi) 1577 mov %dl, 4(%rdi) 1578 ret 1579 1580 .p2align 4 1581L(Fill6): 1582 mov %edx, (%rdi) 1583 mov %dx, 4(%rdi) 1584 ret 1585 1586 .p2align 4 1587L(Fill7): 1588 mov %rdx, -1(%rdi) 1589 ret 1590 1591 .p2align 4 1592L(Fill8): 1593 mov %rdx, (%rdi) 1594 ret 1595 1596 .p2align 4 1597L(Fill9): 1598 mov %rdx, (%rdi) 1599 mov %dl, 8(%rdi) 1600 ret 1601 1602 .p2align 4 1603L(Fill10): 1604 mov %rdx, (%rdi) 1605 mov %dx, 8(%rdi) 1606 ret 1607 1608 .p2align 4 1609L(Fill11): 1610 mov %rdx, (%rdi) 1611 mov %edx, 7(%rdi) 1612 ret 1613 1614 .p2align 4 1615L(Fill12): 1616 mov %rdx, (%rdi) 1617 mov %edx, 8(%rdi) 1618 ret 1619 1620 .p2align 4 1621L(Fill13): 1622 mov %rdx, (%rdi) 1623 mov %rdx, 5(%rdi) 1624 ret 1625 1626 .p2align 4 1627L(Fill14): 1628 mov %rdx, (%rdi) 1629 mov %rdx, 6(%rdi) 1630 ret 1631 1632 .p2align 4 1633L(Fill15): 1634 movdqu %xmm0, -1(%rdi) 1635 ret 1636 1637 .p2align 4 1638L(Fill16): 1639 movdqu %xmm0, (%rdi) 1640 ret 1641 1642 .p2align 4 1643L(CopyFrom1To16BytesUnalignedXmm2): 1644 movdqu %xmm2, (%rdi, %rcx) 1645 1646 .p2align 4 1647L(CopyFrom1To16BytesXmmExit): 1648 bsf %rdx, %rdx 1649 add $15, %r8 1650 add %rcx, %rdi 1651# ifdef USE_AS_STPCPY 1652 lea (%rdi, %rdx), %rax 1653# endif 1654 sub %rdx, %r8 1655 lea 1(%rdi, %rdx), %rdi 1656 1657 .p2align 4 1658L(StrncpyFillTailWithZero): 1659 pxor %xmm0, %xmm0 1660 xor %rdx, %rdx 1661 sub $16, %r8 1662 jbe L(StrncpyFillExit) 1663 1664 movdqu %xmm0, (%rdi) 1665 add $16, %rdi 1666 1667 mov %rdi, %rsi 1668 and $0xf, %rsi 1669 sub %rsi, %rdi 1670 add %rsi, %r8 1671 sub $64, %r8 1672 jb L(StrncpyFillLess64) 1673 1674L(StrncpyFillLoopMovdqa): 1675 movdqa %xmm0, (%rdi) 1676 movdqa %xmm0, 16(%rdi) 1677 movdqa %xmm0, 32(%rdi) 1678 movdqa %xmm0, 48(%rdi) 1679 add $64, %rdi 1680 sub $64, %r8 1681 jae L(StrncpyFillLoopMovdqa) 1682 1683L(StrncpyFillLess64): 1684 add $32, %r8 1685 jl L(StrncpyFillLess32) 1686 movdqa %xmm0, (%rdi) 1687 movdqa %xmm0, 16(%rdi) 1688 add $32, %rdi 1689 sub $16, %r8 1690 jl L(StrncpyFillExit) 1691 movdqa %xmm0, (%rdi) 1692 add $16, %rdi 1693 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4) 1694 1695L(StrncpyFillLess32): 1696 add $16, %r8 1697 jl L(StrncpyFillExit) 1698 movdqa %xmm0, (%rdi) 1699 add $16, %rdi 1700 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4) 1701 1702L(StrncpyFillExit): 1703 add $16, %r8 1704 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4) 1705 1706/* end of ifndef USE_AS_STRCAT */ 1707# endif 1708 1709 .p2align 4 1710L(UnalignedLeaveCase2OrCase3): 1711 test %rdx, %rdx 1712 jnz L(Unaligned64LeaveCase2) 1713L(Unaligned64LeaveCase3): 1714 lea 64(%r8), %rcx 1715 and $-16, %rcx 1716 add $48, %r8 1717 jl L(CopyFrom1To16BytesCase3) 1718 movdqu %xmm4, (%rdi) 1719 sub $16, %r8 1720 jb L(CopyFrom1To16BytesCase3) 1721 movdqu %xmm5, 16(%rdi) 1722 sub $16, %r8 1723 jb L(CopyFrom1To16BytesCase3) 1724 movdqu %xmm6, 32(%rdi) 1725 sub $16, %r8 1726 jb L(CopyFrom1To16BytesCase3) 1727 movdqu %xmm7, 48(%rdi) 1728# ifdef USE_AS_STPCPY 1729 lea 64(%rdi), %rax 1730# endif 1731# ifdef USE_AS_STRCAT 1732 xor %ch, %ch 1733 movb %ch, 64(%rdi) 1734# endif 1735 ret 1736 1737 .p2align 4 1738L(Unaligned64LeaveCase2): 1739 xor %rcx, %rcx 1740 pcmpeqb %xmm4, %xmm0 1741 pmovmskb %xmm0, %rdx 1742 add $48, %r8 1743 jle L(CopyFrom1To16BytesCase2OrCase3) 1744 test %rdx, %rdx 1745# ifndef USE_AS_STRCAT 1746 jnz L(CopyFrom1To16BytesUnalignedXmm4) 1747# else 1748 jnz L(CopyFrom1To16Bytes) 1749# endif 1750 pcmpeqb %xmm5, %xmm0 1751 pmovmskb %xmm0, %rdx 1752 movdqu %xmm4, (%rdi) 1753 add $16, %rcx 1754 sub $16, %r8 1755 jbe L(CopyFrom1To16BytesCase2OrCase3) 1756 test %rdx, %rdx 1757# ifndef USE_AS_STRCAT 1758 jnz L(CopyFrom1To16BytesUnalignedXmm5) 1759# else 1760 jnz L(CopyFrom1To16Bytes) 1761# endif 1762 1763 pcmpeqb %xmm6, %xmm0 1764 pmovmskb %xmm0, %rdx 1765 movdqu %xmm5, 16(%rdi) 1766 add $16, %rcx 1767 sub $16, %r8 1768 jbe L(CopyFrom1To16BytesCase2OrCase3) 1769 test %rdx, %rdx 1770# ifndef USE_AS_STRCAT 1771 jnz L(CopyFrom1To16BytesUnalignedXmm6) 1772# else 1773 jnz L(CopyFrom1To16Bytes) 1774# endif 1775 1776 pcmpeqb %xmm7, %xmm0 1777 pmovmskb %xmm0, %rdx 1778 movdqu %xmm6, 32(%rdi) 1779 lea 16(%rdi, %rcx), %rdi 1780 lea 16(%rsi, %rcx), %rsi 1781 bsf %rdx, %rdx 1782 cmp %r8, %rdx 1783 jb L(CopyFrom1To16BytesExit) 1784 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4) 1785 1786 .p2align 4 1787L(ExitZero): 1788# ifndef USE_AS_STRCAT 1789 mov %rdi, %rax 1790# endif 1791 ret 1792 1793# endif 1794 1795# ifndef USE_AS_STRCAT 1796END (STRCPY) 1797# else 1798END (STRCAT) 1799# endif 1800 .p2align 4 1801 .section .rodata 1802L(ExitTable): 1803 .int JMPTBL(L(Exit1), L(ExitTable)) 1804 .int JMPTBL(L(Exit2), L(ExitTable)) 1805 .int JMPTBL(L(Exit3), L(ExitTable)) 1806 .int JMPTBL(L(Exit4), L(ExitTable)) 1807 .int JMPTBL(L(Exit5), L(ExitTable)) 1808 .int JMPTBL(L(Exit6), L(ExitTable)) 1809 .int JMPTBL(L(Exit7), L(ExitTable)) 1810 .int JMPTBL(L(Exit8), L(ExitTable)) 1811 .int JMPTBL(L(Exit9), L(ExitTable)) 1812 .int JMPTBL(L(Exit10), L(ExitTable)) 1813 .int JMPTBL(L(Exit11), L(ExitTable)) 1814 .int JMPTBL(L(Exit12), L(ExitTable)) 1815 .int JMPTBL(L(Exit13), L(ExitTable)) 1816 .int JMPTBL(L(Exit14), L(ExitTable)) 1817 .int JMPTBL(L(Exit15), L(ExitTable)) 1818 .int JMPTBL(L(Exit16), L(ExitTable)) 1819 .int JMPTBL(L(Exit17), L(ExitTable)) 1820 .int JMPTBL(L(Exit18), L(ExitTable)) 1821 .int JMPTBL(L(Exit19), L(ExitTable)) 1822 .int JMPTBL(L(Exit20), L(ExitTable)) 1823 .int JMPTBL(L(Exit21), L(ExitTable)) 1824 .int JMPTBL(L(Exit22), L(ExitTable)) 1825 .int JMPTBL(L(Exit23), L(ExitTable)) 1826 .int JMPTBL(L(Exit24), L(ExitTable)) 1827 .int JMPTBL(L(Exit25), L(ExitTable)) 1828 .int JMPTBL(L(Exit26), L(ExitTable)) 1829 .int JMPTBL(L(Exit27), L(ExitTable)) 1830 .int JMPTBL(L(Exit28), L(ExitTable)) 1831 .int JMPTBL(L(Exit29), L(ExitTable)) 1832 .int JMPTBL(L(Exit30), L(ExitTable)) 1833 .int JMPTBL(L(Exit31), L(ExitTable)) 1834 .int JMPTBL(L(Exit32), L(ExitTable)) 1835# ifdef USE_AS_STRNCPY 1836L(ExitStrncpyTable): 1837 .int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable)) 1838 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable)) 1839 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable)) 1840 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable)) 1841 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable)) 1842 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable)) 1843 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable)) 1844 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable)) 1845 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable)) 1846 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable)) 1847 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable)) 1848 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable)) 1849 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable)) 1850 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable)) 1851 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable)) 1852 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable)) 1853 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable)) 1854 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable)) 1855 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable)) 1856 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable)) 1857 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable)) 1858 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable)) 1859 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable)) 1860 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable)) 1861 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable)) 1862 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable)) 1863 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable)) 1864 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable)) 1865 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable)) 1866 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable)) 1867 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable)) 1868 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable)) 1869 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable)) 1870 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable)) 1871# ifndef USE_AS_STRCAT 1872 .p2align 4 1873L(FillTable): 1874 .int JMPTBL(L(Fill0), L(FillTable)) 1875 .int JMPTBL(L(Fill1), L(FillTable)) 1876 .int JMPTBL(L(Fill2), L(FillTable)) 1877 .int JMPTBL(L(Fill3), L(FillTable)) 1878 .int JMPTBL(L(Fill4), L(FillTable)) 1879 .int JMPTBL(L(Fill5), L(FillTable)) 1880 .int JMPTBL(L(Fill6), L(FillTable)) 1881 .int JMPTBL(L(Fill7), L(FillTable)) 1882 .int JMPTBL(L(Fill8), L(FillTable)) 1883 .int JMPTBL(L(Fill9), L(FillTable)) 1884 .int JMPTBL(L(Fill10), L(FillTable)) 1885 .int JMPTBL(L(Fill11), L(FillTable)) 1886 .int JMPTBL(L(Fill12), L(FillTable)) 1887 .int JMPTBL(L(Fill13), L(FillTable)) 1888 .int JMPTBL(L(Fill14), L(FillTable)) 1889 .int JMPTBL(L(Fill15), L(FillTable)) 1890 .int JMPTBL(L(Fill16), L(FillTable)) 1891# endif 1892# endif 1893#endif 1894