1/* strcpy with SSSE3 2 Copyright (C) 2011-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19 20#if IS_IN (libc) 21 22# ifndef USE_AS_STRCAT 23# include <sysdep.h> 24 25# define CFI_PUSH(REG) \ 26 cfi_adjust_cfa_offset (4); \ 27 cfi_rel_offset (REG, 0) 28 29# define CFI_POP(REG) \ 30 cfi_adjust_cfa_offset (-4); \ 31 cfi_restore (REG) 32 33# define PUSH(REG) pushl REG; CFI_PUSH (REG) 34# define POP(REG) popl REG; CFI_POP (REG) 35 36# ifndef STRCPY 37# define STRCPY __strcpy_ssse3 38# endif 39 40# ifdef USE_AS_STRNCPY 41# define PARMS 8 42# define ENTRANCE PUSH (%ebx) 43# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx); 44# define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi) 45# else 46# define PARMS 4 47# define ENTRANCE 48# define RETURN ret 49# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi) 50# endif 51 52# ifdef USE_AS_STPCPY 53# define SAVE_RESULT(n) lea n(%edx), %eax 54# define SAVE_RESULT_TAIL(n) lea n(%edx), %eax 55# else 56# define SAVE_RESULT(n) movl %edi, %eax 57# define SAVE_RESULT_TAIL(n) movl %edx, %eax 58# endif 59 60# define STR1 PARMS 61# define STR2 STR1+4 62# define LEN STR2+4 63 64/* In this code following instructions are used for copying: 65 movb - 1 byte 66 movw - 2 byte 67 movl - 4 byte 68 movlpd - 8 byte 69 movaps - 16 byte - requires 16 byte alignment 70 of sourse and destination adresses. 71*/ 72 73.text 74ENTRY (STRCPY) 75 ENTRANCE 76 mov STR1(%esp), %edx 77 mov STR2(%esp), %ecx 78# ifdef USE_AS_STRNCPY 79 movl LEN(%esp), %ebx 80 cmp $8, %ebx 81 jbe L(StrncpyExit8Bytes) 82# endif 83 cmpb $0, (%ecx) 84 jz L(ExitTail1) 85 cmpb $0, 1(%ecx) 86 jz L(ExitTail2) 87 cmpb $0, 2(%ecx) 88 jz L(ExitTail3) 89 cmpb $0, 3(%ecx) 90 jz L(ExitTail4) 91 cmpb $0, 4(%ecx) 92 jz L(ExitTail5) 93 cmpb $0, 5(%ecx) 94 jz L(ExitTail6) 95 cmpb $0, 6(%ecx) 96 jz L(ExitTail7) 97 cmpb $0, 7(%ecx) 98 jz L(ExitTail8) 99# ifdef USE_AS_STRNCPY 100 cmp $16, %ebx 101 jb L(StrncpyExit15Bytes) 102# endif 103 cmpb $0, 8(%ecx) 104 jz L(ExitTail9) 105 cmpb $0, 9(%ecx) 106 jz L(ExitTail10) 107 cmpb $0, 10(%ecx) 108 jz L(ExitTail11) 109 cmpb $0, 11(%ecx) 110 jz L(ExitTail12) 111 cmpb $0, 12(%ecx) 112 jz L(ExitTail13) 113 cmpb $0, 13(%ecx) 114 jz L(ExitTail14) 115 cmpb $0, 14(%ecx) 116 jz L(ExitTail15) 117# ifdef USE_AS_STRNCPY 118 cmp $16, %ebx 119 je L(ExitTail16) 120# endif 121 cmpb $0, 15(%ecx) 122 jz L(ExitTail16) 123 124 PUSH (%edi) 125 mov %edx, %edi 126# endif 127 PUSH (%esi) 128# ifdef USE_AS_STRNCPY 129 mov %ecx, %esi 130 sub $16, %ebx 131 and $0xf, %esi 132 133/* add 16 bytes ecx_offset to ebx */ 134 135 add %esi, %ebx 136# endif 137 lea 16(%ecx), %esi 138 and $-16, %esi 139 pxor %xmm0, %xmm0 140 movlpd (%ecx), %xmm1 141 movlpd %xmm1, (%edx) 142 143 pcmpeqb (%esi), %xmm0 144 movlpd 8(%ecx), %xmm1 145 movlpd %xmm1, 8(%edx) 146 147 pmovmskb %xmm0, %eax 148 sub %ecx, %esi 149 150# ifdef USE_AS_STRNCPY 151 sub $16, %ebx 152 jbe L(CopyFrom1To16BytesCase2OrCase3) 153# endif 154 test %eax, %eax 155 jnz L(CopyFrom1To16Bytes) 156 157 mov %edx, %eax 158 lea 16(%edx), %edx 159 and $-16, %edx 160 sub %edx, %eax 161 162# ifdef USE_AS_STRNCPY 163 add %eax, %esi 164 lea -1(%esi), %esi 165 and $1<<31, %esi 166 test %esi, %esi 167 jnz L(ContinueCopy) 168 lea 16(%ebx), %ebx 169 170L(ContinueCopy): 171# endif 172 sub %eax, %ecx 173 mov %ecx, %eax 174 and $0xf, %eax 175 mov $0, %esi 176 177/* case: ecx_offset == edx_offset */ 178 179 jz L(Align16Both) 180 181 cmp $8, %eax 182 jae L(ShlHigh8) 183 cmp $1, %eax 184 je L(Shl1) 185 cmp $2, %eax 186 je L(Shl2) 187 cmp $3, %eax 188 je L(Shl3) 189 cmp $4, %eax 190 je L(Shl4) 191 cmp $5, %eax 192 je L(Shl5) 193 cmp $6, %eax 194 je L(Shl6) 195 jmp L(Shl7) 196 197L(ShlHigh8): 198 je L(Shl8) 199 cmp $9, %eax 200 je L(Shl9) 201 cmp $10, %eax 202 je L(Shl10) 203 cmp $11, %eax 204 je L(Shl11) 205 cmp $12, %eax 206 je L(Shl12) 207 cmp $13, %eax 208 je L(Shl13) 209 cmp $14, %eax 210 je L(Shl14) 211 jmp L(Shl15) 212 213L(Align16Both): 214 movaps (%ecx), %xmm1 215 movaps 16(%ecx), %xmm2 216 movaps %xmm1, (%edx) 217 pcmpeqb %xmm2, %xmm0 218 pmovmskb %xmm0, %eax 219 lea 16(%esi), %esi 220# ifdef USE_AS_STRNCPY 221 sub $16, %ebx 222 jbe L(CopyFrom1To16BytesCase2OrCase3) 223# endif 224 test %eax, %eax 225 jnz L(CopyFrom1To16Bytes) 226 227 movaps 16(%ecx, %esi), %xmm3 228 movaps %xmm2, (%edx, %esi) 229 pcmpeqb %xmm3, %xmm0 230 pmovmskb %xmm0, %eax 231 lea 16(%esi), %esi 232# ifdef USE_AS_STRNCPY 233 sub $16, %ebx 234 jbe L(CopyFrom1To16BytesCase2OrCase3) 235# endif 236 test %eax, %eax 237 jnz L(CopyFrom1To16Bytes) 238 239 movaps 16(%ecx, %esi), %xmm4 240 movaps %xmm3, (%edx, %esi) 241 pcmpeqb %xmm4, %xmm0 242 pmovmskb %xmm0, %eax 243 lea 16(%esi), %esi 244# ifdef USE_AS_STRNCPY 245 sub $16, %ebx 246 jbe L(CopyFrom1To16BytesCase2OrCase3) 247# endif 248 test %eax, %eax 249 jnz L(CopyFrom1To16Bytes) 250 251 movaps 16(%ecx, %esi), %xmm1 252 movaps %xmm4, (%edx, %esi) 253 pcmpeqb %xmm1, %xmm0 254 pmovmskb %xmm0, %eax 255 lea 16(%esi), %esi 256# ifdef USE_AS_STRNCPY 257 sub $16, %ebx 258 jbe L(CopyFrom1To16BytesCase2OrCase3) 259# endif 260 test %eax, %eax 261 jnz L(CopyFrom1To16Bytes) 262 263 movaps 16(%ecx, %esi), %xmm2 264 movaps %xmm1, (%edx, %esi) 265 pcmpeqb %xmm2, %xmm0 266 pmovmskb %xmm0, %eax 267 lea 16(%esi), %esi 268# ifdef USE_AS_STRNCPY 269 sub $16, %ebx 270 jbe L(CopyFrom1To16BytesCase2OrCase3) 271# endif 272 test %eax, %eax 273 jnz L(CopyFrom1To16Bytes) 274 275 movaps 16(%ecx, %esi), %xmm3 276 movaps %xmm2, (%edx, %esi) 277 pcmpeqb %xmm3, %xmm0 278 pmovmskb %xmm0, %eax 279 lea 16(%esi), %esi 280# ifdef USE_AS_STRNCPY 281 sub $16, %ebx 282 jbe L(CopyFrom1To16BytesCase2OrCase3) 283# endif 284 test %eax, %eax 285 jnz L(CopyFrom1To16Bytes) 286 287 movaps %xmm3, (%edx, %esi) 288 mov %ecx, %eax 289 lea 16(%ecx, %esi), %ecx 290 and $-0x40, %ecx 291 sub %ecx, %eax 292 sub %eax, %edx 293# ifdef USE_AS_STRNCPY 294 lea 112(%ebx, %eax), %ebx 295# endif 296 mov $-0x40, %esi 297 298L(Aligned64Loop): 299 movaps (%ecx), %xmm2 300 movaps 32(%ecx), %xmm3 301 movaps %xmm2, %xmm4 302 movaps 16(%ecx), %xmm5 303 movaps %xmm3, %xmm6 304 movaps 48(%ecx), %xmm7 305 pminub %xmm5, %xmm2 306 pminub %xmm7, %xmm3 307 pminub %xmm2, %xmm3 308 lea 64(%edx), %edx 309 pcmpeqb %xmm0, %xmm3 310 lea 64(%ecx), %ecx 311 pmovmskb %xmm3, %eax 312# ifdef USE_AS_STRNCPY 313 sub $64, %ebx 314 jbe L(StrncpyLeaveCase2OrCase3) 315# endif 316 test %eax, %eax 317 jnz L(Aligned64Leave) 318 movaps %xmm4, -64(%edx) 319 movaps %xmm5, -48(%edx) 320 movaps %xmm6, -32(%edx) 321 movaps %xmm7, -16(%edx) 322 jmp L(Aligned64Loop) 323 324L(Aligned64Leave): 325# ifdef USE_AS_STRNCPY 326 lea 48(%ebx), %ebx 327# endif 328 pcmpeqb %xmm4, %xmm0 329 pmovmskb %xmm0, %eax 330 test %eax, %eax 331 jnz L(CopyFrom1To16Bytes) 332 333 pcmpeqb %xmm5, %xmm0 334# ifdef USE_AS_STRNCPY 335 lea -16(%ebx), %ebx 336# endif 337 pmovmskb %xmm0, %eax 338 movaps %xmm4, -64(%edx) 339 test %eax, %eax 340 lea 16(%esi), %esi 341 jnz L(CopyFrom1To16Bytes) 342 343 pcmpeqb %xmm6, %xmm0 344# ifdef USE_AS_STRNCPY 345 lea -16(%ebx), %ebx 346# endif 347 pmovmskb %xmm0, %eax 348 movaps %xmm5, -48(%edx) 349 test %eax, %eax 350 lea 16(%esi), %esi 351 jnz L(CopyFrom1To16Bytes) 352 353 movaps %xmm6, -32(%edx) 354 pcmpeqb %xmm7, %xmm0 355# ifdef USE_AS_STRNCPY 356 lea -16(%ebx), %ebx 357# endif 358 pmovmskb %xmm0, %eax 359 lea 16(%esi), %esi 360 jmp L(CopyFrom1To16Bytes) 361 362 .p2align 4 363L(Shl1): 364 movaps -1(%ecx), %xmm1 365 movaps 15(%ecx), %xmm2 366L(Shl1Start): 367 pcmpeqb %xmm2, %xmm0 368 pmovmskb %xmm0, %eax 369 movaps %xmm2, %xmm3 370# ifdef USE_AS_STRNCPY 371 sub $16, %ebx 372 jbe L(StrncpyExit1Case2OrCase3) 373# endif 374 test %eax, %eax 375 jnz L(Shl1LoopExit) 376 377 palignr $1, %xmm1, %xmm2 378 movaps %xmm3, %xmm1 379 movaps %xmm2, (%edx) 380 movaps 31(%ecx), %xmm2 381 382 pcmpeqb %xmm2, %xmm0 383 lea 16(%edx), %edx 384 pmovmskb %xmm0, %eax 385 lea 16(%ecx), %ecx 386 movaps %xmm2, %xmm3 387# ifdef USE_AS_STRNCPY 388 sub $16, %ebx 389 jbe L(StrncpyExit1Case2OrCase3) 390# endif 391 test %eax, %eax 392 jnz L(Shl1LoopExit) 393 394 palignr $1, %xmm1, %xmm2 395 movaps %xmm2, (%edx) 396 movaps 31(%ecx), %xmm2 397 movaps %xmm3, %xmm1 398 399 pcmpeqb %xmm2, %xmm0 400 lea 16(%edx), %edx 401 pmovmskb %xmm0, %eax 402 lea 16(%ecx), %ecx 403 movaps %xmm2, %xmm3 404# ifdef USE_AS_STRNCPY 405 sub $16, %ebx 406 jbe L(StrncpyExit1Case2OrCase3) 407# endif 408 test %eax, %eax 409 jnz L(Shl1LoopExit) 410 411 palignr $1, %xmm1, %xmm2 412 movaps %xmm2, (%edx) 413 movaps 31(%ecx), %xmm2 414 415 pcmpeqb %xmm2, %xmm0 416 lea 16(%edx), %edx 417 pmovmskb %xmm0, %eax 418 lea 16(%ecx), %ecx 419# ifdef USE_AS_STRNCPY 420 sub $16, %ebx 421 jbe L(StrncpyExit1Case2OrCase3) 422# endif 423 test %eax, %eax 424 jnz L(Shl1LoopExit) 425 426 palignr $1, %xmm3, %xmm2 427 movaps %xmm2, (%edx) 428 lea 31(%ecx), %ecx 429 lea 16(%edx), %edx 430 431 mov %ecx, %eax 432 and $-0x40, %ecx 433 sub %ecx, %eax 434 lea -15(%ecx), %ecx 435 sub %eax, %edx 436# ifdef USE_AS_STRNCPY 437 add %eax, %ebx 438# endif 439 movaps -1(%ecx), %xmm1 440 441L(Shl1LoopStart): 442 movaps 15(%ecx), %xmm2 443 movaps 31(%ecx), %xmm3 444 movaps %xmm3, %xmm6 445 movaps 47(%ecx), %xmm4 446 movaps %xmm4, %xmm7 447 movaps 63(%ecx), %xmm5 448 pminub %xmm2, %xmm6 449 pminub %xmm5, %xmm7 450 pminub %xmm6, %xmm7 451 pcmpeqb %xmm0, %xmm7 452 pmovmskb %xmm7, %eax 453 movaps %xmm5, %xmm7 454 palignr $1, %xmm4, %xmm5 455 test %eax, %eax 456 palignr $1, %xmm3, %xmm4 457 jnz L(Shl1Start) 458# ifdef USE_AS_STRNCPY 459 sub $64, %ebx 460 jbe L(StrncpyLeave1) 461# endif 462 palignr $1, %xmm2, %xmm3 463 lea 64(%ecx), %ecx 464 palignr $1, %xmm1, %xmm2 465 movaps %xmm7, %xmm1 466 movaps %xmm5, 48(%edx) 467 movaps %xmm4, 32(%edx) 468 movaps %xmm3, 16(%edx) 469 movaps %xmm2, (%edx) 470 lea 64(%edx), %edx 471 jmp L(Shl1LoopStart) 472 473L(Shl1LoopExit): 474 movlpd (%ecx), %xmm0 475 movlpd %xmm0, (%edx) 476 movlpd 7(%ecx), %xmm0 477 movlpd %xmm0, 7(%edx) 478 mov $15, %esi 479 jmp L(CopyFrom1To16Bytes) 480 481 .p2align 4 482L(Shl2): 483 movaps -2(%ecx), %xmm1 484 movaps 14(%ecx), %xmm2 485L(Shl2Start): 486 pcmpeqb %xmm2, %xmm0 487 pmovmskb %xmm0, %eax 488 movaps %xmm2, %xmm3 489# ifdef USE_AS_STRNCPY 490 sub $16, %ebx 491 jbe L(StrncpyExit2Case2OrCase3) 492# endif 493 test %eax, %eax 494 jnz L(Shl2LoopExit) 495 496 palignr $2, %xmm1, %xmm2 497 movaps %xmm3, %xmm1 498 movaps %xmm2, (%edx) 499 movaps 30(%ecx), %xmm2 500 501 pcmpeqb %xmm2, %xmm0 502 lea 16(%edx), %edx 503 pmovmskb %xmm0, %eax 504 lea 16(%ecx), %ecx 505 movaps %xmm2, %xmm3 506# ifdef USE_AS_STRNCPY 507 sub $16, %ebx 508 jbe L(StrncpyExit2Case2OrCase3) 509# endif 510 test %eax, %eax 511 jnz L(Shl2LoopExit) 512 513 palignr $2, %xmm1, %xmm2 514 movaps %xmm2, (%edx) 515 movaps 30(%ecx), %xmm2 516 movaps %xmm3, %xmm1 517 518 pcmpeqb %xmm2, %xmm0 519 lea 16(%edx), %edx 520 pmovmskb %xmm0, %eax 521 lea 16(%ecx), %ecx 522 movaps %xmm2, %xmm3 523# ifdef USE_AS_STRNCPY 524 sub $16, %ebx 525 jbe L(StrncpyExit2Case2OrCase3) 526# endif 527 test %eax, %eax 528 jnz L(Shl2LoopExit) 529 530 palignr $2, %xmm1, %xmm2 531 movaps %xmm2, (%edx) 532 movaps 30(%ecx), %xmm2 533 534 pcmpeqb %xmm2, %xmm0 535 lea 16(%edx), %edx 536 pmovmskb %xmm0, %eax 537 lea 16(%ecx), %ecx 538# ifdef USE_AS_STRNCPY 539 sub $16, %ebx 540 jbe L(StrncpyExit2Case2OrCase3) 541# endif 542 test %eax, %eax 543 jnz L(Shl2LoopExit) 544 545 palignr $2, %xmm3, %xmm2 546 movaps %xmm2, (%edx) 547 lea 30(%ecx), %ecx 548 lea 16(%edx), %edx 549 550 mov %ecx, %eax 551 and $-0x40, %ecx 552 sub %ecx, %eax 553 lea -14(%ecx), %ecx 554 sub %eax, %edx 555# ifdef USE_AS_STRNCPY 556 add %eax, %ebx 557# endif 558 movaps -2(%ecx), %xmm1 559 560L(Shl2LoopStart): 561 movaps 14(%ecx), %xmm2 562 movaps 30(%ecx), %xmm3 563 movaps %xmm3, %xmm6 564 movaps 46(%ecx), %xmm4 565 movaps %xmm4, %xmm7 566 movaps 62(%ecx), %xmm5 567 pminub %xmm2, %xmm6 568 pminub %xmm5, %xmm7 569 pminub %xmm6, %xmm7 570 pcmpeqb %xmm0, %xmm7 571 pmovmskb %xmm7, %eax 572 movaps %xmm5, %xmm7 573 palignr $2, %xmm4, %xmm5 574 test %eax, %eax 575 palignr $2, %xmm3, %xmm4 576 jnz L(Shl2Start) 577# ifdef USE_AS_STRNCPY 578 sub $64, %ebx 579 jbe L(StrncpyLeave2) 580# endif 581 palignr $2, %xmm2, %xmm3 582 lea 64(%ecx), %ecx 583 palignr $2, %xmm1, %xmm2 584 movaps %xmm7, %xmm1 585 movaps %xmm5, 48(%edx) 586 movaps %xmm4, 32(%edx) 587 movaps %xmm3, 16(%edx) 588 movaps %xmm2, (%edx) 589 lea 64(%edx), %edx 590 jmp L(Shl2LoopStart) 591 592L(Shl2LoopExit): 593 movlpd (%ecx), %xmm0 594 movlpd 6(%ecx), %xmm1 595 movlpd %xmm0, (%edx) 596 movlpd %xmm1, 6(%edx) 597 mov $14, %esi 598 jmp L(CopyFrom1To16Bytes) 599 600 .p2align 4 601L(Shl3): 602 movaps -3(%ecx), %xmm1 603 movaps 13(%ecx), %xmm2 604L(Shl3Start): 605 pcmpeqb %xmm2, %xmm0 606 pmovmskb %xmm0, %eax 607 movaps %xmm2, %xmm3 608# ifdef USE_AS_STRNCPY 609 sub $16, %ebx 610 jbe L(StrncpyExit3Case2OrCase3) 611# endif 612 test %eax, %eax 613 jnz L(Shl3LoopExit) 614 615 palignr $3, %xmm1, %xmm2 616 movaps %xmm3, %xmm1 617 movaps %xmm2, (%edx) 618 movaps 29(%ecx), %xmm2 619 620 pcmpeqb %xmm2, %xmm0 621 lea 16(%edx), %edx 622 pmovmskb %xmm0, %eax 623 lea 16(%ecx), %ecx 624 movaps %xmm2, %xmm3 625# ifdef USE_AS_STRNCPY 626 sub $16, %ebx 627 jbe L(StrncpyExit3Case2OrCase3) 628# endif 629 test %eax, %eax 630 jnz L(Shl3LoopExit) 631 632 palignr $3, %xmm1, %xmm2 633 movaps %xmm2, (%edx) 634 movaps 29(%ecx), %xmm2 635 movaps %xmm3, %xmm1 636 637 pcmpeqb %xmm2, %xmm0 638 lea 16(%edx), %edx 639 pmovmskb %xmm0, %eax 640 lea 16(%ecx), %ecx 641 movaps %xmm2, %xmm3 642# ifdef USE_AS_STRNCPY 643 sub $16, %ebx 644 jbe L(StrncpyExit3Case2OrCase3) 645# endif 646 test %eax, %eax 647 jnz L(Shl3LoopExit) 648 649 palignr $3, %xmm1, %xmm2 650 movaps %xmm2, (%edx) 651 movaps 29(%ecx), %xmm2 652 653 pcmpeqb %xmm2, %xmm0 654 lea 16(%edx), %edx 655 pmovmskb %xmm0, %eax 656 lea 16(%ecx), %ecx 657# ifdef USE_AS_STRNCPY 658 sub $16, %ebx 659 jbe L(StrncpyExit3Case2OrCase3) 660# endif 661 test %eax, %eax 662 jnz L(Shl3LoopExit) 663 664 palignr $3, %xmm3, %xmm2 665 movaps %xmm2, (%edx) 666 lea 29(%ecx), %ecx 667 lea 16(%edx), %edx 668 669 mov %ecx, %eax 670 and $-0x40, %ecx 671 sub %ecx, %eax 672 lea -13(%ecx), %ecx 673 sub %eax, %edx 674# ifdef USE_AS_STRNCPY 675 add %eax, %ebx 676# endif 677 movaps -3(%ecx), %xmm1 678 679L(Shl3LoopStart): 680 movaps 13(%ecx), %xmm2 681 movaps 29(%ecx), %xmm3 682 movaps %xmm3, %xmm6 683 movaps 45(%ecx), %xmm4 684 movaps %xmm4, %xmm7 685 movaps 61(%ecx), %xmm5 686 pminub %xmm2, %xmm6 687 pminub %xmm5, %xmm7 688 pminub %xmm6, %xmm7 689 pcmpeqb %xmm0, %xmm7 690 pmovmskb %xmm7, %eax 691 movaps %xmm5, %xmm7 692 palignr $3, %xmm4, %xmm5 693 test %eax, %eax 694 palignr $3, %xmm3, %xmm4 695 jnz L(Shl3Start) 696# ifdef USE_AS_STRNCPY 697 sub $64, %ebx 698 jbe L(StrncpyLeave3) 699# endif 700 palignr $3, %xmm2, %xmm3 701 lea 64(%ecx), %ecx 702 palignr $3, %xmm1, %xmm2 703 movaps %xmm7, %xmm1 704 movaps %xmm5, 48(%edx) 705 movaps %xmm4, 32(%edx) 706 movaps %xmm3, 16(%edx) 707 movaps %xmm2, (%edx) 708 lea 64(%edx), %edx 709 jmp L(Shl3LoopStart) 710 711L(Shl3LoopExit): 712 movlpd (%ecx), %xmm0 713 movlpd 5(%ecx), %xmm1 714 movlpd %xmm0, (%edx) 715 movlpd %xmm1, 5(%edx) 716 mov $13, %esi 717 jmp L(CopyFrom1To16Bytes) 718 719 .p2align 4 720L(Shl4): 721 movaps -4(%ecx), %xmm1 722 movaps 12(%ecx), %xmm2 723L(Shl4Start): 724 pcmpeqb %xmm2, %xmm0 725 pmovmskb %xmm0, %eax 726 movaps %xmm2, %xmm3 727# ifdef USE_AS_STRNCPY 728 sub $16, %ebx 729 jbe L(StrncpyExit4Case2OrCase3) 730# endif 731 test %eax, %eax 732 jnz L(Shl4LoopExit) 733 734 palignr $4, %xmm1, %xmm2 735 movaps %xmm3, %xmm1 736 movaps %xmm2, (%edx) 737 movaps 28(%ecx), %xmm2 738 739 pcmpeqb %xmm2, %xmm0 740 lea 16(%edx), %edx 741 pmovmskb %xmm0, %eax 742 lea 16(%ecx), %ecx 743 movaps %xmm2, %xmm3 744# ifdef USE_AS_STRNCPY 745 sub $16, %ebx 746 jbe L(StrncpyExit4Case2OrCase3) 747# endif 748 test %eax, %eax 749 jnz L(Shl4LoopExit) 750 751 palignr $4, %xmm1, %xmm2 752 movaps %xmm2, (%edx) 753 movaps 28(%ecx), %xmm2 754 movaps %xmm3, %xmm1 755 756 pcmpeqb %xmm2, %xmm0 757 lea 16(%edx), %edx 758 pmovmskb %xmm0, %eax 759 lea 16(%ecx), %ecx 760 movaps %xmm2, %xmm3 761# ifdef USE_AS_STRNCPY 762 sub $16, %ebx 763 jbe L(StrncpyExit4Case2OrCase3) 764# endif 765 test %eax, %eax 766 jnz L(Shl4LoopExit) 767 768 palignr $4, %xmm1, %xmm2 769 movaps %xmm2, (%edx) 770 movaps 28(%ecx), %xmm2 771 772 pcmpeqb %xmm2, %xmm0 773 lea 16(%edx), %edx 774 pmovmskb %xmm0, %eax 775 lea 16(%ecx), %ecx 776# ifdef USE_AS_STRNCPY 777 sub $16, %ebx 778 jbe L(StrncpyExit4Case2OrCase3) 779# endif 780 test %eax, %eax 781 jnz L(Shl4LoopExit) 782 783 palignr $4, %xmm3, %xmm2 784 movaps %xmm2, (%edx) 785 lea 28(%ecx), %ecx 786 lea 16(%edx), %edx 787 788 mov %ecx, %eax 789 and $-0x40, %ecx 790 sub %ecx, %eax 791 lea -12(%ecx), %ecx 792 sub %eax, %edx 793# ifdef USE_AS_STRNCPY 794 add %eax, %ebx 795# endif 796 movaps -4(%ecx), %xmm1 797 798L(Shl4LoopStart): 799 movaps 12(%ecx), %xmm2 800 movaps 28(%ecx), %xmm3 801 movaps %xmm3, %xmm6 802 movaps 44(%ecx), %xmm4 803 movaps %xmm4, %xmm7 804 movaps 60(%ecx), %xmm5 805 pminub %xmm2, %xmm6 806 pminub %xmm5, %xmm7 807 pminub %xmm6, %xmm7 808 pcmpeqb %xmm0, %xmm7 809 pmovmskb %xmm7, %eax 810 movaps %xmm5, %xmm7 811 palignr $4, %xmm4, %xmm5 812 test %eax, %eax 813 palignr $4, %xmm3, %xmm4 814 jnz L(Shl4Start) 815# ifdef USE_AS_STRNCPY 816 sub $64, %ebx 817 jbe L(StrncpyLeave4) 818# endif 819 palignr $4, %xmm2, %xmm3 820 lea 64(%ecx), %ecx 821 palignr $4, %xmm1, %xmm2 822 movaps %xmm7, %xmm1 823 movaps %xmm5, 48(%edx) 824 movaps %xmm4, 32(%edx) 825 movaps %xmm3, 16(%edx) 826 movaps %xmm2, (%edx) 827 lea 64(%edx), %edx 828 jmp L(Shl4LoopStart) 829 830L(Shl4LoopExit): 831 movlpd (%ecx), %xmm0 832 movl 8(%ecx), %esi 833 movlpd %xmm0, (%edx) 834 movl %esi, 8(%edx) 835 mov $12, %esi 836 jmp L(CopyFrom1To16Bytes) 837 838 .p2align 4 839L(Shl5): 840 movaps -5(%ecx), %xmm1 841 movaps 11(%ecx), %xmm2 842L(Shl5Start): 843 pcmpeqb %xmm2, %xmm0 844 pmovmskb %xmm0, %eax 845 movaps %xmm2, %xmm3 846# ifdef USE_AS_STRNCPY 847 sub $16, %ebx 848 jbe L(StrncpyExit5Case2OrCase3) 849# endif 850 test %eax, %eax 851 jnz L(Shl5LoopExit) 852 853 palignr $5, %xmm1, %xmm2 854 movaps %xmm3, %xmm1 855 movaps %xmm2, (%edx) 856 movaps 27(%ecx), %xmm2 857 858 pcmpeqb %xmm2, %xmm0 859 lea 16(%edx), %edx 860 pmovmskb %xmm0, %eax 861 lea 16(%ecx), %ecx 862 movaps %xmm2, %xmm3 863# ifdef USE_AS_STRNCPY 864 sub $16, %ebx 865 jbe L(StrncpyExit5Case2OrCase3) 866# endif 867 test %eax, %eax 868 jnz L(Shl5LoopExit) 869 870 palignr $5, %xmm1, %xmm2 871 movaps %xmm2, (%edx) 872 movaps 27(%ecx), %xmm2 873 movaps %xmm3, %xmm1 874 875 pcmpeqb %xmm2, %xmm0 876 lea 16(%edx), %edx 877 pmovmskb %xmm0, %eax 878 lea 16(%ecx), %ecx 879 movaps %xmm2, %xmm3 880# ifdef USE_AS_STRNCPY 881 sub $16, %ebx 882 jbe L(StrncpyExit5Case2OrCase3) 883# endif 884 test %eax, %eax 885 jnz L(Shl5LoopExit) 886 887 palignr $5, %xmm1, %xmm2 888 movaps %xmm2, (%edx) 889 movaps 27(%ecx), %xmm2 890 891 pcmpeqb %xmm2, %xmm0 892 lea 16(%edx), %edx 893 pmovmskb %xmm0, %eax 894 lea 16(%ecx), %ecx 895# ifdef USE_AS_STRNCPY 896 sub $16, %ebx 897 jbe L(StrncpyExit5Case2OrCase3) 898# endif 899 test %eax, %eax 900 jnz L(Shl5LoopExit) 901 902 palignr $5, %xmm3, %xmm2 903 movaps %xmm2, (%edx) 904 lea 27(%ecx), %ecx 905 lea 16(%edx), %edx 906 907 mov %ecx, %eax 908 and $-0x40, %ecx 909 sub %ecx, %eax 910 lea -11(%ecx), %ecx 911 sub %eax, %edx 912# ifdef USE_AS_STRNCPY 913 add %eax, %ebx 914# endif 915 movaps -5(%ecx), %xmm1 916 917L(Shl5LoopStart): 918 movaps 11(%ecx), %xmm2 919 movaps 27(%ecx), %xmm3 920 movaps %xmm3, %xmm6 921 movaps 43(%ecx), %xmm4 922 movaps %xmm4, %xmm7 923 movaps 59(%ecx), %xmm5 924 pminub %xmm2, %xmm6 925 pminub %xmm5, %xmm7 926 pminub %xmm6, %xmm7 927 pcmpeqb %xmm0, %xmm7 928 pmovmskb %xmm7, %eax 929 movaps %xmm5, %xmm7 930 palignr $5, %xmm4, %xmm5 931 test %eax, %eax 932 palignr $5, %xmm3, %xmm4 933 jnz L(Shl5Start) 934# ifdef USE_AS_STRNCPY 935 sub $64, %ebx 936 jbe L(StrncpyLeave5) 937# endif 938 palignr $5, %xmm2, %xmm3 939 lea 64(%ecx), %ecx 940 palignr $5, %xmm1, %xmm2 941 movaps %xmm7, %xmm1 942 movaps %xmm5, 48(%edx) 943 movaps %xmm4, 32(%edx) 944 movaps %xmm3, 16(%edx) 945 movaps %xmm2, (%edx) 946 lea 64(%edx), %edx 947 jmp L(Shl5LoopStart) 948 949L(Shl5LoopExit): 950 movlpd (%ecx), %xmm0 951 movl 7(%ecx), %esi 952 movlpd %xmm0, (%edx) 953 movl %esi, 7(%edx) 954 mov $11, %esi 955 jmp L(CopyFrom1To16Bytes) 956 957 .p2align 4 958L(Shl6): 959 movaps -6(%ecx), %xmm1 960 movaps 10(%ecx), %xmm2 961L(Shl6Start): 962 pcmpeqb %xmm2, %xmm0 963 pmovmskb %xmm0, %eax 964 movaps %xmm2, %xmm3 965# ifdef USE_AS_STRNCPY 966 sub $16, %ebx 967 jbe L(StrncpyExit6Case2OrCase3) 968# endif 969 test %eax, %eax 970 jnz L(Shl6LoopExit) 971 972 palignr $6, %xmm1, %xmm2 973 movaps %xmm3, %xmm1 974 movaps %xmm2, (%edx) 975 movaps 26(%ecx), %xmm2 976 977 pcmpeqb %xmm2, %xmm0 978 lea 16(%edx), %edx 979 pmovmskb %xmm0, %eax 980 lea 16(%ecx), %ecx 981 movaps %xmm2, %xmm3 982# ifdef USE_AS_STRNCPY 983 sub $16, %ebx 984 jbe L(StrncpyExit6Case2OrCase3) 985# endif 986 test %eax, %eax 987 jnz L(Shl6LoopExit) 988 989 palignr $6, %xmm1, %xmm2 990 movaps %xmm2, (%edx) 991 movaps 26(%ecx), %xmm2 992 movaps %xmm3, %xmm1 993 994 pcmpeqb %xmm2, %xmm0 995 lea 16(%edx), %edx 996 pmovmskb %xmm0, %eax 997 lea 16(%ecx), %ecx 998 movaps %xmm2, %xmm3 999# ifdef USE_AS_STRNCPY 1000 sub $16, %ebx 1001 jbe L(StrncpyExit6Case2OrCase3) 1002# endif 1003 test %eax, %eax 1004 jnz L(Shl6LoopExit) 1005 1006 palignr $6, %xmm1, %xmm2 1007 movaps %xmm2, (%edx) 1008 movaps 26(%ecx), %xmm2 1009 1010 pcmpeqb %xmm2, %xmm0 1011 lea 16(%edx), %edx 1012 pmovmskb %xmm0, %eax 1013 lea 16(%ecx), %ecx 1014# ifdef USE_AS_STRNCPY 1015 sub $16, %ebx 1016 jbe L(StrncpyExit6Case2OrCase3) 1017# endif 1018 test %eax, %eax 1019 jnz L(Shl6LoopExit) 1020 1021 palignr $6, %xmm3, %xmm2 1022 movaps %xmm2, (%edx) 1023 lea 26(%ecx), %ecx 1024 lea 16(%edx), %edx 1025 1026 mov %ecx, %eax 1027 and $-0x40, %ecx 1028 sub %ecx, %eax 1029 lea -10(%ecx), %ecx 1030 sub %eax, %edx 1031# ifdef USE_AS_STRNCPY 1032 add %eax, %ebx 1033# endif 1034 movaps -6(%ecx), %xmm1 1035 1036L(Shl6LoopStart): 1037 movaps 10(%ecx), %xmm2 1038 movaps 26(%ecx), %xmm3 1039 movaps %xmm3, %xmm6 1040 movaps 42(%ecx), %xmm4 1041 movaps %xmm4, %xmm7 1042 movaps 58(%ecx), %xmm5 1043 pminub %xmm2, %xmm6 1044 pminub %xmm5, %xmm7 1045 pminub %xmm6, %xmm7 1046 pcmpeqb %xmm0, %xmm7 1047 pmovmskb %xmm7, %eax 1048 movaps %xmm5, %xmm7 1049 palignr $6, %xmm4, %xmm5 1050 test %eax, %eax 1051 palignr $6, %xmm3, %xmm4 1052 jnz L(Shl6Start) 1053# ifdef USE_AS_STRNCPY 1054 sub $64, %ebx 1055 jbe L(StrncpyLeave6) 1056# endif 1057 palignr $6, %xmm2, %xmm3 1058 lea 64(%ecx), %ecx 1059 palignr $6, %xmm1, %xmm2 1060 movaps %xmm7, %xmm1 1061 movaps %xmm5, 48(%edx) 1062 movaps %xmm4, 32(%edx) 1063 movaps %xmm3, 16(%edx) 1064 movaps %xmm2, (%edx) 1065 lea 64(%edx), %edx 1066 jmp L(Shl6LoopStart) 1067 1068L(Shl6LoopExit): 1069 movlpd (%ecx), %xmm0 1070 movl 6(%ecx), %esi 1071 movlpd %xmm0, (%edx) 1072 movl %esi, 6(%edx) 1073 mov $10, %esi 1074 jmp L(CopyFrom1To16Bytes) 1075 1076 .p2align 4 1077L(Shl7): 1078 movaps -7(%ecx), %xmm1 1079 movaps 9(%ecx), %xmm2 1080L(Shl7Start): 1081 pcmpeqb %xmm2, %xmm0 1082 pmovmskb %xmm0, %eax 1083 movaps %xmm2, %xmm3 1084# ifdef USE_AS_STRNCPY 1085 sub $16, %ebx 1086 jbe L(StrncpyExit7Case2OrCase3) 1087# endif 1088 test %eax, %eax 1089 jnz L(Shl7LoopExit) 1090 1091 palignr $7, %xmm1, %xmm2 1092 movaps %xmm3, %xmm1 1093 movaps %xmm2, (%edx) 1094 movaps 25(%ecx), %xmm2 1095 1096 pcmpeqb %xmm2, %xmm0 1097 lea 16(%edx), %edx 1098 pmovmskb %xmm0, %eax 1099 lea 16(%ecx), %ecx 1100 movaps %xmm2, %xmm3 1101# ifdef USE_AS_STRNCPY 1102 sub $16, %ebx 1103 jbe L(StrncpyExit7Case2OrCase3) 1104# endif 1105 test %eax, %eax 1106 jnz L(Shl7LoopExit) 1107 1108 palignr $7, %xmm1, %xmm2 1109 movaps %xmm2, (%edx) 1110 movaps 25(%ecx), %xmm2 1111 movaps %xmm3, %xmm1 1112 1113 pcmpeqb %xmm2, %xmm0 1114 lea 16(%edx), %edx 1115 pmovmskb %xmm0, %eax 1116 lea 16(%ecx), %ecx 1117 movaps %xmm2, %xmm3 1118# ifdef USE_AS_STRNCPY 1119 sub $16, %ebx 1120 jbe L(StrncpyExit7Case2OrCase3) 1121# endif 1122 test %eax, %eax 1123 jnz L(Shl7LoopExit) 1124 1125 palignr $7, %xmm1, %xmm2 1126 movaps %xmm2, (%edx) 1127 movaps 25(%ecx), %xmm2 1128 1129 pcmpeqb %xmm2, %xmm0 1130 lea 16(%edx), %edx 1131 pmovmskb %xmm0, %eax 1132 lea 16(%ecx), %ecx 1133# ifdef USE_AS_STRNCPY 1134 sub $16, %ebx 1135 jbe L(StrncpyExit7Case2OrCase3) 1136# endif 1137 test %eax, %eax 1138 jnz L(Shl7LoopExit) 1139 1140 palignr $7, %xmm3, %xmm2 1141 movaps %xmm2, (%edx) 1142 lea 25(%ecx), %ecx 1143 lea 16(%edx), %edx 1144 1145 mov %ecx, %eax 1146 and $-0x40, %ecx 1147 sub %ecx, %eax 1148 lea -9(%ecx), %ecx 1149 sub %eax, %edx 1150# ifdef USE_AS_STRNCPY 1151 add %eax, %ebx 1152# endif 1153 movaps -7(%ecx), %xmm1 1154 1155L(Shl7LoopStart): 1156 movaps 9(%ecx), %xmm2 1157 movaps 25(%ecx), %xmm3 1158 movaps %xmm3, %xmm6 1159 movaps 41(%ecx), %xmm4 1160 movaps %xmm4, %xmm7 1161 movaps 57(%ecx), %xmm5 1162 pminub %xmm2, %xmm6 1163 pminub %xmm5, %xmm7 1164 pminub %xmm6, %xmm7 1165 pcmpeqb %xmm0, %xmm7 1166 pmovmskb %xmm7, %eax 1167 movaps %xmm5, %xmm7 1168 palignr $7, %xmm4, %xmm5 1169 test %eax, %eax 1170 palignr $7, %xmm3, %xmm4 1171 jnz L(Shl7Start) 1172# ifdef USE_AS_STRNCPY 1173 sub $64, %ebx 1174 jbe L(StrncpyLeave7) 1175# endif 1176 palignr $7, %xmm2, %xmm3 1177 lea 64(%ecx), %ecx 1178 palignr $7, %xmm1, %xmm2 1179 movaps %xmm7, %xmm1 1180 movaps %xmm5, 48(%edx) 1181 movaps %xmm4, 32(%edx) 1182 movaps %xmm3, 16(%edx) 1183 movaps %xmm2, (%edx) 1184 lea 64(%edx), %edx 1185 jmp L(Shl7LoopStart) 1186 1187L(Shl7LoopExit): 1188 movlpd (%ecx), %xmm0 1189 movl 5(%ecx), %esi 1190 movlpd %xmm0, (%edx) 1191 movl %esi, 5(%edx) 1192 mov $9, %esi 1193 jmp L(CopyFrom1To16Bytes) 1194 1195 .p2align 4 1196L(Shl8): 1197 movaps -8(%ecx), %xmm1 1198 movaps 8(%ecx), %xmm2 1199L(Shl8Start): 1200 pcmpeqb %xmm2, %xmm0 1201 pmovmskb %xmm0, %eax 1202 movaps %xmm2, %xmm3 1203# ifdef USE_AS_STRNCPY 1204 sub $16, %ebx 1205 jbe L(StrncpyExit8Case2OrCase3) 1206# endif 1207 test %eax, %eax 1208 jnz L(Shl8LoopExit) 1209 1210 palignr $8, %xmm1, %xmm2 1211 movaps %xmm3, %xmm1 1212 movaps %xmm2, (%edx) 1213 movaps 24(%ecx), %xmm2 1214 1215 pcmpeqb %xmm2, %xmm0 1216 lea 16(%edx), %edx 1217 pmovmskb %xmm0, %eax 1218 lea 16(%ecx), %ecx 1219 movaps %xmm2, %xmm3 1220# ifdef USE_AS_STRNCPY 1221 sub $16, %ebx 1222 jbe L(StrncpyExit8Case2OrCase3) 1223# endif 1224 test %eax, %eax 1225 jnz L(Shl8LoopExit) 1226 1227 palignr $8, %xmm1, %xmm2 1228 movaps %xmm2, (%edx) 1229 movaps 24(%ecx), %xmm2 1230 movaps %xmm3, %xmm1 1231 1232 pcmpeqb %xmm2, %xmm0 1233 lea 16(%edx), %edx 1234 pmovmskb %xmm0, %eax 1235 lea 16(%ecx), %ecx 1236 movaps %xmm2, %xmm3 1237# ifdef USE_AS_STRNCPY 1238 sub $16, %ebx 1239 jbe L(StrncpyExit8Case2OrCase3) 1240# endif 1241 test %eax, %eax 1242 jnz L(Shl8LoopExit) 1243 1244 palignr $8, %xmm1, %xmm2 1245 movaps %xmm2, (%edx) 1246 movaps 24(%ecx), %xmm2 1247 1248 pcmpeqb %xmm2, %xmm0 1249 lea 16(%edx), %edx 1250 pmovmskb %xmm0, %eax 1251 lea 16(%ecx), %ecx 1252# ifdef USE_AS_STRNCPY 1253 sub $16, %ebx 1254 jbe L(StrncpyExit8Case2OrCase3) 1255# endif 1256 test %eax, %eax 1257 jnz L(Shl8LoopExit) 1258 1259 palignr $8, %xmm3, %xmm2 1260 movaps %xmm2, (%edx) 1261 lea 24(%ecx), %ecx 1262 lea 16(%edx), %edx 1263 1264 mov %ecx, %eax 1265 and $-0x40, %ecx 1266 sub %ecx, %eax 1267 lea -8(%ecx), %ecx 1268 sub %eax, %edx 1269# ifdef USE_AS_STRNCPY 1270 add %eax, %ebx 1271# endif 1272 movaps -8(%ecx), %xmm1 1273 1274L(Shl8LoopStart): 1275 movaps 8(%ecx), %xmm2 1276 movaps 24(%ecx), %xmm3 1277 movaps %xmm3, %xmm6 1278 movaps 40(%ecx), %xmm4 1279 movaps %xmm4, %xmm7 1280 movaps 56(%ecx), %xmm5 1281 pminub %xmm2, %xmm6 1282 pminub %xmm5, %xmm7 1283 pminub %xmm6, %xmm7 1284 pcmpeqb %xmm0, %xmm7 1285 pmovmskb %xmm7, %eax 1286 movaps %xmm5, %xmm7 1287 palignr $8, %xmm4, %xmm5 1288 test %eax, %eax 1289 palignr $8, %xmm3, %xmm4 1290 jnz L(Shl8Start) 1291# ifdef USE_AS_STRNCPY 1292 sub $64, %ebx 1293 jbe L(StrncpyLeave8) 1294# endif 1295 palignr $8, %xmm2, %xmm3 1296 lea 64(%ecx), %ecx 1297 palignr $8, %xmm1, %xmm2 1298 movaps %xmm7, %xmm1 1299 movaps %xmm5, 48(%edx) 1300 movaps %xmm4, 32(%edx) 1301 movaps %xmm3, 16(%edx) 1302 movaps %xmm2, (%edx) 1303 lea 64(%edx), %edx 1304 jmp L(Shl8LoopStart) 1305 1306L(Shl8LoopExit): 1307 movlpd (%ecx), %xmm0 1308 movlpd %xmm0, (%edx) 1309 mov $8, %esi 1310 jmp L(CopyFrom1To16Bytes) 1311 1312 .p2align 4 1313L(Shl9): 1314 movaps -9(%ecx), %xmm1 1315 movaps 7(%ecx), %xmm2 1316L(Shl9Start): 1317 pcmpeqb %xmm2, %xmm0 1318 pmovmskb %xmm0, %eax 1319 movaps %xmm2, %xmm3 1320# ifdef USE_AS_STRNCPY 1321 sub $16, %ebx 1322 jbe L(StrncpyExit9Case2OrCase3) 1323# endif 1324 test %eax, %eax 1325 jnz L(Shl9LoopExit) 1326 1327 palignr $9, %xmm1, %xmm2 1328 movaps %xmm3, %xmm1 1329 movaps %xmm2, (%edx) 1330 movaps 23(%ecx), %xmm2 1331 1332 pcmpeqb %xmm2, %xmm0 1333 lea 16(%edx), %edx 1334 pmovmskb %xmm0, %eax 1335 lea 16(%ecx), %ecx 1336 movaps %xmm2, %xmm3 1337# ifdef USE_AS_STRNCPY 1338 sub $16, %ebx 1339 jbe L(StrncpyExit9Case2OrCase3) 1340# endif 1341 test %eax, %eax 1342 jnz L(Shl9LoopExit) 1343 1344 palignr $9, %xmm1, %xmm2 1345 movaps %xmm2, (%edx) 1346 movaps 23(%ecx), %xmm2 1347 movaps %xmm3, %xmm1 1348 1349 pcmpeqb %xmm2, %xmm0 1350 lea 16(%edx), %edx 1351 pmovmskb %xmm0, %eax 1352 lea 16(%ecx), %ecx 1353 movaps %xmm2, %xmm3 1354# ifdef USE_AS_STRNCPY 1355 sub $16, %ebx 1356 jbe L(StrncpyExit9Case2OrCase3) 1357# endif 1358 test %eax, %eax 1359 jnz L(Shl9LoopExit) 1360 1361 palignr $9, %xmm1, %xmm2 1362 movaps %xmm2, (%edx) 1363 movaps 23(%ecx), %xmm2 1364 1365 pcmpeqb %xmm2, %xmm0 1366 lea 16(%edx), %edx 1367 pmovmskb %xmm0, %eax 1368 lea 16(%ecx), %ecx 1369# ifdef USE_AS_STRNCPY 1370 sub $16, %ebx 1371 jbe L(StrncpyExit9Case2OrCase3) 1372# endif 1373 test %eax, %eax 1374 jnz L(Shl9LoopExit) 1375 1376 palignr $9, %xmm3, %xmm2 1377 movaps %xmm2, (%edx) 1378 lea 23(%ecx), %ecx 1379 lea 16(%edx), %edx 1380 1381 mov %ecx, %eax 1382 and $-0x40, %ecx 1383 sub %ecx, %eax 1384 lea -7(%ecx), %ecx 1385 sub %eax, %edx 1386# ifdef USE_AS_STRNCPY 1387 add %eax, %ebx 1388# endif 1389 movaps -9(%ecx), %xmm1 1390 1391L(Shl9LoopStart): 1392 movaps 7(%ecx), %xmm2 1393 movaps 23(%ecx), %xmm3 1394 movaps %xmm3, %xmm6 1395 movaps 39(%ecx), %xmm4 1396 movaps %xmm4, %xmm7 1397 movaps 55(%ecx), %xmm5 1398 pminub %xmm2, %xmm6 1399 pminub %xmm5, %xmm7 1400 pminub %xmm6, %xmm7 1401 pcmpeqb %xmm0, %xmm7 1402 pmovmskb %xmm7, %eax 1403 movaps %xmm5, %xmm7 1404 palignr $9, %xmm4, %xmm5 1405 test %eax, %eax 1406 palignr $9, %xmm3, %xmm4 1407 jnz L(Shl9Start) 1408# ifdef USE_AS_STRNCPY 1409 sub $64, %ebx 1410 jbe L(StrncpyLeave9) 1411# endif 1412 palignr $9, %xmm2, %xmm3 1413 lea 64(%ecx), %ecx 1414 palignr $9, %xmm1, %xmm2 1415 movaps %xmm7, %xmm1 1416 movaps %xmm5, 48(%edx) 1417 movaps %xmm4, 32(%edx) 1418 movaps %xmm3, 16(%edx) 1419 movaps %xmm2, (%edx) 1420 lea 64(%edx), %edx 1421 jmp L(Shl9LoopStart) 1422 1423L(Shl9LoopExit): 1424 movlpd -1(%ecx), %xmm0 1425 movlpd %xmm0, -1(%edx) 1426 mov $7, %esi 1427 jmp L(CopyFrom1To16Bytes) 1428 1429 .p2align 4 1430L(Shl10): 1431 movaps -10(%ecx), %xmm1 1432 movaps 6(%ecx), %xmm2 1433L(Shl10Start): 1434 pcmpeqb %xmm2, %xmm0 1435 pmovmskb %xmm0, %eax 1436 movaps %xmm2, %xmm3 1437# ifdef USE_AS_STRNCPY 1438 sub $16, %ebx 1439 jbe L(StrncpyExit10Case2OrCase3) 1440# endif 1441 test %eax, %eax 1442 jnz L(Shl10LoopExit) 1443 1444 palignr $10, %xmm1, %xmm2 1445 movaps %xmm3, %xmm1 1446 movaps %xmm2, (%edx) 1447 movaps 22(%ecx), %xmm2 1448 1449 pcmpeqb %xmm2, %xmm0 1450 lea 16(%edx), %edx 1451 pmovmskb %xmm0, %eax 1452 lea 16(%ecx), %ecx 1453 movaps %xmm2, %xmm3 1454# ifdef USE_AS_STRNCPY 1455 sub $16, %ebx 1456 jbe L(StrncpyExit10Case2OrCase3) 1457# endif 1458 test %eax, %eax 1459 jnz L(Shl10LoopExit) 1460 1461 palignr $10, %xmm1, %xmm2 1462 movaps %xmm2, (%edx) 1463 movaps 22(%ecx), %xmm2 1464 movaps %xmm3, %xmm1 1465 1466 pcmpeqb %xmm2, %xmm0 1467 lea 16(%edx), %edx 1468 pmovmskb %xmm0, %eax 1469 lea 16(%ecx), %ecx 1470 movaps %xmm2, %xmm3 1471# ifdef USE_AS_STRNCPY 1472 sub $16, %ebx 1473 jbe L(StrncpyExit10Case2OrCase3) 1474# endif 1475 test %eax, %eax 1476 jnz L(Shl10LoopExit) 1477 1478 palignr $10, %xmm1, %xmm2 1479 movaps %xmm2, (%edx) 1480 movaps 22(%ecx), %xmm2 1481 1482 pcmpeqb %xmm2, %xmm0 1483 lea 16(%edx), %edx 1484 pmovmskb %xmm0, %eax 1485 lea 16(%ecx), %ecx 1486# ifdef USE_AS_STRNCPY 1487 sub $16, %ebx 1488 jbe L(StrncpyExit10Case2OrCase3) 1489# endif 1490 test %eax, %eax 1491 jnz L(Shl10LoopExit) 1492 1493 palignr $10, %xmm3, %xmm2 1494 movaps %xmm2, (%edx) 1495 lea 22(%ecx), %ecx 1496 lea 16(%edx), %edx 1497 1498 mov %ecx, %eax 1499 and $-0x40, %ecx 1500 sub %ecx, %eax 1501 lea -6(%ecx), %ecx 1502 sub %eax, %edx 1503# ifdef USE_AS_STRNCPY 1504 add %eax, %ebx 1505# endif 1506 movaps -10(%ecx), %xmm1 1507 1508L(Shl10LoopStart): 1509 movaps 6(%ecx), %xmm2 1510 movaps 22(%ecx), %xmm3 1511 movaps %xmm3, %xmm6 1512 movaps 38(%ecx), %xmm4 1513 movaps %xmm4, %xmm7 1514 movaps 54(%ecx), %xmm5 1515 pminub %xmm2, %xmm6 1516 pminub %xmm5, %xmm7 1517 pminub %xmm6, %xmm7 1518 pcmpeqb %xmm0, %xmm7 1519 pmovmskb %xmm7, %eax 1520 movaps %xmm5, %xmm7 1521 palignr $10, %xmm4, %xmm5 1522 test %eax, %eax 1523 palignr $10, %xmm3, %xmm4 1524 jnz L(Shl10Start) 1525# ifdef USE_AS_STRNCPY 1526 sub $64, %ebx 1527 jbe L(StrncpyLeave10) 1528# endif 1529 palignr $10, %xmm2, %xmm3 1530 lea 64(%ecx), %ecx 1531 palignr $10, %xmm1, %xmm2 1532 movaps %xmm7, %xmm1 1533 movaps %xmm5, 48(%edx) 1534 movaps %xmm4, 32(%edx) 1535 movaps %xmm3, 16(%edx) 1536 movaps %xmm2, (%edx) 1537 lea 64(%edx), %edx 1538 jmp L(Shl10LoopStart) 1539 1540L(Shl10LoopExit): 1541 movlpd -2(%ecx), %xmm0 1542 movlpd %xmm0, -2(%edx) 1543 mov $6, %esi 1544 jmp L(CopyFrom1To16Bytes) 1545 1546 .p2align 4 1547L(Shl11): 1548 movaps -11(%ecx), %xmm1 1549 movaps 5(%ecx), %xmm2 1550L(Shl11Start): 1551 pcmpeqb %xmm2, %xmm0 1552 pmovmskb %xmm0, %eax 1553 movaps %xmm2, %xmm3 1554# ifdef USE_AS_STRNCPY 1555 sub $16, %ebx 1556 jbe L(StrncpyExit11Case2OrCase3) 1557# endif 1558 test %eax, %eax 1559 jnz L(Shl11LoopExit) 1560 1561 palignr $11, %xmm1, %xmm2 1562 movaps %xmm3, %xmm1 1563 movaps %xmm2, (%edx) 1564 movaps 21(%ecx), %xmm2 1565 1566 pcmpeqb %xmm2, %xmm0 1567 lea 16(%edx), %edx 1568 pmovmskb %xmm0, %eax 1569 lea 16(%ecx), %ecx 1570 movaps %xmm2, %xmm3 1571# ifdef USE_AS_STRNCPY 1572 sub $16, %ebx 1573 jbe L(StrncpyExit11Case2OrCase3) 1574# endif 1575 test %eax, %eax 1576 jnz L(Shl11LoopExit) 1577 1578 palignr $11, %xmm1, %xmm2 1579 movaps %xmm2, (%edx) 1580 movaps 21(%ecx), %xmm2 1581 movaps %xmm3, %xmm1 1582 1583 pcmpeqb %xmm2, %xmm0 1584 lea 16(%edx), %edx 1585 pmovmskb %xmm0, %eax 1586 lea 16(%ecx), %ecx 1587 movaps %xmm2, %xmm3 1588# ifdef USE_AS_STRNCPY 1589 sub $16, %ebx 1590 jbe L(StrncpyExit11Case2OrCase3) 1591# endif 1592 test %eax, %eax 1593 jnz L(Shl11LoopExit) 1594 1595 palignr $11, %xmm1, %xmm2 1596 movaps %xmm2, (%edx) 1597 movaps 21(%ecx), %xmm2 1598 1599 pcmpeqb %xmm2, %xmm0 1600 lea 16(%edx), %edx 1601 pmovmskb %xmm0, %eax 1602 lea 16(%ecx), %ecx 1603# ifdef USE_AS_STRNCPY 1604 sub $16, %ebx 1605 jbe L(StrncpyExit11Case2OrCase3) 1606# endif 1607 test %eax, %eax 1608 jnz L(Shl11LoopExit) 1609 1610 palignr $11, %xmm3, %xmm2 1611 movaps %xmm2, (%edx) 1612 lea 21(%ecx), %ecx 1613 lea 16(%edx), %edx 1614 1615 mov %ecx, %eax 1616 and $-0x40, %ecx 1617 sub %ecx, %eax 1618 lea -5(%ecx), %ecx 1619 sub %eax, %edx 1620# ifdef USE_AS_STRNCPY 1621 add %eax, %ebx 1622# endif 1623 movaps -11(%ecx), %xmm1 1624 1625L(Shl11LoopStart): 1626 movaps 5(%ecx), %xmm2 1627 movaps 21(%ecx), %xmm3 1628 movaps %xmm3, %xmm6 1629 movaps 37(%ecx), %xmm4 1630 movaps %xmm4, %xmm7 1631 movaps 53(%ecx), %xmm5 1632 pminub %xmm2, %xmm6 1633 pminub %xmm5, %xmm7 1634 pminub %xmm6, %xmm7 1635 pcmpeqb %xmm0, %xmm7 1636 pmovmskb %xmm7, %eax 1637 movaps %xmm5, %xmm7 1638 palignr $11, %xmm4, %xmm5 1639 test %eax, %eax 1640 palignr $11, %xmm3, %xmm4 1641 jnz L(Shl11Start) 1642# ifdef USE_AS_STRNCPY 1643 sub $64, %ebx 1644 jbe L(StrncpyLeave11) 1645# endif 1646 palignr $11, %xmm2, %xmm3 1647 lea 64(%ecx), %ecx 1648 palignr $11, %xmm1, %xmm2 1649 movaps %xmm7, %xmm1 1650 movaps %xmm5, 48(%edx) 1651 movaps %xmm4, 32(%edx) 1652 movaps %xmm3, 16(%edx) 1653 movaps %xmm2, (%edx) 1654 lea 64(%edx), %edx 1655 jmp L(Shl11LoopStart) 1656 1657L(Shl11LoopExit): 1658 movlpd -3(%ecx), %xmm0 1659 movlpd %xmm0, -3(%edx) 1660 mov $5, %esi 1661 jmp L(CopyFrom1To16Bytes) 1662 1663 .p2align 4 1664L(Shl12): 1665 movaps -12(%ecx), %xmm1 1666 movaps 4(%ecx), %xmm2 1667L(Shl12Start): 1668 pcmpeqb %xmm2, %xmm0 1669 pmovmskb %xmm0, %eax 1670 movaps %xmm2, %xmm3 1671# ifdef USE_AS_STRNCPY 1672 sub $16, %ebx 1673 jbe L(StrncpyExit12Case2OrCase3) 1674# endif 1675 test %eax, %eax 1676 jnz L(Shl12LoopExit) 1677 1678 palignr $12, %xmm1, %xmm2 1679 movaps %xmm3, %xmm1 1680 movaps %xmm2, (%edx) 1681 movaps 20(%ecx), %xmm2 1682 1683 pcmpeqb %xmm2, %xmm0 1684 lea 16(%edx), %edx 1685 pmovmskb %xmm0, %eax 1686 lea 16(%ecx), %ecx 1687 movaps %xmm2, %xmm3 1688# ifdef USE_AS_STRNCPY 1689 sub $16, %ebx 1690 jbe L(StrncpyExit12Case2OrCase3) 1691# endif 1692 test %eax, %eax 1693 jnz L(Shl12LoopExit) 1694 1695 palignr $12, %xmm1, %xmm2 1696 movaps %xmm2, (%edx) 1697 movaps 20(%ecx), %xmm2 1698 movaps %xmm3, %xmm1 1699 1700 pcmpeqb %xmm2, %xmm0 1701 lea 16(%edx), %edx 1702 pmovmskb %xmm0, %eax 1703 lea 16(%ecx), %ecx 1704 movaps %xmm2, %xmm3 1705# ifdef USE_AS_STRNCPY 1706 sub $16, %ebx 1707 jbe L(StrncpyExit12Case2OrCase3) 1708# endif 1709 test %eax, %eax 1710 jnz L(Shl12LoopExit) 1711 1712 palignr $12, %xmm1, %xmm2 1713 movaps %xmm2, (%edx) 1714 movaps 20(%ecx), %xmm2 1715 1716 pcmpeqb %xmm2, %xmm0 1717 lea 16(%edx), %edx 1718 pmovmskb %xmm0, %eax 1719 lea 16(%ecx), %ecx 1720# ifdef USE_AS_STRNCPY 1721 sub $16, %ebx 1722 jbe L(StrncpyExit12Case2OrCase3) 1723# endif 1724 test %eax, %eax 1725 jnz L(Shl12LoopExit) 1726 1727 palignr $12, %xmm3, %xmm2 1728 movaps %xmm2, (%edx) 1729 lea 20(%ecx), %ecx 1730 lea 16(%edx), %edx 1731 1732 mov %ecx, %eax 1733 and $-0x40, %ecx 1734 sub %ecx, %eax 1735 lea -4(%ecx), %ecx 1736 sub %eax, %edx 1737# ifdef USE_AS_STRNCPY 1738 add %eax, %ebx 1739# endif 1740 movaps -12(%ecx), %xmm1 1741 1742L(Shl12LoopStart): 1743 movaps 4(%ecx), %xmm2 1744 movaps 20(%ecx), %xmm3 1745 movaps %xmm3, %xmm6 1746 movaps 36(%ecx), %xmm4 1747 movaps %xmm4, %xmm7 1748 movaps 52(%ecx), %xmm5 1749 pminub %xmm2, %xmm6 1750 pminub %xmm5, %xmm7 1751 pminub %xmm6, %xmm7 1752 pcmpeqb %xmm0, %xmm7 1753 pmovmskb %xmm7, %eax 1754 movaps %xmm5, %xmm7 1755 palignr $12, %xmm4, %xmm5 1756 test %eax, %eax 1757 palignr $12, %xmm3, %xmm4 1758 jnz L(Shl12Start) 1759# ifdef USE_AS_STRNCPY 1760 sub $64, %ebx 1761 jbe L(StrncpyLeave12) 1762# endif 1763 palignr $12, %xmm2, %xmm3 1764 lea 64(%ecx), %ecx 1765 palignr $12, %xmm1, %xmm2 1766 movaps %xmm7, %xmm1 1767 movaps %xmm5, 48(%edx) 1768 movaps %xmm4, 32(%edx) 1769 movaps %xmm3, 16(%edx) 1770 movaps %xmm2, (%edx) 1771 lea 64(%edx), %edx 1772 jmp L(Shl12LoopStart) 1773 1774L(Shl12LoopExit): 1775 movl (%ecx), %esi 1776 movl %esi, (%edx) 1777 mov $4, %esi 1778 jmp L(CopyFrom1To16Bytes) 1779 1780 .p2align 4 1781L(Shl13): 1782 movaps -13(%ecx), %xmm1 1783 movaps 3(%ecx), %xmm2 1784L(Shl13Start): 1785 pcmpeqb %xmm2, %xmm0 1786 pmovmskb %xmm0, %eax 1787 movaps %xmm2, %xmm3 1788# ifdef USE_AS_STRNCPY 1789 sub $16, %ebx 1790 jbe L(StrncpyExit13Case2OrCase3) 1791# endif 1792 test %eax, %eax 1793 jnz L(Shl13LoopExit) 1794 1795 palignr $13, %xmm1, %xmm2 1796 movaps %xmm3, %xmm1 1797 movaps %xmm2, (%edx) 1798 movaps 19(%ecx), %xmm2 1799 1800 pcmpeqb %xmm2, %xmm0 1801 lea 16(%edx), %edx 1802 pmovmskb %xmm0, %eax 1803 lea 16(%ecx), %ecx 1804 movaps %xmm2, %xmm3 1805# ifdef USE_AS_STRNCPY 1806 sub $16, %ebx 1807 jbe L(StrncpyExit13Case2OrCase3) 1808# endif 1809 test %eax, %eax 1810 jnz L(Shl13LoopExit) 1811 1812 palignr $13, %xmm1, %xmm2 1813 movaps %xmm2, (%edx) 1814 movaps 19(%ecx), %xmm2 1815 movaps %xmm3, %xmm1 1816 1817 pcmpeqb %xmm2, %xmm0 1818 lea 16(%edx), %edx 1819 pmovmskb %xmm0, %eax 1820 lea 16(%ecx), %ecx 1821 movaps %xmm2, %xmm3 1822# ifdef USE_AS_STRNCPY 1823 sub $16, %ebx 1824 jbe L(StrncpyExit13Case2OrCase3) 1825# endif 1826 test %eax, %eax 1827 jnz L(Shl13LoopExit) 1828 1829 palignr $13, %xmm1, %xmm2 1830 movaps %xmm2, (%edx) 1831 movaps 19(%ecx), %xmm2 1832 1833 pcmpeqb %xmm2, %xmm0 1834 lea 16(%edx), %edx 1835 pmovmskb %xmm0, %eax 1836 lea 16(%ecx), %ecx 1837# ifdef USE_AS_STRNCPY 1838 sub $16, %ebx 1839 jbe L(StrncpyExit13Case2OrCase3) 1840# endif 1841 test %eax, %eax 1842 jnz L(Shl13LoopExit) 1843 1844 palignr $13, %xmm3, %xmm2 1845 movaps %xmm2, (%edx) 1846 lea 19(%ecx), %ecx 1847 lea 16(%edx), %edx 1848 1849 mov %ecx, %eax 1850 and $-0x40, %ecx 1851 sub %ecx, %eax 1852 lea -3(%ecx), %ecx 1853 sub %eax, %edx 1854# ifdef USE_AS_STRNCPY 1855 add %eax, %ebx 1856# endif 1857 movaps -13(%ecx), %xmm1 1858 1859L(Shl13LoopStart): 1860 movaps 3(%ecx), %xmm2 1861 movaps 19(%ecx), %xmm3 1862 movaps %xmm3, %xmm6 1863 movaps 35(%ecx), %xmm4 1864 movaps %xmm4, %xmm7 1865 movaps 51(%ecx), %xmm5 1866 pminub %xmm2, %xmm6 1867 pminub %xmm5, %xmm7 1868 pminub %xmm6, %xmm7 1869 pcmpeqb %xmm0, %xmm7 1870 pmovmskb %xmm7, %eax 1871 movaps %xmm5, %xmm7 1872 palignr $13, %xmm4, %xmm5 1873 test %eax, %eax 1874 palignr $13, %xmm3, %xmm4 1875 jnz L(Shl13Start) 1876# ifdef USE_AS_STRNCPY 1877 sub $64, %ebx 1878 jbe L(StrncpyLeave13) 1879# endif 1880 palignr $13, %xmm2, %xmm3 1881 lea 64(%ecx), %ecx 1882 palignr $13, %xmm1, %xmm2 1883 movaps %xmm7, %xmm1 1884 movaps %xmm5, 48(%edx) 1885 movaps %xmm4, 32(%edx) 1886 movaps %xmm3, 16(%edx) 1887 movaps %xmm2, (%edx) 1888 lea 64(%edx), %edx 1889 jmp L(Shl13LoopStart) 1890 1891L(Shl13LoopExit): 1892 movl -1(%ecx), %esi 1893 movl %esi, -1(%edx) 1894 mov $3, %esi 1895 jmp L(CopyFrom1To16Bytes) 1896 1897 .p2align 4 1898L(Shl14): 1899 movaps -14(%ecx), %xmm1 1900 movaps 2(%ecx), %xmm2 1901L(Shl14Start): 1902 pcmpeqb %xmm2, %xmm0 1903 pmovmskb %xmm0, %eax 1904 movaps %xmm2, %xmm3 1905# ifdef USE_AS_STRNCPY 1906 sub $16, %ebx 1907 jbe L(StrncpyExit14Case2OrCase3) 1908# endif 1909 test %eax, %eax 1910 jnz L(Shl14LoopExit) 1911 1912 palignr $14, %xmm1, %xmm2 1913 movaps %xmm3, %xmm1 1914 movaps %xmm2, (%edx) 1915 movaps 18(%ecx), %xmm2 1916 1917 pcmpeqb %xmm2, %xmm0 1918 lea 16(%edx), %edx 1919 pmovmskb %xmm0, %eax 1920 lea 16(%ecx), %ecx 1921 movaps %xmm2, %xmm3 1922# ifdef USE_AS_STRNCPY 1923 sub $16, %ebx 1924 jbe L(StrncpyExit14Case2OrCase3) 1925# endif 1926 test %eax, %eax 1927 jnz L(Shl14LoopExit) 1928 1929 palignr $14, %xmm1, %xmm2 1930 movaps %xmm2, (%edx) 1931 movaps 18(%ecx), %xmm2 1932 movaps %xmm3, %xmm1 1933 1934 pcmpeqb %xmm2, %xmm0 1935 lea 16(%edx), %edx 1936 pmovmskb %xmm0, %eax 1937 lea 16(%ecx), %ecx 1938 movaps %xmm2, %xmm3 1939# ifdef USE_AS_STRNCPY 1940 sub $16, %ebx 1941 jbe L(StrncpyExit14Case2OrCase3) 1942# endif 1943 test %eax, %eax 1944 jnz L(Shl14LoopExit) 1945 1946 palignr $14, %xmm1, %xmm2 1947 movaps %xmm2, (%edx) 1948 movaps 18(%ecx), %xmm2 1949 1950 pcmpeqb %xmm2, %xmm0 1951 lea 16(%edx), %edx 1952 pmovmskb %xmm0, %eax 1953 lea 16(%ecx), %ecx 1954# ifdef USE_AS_STRNCPY 1955 sub $16, %ebx 1956 jbe L(StrncpyExit14Case2OrCase3) 1957# endif 1958 test %eax, %eax 1959 jnz L(Shl14LoopExit) 1960 1961 palignr $14, %xmm3, %xmm2 1962 movaps %xmm2, (%edx) 1963 lea 18(%ecx), %ecx 1964 lea 16(%edx), %edx 1965 1966 mov %ecx, %eax 1967 and $-0x40, %ecx 1968 sub %ecx, %eax 1969 lea -2(%ecx), %ecx 1970 sub %eax, %edx 1971# ifdef USE_AS_STRNCPY 1972 add %eax, %ebx 1973# endif 1974 movaps -14(%ecx), %xmm1 1975 1976L(Shl14LoopStart): 1977 movaps 2(%ecx), %xmm2 1978 movaps 18(%ecx), %xmm3 1979 movaps %xmm3, %xmm6 1980 movaps 34(%ecx), %xmm4 1981 movaps %xmm4, %xmm7 1982 movaps 50(%ecx), %xmm5 1983 pminub %xmm2, %xmm6 1984 pminub %xmm5, %xmm7 1985 pminub %xmm6, %xmm7 1986 pcmpeqb %xmm0, %xmm7 1987 pmovmskb %xmm7, %eax 1988 movaps %xmm5, %xmm7 1989 palignr $14, %xmm4, %xmm5 1990 test %eax, %eax 1991 palignr $14, %xmm3, %xmm4 1992 jnz L(Shl14Start) 1993# ifdef USE_AS_STRNCPY 1994 sub $64, %ebx 1995 jbe L(StrncpyLeave14) 1996# endif 1997 palignr $14, %xmm2, %xmm3 1998 lea 64(%ecx), %ecx 1999 palignr $14, %xmm1, %xmm2 2000 movaps %xmm7, %xmm1 2001 movaps %xmm5, 48(%edx) 2002 movaps %xmm4, 32(%edx) 2003 movaps %xmm3, 16(%edx) 2004 movaps %xmm2, (%edx) 2005 lea 64(%edx), %edx 2006 jmp L(Shl14LoopStart) 2007 2008L(Shl14LoopExit): 2009 movl -2(%ecx), %esi 2010 movl %esi, -2(%edx) 2011 mov $2, %esi 2012 jmp L(CopyFrom1To16Bytes) 2013 2014 .p2align 4 2015L(Shl15): 2016 movaps -15(%ecx), %xmm1 2017 movaps 1(%ecx), %xmm2 2018L(Shl15Start): 2019 pcmpeqb %xmm2, %xmm0 2020 pmovmskb %xmm0, %eax 2021 movaps %xmm2, %xmm3 2022# ifdef USE_AS_STRNCPY 2023 sub $16, %ebx 2024 jbe L(StrncpyExit15Case2OrCase3) 2025# endif 2026 test %eax, %eax 2027 jnz L(Shl15LoopExit) 2028 2029 palignr $15, %xmm1, %xmm2 2030 movaps %xmm3, %xmm1 2031 movaps %xmm2, (%edx) 2032 movaps 17(%ecx), %xmm2 2033 2034 pcmpeqb %xmm2, %xmm0 2035 lea 16(%edx), %edx 2036 pmovmskb %xmm0, %eax 2037 lea 16(%ecx), %ecx 2038 movaps %xmm2, %xmm3 2039# ifdef USE_AS_STRNCPY 2040 sub $16, %ebx 2041 jbe L(StrncpyExit15Case2OrCase3) 2042# endif 2043 test %eax, %eax 2044 jnz L(Shl15LoopExit) 2045 2046 palignr $15, %xmm1, %xmm2 2047 movaps %xmm2, (%edx) 2048 movaps 17(%ecx), %xmm2 2049 movaps %xmm3, %xmm1 2050 2051 pcmpeqb %xmm2, %xmm0 2052 lea 16(%edx), %edx 2053 pmovmskb %xmm0, %eax 2054 lea 16(%ecx), %ecx 2055 movaps %xmm2, %xmm3 2056# ifdef USE_AS_STRNCPY 2057 sub $16, %ebx 2058 jbe L(StrncpyExit15Case2OrCase3) 2059# endif 2060 test %eax, %eax 2061 jnz L(Shl15LoopExit) 2062 2063 palignr $15, %xmm1, %xmm2 2064 movaps %xmm2, (%edx) 2065 movaps 17(%ecx), %xmm2 2066 2067 pcmpeqb %xmm2, %xmm0 2068 lea 16(%edx), %edx 2069 pmovmskb %xmm0, %eax 2070 lea 16(%ecx), %ecx 2071# ifdef USE_AS_STRNCPY 2072 sub $16, %ebx 2073 jbe L(StrncpyExit15Case2OrCase3) 2074# endif 2075 test %eax, %eax 2076 jnz L(Shl15LoopExit) 2077 2078 palignr $15, %xmm3, %xmm2 2079 movaps %xmm2, (%edx) 2080 lea 17(%ecx), %ecx 2081 lea 16(%edx), %edx 2082 2083 mov %ecx, %eax 2084 and $-0x40, %ecx 2085 sub %ecx, %eax 2086 lea -1(%ecx), %ecx 2087 sub %eax, %edx 2088# ifdef USE_AS_STRNCPY 2089 add %eax, %ebx 2090# endif 2091 movaps -15(%ecx), %xmm1 2092 2093L(Shl15LoopStart): 2094 movaps 1(%ecx), %xmm2 2095 movaps 17(%ecx), %xmm3 2096 movaps %xmm3, %xmm6 2097 movaps 33(%ecx), %xmm4 2098 movaps %xmm4, %xmm7 2099 movaps 49(%ecx), %xmm5 2100 pminub %xmm2, %xmm6 2101 pminub %xmm5, %xmm7 2102 pminub %xmm6, %xmm7 2103 pcmpeqb %xmm0, %xmm7 2104 pmovmskb %xmm7, %eax 2105 movaps %xmm5, %xmm7 2106 palignr $15, %xmm4, %xmm5 2107 test %eax, %eax 2108 palignr $15, %xmm3, %xmm4 2109 jnz L(Shl15Start) 2110# ifdef USE_AS_STRNCPY 2111 sub $64, %ebx 2112 jbe L(StrncpyLeave15) 2113# endif 2114 palignr $15, %xmm2, %xmm3 2115 lea 64(%ecx), %ecx 2116 palignr $15, %xmm1, %xmm2 2117 movaps %xmm7, %xmm1 2118 movaps %xmm5, 48(%edx) 2119 movaps %xmm4, 32(%edx) 2120 movaps %xmm3, 16(%edx) 2121 movaps %xmm2, (%edx) 2122 lea 64(%edx), %edx 2123 jmp L(Shl15LoopStart) 2124 2125L(Shl15LoopExit): 2126 movl -3(%ecx), %esi 2127 movl %esi, -3(%edx) 2128 mov $1, %esi 2129# ifdef USE_AS_STRCAT 2130 jmp L(CopyFrom1To16Bytes) 2131# endif 2132 2133 2134# ifndef USE_AS_STRCAT 2135 2136 .p2align 4 2137L(CopyFrom1To16Bytes): 2138# ifdef USE_AS_STRNCPY 2139 add $16, %ebx 2140# endif 2141 add %esi, %edx 2142 add %esi, %ecx 2143 2144 POP (%esi) 2145 test %al, %al 2146 jz L(ExitHigh8) 2147 2148L(CopyFrom1To16BytesLess8): 2149 mov %al, %ah 2150 and $15, %ah 2151 jz L(ExitHigh4) 2152 2153 test $0x01, %al 2154 jnz L(Exit1) 2155 test $0x02, %al 2156 jnz L(Exit2) 2157 test $0x04, %al 2158 jnz L(Exit3) 2159 2160 .p2align 4 2161L(Exit4): 2162 movl (%ecx), %eax 2163 movl %eax, (%edx) 2164 SAVE_RESULT (3) 2165# ifdef USE_AS_STRNCPY 2166 sub $4, %ebx 2167 lea 4(%edx), %ecx 2168 jnz L(StrncpyFillTailWithZero1) 2169# ifdef USE_AS_STPCPY 2170 cmpb $1, (%eax) 2171 sbb $-1, %eax 2172# endif 2173# endif 2174 RETURN1 2175 2176 .p2align 4 2177L(ExitHigh4): 2178 test $0x10, %al 2179 jnz L(Exit5) 2180 test $0x20, %al 2181 jnz L(Exit6) 2182 test $0x40, %al 2183 jnz L(Exit7) 2184 2185 .p2align 4 2186L(Exit8): 2187 movlpd (%ecx), %xmm0 2188 movlpd %xmm0, (%edx) 2189 SAVE_RESULT (7) 2190# ifdef USE_AS_STRNCPY 2191 sub $8, %ebx 2192 lea 8(%edx), %ecx 2193 jnz L(StrncpyFillTailWithZero1) 2194# ifdef USE_AS_STPCPY 2195 cmpb $1, (%eax) 2196 sbb $-1, %eax 2197# endif 2198# endif 2199 RETURN1 2200 2201 .p2align 4 2202L(ExitHigh8): 2203 mov %ah, %al 2204 and $15, %al 2205 jz L(ExitHigh12) 2206 2207 test $0x01, %ah 2208 jnz L(Exit9) 2209 test $0x02, %ah 2210 jnz L(Exit10) 2211 test $0x04, %ah 2212 jnz L(Exit11) 2213 2214 .p2align 4 2215L(Exit12): 2216 movlpd (%ecx), %xmm0 2217 movl 8(%ecx), %eax 2218 movlpd %xmm0, (%edx) 2219 movl %eax, 8(%edx) 2220 SAVE_RESULT (11) 2221# ifdef USE_AS_STRNCPY 2222 sub $12, %ebx 2223 lea 12(%edx), %ecx 2224 jnz L(StrncpyFillTailWithZero1) 2225# ifdef USE_AS_STPCPY 2226 cmpb $1, (%eax) 2227 sbb $-1, %eax 2228# endif 2229# endif 2230 RETURN1 2231 2232 .p2align 4 2233L(ExitHigh12): 2234 test $0x10, %ah 2235 jnz L(Exit13) 2236 test $0x20, %ah 2237 jnz L(Exit14) 2238 test $0x40, %ah 2239 jnz L(Exit15) 2240 2241 .p2align 4 2242L(Exit16): 2243 movdqu (%ecx), %xmm0 2244 movdqu %xmm0, (%edx) 2245 SAVE_RESULT (15) 2246# ifdef USE_AS_STRNCPY 2247 sub $16, %ebx 2248 lea 16(%edx), %ecx 2249 jnz L(StrncpyFillTailWithZero1) 2250# ifdef USE_AS_STPCPY 2251 cmpb $1, (%eax) 2252 sbb $-1, %eax 2253# endif 2254# endif 2255 RETURN1 2256 2257# ifdef USE_AS_STRNCPY 2258 2259 CFI_PUSH(%esi) 2260 2261 .p2align 4 2262L(CopyFrom1To16BytesCase2): 2263 add $16, %ebx 2264 add %esi, %ecx 2265 add %esi, %edx 2266 2267 POP (%esi) 2268 2269 test %al, %al 2270 jz L(ExitHighCase2) 2271 2272 cmp $8, %ebx 2273 ja L(CopyFrom1To16BytesLess8) 2274 2275 test $0x01, %al 2276 jnz L(Exit1) 2277 cmp $1, %ebx 2278 je L(Exit1) 2279 test $0x02, %al 2280 jnz L(Exit2) 2281 cmp $2, %ebx 2282 je L(Exit2) 2283 test $0x04, %al 2284 jnz L(Exit3) 2285 cmp $3, %ebx 2286 je L(Exit3) 2287 test $0x08, %al 2288 jnz L(Exit4) 2289 cmp $4, %ebx 2290 je L(Exit4) 2291 test $0x10, %al 2292 jnz L(Exit5) 2293 cmp $5, %ebx 2294 je L(Exit5) 2295 test $0x20, %al 2296 jnz L(Exit6) 2297 cmp $6, %ebx 2298 je L(Exit6) 2299 test $0x40, %al 2300 jnz L(Exit7) 2301 cmp $7, %ebx 2302 je L(Exit7) 2303 jmp L(Exit8) 2304 2305 .p2align 4 2306L(ExitHighCase2): 2307 cmp $8, %ebx 2308 jbe L(CopyFrom1To16BytesLess8Case3) 2309 2310 test $0x01, %ah 2311 jnz L(Exit9) 2312 cmp $9, %ebx 2313 je L(Exit9) 2314 test $0x02, %ah 2315 jnz L(Exit10) 2316 cmp $10, %ebx 2317 je L(Exit10) 2318 test $0x04, %ah 2319 jnz L(Exit11) 2320 cmp $11, %ebx 2321 je L(Exit11) 2322 test $0x8, %ah 2323 jnz L(Exit12) 2324 cmp $12, %ebx 2325 je L(Exit12) 2326 test $0x10, %ah 2327 jnz L(Exit13) 2328 cmp $13, %ebx 2329 je L(Exit13) 2330 test $0x20, %ah 2331 jnz L(Exit14) 2332 cmp $14, %ebx 2333 je L(Exit14) 2334 test $0x40, %ah 2335 jnz L(Exit15) 2336 cmp $15, %ebx 2337 je L(Exit15) 2338 jmp L(Exit16) 2339 2340 CFI_PUSH(%esi) 2341 2342 .p2align 4 2343L(CopyFrom1To16BytesCase2OrCase3): 2344 test %eax, %eax 2345 jnz L(CopyFrom1To16BytesCase2) 2346 2347 .p2align 4 2348L(CopyFrom1To16BytesCase3): 2349 add $16, %ebx 2350 add %esi, %edx 2351 add %esi, %ecx 2352 2353 POP (%esi) 2354 2355 cmp $8, %ebx 2356 ja L(ExitHigh8Case3) 2357 2358L(CopyFrom1To16BytesLess8Case3): 2359 cmp $4, %ebx 2360 ja L(ExitHigh4Case3) 2361 2362 cmp $1, %ebx 2363 je L(Exit1) 2364 cmp $2, %ebx 2365 je L(Exit2) 2366 cmp $3, %ebx 2367 je L(Exit3) 2368 movl (%ecx), %eax 2369 movl %eax, (%edx) 2370 SAVE_RESULT (4) 2371 RETURN1 2372 2373 .p2align 4 2374L(ExitHigh4Case3): 2375 cmp $5, %ebx 2376 je L(Exit5) 2377 cmp $6, %ebx 2378 je L(Exit6) 2379 cmp $7, %ebx 2380 je L(Exit7) 2381 movlpd (%ecx), %xmm0 2382 movlpd %xmm0, (%edx) 2383 SAVE_RESULT (8) 2384 RETURN1 2385 2386 .p2align 4 2387L(ExitHigh8Case3): 2388 cmp $12, %ebx 2389 ja L(ExitHigh12Case3) 2390 2391 cmp $9, %ebx 2392 je L(Exit9) 2393 cmp $10, %ebx 2394 je L(Exit10) 2395 cmp $11, %ebx 2396 je L(Exit11) 2397 movlpd (%ecx), %xmm0 2398 movl 8(%ecx), %eax 2399 movlpd %xmm0, (%edx) 2400 movl %eax, 8(%edx) 2401 SAVE_RESULT (12) 2402 RETURN1 2403 2404 .p2align 4 2405L(ExitHigh12Case3): 2406 cmp $13, %ebx 2407 je L(Exit13) 2408 cmp $14, %ebx 2409 je L(Exit14) 2410 cmp $15, %ebx 2411 je L(Exit15) 2412 movlpd (%ecx), %xmm0 2413 movlpd 8(%ecx), %xmm1 2414 movlpd %xmm0, (%edx) 2415 movlpd %xmm1, 8(%edx) 2416 SAVE_RESULT (16) 2417 RETURN1 2418 2419# endif 2420 2421 .p2align 4 2422L(Exit1): 2423 movb (%ecx), %al 2424 movb %al, (%edx) 2425 SAVE_RESULT (0) 2426# ifdef USE_AS_STRNCPY 2427 sub $1, %ebx 2428 lea 1(%edx), %ecx 2429 jnz L(StrncpyFillTailWithZero1) 2430# ifdef USE_AS_STPCPY 2431 cmpb $1, (%eax) 2432 sbb $-1, %eax 2433# endif 2434# endif 2435 RETURN1 2436 2437 .p2align 4 2438L(Exit2): 2439 movw (%ecx), %ax 2440 movw %ax, (%edx) 2441 SAVE_RESULT (1) 2442# ifdef USE_AS_STRNCPY 2443 sub $2, %ebx 2444 lea 2(%edx), %ecx 2445 jnz L(StrncpyFillTailWithZero1) 2446# ifdef USE_AS_STPCPY 2447 cmpb $1, (%eax) 2448 sbb $-1, %eax 2449# endif 2450# endif 2451 RETURN1 2452 2453 .p2align 4 2454L(Exit3): 2455 movw (%ecx), %ax 2456 movw %ax, (%edx) 2457 movb 2(%ecx), %al 2458 movb %al, 2(%edx) 2459 SAVE_RESULT (2) 2460# ifdef USE_AS_STRNCPY 2461 sub $3, %ebx 2462 lea 3(%edx), %ecx 2463 jnz L(StrncpyFillTailWithZero1) 2464# ifdef USE_AS_STPCPY 2465 cmpb $1, (%eax) 2466 sbb $-1, %eax 2467# endif 2468# endif 2469 RETURN1 2470 2471 .p2align 4 2472L(Exit5): 2473 movl (%ecx), %eax 2474 movl %eax, (%edx) 2475 movb 4(%ecx), %al 2476 movb %al, 4(%edx) 2477 SAVE_RESULT (4) 2478# ifdef USE_AS_STRNCPY 2479 sub $5, %ebx 2480 lea 5(%edx), %ecx 2481 jnz L(StrncpyFillTailWithZero1) 2482# ifdef USE_AS_STPCPY 2483 cmpb $1, (%eax) 2484 sbb $-1, %eax 2485# endif 2486# endif 2487 RETURN1 2488 2489 .p2align 4 2490L(Exit6): 2491 movl (%ecx), %eax 2492 movl %eax, (%edx) 2493 movw 4(%ecx), %ax 2494 movw %ax, 4(%edx) 2495 SAVE_RESULT (5) 2496# ifdef USE_AS_STRNCPY 2497 sub $6, %ebx 2498 lea 6(%edx), %ecx 2499 jnz L(StrncpyFillTailWithZero1) 2500# ifdef USE_AS_STPCPY 2501 cmpb $1, (%eax) 2502 sbb $-1, %eax 2503# endif 2504# endif 2505 RETURN1 2506 2507 .p2align 4 2508L(Exit7): 2509 movl (%ecx), %eax 2510 movl %eax, (%edx) 2511 movl 3(%ecx), %eax 2512 movl %eax, 3(%edx) 2513 SAVE_RESULT (6) 2514# ifdef USE_AS_STRNCPY 2515 sub $7, %ebx 2516 lea 7(%edx), %ecx 2517 jnz L(StrncpyFillTailWithZero1) 2518# ifdef USE_AS_STPCPY 2519 cmpb $1, (%eax) 2520 sbb $-1, %eax 2521# endif 2522# endif 2523 RETURN1 2524 2525 .p2align 4 2526L(Exit9): 2527 movlpd (%ecx), %xmm0 2528 movb 8(%ecx), %al 2529 movlpd %xmm0, (%edx) 2530 movb %al, 8(%edx) 2531 SAVE_RESULT (8) 2532# ifdef USE_AS_STRNCPY 2533 sub $9, %ebx 2534 lea 9(%edx), %ecx 2535 jnz L(StrncpyFillTailWithZero1) 2536# ifdef USE_AS_STPCPY 2537 cmpb $1, (%eax) 2538 sbb $-1, %eax 2539# endif 2540# endif 2541 RETURN1 2542 2543 .p2align 4 2544L(Exit10): 2545 movlpd (%ecx), %xmm0 2546 movw 8(%ecx), %ax 2547 movlpd %xmm0, (%edx) 2548 movw %ax, 8(%edx) 2549 SAVE_RESULT (9) 2550# ifdef USE_AS_STRNCPY 2551 sub $10, %ebx 2552 lea 10(%edx), %ecx 2553 jnz L(StrncpyFillTailWithZero1) 2554# ifdef USE_AS_STPCPY 2555 cmpb $1, (%eax) 2556 sbb $-1, %eax 2557# endif 2558# endif 2559 RETURN1 2560 2561 .p2align 4 2562L(Exit11): 2563 movlpd (%ecx), %xmm0 2564 movl 7(%ecx), %eax 2565 movlpd %xmm0, (%edx) 2566 movl %eax, 7(%edx) 2567 SAVE_RESULT (10) 2568# ifdef USE_AS_STRNCPY 2569 sub $11, %ebx 2570 lea 11(%edx), %ecx 2571 jnz L(StrncpyFillTailWithZero1) 2572# ifdef USE_AS_STPCPY 2573 cmpb $1, (%eax) 2574 sbb $-1, %eax 2575# endif 2576# endif 2577 RETURN1 2578 2579 .p2align 4 2580L(Exit13): 2581 movlpd (%ecx), %xmm0 2582 movlpd 5(%ecx), %xmm1 2583 movlpd %xmm0, (%edx) 2584 movlpd %xmm1, 5(%edx) 2585 SAVE_RESULT (12) 2586# ifdef USE_AS_STRNCPY 2587 sub $13, %ebx 2588 lea 13(%edx), %ecx 2589 jnz L(StrncpyFillTailWithZero1) 2590# ifdef USE_AS_STPCPY 2591 cmpb $1, (%eax) 2592 sbb $-1, %eax 2593# endif 2594# endif 2595 RETURN1 2596 2597 .p2align 4 2598L(Exit14): 2599 movlpd (%ecx), %xmm0 2600 movlpd 6(%ecx), %xmm1 2601 movlpd %xmm0, (%edx) 2602 movlpd %xmm1, 6(%edx) 2603 SAVE_RESULT (13) 2604# ifdef USE_AS_STRNCPY 2605 sub $14, %ebx 2606 lea 14(%edx), %ecx 2607 jnz L(StrncpyFillTailWithZero1) 2608# ifdef USE_AS_STPCPY 2609 cmpb $1, (%eax) 2610 sbb $-1, %eax 2611# endif 2612# endif 2613 RETURN1 2614 2615 .p2align 4 2616L(Exit15): 2617 movlpd (%ecx), %xmm0 2618 movlpd 7(%ecx), %xmm1 2619 movlpd %xmm0, (%edx) 2620 movlpd %xmm1, 7(%edx) 2621 SAVE_RESULT (14) 2622# ifdef USE_AS_STRNCPY 2623 sub $15, %ebx 2624 lea 15(%edx), %ecx 2625 jnz L(StrncpyFillTailWithZero1) 2626# ifdef USE_AS_STPCPY 2627 cmpb $1, (%eax) 2628 sbb $-1, %eax 2629# endif 2630# endif 2631 RETURN1 2632 2633CFI_POP (%edi) 2634 2635# ifdef USE_AS_STRNCPY 2636 .p2align 4 2637L(Fill0): 2638 RETURN 2639 2640 .p2align 4 2641L(Fill1): 2642 movb %dl, (%ecx) 2643 RETURN 2644 2645 .p2align 4 2646L(Fill2): 2647 movw %dx, (%ecx) 2648 RETURN 2649 2650 .p2align 4 2651L(Fill3): 2652 movw %dx, (%ecx) 2653 movb %dl, 2(%ecx) 2654 RETURN 2655 2656 .p2align 4 2657L(Fill4): 2658 movl %edx, (%ecx) 2659 RETURN 2660 2661 .p2align 4 2662L(Fill5): 2663 movl %edx, (%ecx) 2664 movb %dl, 4(%ecx) 2665 RETURN 2666 2667 .p2align 4 2668L(Fill6): 2669 movl %edx, (%ecx) 2670 movw %dx, 4(%ecx) 2671 RETURN 2672 2673 .p2align 4 2674L(Fill7): 2675 movl %edx, (%ecx) 2676 movl %edx, 3(%ecx) 2677 RETURN 2678 2679 .p2align 4 2680L(Fill8): 2681 movlpd %xmm0, (%ecx) 2682 RETURN 2683 2684 .p2align 4 2685L(Fill9): 2686 movlpd %xmm0, (%ecx) 2687 movb %dl, 8(%ecx) 2688 RETURN 2689 2690 .p2align 4 2691L(Fill10): 2692 movlpd %xmm0, (%ecx) 2693 movw %dx, 8(%ecx) 2694 RETURN 2695 2696 .p2align 4 2697L(Fill11): 2698 movlpd %xmm0, (%ecx) 2699 movl %edx, 7(%ecx) 2700 RETURN 2701 2702 .p2align 4 2703L(Fill12): 2704 movlpd %xmm0, (%ecx) 2705 movl %edx, 8(%ecx) 2706 RETURN 2707 2708 .p2align 4 2709L(Fill13): 2710 movlpd %xmm0, (%ecx) 2711 movlpd %xmm0, 5(%ecx) 2712 RETURN 2713 2714 .p2align 4 2715L(Fill14): 2716 movlpd %xmm0, (%ecx) 2717 movlpd %xmm0, 6(%ecx) 2718 RETURN 2719 2720 .p2align 4 2721L(Fill15): 2722 movlpd %xmm0, (%ecx) 2723 movlpd %xmm0, 7(%ecx) 2724 RETURN 2725 2726 .p2align 4 2727L(Fill16): 2728 movlpd %xmm0, (%ecx) 2729 movlpd %xmm0, 8(%ecx) 2730 RETURN 2731 2732 .p2align 4 2733L(StrncpyFillExit1): 2734 lea 16(%ebx), %ebx 2735L(FillFrom1To16Bytes): 2736 test %ebx, %ebx 2737 jz L(Fill0) 2738 cmp $16, %ebx 2739 je L(Fill16) 2740 cmp $8, %ebx 2741 je L(Fill8) 2742 jg L(FillMore8) 2743 cmp $4, %ebx 2744 je L(Fill4) 2745 jg L(FillMore4) 2746 cmp $2, %ebx 2747 jl L(Fill1) 2748 je L(Fill2) 2749 jg L(Fill3) 2750L(FillMore8): /* but less than 16 */ 2751 cmp $12, %ebx 2752 je L(Fill12) 2753 jl L(FillLess12) 2754 cmp $14, %ebx 2755 jl L(Fill13) 2756 je L(Fill14) 2757 jg L(Fill15) 2758L(FillMore4): /* but less than 8 */ 2759 cmp $6, %ebx 2760 jl L(Fill5) 2761 je L(Fill6) 2762 jg L(Fill7) 2763L(FillLess12): /* but more than 8 */ 2764 cmp $10, %ebx 2765 jl L(Fill9) 2766 je L(Fill10) 2767 jmp L(Fill11) 2768 2769 CFI_PUSH(%edi) 2770 2771 .p2align 4 2772L(StrncpyFillTailWithZero1): 2773 POP (%edi) 2774L(StrncpyFillTailWithZero): 2775 pxor %xmm0, %xmm0 2776 xor %edx, %edx 2777 sub $16, %ebx 2778 jbe L(StrncpyFillExit1) 2779 2780 movlpd %xmm0, (%ecx) 2781 movlpd %xmm0, 8(%ecx) 2782 2783 lea 16(%ecx), %ecx 2784 2785 mov %ecx, %edx 2786 and $0xf, %edx 2787 sub %edx, %ecx 2788 add %edx, %ebx 2789 xor %edx, %edx 2790 sub $64, %ebx 2791 jb L(StrncpyFillLess64) 2792 2793L(StrncpyFillLoopMovdqa): 2794 movdqa %xmm0, (%ecx) 2795 movdqa %xmm0, 16(%ecx) 2796 movdqa %xmm0, 32(%ecx) 2797 movdqa %xmm0, 48(%ecx) 2798 lea 64(%ecx), %ecx 2799 sub $64, %ebx 2800 jae L(StrncpyFillLoopMovdqa) 2801 2802L(StrncpyFillLess64): 2803 add $32, %ebx 2804 jl L(StrncpyFillLess32) 2805 movdqa %xmm0, (%ecx) 2806 movdqa %xmm0, 16(%ecx) 2807 lea 32(%ecx), %ecx 2808 sub $16, %ebx 2809 jl L(StrncpyFillExit1) 2810 movdqa %xmm0, (%ecx) 2811 lea 16(%ecx), %ecx 2812 jmp L(FillFrom1To16Bytes) 2813 2814L(StrncpyFillLess32): 2815 add $16, %ebx 2816 jl L(StrncpyFillExit1) 2817 movdqa %xmm0, (%ecx) 2818 lea 16(%ecx), %ecx 2819 jmp L(FillFrom1To16Bytes) 2820# endif 2821 2822 .p2align 4 2823L(ExitTail1): 2824 movb (%ecx), %al 2825 movb %al, (%edx) 2826 SAVE_RESULT_TAIL (0) 2827# ifdef USE_AS_STRNCPY 2828 sub $1, %ebx 2829 lea 1(%edx), %ecx 2830 jnz L(StrncpyFillTailWithZero) 2831# ifdef USE_AS_STPCPY 2832 cmpb $1, (%eax) 2833 sbb $-1, %eax 2834# endif 2835# endif 2836 RETURN 2837 2838 .p2align 4 2839L(ExitTail2): 2840 movw (%ecx), %ax 2841 movw %ax, (%edx) 2842 SAVE_RESULT_TAIL (1) 2843# ifdef USE_AS_STRNCPY 2844 sub $2, %ebx 2845 lea 2(%edx), %ecx 2846 jnz L(StrncpyFillTailWithZero) 2847# ifdef USE_AS_STPCPY 2848 cmpb $1, (%eax) 2849 sbb $-1, %eax 2850# endif 2851# endif 2852 RETURN 2853 2854 .p2align 4 2855L(ExitTail3): 2856 movw (%ecx), %ax 2857 movw %ax, (%edx) 2858 movb 2(%ecx), %al 2859 movb %al, 2(%edx) 2860 SAVE_RESULT_TAIL (2) 2861# ifdef USE_AS_STRNCPY 2862 sub $3, %ebx 2863 lea 3(%edx), %ecx 2864 jnz L(StrncpyFillTailWithZero) 2865# ifdef USE_AS_STPCPY 2866 cmpb $1, (%eax) 2867 sbb $-1, %eax 2868# endif 2869# endif 2870 RETURN 2871 2872 .p2align 4 2873L(ExitTail4): 2874 movl (%ecx), %eax 2875 movl %eax, (%edx) 2876 SAVE_RESULT_TAIL (3) 2877# ifdef USE_AS_STRNCPY 2878 sub $4, %ebx 2879 lea 4(%edx), %ecx 2880 jnz L(StrncpyFillTailWithZero) 2881# ifdef USE_AS_STPCPY 2882 cmpb $1, (%eax) 2883 sbb $-1, %eax 2884# endif 2885# endif 2886 RETURN 2887 2888 .p2align 4 2889L(ExitTail5): 2890 movl (%ecx), %eax 2891 movl %eax, (%edx) 2892 movb 4(%ecx), %al 2893 movb %al, 4(%edx) 2894 SAVE_RESULT_TAIL (4) 2895# ifdef USE_AS_STRNCPY 2896 sub $5, %ebx 2897 lea 5(%edx), %ecx 2898 jnz L(StrncpyFillTailWithZero) 2899# ifdef USE_AS_STPCPY 2900 cmpb $1, (%eax) 2901 sbb $-1, %eax 2902# endif 2903# endif 2904 RETURN 2905 2906 .p2align 4 2907L(ExitTail6): 2908 movl (%ecx), %eax 2909 movl %eax, (%edx) 2910 movw 4(%ecx), %ax 2911 movw %ax, 4(%edx) 2912 SAVE_RESULT_TAIL (5) 2913# ifdef USE_AS_STRNCPY 2914 sub $6, %ebx 2915 lea 6(%edx), %ecx 2916 jnz L(StrncpyFillTailWithZero) 2917# ifdef USE_AS_STPCPY 2918 cmpb $1, (%eax) 2919 sbb $-1, %eax 2920# endif 2921# endif 2922 RETURN 2923 2924 .p2align 4 2925L(ExitTail7): 2926 movl (%ecx), %eax 2927 movl %eax, (%edx) 2928 movl 3(%ecx), %eax 2929 movl %eax, 3(%edx) 2930 SAVE_RESULT_TAIL (6) 2931# ifdef USE_AS_STRNCPY 2932 sub $7, %ebx 2933 lea 7(%edx), %ecx 2934 jnz L(StrncpyFillTailWithZero) 2935# ifdef USE_AS_STPCPY 2936 cmpb $1, (%eax) 2937 sbb $-1, %eax 2938# endif 2939# endif 2940 RETURN 2941 2942 .p2align 4 2943L(ExitTail8): 2944 movlpd (%ecx), %xmm0 2945 movlpd %xmm0, (%edx) 2946 SAVE_RESULT_TAIL (7) 2947# ifdef USE_AS_STRNCPY 2948 sub $8, %ebx 2949 lea 8(%edx), %ecx 2950 jnz L(StrncpyFillTailWithZero) 2951# endif 2952 RETURN 2953 2954 .p2align 4 2955L(ExitTail9): 2956 movlpd (%ecx), %xmm0 2957 movb 8(%ecx), %al 2958 movlpd %xmm0, (%edx) 2959 movb %al, 8(%edx) 2960 SAVE_RESULT_TAIL (8) 2961# ifdef USE_AS_STRNCPY 2962 sub $9, %ebx 2963 lea 9(%edx), %ecx 2964 jnz L(StrncpyFillTailWithZero) 2965# ifdef USE_AS_STPCPY 2966 cmpb $1, (%eax) 2967 sbb $-1, %eax 2968# endif 2969# endif 2970 RETURN 2971 2972 .p2align 4 2973L(ExitTail10): 2974 movlpd (%ecx), %xmm0 2975 movw 8(%ecx), %ax 2976 movlpd %xmm0, (%edx) 2977 movw %ax, 8(%edx) 2978 SAVE_RESULT_TAIL (9) 2979# ifdef USE_AS_STRNCPY 2980 sub $10, %ebx 2981 lea 10(%edx), %ecx 2982 jnz L(StrncpyFillTailWithZero) 2983# ifdef USE_AS_STPCPY 2984 cmpb $1, (%eax) 2985 sbb $-1, %eax 2986# endif 2987# endif 2988 RETURN 2989 2990 .p2align 4 2991L(ExitTail11): 2992 movlpd (%ecx), %xmm0 2993 movl 7(%ecx), %eax 2994 movlpd %xmm0, (%edx) 2995 movl %eax, 7(%edx) 2996 SAVE_RESULT_TAIL (10) 2997# ifdef USE_AS_STRNCPY 2998 sub $11, %ebx 2999 lea 11(%edx), %ecx 3000 jnz L(StrncpyFillTailWithZero) 3001# ifdef USE_AS_STPCPY 3002 cmpb $1, (%eax) 3003 sbb $-1, %eax 3004# endif 3005# endif 3006 RETURN 3007 3008 .p2align 4 3009L(ExitTail12): 3010 movlpd (%ecx), %xmm0 3011 movl 8(%ecx), %eax 3012 movlpd %xmm0, (%edx) 3013 movl %eax, 8(%edx) 3014 SAVE_RESULT_TAIL (11) 3015# ifdef USE_AS_STRNCPY 3016 sub $12, %ebx 3017 lea 12(%edx), %ecx 3018 jnz L(StrncpyFillTailWithZero) 3019# ifdef USE_AS_STPCPY 3020 cmpb $1, (%eax) 3021 sbb $-1, %eax 3022# endif 3023# endif 3024 RETURN 3025 3026 .p2align 4 3027L(ExitTail13): 3028 movlpd (%ecx), %xmm0 3029 movlpd 5(%ecx), %xmm1 3030 movlpd %xmm0, (%edx) 3031 movlpd %xmm1, 5(%edx) 3032 SAVE_RESULT_TAIL (12) 3033# ifdef USE_AS_STRNCPY 3034 sub $13, %ebx 3035 lea 13(%edx), %ecx 3036 jnz L(StrncpyFillTailWithZero) 3037# ifdef USE_AS_STPCPY 3038 cmpb $1, (%eax) 3039 sbb $-1, %eax 3040# endif 3041# endif 3042 RETURN 3043 3044 .p2align 4 3045L(ExitTail14): 3046 movlpd (%ecx), %xmm0 3047 movlpd 6(%ecx), %xmm1 3048 movlpd %xmm0, (%edx) 3049 movlpd %xmm1, 6(%edx) 3050 SAVE_RESULT_TAIL (13) 3051# ifdef USE_AS_STRNCPY 3052 sub $14, %ebx 3053 lea 14(%edx), %ecx 3054 jnz L(StrncpyFillTailWithZero) 3055# ifdef USE_AS_STPCPY 3056 cmpb $1, (%eax) 3057 sbb $-1, %eax 3058# endif 3059# endif 3060 RETURN 3061 3062 .p2align 4 3063L(ExitTail15): 3064 movlpd (%ecx), %xmm0 3065 movlpd 7(%ecx), %xmm1 3066 movlpd %xmm0, (%edx) 3067 movlpd %xmm1, 7(%edx) 3068 SAVE_RESULT_TAIL (14) 3069# ifdef USE_AS_STRNCPY 3070 sub $15, %ebx 3071 lea 15(%edx), %ecx 3072 jnz L(StrncpyFillTailWithZero) 3073# endif 3074 RETURN 3075 3076 .p2align 4 3077L(ExitTail16): 3078 movdqu (%ecx), %xmm0 3079 movdqu %xmm0, (%edx) 3080 SAVE_RESULT_TAIL (15) 3081# ifdef USE_AS_STRNCPY 3082 sub $16, %ebx 3083 lea 16(%edx), %ecx 3084 jnz L(StrncpyFillTailWithZero) 3085# ifdef USE_AS_STPCPY 3086 cmpb $1, (%eax) 3087 sbb $-1, %eax 3088# endif 3089# endif 3090 RETURN 3091# endif 3092 3093# ifdef USE_AS_STRNCPY 3094# ifndef USE_AS_STRCAT 3095 CFI_PUSH (%esi) 3096 CFI_PUSH (%edi) 3097# endif 3098 .p2align 4 3099L(StrncpyLeaveCase2OrCase3): 3100 test %eax, %eax 3101 jnz L(Aligned64LeaveCase2) 3102 3103L(Aligned64LeaveCase3): 3104 add $48, %ebx 3105 jle L(CopyFrom1To16BytesCase3) 3106 movaps %xmm4, -64(%edx) 3107 lea 16(%esi), %esi 3108 sub $16, %ebx 3109 jbe L(CopyFrom1To16BytesCase3) 3110 movaps %xmm5, -48(%edx) 3111 lea 16(%esi), %esi 3112 sub $16, %ebx 3113 jbe L(CopyFrom1To16BytesCase3) 3114 movaps %xmm6, -32(%edx) 3115 lea 16(%esi), %esi 3116 lea -16(%ebx), %ebx 3117 jmp L(CopyFrom1To16BytesCase3) 3118 3119L(Aligned64LeaveCase2): 3120 pcmpeqb %xmm4, %xmm0 3121 pmovmskb %xmm0, %eax 3122 add $48, %ebx 3123 jle L(CopyFrom1To16BytesCase2OrCase3) 3124 test %eax, %eax 3125 jnz L(CopyFrom1To16Bytes) 3126 3127 pcmpeqb %xmm5, %xmm0 3128 pmovmskb %xmm0, %eax 3129 movaps %xmm4, -64(%edx) 3130 lea 16(%esi), %esi 3131 sub $16, %ebx 3132 jbe L(CopyFrom1To16BytesCase2OrCase3) 3133 test %eax, %eax 3134 jnz L(CopyFrom1To16Bytes) 3135 3136 pcmpeqb %xmm6, %xmm0 3137 pmovmskb %xmm0, %eax 3138 movaps %xmm5, -48(%edx) 3139 lea 16(%esi), %esi 3140 sub $16, %ebx 3141 jbe L(CopyFrom1To16BytesCase2OrCase3) 3142 test %eax, %eax 3143 jnz L(CopyFrom1To16Bytes) 3144 3145 pcmpeqb %xmm7, %xmm0 3146 pmovmskb %xmm0, %eax 3147 movaps %xmm6, -32(%edx) 3148 lea 16(%esi), %esi 3149 lea -16(%ebx), %ebx 3150 jmp L(CopyFrom1To16BytesCase2) 3151 3152/*--------------------------------------------------*/ 3153 .p2align 4 3154L(StrncpyExit1Case2OrCase3): 3155 movlpd (%ecx), %xmm0 3156 movlpd 7(%ecx), %xmm1 3157 movlpd %xmm0, (%edx) 3158 movlpd %xmm1, 7(%edx) 3159 mov $15, %esi 3160 test %eax, %eax 3161 jnz L(CopyFrom1To16BytesCase2) 3162 jmp L(CopyFrom1To16BytesCase3) 3163 3164 .p2align 4 3165L(StrncpyExit2Case2OrCase3): 3166 movlpd (%ecx), %xmm0 3167 movlpd 6(%ecx), %xmm1 3168 movlpd %xmm0, (%edx) 3169 movlpd %xmm1, 6(%edx) 3170 mov $14, %esi 3171 test %eax, %eax 3172 jnz L(CopyFrom1To16BytesCase2) 3173 jmp L(CopyFrom1To16BytesCase3) 3174 3175 .p2align 4 3176L(StrncpyExit3Case2OrCase3): 3177 movlpd (%ecx), %xmm0 3178 movlpd 5(%ecx), %xmm1 3179 movlpd %xmm0, (%edx) 3180 movlpd %xmm1, 5(%edx) 3181 mov $13, %esi 3182 test %eax, %eax 3183 jnz L(CopyFrom1To16BytesCase2) 3184 jmp L(CopyFrom1To16BytesCase3) 3185 3186 .p2align 4 3187L(StrncpyExit4Case2OrCase3): 3188 movlpd (%ecx), %xmm0 3189 movl 8(%ecx), %esi 3190 movlpd %xmm0, (%edx) 3191 movl %esi, 8(%edx) 3192 mov $12, %esi 3193 test %eax, %eax 3194 jnz L(CopyFrom1To16BytesCase2) 3195 jmp L(CopyFrom1To16BytesCase3) 3196 3197 .p2align 4 3198L(StrncpyExit5Case2OrCase3): 3199 movlpd (%ecx), %xmm0 3200 movl 7(%ecx), %esi 3201 movlpd %xmm0, (%edx) 3202 movl %esi, 7(%edx) 3203 mov $11, %esi 3204 test %eax, %eax 3205 jnz L(CopyFrom1To16BytesCase2) 3206 jmp L(CopyFrom1To16BytesCase3) 3207 3208 .p2align 4 3209L(StrncpyExit6Case2OrCase3): 3210 movlpd (%ecx), %xmm0 3211 movl 6(%ecx), %esi 3212 movlpd %xmm0, (%edx) 3213 movl %esi, 6(%edx) 3214 mov $10, %esi 3215 test %eax, %eax 3216 jnz L(CopyFrom1To16BytesCase2) 3217 jmp L(CopyFrom1To16BytesCase3) 3218 3219 .p2align 4 3220L(StrncpyExit7Case2OrCase3): 3221 movlpd (%ecx), %xmm0 3222 movl 5(%ecx), %esi 3223 movlpd %xmm0, (%edx) 3224 movl %esi, 5(%edx) 3225 mov $9, %esi 3226 test %eax, %eax 3227 jnz L(CopyFrom1To16BytesCase2) 3228 jmp L(CopyFrom1To16BytesCase3) 3229 3230 .p2align 4 3231L(StrncpyExit8Case2OrCase3): 3232 movlpd (%ecx), %xmm0 3233 movlpd %xmm0, (%edx) 3234 mov $8, %esi 3235 test %eax, %eax 3236 jnz L(CopyFrom1To16BytesCase2) 3237 jmp L(CopyFrom1To16BytesCase3) 3238 3239 .p2align 4 3240L(StrncpyExit9Case2OrCase3): 3241 movlpd (%ecx), %xmm0 3242 movlpd %xmm0, (%edx) 3243 mov $7, %esi 3244 test %eax, %eax 3245 jnz L(CopyFrom1To16BytesCase2) 3246 jmp L(CopyFrom1To16BytesCase3) 3247 3248 .p2align 4 3249L(StrncpyExit10Case2OrCase3): 3250 movlpd -1(%ecx), %xmm0 3251 movlpd %xmm0, -1(%edx) 3252 mov $6, %esi 3253 test %eax, %eax 3254 jnz L(CopyFrom1To16BytesCase2) 3255 jmp L(CopyFrom1To16BytesCase3) 3256 3257 .p2align 4 3258L(StrncpyExit11Case2OrCase3): 3259 movlpd -2(%ecx), %xmm0 3260 movlpd %xmm0, -2(%edx) 3261 mov $5, %esi 3262 test %eax, %eax 3263 jnz L(CopyFrom1To16BytesCase2) 3264 jmp L(CopyFrom1To16BytesCase3) 3265 3266 .p2align 4 3267L(StrncpyExit12Case2OrCase3): 3268 movl (%ecx), %esi 3269 movl %esi, (%edx) 3270 mov $4, %esi 3271 test %eax, %eax 3272 jnz L(CopyFrom1To16BytesCase2) 3273 jmp L(CopyFrom1To16BytesCase3) 3274 3275 .p2align 4 3276L(StrncpyExit13Case2OrCase3): 3277 movl -1(%ecx), %esi 3278 movl %esi, -1(%edx) 3279 mov $3, %esi 3280 test %eax, %eax 3281 jnz L(CopyFrom1To16BytesCase2) 3282 jmp L(CopyFrom1To16BytesCase3) 3283 3284 .p2align 4 3285L(StrncpyExit14Case2OrCase3): 3286 movl -2(%ecx), %esi 3287 movl %esi, -2(%edx) 3288 mov $2, %esi 3289 test %eax, %eax 3290 jnz L(CopyFrom1To16BytesCase2) 3291 jmp L(CopyFrom1To16BytesCase3) 3292 3293 .p2align 4 3294L(StrncpyExit15Case2OrCase3): 3295 movl -3(%ecx), %esi 3296 movl %esi, -3(%edx) 3297 mov $1, %esi 3298 test %eax, %eax 3299 jnz L(CopyFrom1To16BytesCase2) 3300 jmp L(CopyFrom1To16BytesCase3) 3301 3302L(StrncpyLeave1): 3303 movaps %xmm2, %xmm3 3304 add $48, %ebx 3305 jle L(StrncpyExit1) 3306 palignr $1, %xmm1, %xmm2 3307 movaps %xmm2, (%edx) 3308 movaps 31(%ecx), %xmm2 3309 lea 16(%esi), %esi 3310 sub $16, %ebx 3311 jbe L(StrncpyExit1) 3312 palignr $1, %xmm3, %xmm2 3313 movaps %xmm2, 16(%edx) 3314 lea 16(%esi), %esi 3315 sub $16, %ebx 3316 jbe L(StrncpyExit1) 3317 movaps %xmm4, 32(%edx) 3318 lea 16(%esi), %esi 3319 sub $16, %ebx 3320 jbe L(StrncpyExit1) 3321 movaps %xmm5, 48(%edx) 3322 lea 16(%esi), %esi 3323 lea -16(%ebx), %ebx 3324L(StrncpyExit1): 3325 lea 15(%edx, %esi), %edx 3326 lea 15(%ecx, %esi), %ecx 3327 movdqu -16(%ecx), %xmm0 3328 xor %esi, %esi 3329 movdqu %xmm0, -16(%edx) 3330 jmp L(CopyFrom1To16BytesCase3) 3331 3332L(StrncpyLeave2): 3333 movaps %xmm2, %xmm3 3334 add $48, %ebx 3335 jle L(StrncpyExit2) 3336 palignr $2, %xmm1, %xmm2 3337 movaps %xmm2, (%edx) 3338 movaps 30(%ecx), %xmm2 3339 lea 16(%esi), %esi 3340 sub $16, %ebx 3341 jbe L(StrncpyExit2) 3342 palignr $2, %xmm3, %xmm2 3343 movaps %xmm2, 16(%edx) 3344 lea 16(%esi), %esi 3345 sub $16, %ebx 3346 jbe L(StrncpyExit2) 3347 movaps %xmm4, 32(%edx) 3348 lea 16(%esi), %esi 3349 sub $16, %ebx 3350 jbe L(StrncpyExit2) 3351 movaps %xmm5, 48(%edx) 3352 lea 16(%esi), %esi 3353 lea -16(%ebx), %ebx 3354L(StrncpyExit2): 3355 lea 14(%edx, %esi), %edx 3356 lea 14(%ecx, %esi), %ecx 3357 movdqu -16(%ecx), %xmm0 3358 xor %esi, %esi 3359 movdqu %xmm0, -16(%edx) 3360 jmp L(CopyFrom1To16BytesCase3) 3361 3362L(StrncpyLeave3): 3363 movaps %xmm2, %xmm3 3364 add $48, %ebx 3365 jle L(StrncpyExit3) 3366 palignr $3, %xmm1, %xmm2 3367 movaps %xmm2, (%edx) 3368 movaps 29(%ecx), %xmm2 3369 lea 16(%esi), %esi 3370 sub $16, %ebx 3371 jbe L(StrncpyExit3) 3372 palignr $3, %xmm3, %xmm2 3373 movaps %xmm2, 16(%edx) 3374 lea 16(%esi), %esi 3375 sub $16, %ebx 3376 jbe L(StrncpyExit3) 3377 movaps %xmm4, 32(%edx) 3378 lea 16(%esi), %esi 3379 sub $16, %ebx 3380 jbe L(StrncpyExit3) 3381 movaps %xmm5, 48(%edx) 3382 lea 16(%esi), %esi 3383 lea -16(%ebx), %ebx 3384L(StrncpyExit3): 3385 lea 13(%edx, %esi), %edx 3386 lea 13(%ecx, %esi), %ecx 3387 movdqu -16(%ecx), %xmm0 3388 xor %esi, %esi 3389 movdqu %xmm0, -16(%edx) 3390 jmp L(CopyFrom1To16BytesCase3) 3391 3392L(StrncpyLeave4): 3393 movaps %xmm2, %xmm3 3394 add $48, %ebx 3395 jle L(StrncpyExit4) 3396 palignr $4, %xmm1, %xmm2 3397 movaps %xmm2, (%edx) 3398 movaps 28(%ecx), %xmm2 3399 lea 16(%esi), %esi 3400 sub $16, %ebx 3401 jbe L(StrncpyExit4) 3402 palignr $4, %xmm3, %xmm2 3403 movaps %xmm2, 16(%edx) 3404 lea 16(%esi), %esi 3405 sub $16, %ebx 3406 jbe L(StrncpyExit4) 3407 movaps %xmm4, 32(%edx) 3408 lea 16(%esi), %esi 3409 sub $16, %ebx 3410 jbe L(StrncpyExit4) 3411 movaps %xmm5, 48(%edx) 3412 lea 16(%esi), %esi 3413 lea -16(%ebx), %ebx 3414L(StrncpyExit4): 3415 lea 12(%edx, %esi), %edx 3416 lea 12(%ecx, %esi), %ecx 3417 movlpd -12(%ecx), %xmm0 3418 movl -4(%ecx), %eax 3419 movlpd %xmm0, -12(%edx) 3420 movl %eax, -4(%edx) 3421 xor %esi, %esi 3422 jmp L(CopyFrom1To16BytesCase3) 3423 3424L(StrncpyLeave5): 3425 movaps %xmm2, %xmm3 3426 add $48, %ebx 3427 jle L(StrncpyExit5) 3428 palignr $5, %xmm1, %xmm2 3429 movaps %xmm2, (%edx) 3430 movaps 27(%ecx), %xmm2 3431 lea 16(%esi), %esi 3432 sub $16, %ebx 3433 jbe L(StrncpyExit5) 3434 palignr $5, %xmm3, %xmm2 3435 movaps %xmm2, 16(%edx) 3436 lea 16(%esi), %esi 3437 sub $16, %ebx 3438 jbe L(StrncpyExit5) 3439 movaps %xmm4, 32(%edx) 3440 lea 16(%esi), %esi 3441 sub $16, %ebx 3442 jbe L(StrncpyExit5) 3443 movaps %xmm5, 48(%edx) 3444 lea 16(%esi), %esi 3445 lea -16(%ebx), %ebx 3446L(StrncpyExit5): 3447 lea 11(%edx, %esi), %edx 3448 lea 11(%ecx, %esi), %ecx 3449 movlpd -11(%ecx), %xmm0 3450 movl -4(%ecx), %eax 3451 movlpd %xmm0, -11(%edx) 3452 movl %eax, -4(%edx) 3453 xor %esi, %esi 3454 jmp L(CopyFrom1To16BytesCase3) 3455 3456L(StrncpyLeave6): 3457 movaps %xmm2, %xmm3 3458 add $48, %ebx 3459 jle L(StrncpyExit6) 3460 palignr $6, %xmm1, %xmm2 3461 movaps %xmm2, (%edx) 3462 movaps 26(%ecx), %xmm2 3463 lea 16(%esi), %esi 3464 sub $16, %ebx 3465 jbe L(StrncpyExit6) 3466 palignr $6, %xmm3, %xmm2 3467 movaps %xmm2, 16(%edx) 3468 lea 16(%esi), %esi 3469 sub $16, %ebx 3470 jbe L(StrncpyExit6) 3471 movaps %xmm4, 32(%edx) 3472 lea 16(%esi), %esi 3473 sub $16, %ebx 3474 jbe L(StrncpyExit6) 3475 movaps %xmm5, 48(%edx) 3476 lea 16(%esi), %esi 3477 lea -16(%ebx), %ebx 3478L(StrncpyExit6): 3479 lea 10(%edx, %esi), %edx 3480 lea 10(%ecx, %esi), %ecx 3481 3482 movlpd -10(%ecx), %xmm0 3483 movw -2(%ecx), %ax 3484 movlpd %xmm0, -10(%edx) 3485 movw %ax, -2(%edx) 3486 xor %esi, %esi 3487 jmp L(CopyFrom1To16BytesCase3) 3488 3489L(StrncpyLeave7): 3490 movaps %xmm2, %xmm3 3491 add $48, %ebx 3492 jle L(StrncpyExit7) 3493 palignr $7, %xmm1, %xmm2 3494 movaps %xmm2, (%edx) 3495 movaps 25(%ecx), %xmm2 3496 lea 16(%esi), %esi 3497 sub $16, %ebx 3498 jbe L(StrncpyExit7) 3499 palignr $7, %xmm3, %xmm2 3500 movaps %xmm2, 16(%edx) 3501 lea 16(%esi), %esi 3502 sub $16, %ebx 3503 jbe L(StrncpyExit7) 3504 movaps %xmm4, 32(%edx) 3505 lea 16(%esi), %esi 3506 sub $16, %ebx 3507 jbe L(StrncpyExit7) 3508 movaps %xmm5, 48(%edx) 3509 lea 16(%esi), %esi 3510 lea -16(%ebx), %ebx 3511L(StrncpyExit7): 3512 lea 9(%edx, %esi), %edx 3513 lea 9(%ecx, %esi), %ecx 3514 3515 movlpd -9(%ecx), %xmm0 3516 movb -1(%ecx), %ah 3517 movlpd %xmm0, -9(%edx) 3518 movb %ah, -1(%edx) 3519 xor %esi, %esi 3520 jmp L(CopyFrom1To16BytesCase3) 3521 3522L(StrncpyLeave8): 3523 movaps %xmm2, %xmm3 3524 add $48, %ebx 3525 jle L(StrncpyExit8) 3526 palignr $8, %xmm1, %xmm2 3527 movaps %xmm2, (%edx) 3528 movaps 24(%ecx), %xmm2 3529 lea 16(%esi), %esi 3530 sub $16, %ebx 3531 jbe L(StrncpyExit8) 3532 palignr $8, %xmm3, %xmm2 3533 movaps %xmm2, 16(%edx) 3534 lea 16(%esi), %esi 3535 sub $16, %ebx 3536 jbe L(StrncpyExit8) 3537 movaps %xmm4, 32(%edx) 3538 lea 16(%esi), %esi 3539 sub $16, %ebx 3540 jbe L(StrncpyExit8) 3541 movaps %xmm5, 48(%edx) 3542 lea 16(%esi), %esi 3543 lea -16(%ebx), %ebx 3544L(StrncpyExit8): 3545 lea 8(%edx, %esi), %edx 3546 lea 8(%ecx, %esi), %ecx 3547 movlpd -8(%ecx), %xmm0 3548 movlpd %xmm0, -8(%edx) 3549 xor %esi, %esi 3550 jmp L(CopyFrom1To16BytesCase3) 3551 3552L(StrncpyLeave9): 3553 movaps %xmm2, %xmm3 3554 add $48, %ebx 3555 jle L(StrncpyExit9) 3556 palignr $9, %xmm1, %xmm2 3557 movaps %xmm2, (%edx) 3558 movaps 23(%ecx), %xmm2 3559 lea 16(%esi), %esi 3560 sub $16, %ebx 3561 jbe L(StrncpyExit9) 3562 palignr $9, %xmm3, %xmm2 3563 movaps %xmm2, 16(%edx) 3564 lea 16(%esi), %esi 3565 sub $16, %ebx 3566 jbe L(StrncpyExit9) 3567 movaps %xmm4, 32(%edx) 3568 lea 16(%esi), %esi 3569 sub $16, %ebx 3570 jbe L(StrncpyExit9) 3571 movaps %xmm5, 48(%edx) 3572 lea 16(%esi), %esi 3573 lea -16(%ebx), %ebx 3574L(StrncpyExit9): 3575 lea 7(%edx, %esi), %edx 3576 lea 7(%ecx, %esi), %ecx 3577 3578 movlpd -8(%ecx), %xmm0 3579 movlpd %xmm0, -8(%edx) 3580 xor %esi, %esi 3581 jmp L(CopyFrom1To16BytesCase3) 3582 3583L(StrncpyLeave10): 3584 movaps %xmm2, %xmm3 3585 add $48, %ebx 3586 jle L(StrncpyExit10) 3587 palignr $10, %xmm1, %xmm2 3588 movaps %xmm2, (%edx) 3589 movaps 22(%ecx), %xmm2 3590 lea 16(%esi), %esi 3591 sub $16, %ebx 3592 jbe L(StrncpyExit10) 3593 palignr $10, %xmm3, %xmm2 3594 movaps %xmm2, 16(%edx) 3595 lea 16(%esi), %esi 3596 sub $16, %ebx 3597 jbe L(StrncpyExit10) 3598 movaps %xmm4, 32(%edx) 3599 lea 16(%esi), %esi 3600 sub $16, %ebx 3601 jbe L(StrncpyExit10) 3602 movaps %xmm5, 48(%edx) 3603 lea 16(%esi), %esi 3604 lea -16(%ebx), %ebx 3605L(StrncpyExit10): 3606 lea 6(%edx, %esi), %edx 3607 lea 6(%ecx, %esi), %ecx 3608 3609 movlpd -8(%ecx), %xmm0 3610 movlpd %xmm0, -8(%edx) 3611 xor %esi, %esi 3612 jmp L(CopyFrom1To16BytesCase3) 3613 3614L(StrncpyLeave11): 3615 movaps %xmm2, %xmm3 3616 add $48, %ebx 3617 jle L(StrncpyExit11) 3618 palignr $11, %xmm1, %xmm2 3619 movaps %xmm2, (%edx) 3620 movaps 21(%ecx), %xmm2 3621 lea 16(%esi), %esi 3622 sub $16, %ebx 3623 jbe L(StrncpyExit11) 3624 palignr $11, %xmm3, %xmm2 3625 movaps %xmm2, 16(%edx) 3626 lea 16(%esi), %esi 3627 sub $16, %ebx 3628 jbe L(StrncpyExit11) 3629 movaps %xmm4, 32(%edx) 3630 lea 16(%esi), %esi 3631 sub $16, %ebx 3632 jbe L(StrncpyExit11) 3633 movaps %xmm5, 48(%edx) 3634 lea 16(%esi), %esi 3635 lea -16(%ebx), %ebx 3636L(StrncpyExit11): 3637 lea 5(%edx, %esi), %edx 3638 lea 5(%ecx, %esi), %ecx 3639 movl -5(%ecx), %esi 3640 movb -1(%ecx), %ah 3641 movl %esi, -5(%edx) 3642 movb %ah, -1(%edx) 3643 xor %esi, %esi 3644 jmp L(CopyFrom1To16BytesCase3) 3645 3646L(StrncpyLeave12): 3647 movaps %xmm2, %xmm3 3648 add $48, %ebx 3649 jle L(StrncpyExit12) 3650 palignr $12, %xmm1, %xmm2 3651 movaps %xmm2, (%edx) 3652 movaps 20(%ecx), %xmm2 3653 lea 16(%esi), %esi 3654 sub $16, %ebx 3655 jbe L(StrncpyExit12) 3656 palignr $12, %xmm3, %xmm2 3657 movaps %xmm2, 16(%edx) 3658 lea 16(%esi), %esi 3659 sub $16, %ebx 3660 jbe L(StrncpyExit12) 3661 movaps %xmm4, 32(%edx) 3662 lea 16(%esi), %esi 3663 sub $16, %ebx 3664 jbe L(StrncpyExit12) 3665 movaps %xmm5, 48(%edx) 3666 lea 16(%esi), %esi 3667 lea -16(%ebx), %ebx 3668L(StrncpyExit12): 3669 lea 4(%edx, %esi), %edx 3670 lea 4(%ecx, %esi), %ecx 3671 movl -4(%ecx), %eax 3672 movl %eax, -4(%edx) 3673 xor %esi, %esi 3674 jmp L(CopyFrom1To16BytesCase3) 3675 3676L(StrncpyLeave13): 3677 movaps %xmm2, %xmm3 3678 add $48, %ebx 3679 jle L(StrncpyExit13) 3680 palignr $13, %xmm1, %xmm2 3681 movaps %xmm2, (%edx) 3682 movaps 19(%ecx), %xmm2 3683 lea 16(%esi), %esi 3684 sub $16, %ebx 3685 jbe L(StrncpyExit13) 3686 palignr $13, %xmm3, %xmm2 3687 movaps %xmm2, 16(%edx) 3688 lea 16(%esi), %esi 3689 sub $16, %ebx 3690 jbe L(StrncpyExit13) 3691 movaps %xmm4, 32(%edx) 3692 lea 16(%esi), %esi 3693 sub $16, %ebx 3694 jbe L(StrncpyExit13) 3695 movaps %xmm5, 48(%edx) 3696 lea 16(%esi), %esi 3697 lea -16(%ebx), %ebx 3698L(StrncpyExit13): 3699 lea 3(%edx, %esi), %edx 3700 lea 3(%ecx, %esi), %ecx 3701 3702 movl -4(%ecx), %eax 3703 movl %eax, -4(%edx) 3704 xor %esi, %esi 3705 jmp L(CopyFrom1To16BytesCase3) 3706 3707L(StrncpyLeave14): 3708 movaps %xmm2, %xmm3 3709 add $48, %ebx 3710 jle L(StrncpyExit14) 3711 palignr $14, %xmm1, %xmm2 3712 movaps %xmm2, (%edx) 3713 movaps 18(%ecx), %xmm2 3714 lea 16(%esi), %esi 3715 sub $16, %ebx 3716 jbe L(StrncpyExit14) 3717 palignr $14, %xmm3, %xmm2 3718 movaps %xmm2, 16(%edx) 3719 lea 16(%esi), %esi 3720 sub $16, %ebx 3721 jbe L(StrncpyExit14) 3722 movaps %xmm4, 32(%edx) 3723 lea 16(%esi), %esi 3724 sub $16, %ebx 3725 jbe L(StrncpyExit14) 3726 movaps %xmm5, 48(%edx) 3727 lea 16(%esi), %esi 3728 lea -16(%ebx), %ebx 3729L(StrncpyExit14): 3730 lea 2(%edx, %esi), %edx 3731 lea 2(%ecx, %esi), %ecx 3732 movw -2(%ecx), %ax 3733 movw %ax, -2(%edx) 3734 xor %esi, %esi 3735 jmp L(CopyFrom1To16BytesCase3) 3736 3737L(StrncpyLeave15): 3738 movaps %xmm2, %xmm3 3739 add $48, %ebx 3740 jle L(StrncpyExit15) 3741 palignr $15, %xmm1, %xmm2 3742 movaps %xmm2, (%edx) 3743 movaps 17(%ecx), %xmm2 3744 lea 16(%esi), %esi 3745 sub $16, %ebx 3746 jbe L(StrncpyExit15) 3747 palignr $15, %xmm3, %xmm2 3748 movaps %xmm2, 16(%edx) 3749 lea 16(%esi), %esi 3750 sub $16, %ebx 3751 jbe L(StrncpyExit15) 3752 movaps %xmm4, 32(%edx) 3753 lea 16(%esi), %esi 3754 sub $16, %ebx 3755 jbe L(StrncpyExit15) 3756 movaps %xmm5, 48(%edx) 3757 lea 16(%esi), %esi 3758 lea -16(%ebx), %ebx 3759L(StrncpyExit15): 3760 lea 1(%edx, %esi), %edx 3761 lea 1(%ecx, %esi), %ecx 3762 movb -1(%ecx), %ah 3763 movb %ah, -1(%edx) 3764 xor %esi, %esi 3765 jmp L(CopyFrom1To16BytesCase3) 3766# endif 3767 3768# ifndef USE_AS_STRCAT 3769# ifdef USE_AS_STRNCPY 3770 CFI_POP (%esi) 3771 CFI_POP (%edi) 3772 3773 .p2align 4 3774L(ExitTail0): 3775 movl %edx, %eax 3776 RETURN 3777 3778 .p2align 4 3779L(StrncpyExit15Bytes): 3780 cmp $12, %ebx 3781 jbe L(StrncpyExit12Bytes) 3782 cmpb $0, 8(%ecx) 3783 jz L(ExitTail9) 3784 cmpb $0, 9(%ecx) 3785 jz L(ExitTail10) 3786 cmpb $0, 10(%ecx) 3787 jz L(ExitTail11) 3788 cmpb $0, 11(%ecx) 3789 jz L(ExitTail12) 3790 cmp $13, %ebx 3791 je L(ExitTail13) 3792 cmpb $0, 12(%ecx) 3793 jz L(ExitTail13) 3794 cmp $14, %ebx 3795 je L(ExitTail14) 3796 cmpb $0, 13(%ecx) 3797 jz L(ExitTail14) 3798 movlpd (%ecx), %xmm0 3799 movlpd 7(%ecx), %xmm1 3800 movlpd %xmm0, (%edx) 3801 movlpd %xmm1, 7(%edx) 3802# ifdef USE_AS_STPCPY 3803 lea 14(%edx), %eax 3804 cmpb $1, (%eax) 3805 sbb $-1, %eax 3806# else 3807 movl %edx, %eax 3808# endif 3809 RETURN 3810 3811 .p2align 4 3812L(StrncpyExit12Bytes): 3813 cmp $9, %ebx 3814 je L(ExitTail9) 3815 cmpb $0, 8(%ecx) 3816 jz L(ExitTail9) 3817 cmp $10, %ebx 3818 je L(ExitTail10) 3819 cmpb $0, 9(%ecx) 3820 jz L(ExitTail10) 3821 cmp $11, %ebx 3822 je L(ExitTail11) 3823 cmpb $0, 10(%ecx) 3824 jz L(ExitTail11) 3825 movlpd (%ecx), %xmm0 3826 movl 8(%ecx), %eax 3827 movlpd %xmm0, (%edx) 3828 movl %eax, 8(%edx) 3829 SAVE_RESULT_TAIL (11) 3830# ifdef USE_AS_STPCPY 3831 cmpb $1, (%eax) 3832 sbb $-1, %eax 3833# endif 3834 RETURN 3835 3836 .p2align 4 3837L(StrncpyExit8Bytes): 3838 cmp $4, %ebx 3839 jbe L(StrncpyExit4Bytes) 3840 cmpb $0, (%ecx) 3841 jz L(ExitTail1) 3842 cmpb $0, 1(%ecx) 3843 jz L(ExitTail2) 3844 cmpb $0, 2(%ecx) 3845 jz L(ExitTail3) 3846 cmpb $0, 3(%ecx) 3847 jz L(ExitTail4) 3848 3849 cmp $5, %ebx 3850 je L(ExitTail5) 3851 cmpb $0, 4(%ecx) 3852 jz L(ExitTail5) 3853 cmp $6, %ebx 3854 je L(ExitTail6) 3855 cmpb $0, 5(%ecx) 3856 jz L(ExitTail6) 3857 cmp $7, %ebx 3858 je L(ExitTail7) 3859 cmpb $0, 6(%ecx) 3860 jz L(ExitTail7) 3861 movlpd (%ecx), %xmm0 3862 movlpd %xmm0, (%edx) 3863# ifdef USE_AS_STPCPY 3864 lea 7(%edx), %eax 3865 cmpb $1, (%eax) 3866 sbb $-1, %eax 3867# else 3868 movl %edx, %eax 3869# endif 3870 RETURN 3871 3872 .p2align 4 3873L(StrncpyExit4Bytes): 3874 test %ebx, %ebx 3875 jz L(ExitTail0) 3876 cmp $1, %ebx 3877 je L(ExitTail1) 3878 cmpb $0, (%ecx) 3879 jz L(ExitTail1) 3880 cmp $2, %ebx 3881 je L(ExitTail2) 3882 cmpb $0, 1(%ecx) 3883 jz L(ExitTail2) 3884 cmp $3, %ebx 3885 je L(ExitTail3) 3886 cmpb $0, 2(%ecx) 3887 jz L(ExitTail3) 3888 movl (%ecx), %eax 3889 movl %eax, (%edx) 3890 SAVE_RESULT_TAIL (3) 3891# ifdef USE_AS_STPCPY 3892 cmpb $1, (%eax) 3893 sbb $-1, %eax 3894# endif 3895 RETURN 3896# endif 3897 3898END (STRCPY) 3899# endif 3900#endif 3901