1/* strcat with SSSE3 2 Copyright (C) 2011-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19 20#if IS_IN (libc) 21 22# include <sysdep.h> 23 24# define CFI_PUSH(REG) \ 25 cfi_adjust_cfa_offset (4); \ 26 cfi_rel_offset (REG, 0) 27 28# define CFI_POP(REG) \ 29 cfi_adjust_cfa_offset (-4); \ 30 cfi_restore (REG) 31 32# define PUSH(REG) pushl REG; CFI_PUSH (REG) 33# define POP(REG) popl REG; CFI_POP (REG) 34 35# ifndef STRCAT 36# define STRCAT __strcat_ssse3 37# endif 38 39# define PARMS 4 40# define STR1 PARMS+4 41# define STR2 STR1+4 42 43# ifdef USE_AS_STRNCAT 44# define LEN STR2+8 45# endif 46 47# define USE_AS_STRCAT 48 49.text 50ENTRY (STRCAT) 51 PUSH (%edi) 52 mov STR1(%esp), %edi 53 mov %edi, %edx 54 55# define RETURN jmp L(StartStrcpyPart) 56# include "strlen-sse2.S" 57 58L(StartStrcpyPart): 59 mov STR2(%esp), %ecx 60 lea (%edi, %eax), %edx 61# ifdef USE_AS_STRNCAT 62 PUSH (%ebx) 63 mov LEN(%esp), %ebx 64 test %ebx, %ebx 65 jz L(StrncatExit0) 66 cmp $8, %ebx 67 jbe L(StrncatExit8Bytes) 68# endif 69 cmpb $0, (%ecx) 70 jz L(Exit1) 71 cmpb $0, 1(%ecx) 72 jz L(Exit2) 73 cmpb $0, 2(%ecx) 74 jz L(Exit3) 75 cmpb $0, 3(%ecx) 76 jz L(Exit4) 77 cmpb $0, 4(%ecx) 78 jz L(Exit5) 79 cmpb $0, 5(%ecx) 80 jz L(Exit6) 81 cmpb $0, 6(%ecx) 82 jz L(Exit7) 83 cmpb $0, 7(%ecx) 84 jz L(Exit8) 85 cmpb $0, 8(%ecx) 86 jz L(Exit9) 87# ifdef USE_AS_STRNCAT 88 cmp $16, %ebx 89 jb L(StrncatExit15Bytes) 90# endif 91 cmpb $0, 9(%ecx) 92 jz L(Exit10) 93 cmpb $0, 10(%ecx) 94 jz L(Exit11) 95 cmpb $0, 11(%ecx) 96 jz L(Exit12) 97 cmpb $0, 12(%ecx) 98 jz L(Exit13) 99 cmpb $0, 13(%ecx) 100 jz L(Exit14) 101 cmpb $0, 14(%ecx) 102 jz L(Exit15) 103 cmpb $0, 15(%ecx) 104 jz L(Exit16) 105# ifdef USE_AS_STRNCAT 106 cmp $16, %ebx 107 je L(StrncatExit16) 108 109# define RETURN1 \ 110 POP (%ebx); \ 111 POP (%edi); \ 112 ret; \ 113 CFI_PUSH (%ebx); \ 114 CFI_PUSH (%edi) 115# define USE_AS_STRNCPY 116# else 117# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi) 118# endif 119# include "strcpy-ssse3.S" 120 .p2align 4 121L(CopyFrom1To16Bytes): 122 add %esi, %edx 123 add %esi, %ecx 124 125 POP (%esi) 126 test %al, %al 127 jz L(ExitHigh) 128 test $0x01, %al 129 jnz L(Exit1) 130 test $0x02, %al 131 jnz L(Exit2) 132 test $0x04, %al 133 jnz L(Exit3) 134 test $0x08, %al 135 jnz L(Exit4) 136 test $0x10, %al 137 jnz L(Exit5) 138 test $0x20, %al 139 jnz L(Exit6) 140 test $0x40, %al 141 jnz L(Exit7) 142 movlpd (%ecx), %xmm0 143 movlpd %xmm0, (%edx) 144 movl %edi, %eax 145 RETURN1 146 147 .p2align 4 148L(ExitHigh): 149 test $0x01, %ah 150 jnz L(Exit9) 151 test $0x02, %ah 152 jnz L(Exit10) 153 test $0x04, %ah 154 jnz L(Exit11) 155 test $0x08, %ah 156 jnz L(Exit12) 157 test $0x10, %ah 158 jnz L(Exit13) 159 test $0x20, %ah 160 jnz L(Exit14) 161 test $0x40, %ah 162 jnz L(Exit15) 163 movlpd (%ecx), %xmm0 164 movlpd 8(%ecx), %xmm1 165 movlpd %xmm0, (%edx) 166 movlpd %xmm1, 8(%edx) 167 movl %edi, %eax 168 RETURN1 169 170 .p2align 4 171L(StrncatExit1): 172 movb %bh, 1(%edx) 173L(Exit1): 174 movb (%ecx), %al 175 movb %al, (%edx) 176 movl %edi, %eax 177 RETURN1 178 179 .p2align 4 180L(StrncatExit2): 181 movb %bh, 2(%edx) 182L(Exit2): 183 movw (%ecx), %ax 184 movw %ax, (%edx) 185 movl %edi, %eax 186 RETURN1 187 188 .p2align 4 189L(StrncatExit3): 190 movb %bh, 3(%edx) 191L(Exit3): 192 movw (%ecx), %ax 193 movw %ax, (%edx) 194 movb 2(%ecx), %al 195 movb %al, 2(%edx) 196 movl %edi, %eax 197 RETURN1 198 199 .p2align 4 200L(StrncatExit4): 201 movb %bh, 4(%edx) 202L(Exit4): 203 movl (%ecx), %eax 204 movl %eax, (%edx) 205 movl %edi, %eax 206 RETURN1 207 208 .p2align 4 209L(StrncatExit5): 210 movb %bh, 5(%edx) 211L(Exit5): 212 movl (%ecx), %eax 213 movl %eax, (%edx) 214 movb 4(%ecx), %al 215 movb %al, 4(%edx) 216 movl %edi, %eax 217 RETURN1 218 219 .p2align 4 220L(StrncatExit6): 221 movb %bh, 6(%edx) 222L(Exit6): 223 movl (%ecx), %eax 224 movl %eax, (%edx) 225 movw 4(%ecx), %ax 226 movw %ax, 4(%edx) 227 movl %edi, %eax 228 RETURN1 229 230 .p2align 4 231L(StrncatExit7): 232 movb %bh, 7(%edx) 233L(Exit7): 234 movl (%ecx), %eax 235 movl %eax, (%edx) 236 movl 3(%ecx), %eax 237 movl %eax, 3(%edx) 238 movl %edi, %eax 239 RETURN1 240 241 .p2align 4 242L(StrncatExit8): 243 movb %bh, 8(%edx) 244L(Exit8): 245 movlpd (%ecx), %xmm0 246 movlpd %xmm0, (%edx) 247 movl %edi, %eax 248 RETURN1 249 250 .p2align 4 251L(StrncatExit9): 252 movb %bh, 9(%edx) 253L(Exit9): 254 movlpd (%ecx), %xmm0 255 movlpd %xmm0, (%edx) 256 movb 8(%ecx), %al 257 movb %al, 8(%edx) 258 movl %edi, %eax 259 RETURN1 260 261 .p2align 4 262L(StrncatExit10): 263 movb %bh, 10(%edx) 264L(Exit10): 265 movlpd (%ecx), %xmm0 266 movlpd %xmm0, (%edx) 267 movw 8(%ecx), %ax 268 movw %ax, 8(%edx) 269 movl %edi, %eax 270 RETURN1 271 272 .p2align 4 273L(StrncatExit11): 274 movb %bh, 11(%edx) 275L(Exit11): 276 movlpd (%ecx), %xmm0 277 movlpd %xmm0, (%edx) 278 movl 7(%ecx), %eax 279 movl %eax, 7(%edx) 280 movl %edi, %eax 281 RETURN1 282 283 .p2align 4 284L(StrncatExit12): 285 movb %bh, 12(%edx) 286L(Exit12): 287 movlpd (%ecx), %xmm0 288 movlpd %xmm0, (%edx) 289 movl 8(%ecx), %eax 290 movl %eax, 8(%edx) 291 movl %edi, %eax 292 RETURN1 293 294 .p2align 4 295L(StrncatExit13): 296 movb %bh, 13(%edx) 297L(Exit13): 298 movlpd (%ecx), %xmm0 299 movlpd %xmm0, (%edx) 300 movlpd 5(%ecx), %xmm0 301 movlpd %xmm0, 5(%edx) 302 movl %edi, %eax 303 RETURN1 304 305 .p2align 4 306L(StrncatExit14): 307 movb %bh, 14(%edx) 308L(Exit14): 309 movlpd (%ecx), %xmm0 310 movlpd %xmm0, (%edx) 311 movlpd 6(%ecx), %xmm0 312 movlpd %xmm0, 6(%edx) 313 movl %edi, %eax 314 RETURN1 315 316 .p2align 4 317L(StrncatExit15): 318 movb %bh, 15(%edx) 319L(Exit15): 320 movlpd (%ecx), %xmm0 321 movlpd %xmm0, (%edx) 322 movlpd 7(%ecx), %xmm0 323 movlpd %xmm0, 7(%edx) 324 movl %edi, %eax 325 RETURN1 326 327 .p2align 4 328L(StrncatExit16): 329 movb %bh, 16(%edx) 330L(Exit16): 331 movlpd (%ecx), %xmm0 332 movlpd 8(%ecx), %xmm1 333 movlpd %xmm0, (%edx) 334 movlpd %xmm1, 8(%edx) 335 movl %edi, %eax 336 RETURN1 337 338# ifdef USE_AS_STRNCPY 339 340 CFI_PUSH(%esi) 341 342 .p2align 4 343L(CopyFrom1To16BytesCase2): 344 add $16, %ebx 345 add %esi, %ecx 346 lea (%esi, %edx), %esi 347 lea -9(%ebx), %edx 348 and $1<<7, %dh 349 or %al, %dh 350 test %dh, %dh 351 lea (%esi), %edx 352 POP (%esi) 353 jz L(ExitHighCase2) 354 355 test $0x01, %al 356 jnz L(Exit1) 357 cmp $1, %ebx 358 je L(StrncatExit1) 359 test $0x02, %al 360 jnz L(Exit2) 361 cmp $2, %ebx 362 je L(StrncatExit2) 363 test $0x04, %al 364 jnz L(Exit3) 365 cmp $3, %ebx 366 je L(StrncatExit3) 367 test $0x08, %al 368 jnz L(Exit4) 369 cmp $4, %ebx 370 je L(StrncatExit4) 371 test $0x10, %al 372 jnz L(Exit5) 373 cmp $5, %ebx 374 je L(StrncatExit5) 375 test $0x20, %al 376 jnz L(Exit6) 377 cmp $6, %ebx 378 je L(StrncatExit6) 379 test $0x40, %al 380 jnz L(Exit7) 381 cmp $7, %ebx 382 je L(StrncatExit7) 383 movlpd (%ecx), %xmm0 384 movlpd %xmm0, (%edx) 385 lea 7(%edx), %eax 386 cmpb $1, (%eax) 387 sbb $-1, %eax 388 xor %cl, %cl 389 movb %cl, (%eax) 390 movl %edi, %eax 391 RETURN1 392 393 .p2align 4 394L(ExitHighCase2): 395 test $0x01, %ah 396 jnz L(Exit9) 397 cmp $9, %ebx 398 je L(StrncatExit9) 399 test $0x02, %ah 400 jnz L(Exit10) 401 cmp $10, %ebx 402 je L(StrncatExit10) 403 test $0x04, %ah 404 jnz L(Exit11) 405 cmp $11, %ebx 406 je L(StrncatExit11) 407 test $0x8, %ah 408 jnz L(Exit12) 409 cmp $12, %ebx 410 je L(StrncatExit12) 411 test $0x10, %ah 412 jnz L(Exit13) 413 cmp $13, %ebx 414 je L(StrncatExit13) 415 test $0x20, %ah 416 jnz L(Exit14) 417 cmp $14, %ebx 418 je L(StrncatExit14) 419 test $0x40, %ah 420 jnz L(Exit15) 421 cmp $15, %ebx 422 je L(StrncatExit15) 423 movlpd (%ecx), %xmm0 424 movlpd %xmm0, (%edx) 425 movlpd 8(%ecx), %xmm1 426 movlpd %xmm1, 8(%edx) 427 movl %edi, %eax 428 RETURN1 429 430 CFI_PUSH(%esi) 431 432L(CopyFrom1To16BytesCase2OrCase3): 433 test %eax, %eax 434 jnz L(CopyFrom1To16BytesCase2) 435 436 .p2align 4 437L(CopyFrom1To16BytesCase3): 438 add $16, %ebx 439 add %esi, %edx 440 add %esi, %ecx 441 442 POP (%esi) 443 444 cmp $8, %ebx 445 ja L(ExitHighCase3) 446 cmp $1, %ebx 447 je L(StrncatExit1) 448 cmp $2, %ebx 449 je L(StrncatExit2) 450 cmp $3, %ebx 451 je L(StrncatExit3) 452 cmp $4, %ebx 453 je L(StrncatExit4) 454 cmp $5, %ebx 455 je L(StrncatExit5) 456 cmp $6, %ebx 457 je L(StrncatExit6) 458 cmp $7, %ebx 459 je L(StrncatExit7) 460 movlpd (%ecx), %xmm0 461 movlpd %xmm0, (%edx) 462 movb %bh, 8(%edx) 463 movl %edi, %eax 464 RETURN1 465 466 .p2align 4 467L(ExitHighCase3): 468 cmp $9, %ebx 469 je L(StrncatExit9) 470 cmp $10, %ebx 471 je L(StrncatExit10) 472 cmp $11, %ebx 473 je L(StrncatExit11) 474 cmp $12, %ebx 475 je L(StrncatExit12) 476 cmp $13, %ebx 477 je L(StrncatExit13) 478 cmp $14, %ebx 479 je L(StrncatExit14) 480 cmp $15, %ebx 481 je L(StrncatExit15) 482 movlpd (%ecx), %xmm0 483 movlpd %xmm0, (%edx) 484 movlpd 8(%ecx), %xmm1 485 movlpd %xmm1, 8(%edx) 486 movb %bh, 16(%edx) 487 movl %edi, %eax 488 RETURN1 489 490 .p2align 4 491L(StrncatExit0): 492 movl %edi, %eax 493 RETURN1 494 495 .p2align 4 496L(StrncatExit15Bytes): 497 cmp $9, %ebx 498 je L(StrncatExit9) 499 cmpb $0, 9(%ecx) 500 jz L(Exit10) 501 cmp $10, %ebx 502 je L(StrncatExit10) 503 cmpb $0, 10(%ecx) 504 jz L(Exit11) 505 cmp $11, %ebx 506 je L(StrncatExit11) 507 cmpb $0, 11(%ecx) 508 jz L(Exit12) 509 cmp $12, %ebx 510 je L(StrncatExit12) 511 cmpb $0, 12(%ecx) 512 jz L(Exit13) 513 cmp $13, %ebx 514 je L(StrncatExit13) 515 cmpb $0, 13(%ecx) 516 jz L(Exit14) 517 cmp $14, %ebx 518 je L(StrncatExit14) 519 movlpd (%ecx), %xmm0 520 movlpd %xmm0, (%edx) 521 movlpd 7(%ecx), %xmm0 522 movlpd %xmm0, 7(%edx) 523 lea 14(%edx), %eax 524 cmpb $1, (%eax) 525 sbb $-1, %eax 526 movb %bh, (%eax) 527 movl %edi, %eax 528 RETURN1 529 530 .p2align 4 531L(StrncatExit8Bytes): 532 cmpb $0, (%ecx) 533 jz L(Exit1) 534 cmp $1, %ebx 535 je L(StrncatExit1) 536 cmpb $0, 1(%ecx) 537 jz L(Exit2) 538 cmp $2, %ebx 539 je L(StrncatExit2) 540 cmpb $0, 2(%ecx) 541 jz L(Exit3) 542 cmp $3, %ebx 543 je L(StrncatExit3) 544 cmpb $0, 3(%ecx) 545 jz L(Exit4) 546 cmp $4, %ebx 547 je L(StrncatExit4) 548 cmpb $0, 4(%ecx) 549 jz L(Exit5) 550 cmp $5, %ebx 551 je L(StrncatExit5) 552 cmpb $0, 5(%ecx) 553 jz L(Exit6) 554 cmp $6, %ebx 555 je L(StrncatExit6) 556 cmpb $0, 6(%ecx) 557 jz L(Exit7) 558 cmp $7, %ebx 559 je L(StrncatExit7) 560 movlpd (%ecx), %xmm0 561 movlpd %xmm0, (%edx) 562 lea 7(%edx), %eax 563 cmpb $1, (%eax) 564 sbb $-1, %eax 565 movb %bh, (%eax) 566 movl %edi, %eax 567 RETURN1 568 569# endif 570END (STRCAT) 571#endif 572