1/* wcscmp with SSE2 2 Copyright (C) 2011-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20 21# include <sysdep.h> 22 23# define CFI_PUSH(REG) \ 24 cfi_adjust_cfa_offset (4); \ 25 cfi_rel_offset (REG, 0) 26 27# define CFI_POP(REG) \ 28 cfi_adjust_cfa_offset (-4); \ 29 cfi_restore (REG) 30 31# define PUSH(REG) pushl REG; CFI_PUSH (REG) 32# define POP(REG) popl REG; CFI_POP (REG) 33 34# define ENTRANCE PUSH(%esi); PUSH(%edi) 35# define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi); 36# define PARMS 4 37# define STR1 PARMS 38# define STR2 STR1+4 39 40/* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */ 41 42 .text 43ENTRY (__wcscmp_sse2) 44/* 45 * This implementation uses SSE to compare up to 16 bytes at a time. 46*/ 47 mov STR1(%esp), %edx 48 mov STR2(%esp), %eax 49 50 mov (%eax), %ecx 51 cmp %ecx, (%edx) 52 jne L(neq) 53 test %ecx, %ecx 54 jz L(eq) 55 56 mov 4(%eax), %ecx 57 cmp %ecx, 4(%edx) 58 jne L(neq) 59 test %ecx, %ecx 60 jz L(eq) 61 62 mov 8(%eax), %ecx 63 cmp %ecx, 8(%edx) 64 jne L(neq) 65 test %ecx, %ecx 66 jz L(eq) 67 68 mov 12(%eax), %ecx 69 cmp %ecx, 12(%edx) 70 jne L(neq) 71 test %ecx, %ecx 72 jz L(eq) 73 74 ENTRANCE 75 add $16, %eax 76 add $16, %edx 77 78 mov %eax, %esi 79 mov %edx, %edi 80 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ 81 mov %al, %ch 82 mov %dl, %cl 83 and $63, %eax /* esi alignment in cache line */ 84 and $63, %edx /* edi alignment in cache line */ 85 and $15, %cl 86 jz L(continue_00) 87 cmp $16, %edx 88 jb L(continue_0) 89 cmp $32, %edx 90 jb L(continue_16) 91 cmp $48, %edx 92 jb L(continue_32) 93 94L(continue_48): 95 and $15, %ch 96 jz L(continue_48_00) 97 cmp $16, %eax 98 jb L(continue_0_48) 99 cmp $32, %eax 100 jb L(continue_16_48) 101 cmp $48, %eax 102 jb L(continue_32_48) 103 104 .p2align 4 105L(continue_48_48): 106 mov (%esi), %ecx 107 cmp %ecx, (%edi) 108 jne L(nequal) 109 test %ecx, %ecx 110 jz L(equal) 111 112 mov 4(%esi), %ecx 113 cmp %ecx, 4(%edi) 114 jne L(nequal) 115 test %ecx, %ecx 116 jz L(equal) 117 118 mov 8(%esi), %ecx 119 cmp %ecx, 8(%edi) 120 jne L(nequal) 121 test %ecx, %ecx 122 jz L(equal) 123 124 mov 12(%esi), %ecx 125 cmp %ecx, 12(%edi) 126 jne L(nequal) 127 test %ecx, %ecx 128 jz L(equal) 129 130 movdqu 16(%edi), %xmm1 131 movdqu 16(%esi), %xmm2 132 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 133 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 134 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 135 pmovmskb %xmm1, %edx 136 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 137 jnz L(less4_double_words_16) 138 139 movdqu 32(%edi), %xmm1 140 movdqu 32(%esi), %xmm2 141 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 142 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 143 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 144 pmovmskb %xmm1, %edx 145 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 146 jnz L(less4_double_words_32) 147 148 movdqu 48(%edi), %xmm1 149 movdqu 48(%esi), %xmm2 150 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 151 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 152 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 153 pmovmskb %xmm1, %edx 154 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 155 jnz L(less4_double_words_48) 156 157 add $64, %esi 158 add $64, %edi 159 jmp L(continue_48_48) 160 161L(continue_0): 162 and $15, %ch 163 jz L(continue_0_00) 164 cmp $16, %eax 165 jb L(continue_0_0) 166 cmp $32, %eax 167 jb L(continue_0_16) 168 cmp $48, %eax 169 jb L(continue_0_32) 170 171 .p2align 4 172L(continue_0_48): 173 mov (%esi), %ecx 174 cmp %ecx, (%edi) 175 jne L(nequal) 176 test %ecx, %ecx 177 jz L(equal) 178 179 mov 4(%esi), %ecx 180 cmp %ecx, 4(%edi) 181 jne L(nequal) 182 test %ecx, %ecx 183 jz L(equal) 184 185 mov 8(%esi), %ecx 186 cmp %ecx, 8(%edi) 187 jne L(nequal) 188 test %ecx, %ecx 189 jz L(equal) 190 191 mov 12(%esi), %ecx 192 cmp %ecx, 12(%edi) 193 jne L(nequal) 194 test %ecx, %ecx 195 jz L(equal) 196 197 movdqu 16(%edi), %xmm1 198 movdqu 16(%esi), %xmm2 199 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 200 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 201 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 202 pmovmskb %xmm1, %edx 203 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 204 jnz L(less4_double_words_16) 205 206 movdqu 32(%edi), %xmm1 207 movdqu 32(%esi), %xmm2 208 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 209 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 210 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 211 pmovmskb %xmm1, %edx 212 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 213 jnz L(less4_double_words_32) 214 215 mov 48(%esi), %ecx 216 cmp %ecx, 48(%edi) 217 jne L(nequal) 218 test %ecx, %ecx 219 jz L(equal) 220 221 mov 52(%esi), %ecx 222 cmp %ecx, 52(%edi) 223 jne L(nequal) 224 test %ecx, %ecx 225 jz L(equal) 226 227 mov 56(%esi), %ecx 228 cmp %ecx, 56(%edi) 229 jne L(nequal) 230 test %ecx, %ecx 231 jz L(equal) 232 233 mov 60(%esi), %ecx 234 cmp %ecx, 60(%edi) 235 jne L(nequal) 236 test %ecx, %ecx 237 jz L(equal) 238 239 add $64, %esi 240 add $64, %edi 241 jmp L(continue_0_48) 242 243 .p2align 4 244L(continue_00): 245 and $15, %ch 246 jz L(continue_00_00) 247 cmp $16, %eax 248 jb L(continue_00_0) 249 cmp $32, %eax 250 jb L(continue_00_16) 251 cmp $48, %eax 252 jb L(continue_00_32) 253 254 .p2align 4 255L(continue_00_48): 256 pcmpeqd (%edi), %xmm0 257 mov (%edi), %eax 258 pmovmskb %xmm0, %ecx 259 test %ecx, %ecx 260 jnz L(less4_double_words1) 261 262 cmp (%esi), %eax 263 jne L(nequal) 264 265 mov 4(%edi), %eax 266 cmp 4(%esi), %eax 267 jne L(nequal) 268 269 mov 8(%edi), %eax 270 cmp 8(%esi), %eax 271 jne L(nequal) 272 273 mov 12(%edi), %eax 274 cmp 12(%esi), %eax 275 jne L(nequal) 276 277 movdqu 16(%esi), %xmm2 278 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 279 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ 280 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 281 pmovmskb %xmm2, %edx 282 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 283 jnz L(less4_double_words_16) 284 285 movdqu 32(%esi), %xmm2 286 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 287 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */ 288 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 289 pmovmskb %xmm2, %edx 290 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 291 jnz L(less4_double_words_32) 292 293 movdqu 48(%esi), %xmm2 294 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 295 pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */ 296 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 297 pmovmskb %xmm2, %edx 298 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 299 jnz L(less4_double_words_48) 300 301 add $64, %esi 302 add $64, %edi 303 jmp L(continue_00_48) 304 305 .p2align 4 306L(continue_32): 307 and $15, %ch 308 jz L(continue_32_00) 309 cmp $16, %eax 310 jb L(continue_0_32) 311 cmp $32, %eax 312 jb L(continue_16_32) 313 cmp $48, %eax 314 jb L(continue_32_32) 315 316 .p2align 4 317L(continue_32_48): 318 mov (%esi), %ecx 319 cmp %ecx, (%edi) 320 jne L(nequal) 321 test %ecx, %ecx 322 jz L(equal) 323 324 mov 4(%esi), %ecx 325 cmp %ecx, 4(%edi) 326 jne L(nequal) 327 test %ecx, %ecx 328 jz L(equal) 329 330 mov 8(%esi), %ecx 331 cmp %ecx, 8(%edi) 332 jne L(nequal) 333 test %ecx, %ecx 334 jz L(equal) 335 336 mov 12(%esi), %ecx 337 cmp %ecx, 12(%edi) 338 jne L(nequal) 339 test %ecx, %ecx 340 jz L(equal) 341 342 mov 16(%esi), %ecx 343 cmp %ecx, 16(%edi) 344 jne L(nequal) 345 test %ecx, %ecx 346 jz L(equal) 347 348 mov 20(%esi), %ecx 349 cmp %ecx, 20(%edi) 350 jne L(nequal) 351 test %ecx, %ecx 352 jz L(equal) 353 354 mov 24(%esi), %ecx 355 cmp %ecx, 24(%edi) 356 jne L(nequal) 357 test %ecx, %ecx 358 jz L(equal) 359 360 mov 28(%esi), %ecx 361 cmp %ecx, 28(%edi) 362 jne L(nequal) 363 test %ecx, %ecx 364 jz L(equal) 365 366 movdqu 32(%edi), %xmm1 367 movdqu 32(%esi), %xmm2 368 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 369 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 370 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 371 pmovmskb %xmm1, %edx 372 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 373 jnz L(less4_double_words_32) 374 375 movdqu 48(%edi), %xmm1 376 movdqu 48(%esi), %xmm2 377 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 378 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 379 psubb %xmm0, %xmm1 /* packed sub of comparison results */ 380 pmovmskb %xmm1, %edx 381 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 382 jnz L(less4_double_words_48) 383 384 add $64, %esi 385 add $64, %edi 386 jmp L(continue_32_48) 387 388 .p2align 4 389L(continue_16): 390 and $15, %ch 391 jz L(continue_16_00) 392 cmp $16, %eax 393 jb L(continue_0_16) 394 cmp $32, %eax 395 jb L(continue_16_16) 396 cmp $48, %eax 397 jb L(continue_16_32) 398 399 .p2align 4 400L(continue_16_48): 401 mov (%esi), %ecx 402 cmp %ecx, (%edi) 403 jne L(nequal) 404 test %ecx, %ecx 405 jz L(equal) 406 407 mov 4(%esi), %ecx 408 cmp %ecx, 4(%edi) 409 jne L(nequal) 410 test %ecx, %ecx 411 jz L(equal) 412 413 mov 8(%esi), %ecx 414 cmp %ecx, 8(%edi) 415 jne L(nequal) 416 test %ecx, %ecx 417 jz L(equal) 418 419 mov 12(%esi), %ecx 420 cmp %ecx, 12(%edi) 421 jne L(nequal) 422 test %ecx, %ecx 423 jz L(equal) 424 425 movdqu 16(%edi), %xmm1 426 movdqu 16(%esi), %xmm2 427 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 428 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 429 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 430 pmovmskb %xmm1, %edx 431 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 432 jnz L(less4_double_words_16) 433 434 mov 32(%esi), %ecx 435 cmp %ecx, 32(%edi) 436 jne L(nequal) 437 test %ecx, %ecx 438 jz L(equal) 439 440 mov 36(%esi), %ecx 441 cmp %ecx, 36(%edi) 442 jne L(nequal) 443 test %ecx, %ecx 444 jz L(equal) 445 446 mov 40(%esi), %ecx 447 cmp %ecx, 40(%edi) 448 jne L(nequal) 449 test %ecx, %ecx 450 jz L(equal) 451 452 mov 44(%esi), %ecx 453 cmp %ecx, 44(%edi) 454 jne L(nequal) 455 test %ecx, %ecx 456 jz L(equal) 457 458 movdqu 48(%edi), %xmm1 459 movdqu 48(%esi), %xmm2 460 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 461 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 462 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 463 pmovmskb %xmm1, %edx 464 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 465 jnz L(less4_double_words_48) 466 467 add $64, %esi 468 add $64, %edi 469 jmp L(continue_16_48) 470 471 .p2align 4 472L(continue_00_00): 473 movdqa (%edi), %xmm1 474 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 475 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ 476 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 477 pmovmskb %xmm1, %edx 478 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 479 jnz L(less4_double_words) 480 481 movdqa 16(%edi), %xmm3 482 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ 483 pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */ 484 psubb %xmm0, %xmm3 /* packed sub of comparison results*/ 485 pmovmskb %xmm3, %edx 486 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 487 jnz L(less4_double_words_16) 488 489 movdqa 32(%edi), %xmm5 490 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */ 491 pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */ 492 psubb %xmm0, %xmm5 /* packed sub of comparison results*/ 493 pmovmskb %xmm5, %edx 494 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 495 jnz L(less4_double_words_32) 496 497 movdqa 48(%edi), %xmm1 498 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 499 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */ 500 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 501 pmovmskb %xmm1, %edx 502 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 503 jnz L(less4_double_words_48) 504 505 add $64, %esi 506 add $64, %edi 507 jmp L(continue_00_00) 508 509 .p2align 4 510L(continue_00_32): 511 movdqu (%esi), %xmm2 512 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 513 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ 514 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 515 pmovmskb %xmm2, %edx 516 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 517 jnz L(less4_double_words) 518 519 add $16, %esi 520 add $16, %edi 521 jmp L(continue_00_48) 522 523 .p2align 4 524L(continue_00_16): 525 movdqu (%esi), %xmm2 526 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 527 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ 528 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 529 pmovmskb %xmm2, %edx 530 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 531 jnz L(less4_double_words) 532 533 movdqu 16(%esi), %xmm2 534 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 535 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ 536 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 537 pmovmskb %xmm2, %edx 538 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 539 jnz L(less4_double_words_16) 540 541 add $32, %esi 542 add $32, %edi 543 jmp L(continue_00_48) 544 545 .p2align 4 546L(continue_00_0): 547 movdqu (%esi), %xmm2 548 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 549 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ 550 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 551 pmovmskb %xmm2, %edx 552 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 553 jnz L(less4_double_words) 554 555 movdqu 16(%esi), %xmm2 556 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 557 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ 558 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 559 pmovmskb %xmm2, %edx 560 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 561 jnz L(less4_double_words_16) 562 563 movdqu 32(%esi), %xmm2 564 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 565 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */ 566 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 567 pmovmskb %xmm2, %edx 568 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 569 jnz L(less4_double_words_32) 570 571 add $48, %esi 572 add $48, %edi 573 jmp L(continue_00_48) 574 575 .p2align 4 576L(continue_48_00): 577 pcmpeqd (%esi), %xmm0 578 mov (%edi), %eax 579 pmovmskb %xmm0, %ecx 580 test %ecx, %ecx 581 jnz L(less4_double_words1) 582 583 cmp (%esi), %eax 584 jne L(nequal) 585 586 mov 4(%edi), %eax 587 cmp 4(%esi), %eax 588 jne L(nequal) 589 590 mov 8(%edi), %eax 591 cmp 8(%esi), %eax 592 jne L(nequal) 593 594 mov 12(%edi), %eax 595 cmp 12(%esi), %eax 596 jne L(nequal) 597 598 movdqu 16(%edi), %xmm1 599 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 600 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ 601 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 602 pmovmskb %xmm1, %edx 603 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 604 jnz L(less4_double_words_16) 605 606 movdqu 32(%edi), %xmm1 607 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 608 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */ 609 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 610 pmovmskb %xmm1, %edx 611 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 612 jnz L(less4_double_words_32) 613 614 movdqu 48(%edi), %xmm1 615 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 616 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */ 617 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 618 pmovmskb %xmm1, %edx 619 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 620 jnz L(less4_double_words_48) 621 622 add $64, %esi 623 add $64, %edi 624 jmp L(continue_48_00) 625 626 .p2align 4 627L(continue_32_00): 628 movdqu (%edi), %xmm1 629 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 630 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ 631 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 632 pmovmskb %xmm1, %edx 633 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 634 jnz L(less4_double_words) 635 636 add $16, %esi 637 add $16, %edi 638 jmp L(continue_48_00) 639 640 .p2align 4 641L(continue_16_00): 642 movdqu (%edi), %xmm1 643 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 644 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ 645 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 646 pmovmskb %xmm1, %edx 647 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 648 jnz L(less4_double_words) 649 650 movdqu 16(%edi), %xmm1 651 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 652 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ 653 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 654 pmovmskb %xmm1, %edx 655 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 656 jnz L(less4_double_words_16) 657 658 add $32, %esi 659 add $32, %edi 660 jmp L(continue_48_00) 661 662 .p2align 4 663L(continue_0_00): 664 movdqu (%edi), %xmm1 665 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 666 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ 667 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 668 pmovmskb %xmm1, %edx 669 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 670 jnz L(less4_double_words) 671 672 movdqu 16(%edi), %xmm1 673 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 674 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ 675 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 676 pmovmskb %xmm1, %edx 677 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 678 jnz L(less4_double_words_16) 679 680 movdqu 32(%edi), %xmm1 681 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 682 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */ 683 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 684 pmovmskb %xmm1, %edx 685 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 686 jnz L(less4_double_words_32) 687 688 add $48, %esi 689 add $48, %edi 690 jmp L(continue_48_00) 691 692 .p2align 4 693L(continue_32_32): 694 movdqu (%edi), %xmm1 695 movdqu (%esi), %xmm2 696 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 697 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 698 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 699 pmovmskb %xmm1, %edx 700 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 701 jnz L(less4_double_words) 702 703 add $16, %esi 704 add $16, %edi 705 jmp L(continue_48_48) 706 707 .p2align 4 708L(continue_16_16): 709 movdqu (%edi), %xmm1 710 movdqu (%esi), %xmm2 711 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 712 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 713 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 714 pmovmskb %xmm1, %edx 715 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 716 jnz L(less4_double_words) 717 718 movdqu 16(%edi), %xmm3 719 movdqu 16(%esi), %xmm4 720 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ 721 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ 722 psubb %xmm0, %xmm3 /* packed sub of comparison results*/ 723 pmovmskb %xmm3, %edx 724 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 725 jnz L(less4_double_words_16) 726 727 add $32, %esi 728 add $32, %edi 729 jmp L(continue_48_48) 730 731 .p2align 4 732L(continue_0_0): 733 movdqu (%edi), %xmm1 734 movdqu (%esi), %xmm2 735 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 736 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 737 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 738 pmovmskb %xmm1, %edx 739 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 740 jnz L(less4_double_words) 741 742 movdqu 16(%edi), %xmm3 743 movdqu 16(%esi), %xmm4 744 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ 745 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ 746 psubb %xmm0, %xmm3 /* packed sub of comparison results*/ 747 pmovmskb %xmm3, %edx 748 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 749 jnz L(less4_double_words_16) 750 751 movdqu 32(%edi), %xmm1 752 movdqu 32(%esi), %xmm2 753 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 754 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 755 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 756 pmovmskb %xmm1, %edx 757 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 758 jnz L(less4_double_words_32) 759 760 add $48, %esi 761 add $48, %edi 762 jmp L(continue_48_48) 763 764 .p2align 4 765L(continue_0_16): 766 movdqu (%edi), %xmm1 767 movdqu (%esi), %xmm2 768 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 769 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 770 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 771 pmovmskb %xmm1, %edx 772 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 773 jnz L(less4_double_words) 774 775 movdqu 16(%edi), %xmm1 776 movdqu 16(%esi), %xmm2 777 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 778 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 779 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 780 pmovmskb %xmm1, %edx 781 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 782 jnz L(less4_double_words_16) 783 784 add $32, %esi 785 add $32, %edi 786 jmp L(continue_32_48) 787 788 .p2align 4 789L(continue_0_32): 790 movdqu (%edi), %xmm1 791 movdqu (%esi), %xmm2 792 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 793 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 794 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 795 pmovmskb %xmm1, %edx 796 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 797 jnz L(less4_double_words) 798 799 add $16, %esi 800 add $16, %edi 801 jmp L(continue_16_48) 802 803 .p2align 4 804L(continue_16_32): 805 movdqu (%edi), %xmm1 806 movdqu (%esi), %xmm2 807 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 808 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 809 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 810 pmovmskb %xmm1, %edx 811 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 812 jnz L(less4_double_words) 813 814 add $16, %esi 815 add $16, %edi 816 jmp L(continue_32_48) 817 818 .p2align 4 819L(less4_double_words1): 820 cmp (%esi), %eax 821 jne L(nequal) 822 test %eax, %eax 823 jz L(equal) 824 825 mov 4(%esi), %ecx 826 cmp %ecx, 4(%edi) 827 jne L(nequal) 828 test %ecx, %ecx 829 jz L(equal) 830 831 mov 8(%esi), %ecx 832 cmp %ecx, 8(%edi) 833 jne L(nequal) 834 test %ecx, %ecx 835 jz L(equal) 836 837 mov 12(%esi), %ecx 838 cmp %ecx, 12(%edi) 839 jne L(nequal) 840 xor %eax, %eax 841 RETURN 842 843 .p2align 4 844L(less4_double_words): 845 xor %eax, %eax 846 test %dl, %dl 847 jz L(next_two_double_words) 848 and $15, %dl 849 jz L(second_double_word) 850 mov (%esi), %ecx 851 cmp %ecx, (%edi) 852 jne L(nequal) 853 RETURN 854 855 .p2align 4 856L(second_double_word): 857 mov 4(%esi), %ecx 858 cmp %ecx, 4(%edi) 859 jne L(nequal) 860 RETURN 861 862 .p2align 4 863L(next_two_double_words): 864 and $15, %dh 865 jz L(fourth_double_word) 866 mov 8(%esi), %ecx 867 cmp %ecx, 8(%edi) 868 jne L(nequal) 869 RETURN 870 871 .p2align 4 872L(fourth_double_word): 873 mov 12(%esi), %ecx 874 cmp %ecx, 12(%edi) 875 jne L(nequal) 876 RETURN 877 878 .p2align 4 879L(less4_double_words_16): 880 xor %eax, %eax 881 test %dl, %dl 882 jz L(next_two_double_words_16) 883 and $15, %dl 884 jz L(second_double_word_16) 885 mov 16(%esi), %ecx 886 cmp %ecx, 16(%edi) 887 jne L(nequal) 888 RETURN 889 890 .p2align 4 891L(second_double_word_16): 892 mov 20(%esi), %ecx 893 cmp %ecx, 20(%edi) 894 jne L(nequal) 895 RETURN 896 897 .p2align 4 898L(next_two_double_words_16): 899 and $15, %dh 900 jz L(fourth_double_word_16) 901 mov 24(%esi), %ecx 902 cmp %ecx, 24(%edi) 903 jne L(nequal) 904 RETURN 905 906 .p2align 4 907L(fourth_double_word_16): 908 mov 28(%esi), %ecx 909 cmp %ecx, 28(%edi) 910 jne L(nequal) 911 RETURN 912 913 .p2align 4 914L(less4_double_words_32): 915 xor %eax, %eax 916 test %dl, %dl 917 jz L(next_two_double_words_32) 918 and $15, %dl 919 jz L(second_double_word_32) 920 mov 32(%esi), %ecx 921 cmp %ecx, 32(%edi) 922 jne L(nequal) 923 RETURN 924 925 .p2align 4 926L(second_double_word_32): 927 mov 36(%esi), %ecx 928 cmp %ecx, 36(%edi) 929 jne L(nequal) 930 RETURN 931 932 .p2align 4 933L(next_two_double_words_32): 934 and $15, %dh 935 jz L(fourth_double_word_32) 936 mov 40(%esi), %ecx 937 cmp %ecx, 40(%edi) 938 jne L(nequal) 939 RETURN 940 941 .p2align 4 942L(fourth_double_word_32): 943 mov 44(%esi), %ecx 944 cmp %ecx, 44(%edi) 945 jne L(nequal) 946 RETURN 947 948 .p2align 4 949L(less4_double_words_48): 950 xor %eax, %eax 951 test %dl, %dl 952 jz L(next_two_double_words_48) 953 and $15, %dl 954 jz L(second_double_word_48) 955 mov 48(%esi), %ecx 956 cmp %ecx, 48(%edi) 957 jne L(nequal) 958 RETURN 959 960 .p2align 4 961L(second_double_word_48): 962 mov 52(%esi), %ecx 963 cmp %ecx, 52(%edi) 964 jne L(nequal) 965 RETURN 966 967 .p2align 4 968L(next_two_double_words_48): 969 and $15, %dh 970 jz L(fourth_double_word_48) 971 mov 56(%esi), %ecx 972 cmp %ecx, 56(%edi) 973 jne L(nequal) 974 RETURN 975 976 .p2align 4 977L(fourth_double_word_48): 978 mov 60(%esi), %ecx 979 cmp %ecx, 60(%edi) 980 jne L(nequal) 981 RETURN 982 983 .p2align 4 984L(nequal): 985 mov $1, %eax 986 jg L(return) 987 neg %eax 988 RETURN 989 990 .p2align 4 991L(return): 992 RETURN 993 994 .p2align 4 995L(equal): 996 xorl %eax, %eax 997 RETURN 998 999 CFI_POP (%edi) 1000 CFI_POP (%esi) 1001 1002 .p2align 4 1003L(neq): 1004 mov $1, %eax 1005 jg L(neq_bigger) 1006 neg %eax 1007 1008L(neq_bigger): 1009 ret 1010 1011 .p2align 4 1012L(eq): 1013 xorl %eax, %eax 1014 ret 1015 1016END (__wcscmp_sse2) 1017#endif 1018