1/* wcscmp optimized with SSE2. 2 Copyright (C) 2018-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <isa-level.h> 20 21/* ISA level >= 2 because there is no wcscmp-sse4 implementations. */ 22#if ISA_SHOULD_BUILD (2) 23# include <sysdep.h> 24 25/* Needed to get right name. */ 26# define USE_AS_WCSCMP 27# define STRCMP_ISA _sse2 28# include "strcmp-naming.h" 29 30/* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */ 31 32 .text 33ENTRY (STRCMP) 34/* 35 * This implementation uses SSE to compare up to 16 bytes at a time. 36*/ 37 mov %esi, %eax 38 mov %edi, %edx 39 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ 40 mov %al, %ch 41 mov %dl, %cl 42 and $63, %eax /* rsi alignment in cache line */ 43 and $63, %edx /* rdi alignment in cache line */ 44 and $15, %cl 45 jz L(continue_00) 46 cmp $16, %edx 47 jb L(continue_0) 48 cmp $32, %edx 49 jb L(continue_16) 50 cmp $48, %edx 51 jb L(continue_32) 52 53L(continue_48): 54 and $15, %ch 55 jz L(continue_48_00) 56 cmp $16, %eax 57 jb L(continue_0_48) 58 cmp $32, %eax 59 jb L(continue_16_48) 60 cmp $48, %eax 61 jb L(continue_32_48) 62 63 .p2align 4 64L(continue_48_48): 65 mov (%rsi), %ecx 66 cmp %ecx, (%rdi) 67 jne L(nequal) 68 test %ecx, %ecx 69 jz L(equal) 70 71 mov 4(%rsi), %ecx 72 cmp %ecx, 4(%rdi) 73 jne L(nequal) 74 test %ecx, %ecx 75 jz L(equal) 76 77 mov 8(%rsi), %ecx 78 cmp %ecx, 8(%rdi) 79 jne L(nequal) 80 test %ecx, %ecx 81 jz L(equal) 82 83 mov 12(%rsi), %ecx 84 cmp %ecx, 12(%rdi) 85 jne L(nequal) 86 test %ecx, %ecx 87 jz L(equal) 88 89 movdqu 16(%rdi), %xmm1 90 movdqu 16(%rsi), %xmm2 91 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 92 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 93 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 94 pmovmskb %xmm1, %edx 95 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 96 jnz L(less4_double_words_16) 97 98 movdqu 32(%rdi), %xmm1 99 movdqu 32(%rsi), %xmm2 100 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 101 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 102 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 103 pmovmskb %xmm1, %edx 104 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 105 jnz L(less4_double_words_32) 106 107 movdqu 48(%rdi), %xmm1 108 movdqu 48(%rsi), %xmm2 109 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 110 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 111 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 112 pmovmskb %xmm1, %edx 113 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 114 jnz L(less4_double_words_48) 115 116 add $64, %rsi 117 add $64, %rdi 118 jmp L(continue_48_48) 119 120L(continue_0): 121 and $15, %ch 122 jz L(continue_0_00) 123 cmp $16, %eax 124 jb L(continue_0_0) 125 cmp $32, %eax 126 jb L(continue_0_16) 127 cmp $48, %eax 128 jb L(continue_0_32) 129 130 .p2align 4 131L(continue_0_48): 132 mov (%rsi), %ecx 133 cmp %ecx, (%rdi) 134 jne L(nequal) 135 test %ecx, %ecx 136 jz L(equal) 137 138 mov 4(%rsi), %ecx 139 cmp %ecx, 4(%rdi) 140 jne L(nequal) 141 test %ecx, %ecx 142 jz L(equal) 143 144 mov 8(%rsi), %ecx 145 cmp %ecx, 8(%rdi) 146 jne L(nequal) 147 test %ecx, %ecx 148 jz L(equal) 149 150 mov 12(%rsi), %ecx 151 cmp %ecx, 12(%rdi) 152 jne L(nequal) 153 test %ecx, %ecx 154 jz L(equal) 155 156 movdqu 16(%rdi), %xmm1 157 movdqu 16(%rsi), %xmm2 158 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 159 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 160 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 161 pmovmskb %xmm1, %edx 162 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 163 jnz L(less4_double_words_16) 164 165 movdqu 32(%rdi), %xmm1 166 movdqu 32(%rsi), %xmm2 167 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 168 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 169 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 170 pmovmskb %xmm1, %edx 171 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 172 jnz L(less4_double_words_32) 173 174 mov 48(%rsi), %ecx 175 cmp %ecx, 48(%rdi) 176 jne L(nequal) 177 test %ecx, %ecx 178 jz L(equal) 179 180 mov 52(%rsi), %ecx 181 cmp %ecx, 52(%rdi) 182 jne L(nequal) 183 test %ecx, %ecx 184 jz L(equal) 185 186 mov 56(%rsi), %ecx 187 cmp %ecx, 56(%rdi) 188 jne L(nequal) 189 test %ecx, %ecx 190 jz L(equal) 191 192 mov 60(%rsi), %ecx 193 cmp %ecx, 60(%rdi) 194 jne L(nequal) 195 test %ecx, %ecx 196 jz L(equal) 197 198 add $64, %rsi 199 add $64, %rdi 200 jmp L(continue_0_48) 201 202 .p2align 4 203L(continue_00): 204 and $15, %ch 205 jz L(continue_00_00) 206 cmp $16, %eax 207 jb L(continue_00_0) 208 cmp $32, %eax 209 jb L(continue_00_16) 210 cmp $48, %eax 211 jb L(continue_00_32) 212 213 .p2align 4 214L(continue_00_48): 215 pcmpeqd (%rdi), %xmm0 216 mov (%rdi), %eax 217 pmovmskb %xmm0, %ecx 218 test %ecx, %ecx 219 jnz L(less4_double_words1) 220 221 cmp (%rsi), %eax 222 jne L(nequal) 223 224 mov 4(%rdi), %eax 225 cmp 4(%rsi), %eax 226 jne L(nequal) 227 228 mov 8(%rdi), %eax 229 cmp 8(%rsi), %eax 230 jne L(nequal) 231 232 mov 12(%rdi), %eax 233 cmp 12(%rsi), %eax 234 jne L(nequal) 235 236 movdqu 16(%rsi), %xmm2 237 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 238 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ 239 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 240 pmovmskb %xmm2, %edx 241 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 242 jnz L(less4_double_words_16) 243 244 movdqu 32(%rsi), %xmm2 245 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 246 pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */ 247 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 248 pmovmskb %xmm2, %edx 249 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 250 jnz L(less4_double_words_32) 251 252 movdqu 48(%rsi), %xmm2 253 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 254 pcmpeqd 48(%rdi), %xmm2 /* compare first 4 double_words for equality */ 255 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 256 pmovmskb %xmm2, %edx 257 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 258 jnz L(less4_double_words_48) 259 260 add $64, %rsi 261 add $64, %rdi 262 jmp L(continue_00_48) 263 264 .p2align 4 265L(continue_32): 266 and $15, %ch 267 jz L(continue_32_00) 268 cmp $16, %eax 269 jb L(continue_0_32) 270 cmp $32, %eax 271 jb L(continue_16_32) 272 cmp $48, %eax 273 jb L(continue_32_32) 274 275 .p2align 4 276L(continue_32_48): 277 mov (%rsi), %ecx 278 cmp %ecx, (%rdi) 279 jne L(nequal) 280 test %ecx, %ecx 281 jz L(equal) 282 283 mov 4(%rsi), %ecx 284 cmp %ecx, 4(%rdi) 285 jne L(nequal) 286 test %ecx, %ecx 287 jz L(equal) 288 289 mov 8(%rsi), %ecx 290 cmp %ecx, 8(%rdi) 291 jne L(nequal) 292 test %ecx, %ecx 293 jz L(equal) 294 295 mov 12(%rsi), %ecx 296 cmp %ecx, 12(%rdi) 297 jne L(nequal) 298 test %ecx, %ecx 299 jz L(equal) 300 301 mov 16(%rsi), %ecx 302 cmp %ecx, 16(%rdi) 303 jne L(nequal) 304 test %ecx, %ecx 305 jz L(equal) 306 307 mov 20(%rsi), %ecx 308 cmp %ecx, 20(%rdi) 309 jne L(nequal) 310 test %ecx, %ecx 311 jz L(equal) 312 313 mov 24(%rsi), %ecx 314 cmp %ecx, 24(%rdi) 315 jne L(nequal) 316 test %ecx, %ecx 317 jz L(equal) 318 319 mov 28(%rsi), %ecx 320 cmp %ecx, 28(%rdi) 321 jne L(nequal) 322 test %ecx, %ecx 323 jz L(equal) 324 325 movdqu 32(%rdi), %xmm1 326 movdqu 32(%rsi), %xmm2 327 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 328 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 329 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 330 pmovmskb %xmm1, %edx 331 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 332 jnz L(less4_double_words_32) 333 334 movdqu 48(%rdi), %xmm1 335 movdqu 48(%rsi), %xmm2 336 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 337 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 338 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 339 pmovmskb %xmm1, %edx 340 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 341 jnz L(less4_double_words_48) 342 343 add $64, %rsi 344 add $64, %rdi 345 jmp L(continue_32_48) 346 347 .p2align 4 348L(continue_16): 349 and $15, %ch 350 jz L(continue_16_00) 351 cmp $16, %eax 352 jb L(continue_0_16) 353 cmp $32, %eax 354 jb L(continue_16_16) 355 cmp $48, %eax 356 jb L(continue_16_32) 357 358 .p2align 4 359L(continue_16_48): 360 mov (%rsi), %ecx 361 cmp %ecx, (%rdi) 362 jne L(nequal) 363 test %ecx, %ecx 364 jz L(equal) 365 366 mov 4(%rsi), %ecx 367 cmp %ecx, 4(%rdi) 368 jne L(nequal) 369 test %ecx, %ecx 370 jz L(equal) 371 372 mov 8(%rsi), %ecx 373 cmp %ecx, 8(%rdi) 374 jne L(nequal) 375 test %ecx, %ecx 376 jz L(equal) 377 378 mov 12(%rsi), %ecx 379 cmp %ecx, 12(%rdi) 380 jne L(nequal) 381 test %ecx, %ecx 382 jz L(equal) 383 384 movdqu 16(%rdi), %xmm1 385 movdqu 16(%rsi), %xmm2 386 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 387 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 388 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 389 pmovmskb %xmm1, %edx 390 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 391 jnz L(less4_double_words_16) 392 393 mov 32(%rsi), %ecx 394 cmp %ecx, 32(%rdi) 395 jne L(nequal) 396 test %ecx, %ecx 397 jz L(equal) 398 399 mov 36(%rsi), %ecx 400 cmp %ecx, 36(%rdi) 401 jne L(nequal) 402 test %ecx, %ecx 403 jz L(equal) 404 405 mov 40(%rsi), %ecx 406 cmp %ecx, 40(%rdi) 407 jne L(nequal) 408 test %ecx, %ecx 409 jz L(equal) 410 411 mov 44(%rsi), %ecx 412 cmp %ecx, 44(%rdi) 413 jne L(nequal) 414 test %ecx, %ecx 415 jz L(equal) 416 417 movdqu 48(%rdi), %xmm1 418 movdqu 48(%rsi), %xmm2 419 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 420 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 421 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 422 pmovmskb %xmm1, %edx 423 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 424 jnz L(less4_double_words_48) 425 426 add $64, %rsi 427 add $64, %rdi 428 jmp L(continue_16_48) 429 430 .p2align 4 431L(continue_00_00): 432 movdqa (%rdi), %xmm1 433 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 434 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ 435 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 436 pmovmskb %xmm1, %edx 437 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 438 jnz L(less4_double_words) 439 440 movdqa 16(%rdi), %xmm3 441 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ 442 pcmpeqd 16(%rsi), %xmm3 /* compare first 4 double_words for equality */ 443 psubb %xmm0, %xmm3 /* packed sub of comparison results*/ 444 pmovmskb %xmm3, %edx 445 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 446 jnz L(less4_double_words_16) 447 448 movdqa 32(%rdi), %xmm5 449 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */ 450 pcmpeqd 32(%rsi), %xmm5 /* compare first 4 double_words for equality */ 451 psubb %xmm0, %xmm5 /* packed sub of comparison results*/ 452 pmovmskb %xmm5, %edx 453 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 454 jnz L(less4_double_words_32) 455 456 movdqa 48(%rdi), %xmm1 457 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 458 pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */ 459 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 460 pmovmskb %xmm1, %edx 461 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 462 jnz L(less4_double_words_48) 463 464 add $64, %rsi 465 add $64, %rdi 466 jmp L(continue_00_00) 467 468 .p2align 4 469L(continue_00_32): 470 movdqu (%rsi), %xmm2 471 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 472 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ 473 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 474 pmovmskb %xmm2, %edx 475 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 476 jnz L(less4_double_words) 477 478 add $16, %rsi 479 add $16, %rdi 480 jmp L(continue_00_48) 481 482 .p2align 4 483L(continue_00_16): 484 movdqu (%rsi), %xmm2 485 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 486 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ 487 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 488 pmovmskb %xmm2, %edx 489 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 490 jnz L(less4_double_words) 491 492 movdqu 16(%rsi), %xmm2 493 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 494 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ 495 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 496 pmovmskb %xmm2, %edx 497 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 498 jnz L(less4_double_words_16) 499 500 add $32, %rsi 501 add $32, %rdi 502 jmp L(continue_00_48) 503 504 .p2align 4 505L(continue_00_0): 506 movdqu (%rsi), %xmm2 507 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 508 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ 509 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 510 pmovmskb %xmm2, %edx 511 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 512 jnz L(less4_double_words) 513 514 movdqu 16(%rsi), %xmm2 515 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 516 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ 517 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 518 pmovmskb %xmm2, %edx 519 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 520 jnz L(less4_double_words_16) 521 522 movdqu 32(%rsi), %xmm2 523 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 524 pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */ 525 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 526 pmovmskb %xmm2, %edx 527 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 528 jnz L(less4_double_words_32) 529 530 add $48, %rsi 531 add $48, %rdi 532 jmp L(continue_00_48) 533 534 .p2align 4 535L(continue_48_00): 536 pcmpeqd (%rsi), %xmm0 537 mov (%rdi), %eax 538 pmovmskb %xmm0, %ecx 539 test %ecx, %ecx 540 jnz L(less4_double_words1) 541 542 cmp (%rsi), %eax 543 jne L(nequal) 544 545 mov 4(%rdi), %eax 546 cmp 4(%rsi), %eax 547 jne L(nequal) 548 549 mov 8(%rdi), %eax 550 cmp 8(%rsi), %eax 551 jne L(nequal) 552 553 mov 12(%rdi), %eax 554 cmp 12(%rsi), %eax 555 jne L(nequal) 556 557 movdqu 16(%rdi), %xmm1 558 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 559 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ 560 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 561 pmovmskb %xmm1, %edx 562 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 563 jnz L(less4_double_words_16) 564 565 movdqu 32(%rdi), %xmm1 566 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 567 pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */ 568 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 569 pmovmskb %xmm1, %edx 570 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 571 jnz L(less4_double_words_32) 572 573 movdqu 48(%rdi), %xmm1 574 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 575 pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */ 576 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 577 pmovmskb %xmm1, %edx 578 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 579 jnz L(less4_double_words_48) 580 581 add $64, %rsi 582 add $64, %rdi 583 jmp L(continue_48_00) 584 585 .p2align 4 586L(continue_32_00): 587 movdqu (%rdi), %xmm1 588 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 589 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ 590 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 591 pmovmskb %xmm1, %edx 592 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 593 jnz L(less4_double_words) 594 595 add $16, %rsi 596 add $16, %rdi 597 jmp L(continue_48_00) 598 599 .p2align 4 600L(continue_16_00): 601 movdqu (%rdi), %xmm1 602 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 603 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ 604 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 605 pmovmskb %xmm1, %edx 606 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 607 jnz L(less4_double_words) 608 609 movdqu 16(%rdi), %xmm1 610 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 611 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ 612 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 613 pmovmskb %xmm1, %edx 614 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 615 jnz L(less4_double_words_16) 616 617 add $32, %rsi 618 add $32, %rdi 619 jmp L(continue_48_00) 620 621 .p2align 4 622L(continue_0_00): 623 movdqu (%rdi), %xmm1 624 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 625 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ 626 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 627 pmovmskb %xmm1, %edx 628 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 629 jnz L(less4_double_words) 630 631 movdqu 16(%rdi), %xmm1 632 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 633 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ 634 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 635 pmovmskb %xmm1, %edx 636 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 637 jnz L(less4_double_words_16) 638 639 movdqu 32(%rdi), %xmm1 640 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 641 pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */ 642 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 643 pmovmskb %xmm1, %edx 644 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 645 jnz L(less4_double_words_32) 646 647 add $48, %rsi 648 add $48, %rdi 649 jmp L(continue_48_00) 650 651 .p2align 4 652L(continue_32_32): 653 movdqu (%rdi), %xmm1 654 movdqu (%rsi), %xmm2 655 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 656 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 657 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 658 pmovmskb %xmm1, %edx 659 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 660 jnz L(less4_double_words) 661 662 add $16, %rsi 663 add $16, %rdi 664 jmp L(continue_48_48) 665 666 .p2align 4 667L(continue_16_16): 668 movdqu (%rdi), %xmm1 669 movdqu (%rsi), %xmm2 670 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 671 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 672 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 673 pmovmskb %xmm1, %edx 674 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 675 jnz L(less4_double_words) 676 677 movdqu 16(%rdi), %xmm3 678 movdqu 16(%rsi), %xmm4 679 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ 680 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ 681 psubb %xmm0, %xmm3 /* packed sub of comparison results*/ 682 pmovmskb %xmm3, %edx 683 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 684 jnz L(less4_double_words_16) 685 686 add $32, %rsi 687 add $32, %rdi 688 jmp L(continue_48_48) 689 690 .p2align 4 691L(continue_0_0): 692 movdqu (%rdi), %xmm1 693 movdqu (%rsi), %xmm2 694 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 695 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 696 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 697 pmovmskb %xmm1, %edx 698 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 699 jnz L(less4_double_words) 700 701 movdqu 16(%rdi), %xmm3 702 movdqu 16(%rsi), %xmm4 703 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ 704 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ 705 psubb %xmm0, %xmm3 /* packed sub of comparison results*/ 706 pmovmskb %xmm3, %edx 707 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 708 jnz L(less4_double_words_16) 709 710 movdqu 32(%rdi), %xmm1 711 movdqu 32(%rsi), %xmm2 712 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 713 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 714 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 715 pmovmskb %xmm1, %edx 716 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 717 jnz L(less4_double_words_32) 718 719 add $48, %rsi 720 add $48, %rdi 721 jmp L(continue_48_48) 722 723 .p2align 4 724L(continue_0_16): 725 movdqu (%rdi), %xmm1 726 movdqu (%rsi), %xmm2 727 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 728 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 729 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 730 pmovmskb %xmm1, %edx 731 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 732 jnz L(less4_double_words) 733 734 movdqu 16(%rdi), %xmm1 735 movdqu 16(%rsi), %xmm2 736 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 737 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 738 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 739 pmovmskb %xmm1, %edx 740 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 741 jnz L(less4_double_words_16) 742 743 add $32, %rsi 744 add $32, %rdi 745 jmp L(continue_32_48) 746 747 .p2align 4 748L(continue_0_32): 749 movdqu (%rdi), %xmm1 750 movdqu (%rsi), %xmm2 751 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 752 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 753 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 754 pmovmskb %xmm1, %edx 755 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 756 jnz L(less4_double_words) 757 758 add $16, %rsi 759 add $16, %rdi 760 jmp L(continue_16_48) 761 762 .p2align 4 763L(continue_16_32): 764 movdqu (%rdi), %xmm1 765 movdqu (%rsi), %xmm2 766 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 767 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 768 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 769 pmovmskb %xmm1, %edx 770 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 771 jnz L(less4_double_words) 772 773 add $16, %rsi 774 add $16, %rdi 775 jmp L(continue_32_48) 776 777 .p2align 4 778L(less4_double_words1): 779 cmp (%rsi), %eax 780 jne L(nequal) 781 test %eax, %eax 782 jz L(equal) 783 784 mov 4(%rsi), %ecx 785 cmp %ecx, 4(%rdi) 786 jne L(nequal) 787 test %ecx, %ecx 788 jz L(equal) 789 790 mov 8(%rsi), %ecx 791 cmp %ecx, 8(%rdi) 792 jne L(nequal) 793 test %ecx, %ecx 794 jz L(equal) 795 796 mov 12(%rsi), %ecx 797 cmp %ecx, 12(%rdi) 798 jne L(nequal) 799 xor %eax, %eax 800 ret 801 802 .p2align 4 803L(less4_double_words): 804 xor %eax, %eax 805 test %dl, %dl 806 jz L(next_two_double_words) 807 and $15, %dl 808 jz L(second_double_word) 809 mov (%rdi), %eax 810 cmp (%rsi), %eax 811 jne L(nequal) 812 ret 813 814 .p2align 4 815L(second_double_word): 816 mov 4(%rdi), %eax 817 cmp 4(%rsi), %eax 818 jne L(nequal) 819 ret 820 821 .p2align 4 822L(next_two_double_words): 823 and $15, %dh 824 jz L(fourth_double_word) 825 mov 8(%rdi), %eax 826 cmp 8(%rsi), %eax 827 jne L(nequal) 828 ret 829 830 .p2align 4 831L(fourth_double_word): 832 mov 12(%rdi), %eax 833 cmp 12(%rsi), %eax 834 jne L(nequal) 835 ret 836 837 .p2align 4 838L(less4_double_words_16): 839 xor %eax, %eax 840 test %dl, %dl 841 jz L(next_two_double_words_16) 842 and $15, %dl 843 jz L(second_double_word_16) 844 mov 16(%rdi), %eax 845 cmp 16(%rsi), %eax 846 jne L(nequal) 847 ret 848 849 .p2align 4 850L(second_double_word_16): 851 mov 20(%rdi), %eax 852 cmp 20(%rsi), %eax 853 jne L(nequal) 854 ret 855 856 .p2align 4 857L(next_two_double_words_16): 858 and $15, %dh 859 jz L(fourth_double_word_16) 860 mov 24(%rdi), %eax 861 cmp 24(%rsi), %eax 862 jne L(nequal) 863 ret 864 865 .p2align 4 866L(fourth_double_word_16): 867 mov 28(%rdi), %eax 868 cmp 28(%rsi), %eax 869 jne L(nequal) 870 ret 871 872 .p2align 4 873L(less4_double_words_32): 874 xor %eax, %eax 875 test %dl, %dl 876 jz L(next_two_double_words_32) 877 and $15, %dl 878 jz L(second_double_word_32) 879 mov 32(%rdi), %eax 880 cmp 32(%rsi), %eax 881 jne L(nequal) 882 ret 883 884 .p2align 4 885L(second_double_word_32): 886 mov 36(%rdi), %eax 887 cmp 36(%rsi), %eax 888 jne L(nequal) 889 ret 890 891 .p2align 4 892L(next_two_double_words_32): 893 and $15, %dh 894 jz L(fourth_double_word_32) 895 mov 40(%rdi), %eax 896 cmp 40(%rsi), %eax 897 jne L(nequal) 898 ret 899 900 .p2align 4 901L(fourth_double_word_32): 902 mov 44(%rdi), %eax 903 cmp 44(%rsi), %eax 904 jne L(nequal) 905 ret 906 907 .p2align 4 908L(less4_double_words_48): 909 xor %eax, %eax 910 test %dl, %dl 911 jz L(next_two_double_words_48) 912 and $15, %dl 913 jz L(second_double_word_48) 914 mov 48(%rdi), %eax 915 cmp 48(%rsi), %eax 916 jne L(nequal) 917 ret 918 919 .p2align 4 920L(second_double_word_48): 921 mov 52(%rdi), %eax 922 cmp 52(%rsi), %eax 923 jne L(nequal) 924 ret 925 926 .p2align 4 927L(next_two_double_words_48): 928 and $15, %dh 929 jz L(fourth_double_word_48) 930 mov 56(%rdi), %eax 931 cmp 56(%rsi), %eax 932 jne L(nequal) 933 ret 934 935 .p2align 4 936L(fourth_double_word_48): 937 mov 60(%rdi), %eax 938 cmp 60(%rsi), %eax 939 jne L(nequal) 940 ret 941 942 .p2align 4 943L(nequal): 944 mov $1, %eax 945 jg L(nequal_bigger) 946 neg %eax 947 948L(nequal_bigger): 949 ret 950 951 .p2align 4 952L(equal): 953 xor %rax, %rax 954 ret 955 956END (STRCMP) 957#endif 958