1/* strcmp optimized with SSE4.2. 2 Copyright (C) 2017-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <isa-level.h> 20 21#if ISA_SHOULD_BUILD (2) 22 23# include <sysdep.h> 24 25# define STRCMP_ISA _sse42 26# include "strcmp-naming.h" 27 28# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L 29# include "locale-defines.h" 30# endif 31 32# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 33/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz 34 if the new counter > the old one or is 0. */ 35# define UPDATE_STRNCMP_COUNTER \ 36 /* calculate left number to compare */ \ 37 lea -16(%rcx, %r11), %r9; \ 38 cmp %r9, %r11; \ 39 jb LABEL(strcmp_exitz); \ 40 test %r9, %r9; \ 41 je LABEL(strcmp_exitz); \ 42 mov %r9, %r11 43# else 44# define UPDATE_STRNCMP_COUNTER 45# endif 46 47# define SECTION sse4.2 48 49# define LABEL(l) .L##l 50 51/* We use 0x1a: 52 _SIDD_SBYTE_OPS 53 | _SIDD_CMP_EQUAL_EACH 54 | _SIDD_NEGATIVE_POLARITY 55 | _SIDD_LEAST_SIGNIFICANT 56 on pcmpistri to find out if two 16byte data elements are the same 57 and the offset of the first different byte. There are 4 cases: 58 59 1. Both 16byte data elements are valid and identical. 60 2. Both 16byte data elements have EOS and identical. 61 3. Both 16byte data elements are valid and they differ at offset X. 62 4. At least one 16byte data element has EOS at offset X. Two 16byte 63 data elements must differ at or before offset X. 64 65 Here is the table of ECX, CFlag, ZFlag and SFlag for 4 cases: 66 67 case ECX CFlag ZFlag SFlag 68 1 16 0 0 0 69 2 16 0 1 1 70 3 X 1 0 0 71 4 0 <= X 1 0/1 0/1 72 73 We exit from the loop for cases 2, 3 and 4 with jbe which branches 74 when either CFlag or ZFlag is 1. If CFlag == 0, we return 0 for 75 case 2. */ 76 77 /* Put all SSE 4.2 functions together. */ 78 .section .text.SECTION,"ax",@progbits 79 .align 16 80 .type STRCMP, @function 81 .globl STRCMP 82# ifdef USE_AS_STRCASECMP_L 83ENTRY (STRCASECMP) 84 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax 85 mov %fs:(%rax),%RDX_LP 86 87 /* Either 1 or 5 bytes (dependeing if CET is enabled). */ 88 .p2align 4 89END (STRCASECMP) 90 /* FALLTHROUGH to strcasecmp_l. */ 91# endif 92# ifdef USE_AS_STRNCASECMP_L 93ENTRY (STRCASECMP) 94 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax 95 mov %fs:(%rax),%RCX_LP 96 97 /* Either 1 or 5 bytes (dependeing if CET is enabled). */ 98 .p2align 4 99END (STRCASECMP) 100 /* FALLTHROUGH to strncasecmp_l. */ 101# endif 102 103 104# define arg arg 105 106STRCMP: 107 cfi_startproc 108 _CET_ENDBR 109 CALL_MCOUNT 110 111/* 112 * This implementation uses SSE to compare up to 16 bytes at a time. 113 */ 114# ifdef USE_AS_STRCASECMP_L 115 /* We have to fall back on the C implementation for locales 116 with encodings not matching ASCII for single bytes. */ 117# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 118 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rdx), %RAX_LP 119# else 120 mov (%rdx), %RAX_LP 121# endif 122 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) 123 jne __strcasecmp_l_nonascii 124# endif 125# ifdef USE_AS_STRNCASECMP_L 126 /* We have to fall back on the C implementation for locales 127 with encodings not matching ASCII for single bytes. */ 128# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 129 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rcx), %RAX_LP 130# else 131 mov (%rcx), %RAX_LP 132# endif 133 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) 134 jne __strncasecmp_l_nonascii 135# endif 136 137# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 138 test %RDX_LP, %RDX_LP 139 je LABEL(strcmp_exitz) 140 cmp $1, %RDX_LP 141 je LABEL(Byte0) 142 mov %RDX_LP, %R11_LP 143# endif 144 mov %esi, %ecx 145 mov %edi, %eax 146/* Use 64bit AND here to avoid long NOP padding. */ 147 and $0x3f, %rcx /* rsi alignment in cache line */ 148 and $0x3f, %rax /* rdi alignment in cache line */ 149# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L 150 .section .rodata.cst16,"aM",@progbits,16 151 .align 16 152LABEL(lcase_min): 153 .quad 0x3f3f3f3f3f3f3f3f 154 .quad 0x3f3f3f3f3f3f3f3f 155LABEL(lcase_max): 156 .quad 0x9999999999999999 157 .quad 0x9999999999999999 158LABEL(case_add): 159 .quad 0x2020202020202020 160 .quad 0x2020202020202020 161 .previous 162 movdqa LABEL(lcase_min)(%rip), %xmm4 163# define LCASE_MIN_reg %xmm4 164 movdqa LABEL(lcase_max)(%rip), %xmm5 165# define LCASE_MAX_reg %xmm5 166 movdqa LABEL(case_add)(%rip), %xmm6 167# define CASE_ADD_reg %xmm6 168# endif 169 cmp $0x30, %ecx 170 ja LABEL(crosscache)/* rsi: 16-byte load will cross cache line */ 171 cmp $0x30, %eax 172 ja LABEL(crosscache)/* rdi: 16-byte load will cross cache line */ 173 movdqu (%rdi), %xmm1 174 movdqu (%rsi), %xmm2 175# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L 176# define TOLOWER(reg1, reg2) \ 177 movdqa LCASE_MIN_reg, %xmm7; \ 178 movdqa LCASE_MIN_reg, %xmm8; \ 179 paddb reg1, %xmm7; \ 180 paddb reg2, %xmm8; \ 181 pcmpgtb LCASE_MAX_reg, %xmm7; \ 182 pcmpgtb LCASE_MAX_reg, %xmm8; \ 183 pandn CASE_ADD_reg, %xmm7; \ 184 pandn CASE_ADD_reg, %xmm8; \ 185 paddb %xmm7, reg1; \ 186 paddb %xmm8, reg2 187 188 TOLOWER (%xmm1, %xmm2) 189# else 190# define TOLOWER(reg1, reg2) 191# endif 192 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ 193 pcmpeqb %xmm1, %xmm0 /* Any null chars? */ 194 pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */ 195 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 196 pmovmskb %xmm1, %edx 197 sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */ 198 jnz LABEL(less16bytes)/* If not, find different value or null char */ 199# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 200 sub $16, %r11 201 jbe LABEL(strcmp_exitz)/* finish comparison */ 202# endif 203 add $16, %rsi /* prepare to search next 16 bytes */ 204 add $16, %rdi /* prepare to search next 16 bytes */ 205 206 /* 207 * Determine source and destination string offsets from 16-byte 208 * alignment. Use relative offset difference between the two to 209 * determine which case below to use. 210 */ 211 .p2align 4 212LABEL(crosscache): 213 and $0xfffffffffffffff0, %rsi /* force %rsi is 16 byte aligned */ 214 and $0xfffffffffffffff0, %rdi /* force %rdi is 16 byte aligned */ 215 mov $0xffff, %edx /* for equivalent offset */ 216 xor %r8d, %r8d 217 and $0xf, %ecx /* offset of rsi */ 218 and $0xf, %eax /* offset of rdi */ 219 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */ 220 cmp %eax, %ecx 221 je LABEL(ashr_0) /* rsi and rdi relative offset same */ 222 ja LABEL(bigger) 223 mov %edx, %r8d /* r8d is offset flag for exit tail */ 224 xchg %ecx, %eax 225 xchg %rsi, %rdi 226LABEL(bigger): 227 movdqa (%rdi), %xmm2 228 movdqa (%rsi), %xmm1 229 lea 15(%rax), %r9 230 sub %rcx, %r9 231 lea LABEL(unaligned_table)(%rip), %r10 232 movslq (%r10, %r9,4), %r9 233 pcmpeqb %xmm1, %xmm0 /* Any null chars? */ 234 lea (%r10, %r9), %r10 235 _CET_NOTRACK jmp *%r10 /* jump to corresponding case */ 236 237/* 238 * The following cases will be handled by ashr_0 239 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 240 * n(0~15) n(0~15) 15(15+ n-n) ashr_0 241 */ 242 .p2align 4 243LABEL(ashr_0): 244 245 movdqa (%rsi), %xmm1 246 pcmpeqb %xmm1, %xmm0 /* Any null chars? */ 247# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 248 pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */ 249# else 250 movdqa (%rdi), %xmm2 251 TOLOWER (%xmm1, %xmm2) 252 pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */ 253# endif 254 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 255 pmovmskb %xmm1, %r9d 256 shr %cl, %edx /* adjust 0xffff for offset */ 257 shr %cl, %r9d /* adjust for 16-byte offset */ 258 sub %r9d, %edx 259 /* 260 * edx must be the same with r9d if in left byte (16-rcx) is equal to 261 * the start from (16-rax) and no null char was seen. 262 */ 263 jne LABEL(less32bytes) /* mismatch or null char */ 264 UPDATE_STRNCMP_COUNTER 265 mov $16, %rcx 266 mov $16, %r9 267 268 /* 269 * Now both strings are aligned at 16-byte boundary. Loop over strings 270 * checking 32-bytes per iteration. 271 */ 272 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 273 .p2align 4 274LABEL(ashr_0_use): 275 movdqa (%rdi,%rdx), %xmm0 276# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 277 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 278# else 279 movdqa (%rsi,%rdx), %xmm1 280 TOLOWER (%xmm0, %xmm1) 281 pcmpistri $0x1a, %xmm1, %xmm0 282# endif 283 lea 16(%rdx), %rdx 284 jbe LABEL(ashr_0_exit_use) 285# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 286 sub $16, %r11 287 jbe LABEL(strcmp_exitz) 288# endif 289 290 movdqa (%rdi,%rdx), %xmm0 291# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 292 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 293# else 294 movdqa (%rsi,%rdx), %xmm1 295 TOLOWER (%xmm0, %xmm1) 296 pcmpistri $0x1a, %xmm1, %xmm0 297# endif 298 lea 16(%rdx), %rdx 299 jbe LABEL(ashr_0_exit_use) 300# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 301 sub $16, %r11 302 jbe LABEL(strcmp_exitz) 303# endif 304 jmp LABEL(ashr_0_use) 305 306 307 .p2align 4 308LABEL(ashr_0_exit_use): 309 jnc LABEL(strcmp_exitz) 310# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 311 sub %rcx, %r11 312 jbe LABEL(strcmp_exitz) 313# endif 314 lea -16(%rdx, %rcx), %rcx 315 movzbl (%rdi, %rcx), %eax 316 movzbl (%rsi, %rcx), %edx 317# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L 318 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx 319 movl (%rcx,%rax,4), %eax 320 movl (%rcx,%rdx,4), %edx 321# endif 322 sub %edx, %eax 323 ret 324 325 326 327/* 328 * The following cases will be handled by ashr_1 329 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 330 * n(15) n -15 0(15 +(n-15) - n) ashr_1 331 */ 332 .p2align 4 333LABEL(ashr_1): 334 pslldq $15, %xmm2 /* shift first string to align with second */ 335 TOLOWER (%xmm1, %xmm2) 336 pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */ 337 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 338 pmovmskb %xmm2, %r9d 339 shr %cl, %edx /* adjust 0xffff for offset */ 340 shr %cl, %r9d /* adjust for 16-byte offset */ 341 sub %r9d, %edx 342 jnz LABEL(less32bytes) /* mismatch or null char seen */ 343 movdqa (%rdi), %xmm3 344 UPDATE_STRNCMP_COUNTER 345 346 mov $16, %rcx /* index for loads*/ 347 mov $1, %r9d /* byte position left over from less32bytes case */ 348 /* 349 * Setup %r10 value allows us to detect crossing a page boundary. 350 * When %r10 goes positive we have crossed a page boundary and 351 * need to do a nibble. 352 */ 353 lea 1(%rdi), %r10 354 and $0xfff, %r10 /* offset into 4K page */ 355 sub $0x1000, %r10 /* subtract 4K pagesize */ 356 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 357 358 .p2align 4 359LABEL(loop_ashr_1_use): 360 add $16, %r10 361 jg LABEL(nibble_ashr_1_use) 362 363LABEL(nibble_ashr_1_restart_use): 364 movdqa (%rdi, %rdx), %xmm0 365 palignr $1, -16(%rdi, %rdx), %xmm0 366# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 367 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 368# else 369 movdqa (%rsi,%rdx), %xmm1 370 TOLOWER (%xmm0, %xmm1) 371 pcmpistri $0x1a, %xmm1, %xmm0 372# endif 373 jbe LABEL(exit_use) 374# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 375 sub $16, %r11 376 jbe LABEL(strcmp_exitz) 377# endif 378 379 add $16, %rdx 380 add $16, %r10 381 jg LABEL(nibble_ashr_1_use) 382 383 movdqa (%rdi, %rdx), %xmm0 384 palignr $1, -16(%rdi, %rdx), %xmm0 385# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 386 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 387# else 388 movdqa (%rsi,%rdx), %xmm1 389 TOLOWER (%xmm0, %xmm1) 390 pcmpistri $0x1a, %xmm1, %xmm0 391# endif 392 jbe LABEL(exit_use) 393# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 394 sub $16, %r11 395 jbe LABEL(strcmp_exitz) 396# endif 397 add $16, %rdx 398 jmp LABEL(loop_ashr_1_use) 399 400 .p2align 4 401LABEL(nibble_ashr_1_use): 402 sub $0x1000, %r10 403 movdqa -16(%rdi, %rdx), %xmm0 404 psrldq $1, %xmm0 405 pcmpistri $0x3a,%xmm0, %xmm0 406# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 407 cmp %r11, %rcx 408 jae LABEL(nibble_ashr_exit_use) 409# endif 410 cmp $14, %ecx 411 ja LABEL(nibble_ashr_1_restart_use) 412 413 jmp LABEL(nibble_ashr_exit_use) 414 415/* 416 * The following cases will be handled by ashr_2 417 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 418 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2 419 */ 420 .p2align 4 421LABEL(ashr_2): 422 pslldq $14, %xmm2 423 TOLOWER (%xmm1, %xmm2) 424 pcmpeqb %xmm1, %xmm2 425 psubb %xmm0, %xmm2 426 pmovmskb %xmm2, %r9d 427 shr %cl, %edx 428 shr %cl, %r9d 429 sub %r9d, %edx 430 jnz LABEL(less32bytes) 431 movdqa (%rdi), %xmm3 432 UPDATE_STRNCMP_COUNTER 433 434 mov $16, %rcx /* index for loads */ 435 mov $2, %r9d /* byte position left over from less32bytes case */ 436 /* 437 * Setup %r10 value allows us to detect crossing a page boundary. 438 * When %r10 goes positive we have crossed a page boundary and 439 * need to do a nibble. 440 */ 441 lea 2(%rdi), %r10 442 and $0xfff, %r10 /* offset into 4K page */ 443 sub $0x1000, %r10 /* subtract 4K pagesize */ 444 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 445 446 .p2align 4 447LABEL(loop_ashr_2_use): 448 add $16, %r10 449 jg LABEL(nibble_ashr_2_use) 450 451LABEL(nibble_ashr_2_restart_use): 452 movdqa (%rdi, %rdx), %xmm0 453 palignr $2, -16(%rdi, %rdx), %xmm0 454# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 455 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 456# else 457 movdqa (%rsi,%rdx), %xmm1 458 TOLOWER (%xmm0, %xmm1) 459 pcmpistri $0x1a, %xmm1, %xmm0 460# endif 461 jbe LABEL(exit_use) 462# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 463 sub $16, %r11 464 jbe LABEL(strcmp_exitz) 465# endif 466 467 add $16, %rdx 468 add $16, %r10 469 jg LABEL(nibble_ashr_2_use) 470 471 movdqa (%rdi, %rdx), %xmm0 472 palignr $2, -16(%rdi, %rdx), %xmm0 473# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 474 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 475# else 476 movdqa (%rsi,%rdx), %xmm1 477 TOLOWER (%xmm0, %xmm1) 478 pcmpistri $0x1a, %xmm1, %xmm0 479# endif 480 jbe LABEL(exit_use) 481# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 482 sub $16, %r11 483 jbe LABEL(strcmp_exitz) 484# endif 485 add $16, %rdx 486 jmp LABEL(loop_ashr_2_use) 487 488 .p2align 4 489LABEL(nibble_ashr_2_use): 490 sub $0x1000, %r10 491 movdqa -16(%rdi, %rdx), %xmm0 492 psrldq $2, %xmm0 493 pcmpistri $0x3a,%xmm0, %xmm0 494# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 495 cmp %r11, %rcx 496 jae LABEL(nibble_ashr_exit_use) 497# endif 498 cmp $13, %ecx 499 ja LABEL(nibble_ashr_2_restart_use) 500 501 jmp LABEL(nibble_ashr_exit_use) 502 503/* 504 * The following cases will be handled by ashr_3 505 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 506 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3 507 */ 508 .p2align 4 509LABEL(ashr_3): 510 pslldq $13, %xmm2 511 TOLOWER (%xmm1, %xmm2) 512 pcmpeqb %xmm1, %xmm2 513 psubb %xmm0, %xmm2 514 pmovmskb %xmm2, %r9d 515 shr %cl, %edx 516 shr %cl, %r9d 517 sub %r9d, %edx 518 jnz LABEL(less32bytes) 519 movdqa (%rdi), %xmm3 520 521 UPDATE_STRNCMP_COUNTER 522 523 mov $16, %rcx /* index for loads */ 524 mov $3, %r9d /* byte position left over from less32bytes case */ 525 /* 526 * Setup %r10 value allows us to detect crossing a page boundary. 527 * When %r10 goes positive we have crossed a page boundary and 528 * need to do a nibble. 529 */ 530 lea 3(%rdi), %r10 531 and $0xfff, %r10 /* offset into 4K page */ 532 sub $0x1000, %r10 /* subtract 4K pagesize */ 533 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 534 535LABEL(loop_ashr_3_use): 536 add $16, %r10 537 jg LABEL(nibble_ashr_3_use) 538 539LABEL(nibble_ashr_3_restart_use): 540 movdqa (%rdi, %rdx), %xmm0 541 palignr $3, -16(%rdi, %rdx), %xmm0 542# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 543 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 544# else 545 movdqa (%rsi,%rdx), %xmm1 546 TOLOWER (%xmm0, %xmm1) 547 pcmpistri $0x1a, %xmm1, %xmm0 548# endif 549 jbe LABEL(exit_use) 550# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 551 sub $16, %r11 552 jbe LABEL(strcmp_exitz) 553# endif 554 555 add $16, %rdx 556 add $16, %r10 557 jg LABEL(nibble_ashr_3_use) 558 559 movdqa (%rdi, %rdx), %xmm0 560 palignr $3, -16(%rdi, %rdx), %xmm0 561# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 562 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 563# else 564 movdqa (%rsi,%rdx), %xmm1 565 TOLOWER (%xmm0, %xmm1) 566 pcmpistri $0x1a, %xmm1, %xmm0 567# endif 568 jbe LABEL(exit_use) 569# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 570 sub $16, %r11 571 jbe LABEL(strcmp_exitz) 572# endif 573 add $16, %rdx 574 jmp LABEL(loop_ashr_3_use) 575 576 .p2align 4 577LABEL(nibble_ashr_3_use): 578 sub $0x1000, %r10 579 movdqa -16(%rdi, %rdx), %xmm0 580 psrldq $3, %xmm0 581 pcmpistri $0x3a,%xmm0, %xmm0 582# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 583 cmp %r11, %rcx 584 jae LABEL(nibble_ashr_exit_use) 585# endif 586 cmp $12, %ecx 587 ja LABEL(nibble_ashr_3_restart_use) 588 589 jmp LABEL(nibble_ashr_exit_use) 590 591/* 592 * The following cases will be handled by ashr_4 593 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 594 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4 595 */ 596 .p2align 4 597LABEL(ashr_4): 598 pslldq $12, %xmm2 599 TOLOWER (%xmm1, %xmm2) 600 pcmpeqb %xmm1, %xmm2 601 psubb %xmm0, %xmm2 602 pmovmskb %xmm2, %r9d 603 shr %cl, %edx 604 shr %cl, %r9d 605 sub %r9d, %edx 606 jnz LABEL(less32bytes) 607 movdqa (%rdi), %xmm3 608 609 UPDATE_STRNCMP_COUNTER 610 611 mov $16, %rcx /* index for loads */ 612 mov $4, %r9d /* byte position left over from less32bytes case */ 613 /* 614 * Setup %r10 value allows us to detect crossing a page boundary. 615 * When %r10 goes positive we have crossed a page boundary and 616 * need to do a nibble. 617 */ 618 lea 4(%rdi), %r10 619 and $0xfff, %r10 /* offset into 4K page */ 620 sub $0x1000, %r10 /* subtract 4K pagesize */ 621 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 622 623 .p2align 4 624LABEL(loop_ashr_4_use): 625 add $16, %r10 626 jg LABEL(nibble_ashr_4_use) 627 628LABEL(nibble_ashr_4_restart_use): 629 movdqa (%rdi, %rdx), %xmm0 630 palignr $4, -16(%rdi, %rdx), %xmm0 631# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 632 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 633# else 634 movdqa (%rsi,%rdx), %xmm1 635 TOLOWER (%xmm0, %xmm1) 636 pcmpistri $0x1a, %xmm1, %xmm0 637# endif 638 jbe LABEL(exit_use) 639# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 640 sub $16, %r11 641 jbe LABEL(strcmp_exitz) 642# endif 643 644 add $16, %rdx 645 add $16, %r10 646 jg LABEL(nibble_ashr_4_use) 647 648 movdqa (%rdi, %rdx), %xmm0 649 palignr $4, -16(%rdi, %rdx), %xmm0 650# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 651 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 652# else 653 movdqa (%rsi,%rdx), %xmm1 654 TOLOWER (%xmm0, %xmm1) 655 pcmpistri $0x1a, %xmm1, %xmm0 656# endif 657 jbe LABEL(exit_use) 658# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 659 sub $16, %r11 660 jbe LABEL(strcmp_exitz) 661# endif 662 add $16, %rdx 663 jmp LABEL(loop_ashr_4_use) 664 665 .p2align 4 666LABEL(nibble_ashr_4_use): 667 sub $0x1000, %r10 668 movdqa -16(%rdi, %rdx), %xmm0 669 psrldq $4, %xmm0 670 pcmpistri $0x3a,%xmm0, %xmm0 671# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 672 cmp %r11, %rcx 673 jae LABEL(nibble_ashr_exit_use) 674# endif 675 cmp $11, %ecx 676 ja LABEL(nibble_ashr_4_restart_use) 677 678 jmp LABEL(nibble_ashr_exit_use) 679 680/* 681 * The following cases will be handled by ashr_5 682 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 683 * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5 684 */ 685 .p2align 4 686LABEL(ashr_5): 687 pslldq $11, %xmm2 688 TOLOWER (%xmm1, %xmm2) 689 pcmpeqb %xmm1, %xmm2 690 psubb %xmm0, %xmm2 691 pmovmskb %xmm2, %r9d 692 shr %cl, %edx 693 shr %cl, %r9d 694 sub %r9d, %edx 695 jnz LABEL(less32bytes) 696 movdqa (%rdi), %xmm3 697 698 UPDATE_STRNCMP_COUNTER 699 700 mov $16, %rcx /* index for loads */ 701 mov $5, %r9d /* byte position left over from less32bytes case */ 702 /* 703 * Setup %r10 value allows us to detect crossing a page boundary. 704 * When %r10 goes positive we have crossed a page boundary and 705 * need to do a nibble. 706 */ 707 lea 5(%rdi), %r10 708 and $0xfff, %r10 /* offset into 4K page */ 709 sub $0x1000, %r10 /* subtract 4K pagesize */ 710 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 711 712 .p2align 4 713LABEL(loop_ashr_5_use): 714 add $16, %r10 715 jg LABEL(nibble_ashr_5_use) 716 717LABEL(nibble_ashr_5_restart_use): 718 movdqa (%rdi, %rdx), %xmm0 719 palignr $5, -16(%rdi, %rdx), %xmm0 720# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 721 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 722# else 723 movdqa (%rsi,%rdx), %xmm1 724 TOLOWER (%xmm0, %xmm1) 725 pcmpistri $0x1a, %xmm1, %xmm0 726# endif 727 jbe LABEL(exit_use) 728# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 729 sub $16, %r11 730 jbe LABEL(strcmp_exitz) 731# endif 732 733 add $16, %rdx 734 add $16, %r10 735 jg LABEL(nibble_ashr_5_use) 736 737 movdqa (%rdi, %rdx), %xmm0 738 739 palignr $5, -16(%rdi, %rdx), %xmm0 740# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 741 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 742# else 743 movdqa (%rsi,%rdx), %xmm1 744 TOLOWER (%xmm0, %xmm1) 745 pcmpistri $0x1a, %xmm1, %xmm0 746# endif 747 jbe LABEL(exit_use) 748# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 749 sub $16, %r11 750 jbe LABEL(strcmp_exitz) 751# endif 752 add $16, %rdx 753 jmp LABEL(loop_ashr_5_use) 754 755 .p2align 4 756LABEL(nibble_ashr_5_use): 757 sub $0x1000, %r10 758 movdqa -16(%rdi, %rdx), %xmm0 759 psrldq $5, %xmm0 760 pcmpistri $0x3a,%xmm0, %xmm0 761# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 762 cmp %r11, %rcx 763 jae LABEL(nibble_ashr_exit_use) 764# endif 765 cmp $10, %ecx 766 ja LABEL(nibble_ashr_5_restart_use) 767 768 jmp LABEL(nibble_ashr_exit_use) 769 770/* 771 * The following cases will be handled by ashr_6 772 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 773 * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6 774 */ 775 .p2align 4 776LABEL(ashr_6): 777 pslldq $10, %xmm2 778 TOLOWER (%xmm1, %xmm2) 779 pcmpeqb %xmm1, %xmm2 780 psubb %xmm0, %xmm2 781 pmovmskb %xmm2, %r9d 782 shr %cl, %edx 783 shr %cl, %r9d 784 sub %r9d, %edx 785 jnz LABEL(less32bytes) 786 movdqa (%rdi), %xmm3 787 788 UPDATE_STRNCMP_COUNTER 789 790 mov $16, %rcx /* index for loads */ 791 mov $6, %r9d /* byte position left over from less32bytes case */ 792 /* 793 * Setup %r10 value allows us to detect crossing a page boundary. 794 * When %r10 goes positive we have crossed a page boundary and 795 * need to do a nibble. 796 */ 797 lea 6(%rdi), %r10 798 and $0xfff, %r10 /* offset into 4K page */ 799 sub $0x1000, %r10 /* subtract 4K pagesize */ 800 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 801 802 .p2align 4 803LABEL(loop_ashr_6_use): 804 add $16, %r10 805 jg LABEL(nibble_ashr_6_use) 806 807LABEL(nibble_ashr_6_restart_use): 808 movdqa (%rdi, %rdx), %xmm0 809 palignr $6, -16(%rdi, %rdx), %xmm0 810# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 811 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 812# else 813 movdqa (%rsi,%rdx), %xmm1 814 TOLOWER (%xmm0, %xmm1) 815 pcmpistri $0x1a, %xmm1, %xmm0 816# endif 817 jbe LABEL(exit_use) 818# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 819 sub $16, %r11 820 jbe LABEL(strcmp_exitz) 821# endif 822 823 add $16, %rdx 824 add $16, %r10 825 jg LABEL(nibble_ashr_6_use) 826 827 movdqa (%rdi, %rdx), %xmm0 828 palignr $6, -16(%rdi, %rdx), %xmm0 829# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 830 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 831# else 832 movdqa (%rsi,%rdx), %xmm1 833 TOLOWER (%xmm0, %xmm1) 834 pcmpistri $0x1a, %xmm1, %xmm0 835# endif 836 jbe LABEL(exit_use) 837# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 838 sub $16, %r11 839 jbe LABEL(strcmp_exitz) 840# endif 841 add $16, %rdx 842 jmp LABEL(loop_ashr_6_use) 843 844 .p2align 4 845LABEL(nibble_ashr_6_use): 846 sub $0x1000, %r10 847 movdqa -16(%rdi, %rdx), %xmm0 848 psrldq $6, %xmm0 849 pcmpistri $0x3a,%xmm0, %xmm0 850# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 851 cmp %r11, %rcx 852 jae LABEL(nibble_ashr_exit_use) 853# endif 854 cmp $9, %ecx 855 ja LABEL(nibble_ashr_6_restart_use) 856 857 jmp LABEL(nibble_ashr_exit_use) 858 859/* 860 * The following cases will be handled by ashr_7 861 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 862 * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7 863 */ 864 .p2align 4 865LABEL(ashr_7): 866 pslldq $9, %xmm2 867 TOLOWER (%xmm1, %xmm2) 868 pcmpeqb %xmm1, %xmm2 869 psubb %xmm0, %xmm2 870 pmovmskb %xmm2, %r9d 871 shr %cl, %edx 872 shr %cl, %r9d 873 sub %r9d, %edx 874 jnz LABEL(less32bytes) 875 movdqa (%rdi), %xmm3 876 877 UPDATE_STRNCMP_COUNTER 878 879 mov $16, %rcx /* index for loads */ 880 mov $7, %r9d /* byte position left over from less32bytes case */ 881 /* 882 * Setup %r10 value allows us to detect crossing a page boundary. 883 * When %r10 goes positive we have crossed a page boundary and 884 * need to do a nibble. 885 */ 886 lea 7(%rdi), %r10 887 and $0xfff, %r10 /* offset into 4K page */ 888 sub $0x1000, %r10 /* subtract 4K pagesize */ 889 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 890 891 .p2align 4 892LABEL(loop_ashr_7_use): 893 add $16, %r10 894 jg LABEL(nibble_ashr_7_use) 895 896LABEL(nibble_ashr_7_restart_use): 897 movdqa (%rdi, %rdx), %xmm0 898 palignr $7, -16(%rdi, %rdx), %xmm0 899# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 900 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 901# else 902 movdqa (%rsi,%rdx), %xmm1 903 TOLOWER (%xmm0, %xmm1) 904 pcmpistri $0x1a, %xmm1, %xmm0 905# endif 906 jbe LABEL(exit_use) 907# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 908 sub $16, %r11 909 jbe LABEL(strcmp_exitz) 910# endif 911 912 add $16, %rdx 913 add $16, %r10 914 jg LABEL(nibble_ashr_7_use) 915 916 movdqa (%rdi, %rdx), %xmm0 917 palignr $7, -16(%rdi, %rdx), %xmm0 918# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 919 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 920# else 921 movdqa (%rsi,%rdx), %xmm1 922 TOLOWER (%xmm0, %xmm1) 923 pcmpistri $0x1a, %xmm1, %xmm0 924# endif 925 jbe LABEL(exit_use) 926# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 927 sub $16, %r11 928 jbe LABEL(strcmp_exitz) 929# endif 930 add $16, %rdx 931 jmp LABEL(loop_ashr_7_use) 932 933 .p2align 4 934LABEL(nibble_ashr_7_use): 935 sub $0x1000, %r10 936 movdqa -16(%rdi, %rdx), %xmm0 937 psrldq $7, %xmm0 938 pcmpistri $0x3a,%xmm0, %xmm0 939# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 940 cmp %r11, %rcx 941 jae LABEL(nibble_ashr_exit_use) 942# endif 943 cmp $8, %ecx 944 ja LABEL(nibble_ashr_7_restart_use) 945 946 jmp LABEL(nibble_ashr_exit_use) 947 948/* 949 * The following cases will be handled by ashr_8 950 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 951 * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8 952 */ 953 .p2align 4 954LABEL(ashr_8): 955 pslldq $8, %xmm2 956 TOLOWER (%xmm1, %xmm2) 957 pcmpeqb %xmm1, %xmm2 958 psubb %xmm0, %xmm2 959 pmovmskb %xmm2, %r9d 960 shr %cl, %edx 961 shr %cl, %r9d 962 sub %r9d, %edx 963 jnz LABEL(less32bytes) 964 movdqa (%rdi), %xmm3 965 966 UPDATE_STRNCMP_COUNTER 967 968 mov $16, %rcx /* index for loads */ 969 mov $8, %r9d /* byte position left over from less32bytes case */ 970 /* 971 * Setup %r10 value allows us to detect crossing a page boundary. 972 * When %r10 goes positive we have crossed a page boundary and 973 * need to do a nibble. 974 */ 975 lea 8(%rdi), %r10 976 and $0xfff, %r10 /* offset into 4K page */ 977 sub $0x1000, %r10 /* subtract 4K pagesize */ 978 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 979 980 .p2align 4 981LABEL(loop_ashr_8_use): 982 add $16, %r10 983 jg LABEL(nibble_ashr_8_use) 984 985LABEL(nibble_ashr_8_restart_use): 986 movdqa (%rdi, %rdx), %xmm0 987 palignr $8, -16(%rdi, %rdx), %xmm0 988# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 989 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 990# else 991 movdqa (%rsi,%rdx), %xmm1 992 TOLOWER (%xmm0, %xmm1) 993 pcmpistri $0x1a, %xmm1, %xmm0 994# endif 995 jbe LABEL(exit_use) 996# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 997 sub $16, %r11 998 jbe LABEL(strcmp_exitz) 999# endif 1000 1001 add $16, %rdx 1002 add $16, %r10 1003 jg LABEL(nibble_ashr_8_use) 1004 1005 movdqa (%rdi, %rdx), %xmm0 1006 palignr $8, -16(%rdi, %rdx), %xmm0 1007# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1008 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1009# else 1010 movdqa (%rsi,%rdx), %xmm1 1011 TOLOWER (%xmm0, %xmm1) 1012 pcmpistri $0x1a, %xmm1, %xmm0 1013# endif 1014 jbe LABEL(exit_use) 1015# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1016 sub $16, %r11 1017 jbe LABEL(strcmp_exitz) 1018# endif 1019 add $16, %rdx 1020 jmp LABEL(loop_ashr_8_use) 1021 1022 .p2align 4 1023LABEL(nibble_ashr_8_use): 1024 sub $0x1000, %r10 1025 movdqa -16(%rdi, %rdx), %xmm0 1026 psrldq $8, %xmm0 1027 pcmpistri $0x3a,%xmm0, %xmm0 1028# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1029 cmp %r11, %rcx 1030 jae LABEL(nibble_ashr_exit_use) 1031# endif 1032 cmp $7, %ecx 1033 ja LABEL(nibble_ashr_8_restart_use) 1034 1035 jmp LABEL(nibble_ashr_exit_use) 1036 1037/* 1038 * The following cases will be handled by ashr_9 1039 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 1040 * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9 1041 */ 1042 .p2align 4 1043LABEL(ashr_9): 1044 pslldq $7, %xmm2 1045 TOLOWER (%xmm1, %xmm2) 1046 pcmpeqb %xmm1, %xmm2 1047 psubb %xmm0, %xmm2 1048 pmovmskb %xmm2, %r9d 1049 shr %cl, %edx 1050 shr %cl, %r9d 1051 sub %r9d, %edx 1052 jnz LABEL(less32bytes) 1053 movdqa (%rdi), %xmm3 1054 1055 UPDATE_STRNCMP_COUNTER 1056 1057 mov $16, %rcx /* index for loads */ 1058 mov $9, %r9d /* byte position left over from less32bytes case */ 1059 /* 1060 * Setup %r10 value allows us to detect crossing a page boundary. 1061 * When %r10 goes positive we have crossed a page boundary and 1062 * need to do a nibble. 1063 */ 1064 lea 9(%rdi), %r10 1065 and $0xfff, %r10 /* offset into 4K page */ 1066 sub $0x1000, %r10 /* subtract 4K pagesize */ 1067 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 1068 1069 .p2align 4 1070LABEL(loop_ashr_9_use): 1071 add $16, %r10 1072 jg LABEL(nibble_ashr_9_use) 1073 1074LABEL(nibble_ashr_9_restart_use): 1075 movdqa (%rdi, %rdx), %xmm0 1076 1077 palignr $9, -16(%rdi, %rdx), %xmm0 1078# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1079 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1080# else 1081 movdqa (%rsi,%rdx), %xmm1 1082 TOLOWER (%xmm0, %xmm1) 1083 pcmpistri $0x1a, %xmm1, %xmm0 1084# endif 1085 jbe LABEL(exit_use) 1086# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1087 sub $16, %r11 1088 jbe LABEL(strcmp_exitz) 1089# endif 1090 1091 add $16, %rdx 1092 add $16, %r10 1093 jg LABEL(nibble_ashr_9_use) 1094 1095 movdqa (%rdi, %rdx), %xmm0 1096 palignr $9, -16(%rdi, %rdx), %xmm0 1097# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1098 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1099# else 1100 movdqa (%rsi,%rdx), %xmm1 1101 TOLOWER (%xmm0, %xmm1) 1102 pcmpistri $0x1a, %xmm1, %xmm0 1103# endif 1104 jbe LABEL(exit_use) 1105# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1106 sub $16, %r11 1107 jbe LABEL(strcmp_exitz) 1108# endif 1109 add $16, %rdx 1110 jmp LABEL(loop_ashr_9_use) 1111 1112 .p2align 4 1113LABEL(nibble_ashr_9_use): 1114 sub $0x1000, %r10 1115 movdqa -16(%rdi, %rdx), %xmm0 1116 psrldq $9, %xmm0 1117 pcmpistri $0x3a,%xmm0, %xmm0 1118# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1119 cmp %r11, %rcx 1120 jae LABEL(nibble_ashr_exit_use) 1121# endif 1122 cmp $6, %ecx 1123 ja LABEL(nibble_ashr_9_restart_use) 1124 1125 jmp LABEL(nibble_ashr_exit_use) 1126 1127/* 1128 * The following cases will be handled by ashr_10 1129 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 1130 * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10 1131 */ 1132 .p2align 4 1133LABEL(ashr_10): 1134 pslldq $6, %xmm2 1135 TOLOWER (%xmm1, %xmm2) 1136 pcmpeqb %xmm1, %xmm2 1137 psubb %xmm0, %xmm2 1138 pmovmskb %xmm2, %r9d 1139 shr %cl, %edx 1140 shr %cl, %r9d 1141 sub %r9d, %edx 1142 jnz LABEL(less32bytes) 1143 movdqa (%rdi), %xmm3 1144 1145 UPDATE_STRNCMP_COUNTER 1146 1147 mov $16, %rcx /* index for loads */ 1148 mov $10, %r9d /* byte position left over from less32bytes case */ 1149 /* 1150 * Setup %r10 value allows us to detect crossing a page boundary. 1151 * When %r10 goes positive we have crossed a page boundary and 1152 * need to do a nibble. 1153 */ 1154 lea 10(%rdi), %r10 1155 and $0xfff, %r10 /* offset into 4K page */ 1156 sub $0x1000, %r10 /* subtract 4K pagesize */ 1157 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 1158 1159 .p2align 4 1160LABEL(loop_ashr_10_use): 1161 add $16, %r10 1162 jg LABEL(nibble_ashr_10_use) 1163 1164LABEL(nibble_ashr_10_restart_use): 1165 movdqa (%rdi, %rdx), %xmm0 1166 palignr $10, -16(%rdi, %rdx), %xmm0 1167# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1168 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1169# else 1170 movdqa (%rsi,%rdx), %xmm1 1171 TOLOWER (%xmm0, %xmm1) 1172 pcmpistri $0x1a, %xmm1, %xmm0 1173# endif 1174 jbe LABEL(exit_use) 1175# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1176 sub $16, %r11 1177 jbe LABEL(strcmp_exitz) 1178# endif 1179 1180 add $16, %rdx 1181 add $16, %r10 1182 jg LABEL(nibble_ashr_10_use) 1183 1184 movdqa (%rdi, %rdx), %xmm0 1185 palignr $10, -16(%rdi, %rdx), %xmm0 1186# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1187 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1188# else 1189 movdqa (%rsi,%rdx), %xmm1 1190 TOLOWER (%xmm0, %xmm1) 1191 pcmpistri $0x1a, %xmm1, %xmm0 1192# endif 1193 jbe LABEL(exit_use) 1194# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1195 sub $16, %r11 1196 jbe LABEL(strcmp_exitz) 1197# endif 1198 add $16, %rdx 1199 jmp LABEL(loop_ashr_10_use) 1200 1201 .p2align 4 1202LABEL(nibble_ashr_10_use): 1203 sub $0x1000, %r10 1204 movdqa -16(%rdi, %rdx), %xmm0 1205 psrldq $10, %xmm0 1206 pcmpistri $0x3a,%xmm0, %xmm0 1207# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1208 cmp %r11, %rcx 1209 jae LABEL(nibble_ashr_exit_use) 1210# endif 1211 cmp $5, %ecx 1212 ja LABEL(nibble_ashr_10_restart_use) 1213 1214 jmp LABEL(nibble_ashr_exit_use) 1215 1216/* 1217 * The following cases will be handled by ashr_11 1218 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 1219 * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11 1220 */ 1221 .p2align 4 1222LABEL(ashr_11): 1223 pslldq $5, %xmm2 1224 TOLOWER (%xmm1, %xmm2) 1225 pcmpeqb %xmm1, %xmm2 1226 psubb %xmm0, %xmm2 1227 pmovmskb %xmm2, %r9d 1228 shr %cl, %edx 1229 shr %cl, %r9d 1230 sub %r9d, %edx 1231 jnz LABEL(less32bytes) 1232 movdqa (%rdi), %xmm3 1233 1234 UPDATE_STRNCMP_COUNTER 1235 1236 mov $16, %rcx /* index for loads */ 1237 mov $11, %r9d /* byte position left over from less32bytes case */ 1238 /* 1239 * Setup %r10 value allows us to detect crossing a page boundary. 1240 * When %r10 goes positive we have crossed a page boundary and 1241 * need to do a nibble. 1242 */ 1243 lea 11(%rdi), %r10 1244 and $0xfff, %r10 /* offset into 4K page */ 1245 sub $0x1000, %r10 /* subtract 4K pagesize */ 1246 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 1247 1248 .p2align 4 1249LABEL(loop_ashr_11_use): 1250 add $16, %r10 1251 jg LABEL(nibble_ashr_11_use) 1252 1253LABEL(nibble_ashr_11_restart_use): 1254 movdqa (%rdi, %rdx), %xmm0 1255 palignr $11, -16(%rdi, %rdx), %xmm0 1256# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1257 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1258# else 1259 movdqa (%rsi,%rdx), %xmm1 1260 TOLOWER (%xmm0, %xmm1) 1261 pcmpistri $0x1a, %xmm1, %xmm0 1262# endif 1263 jbe LABEL(exit_use) 1264# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1265 sub $16, %r11 1266 jbe LABEL(strcmp_exitz) 1267# endif 1268 1269 add $16, %rdx 1270 add $16, %r10 1271 jg LABEL(nibble_ashr_11_use) 1272 1273 movdqa (%rdi, %rdx), %xmm0 1274 palignr $11, -16(%rdi, %rdx), %xmm0 1275# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1276 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1277# else 1278 movdqa (%rsi,%rdx), %xmm1 1279 TOLOWER (%xmm0, %xmm1) 1280 pcmpistri $0x1a, %xmm1, %xmm0 1281# endif 1282 jbe LABEL(exit_use) 1283# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1284 sub $16, %r11 1285 jbe LABEL(strcmp_exitz) 1286# endif 1287 add $16, %rdx 1288 jmp LABEL(loop_ashr_11_use) 1289 1290 .p2align 4 1291LABEL(nibble_ashr_11_use): 1292 sub $0x1000, %r10 1293 movdqa -16(%rdi, %rdx), %xmm0 1294 psrldq $11, %xmm0 1295 pcmpistri $0x3a,%xmm0, %xmm0 1296# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1297 cmp %r11, %rcx 1298 jae LABEL(nibble_ashr_exit_use) 1299# endif 1300 cmp $4, %ecx 1301 ja LABEL(nibble_ashr_11_restart_use) 1302 1303 jmp LABEL(nibble_ashr_exit_use) 1304 1305/* 1306 * The following cases will be handled by ashr_12 1307 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 1308 * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12 1309 */ 1310 .p2align 4 1311LABEL(ashr_12): 1312 pslldq $4, %xmm2 1313 TOLOWER (%xmm1, %xmm2) 1314 pcmpeqb %xmm1, %xmm2 1315 psubb %xmm0, %xmm2 1316 pmovmskb %xmm2, %r9d 1317 shr %cl, %edx 1318 shr %cl, %r9d 1319 sub %r9d, %edx 1320 jnz LABEL(less32bytes) 1321 movdqa (%rdi), %xmm3 1322 1323 UPDATE_STRNCMP_COUNTER 1324 1325 mov $16, %rcx /* index for loads */ 1326 mov $12, %r9d /* byte position left over from less32bytes case */ 1327 /* 1328 * Setup %r10 value allows us to detect crossing a page boundary. 1329 * When %r10 goes positive we have crossed a page boundary and 1330 * need to do a nibble. 1331 */ 1332 lea 12(%rdi), %r10 1333 and $0xfff, %r10 /* offset into 4K page */ 1334 sub $0x1000, %r10 /* subtract 4K pagesize */ 1335 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 1336 1337 .p2align 4 1338LABEL(loop_ashr_12_use): 1339 add $16, %r10 1340 jg LABEL(nibble_ashr_12_use) 1341 1342LABEL(nibble_ashr_12_restart_use): 1343 movdqa (%rdi, %rdx), %xmm0 1344 palignr $12, -16(%rdi, %rdx), %xmm0 1345# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1346 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1347# else 1348 movdqa (%rsi,%rdx), %xmm1 1349 TOLOWER (%xmm0, %xmm1) 1350 pcmpistri $0x1a, %xmm1, %xmm0 1351# endif 1352 jbe LABEL(exit_use) 1353# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1354 sub $16, %r11 1355 jbe LABEL(strcmp_exitz) 1356# endif 1357 1358 add $16, %rdx 1359 add $16, %r10 1360 jg LABEL(nibble_ashr_12_use) 1361 1362 movdqa (%rdi, %rdx), %xmm0 1363 palignr $12, -16(%rdi, %rdx), %xmm0 1364# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1365 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1366# else 1367 movdqa (%rsi,%rdx), %xmm1 1368 TOLOWER (%xmm0, %xmm1) 1369 pcmpistri $0x1a, %xmm1, %xmm0 1370# endif 1371 jbe LABEL(exit_use) 1372# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1373 sub $16, %r11 1374 jbe LABEL(strcmp_exitz) 1375# endif 1376 add $16, %rdx 1377 jmp LABEL(loop_ashr_12_use) 1378 1379 .p2align 4 1380LABEL(nibble_ashr_12_use): 1381 sub $0x1000, %r10 1382 movdqa -16(%rdi, %rdx), %xmm0 1383 psrldq $12, %xmm0 1384 pcmpistri $0x3a,%xmm0, %xmm0 1385# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1386 cmp %r11, %rcx 1387 jae LABEL(nibble_ashr_exit_use) 1388# endif 1389 cmp $3, %ecx 1390 ja LABEL(nibble_ashr_12_restart_use) 1391 1392 jmp LABEL(nibble_ashr_exit_use) 1393 1394/* 1395 * The following cases will be handled by ashr_13 1396 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 1397 * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13 1398 */ 1399 .p2align 4 1400LABEL(ashr_13): 1401 pslldq $3, %xmm2 1402 TOLOWER (%xmm1, %xmm2) 1403 pcmpeqb %xmm1, %xmm2 1404 psubb %xmm0, %xmm2 1405 pmovmskb %xmm2, %r9d 1406 shr %cl, %edx 1407 shr %cl, %r9d 1408 sub %r9d, %edx 1409 jnz LABEL(less32bytes) 1410 movdqa (%rdi), %xmm3 1411 1412 UPDATE_STRNCMP_COUNTER 1413 1414 mov $16, %rcx /* index for loads */ 1415 mov $13, %r9d /* byte position left over from less32bytes case */ 1416 /* 1417 * Setup %r10 value allows us to detect crossing a page boundary. 1418 * When %r10 goes positive we have crossed a page boundary and 1419 * need to do a nibble. 1420 */ 1421 lea 13(%rdi), %r10 1422 and $0xfff, %r10 /* offset into 4K page */ 1423 sub $0x1000, %r10 /* subtract 4K pagesize */ 1424 1425 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 1426 1427 .p2align 4 1428LABEL(loop_ashr_13_use): 1429 add $16, %r10 1430 jg LABEL(nibble_ashr_13_use) 1431 1432LABEL(nibble_ashr_13_restart_use): 1433 movdqa (%rdi, %rdx), %xmm0 1434 palignr $13, -16(%rdi, %rdx), %xmm0 1435# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1436 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1437# else 1438 movdqa (%rsi,%rdx), %xmm1 1439 TOLOWER (%xmm0, %xmm1) 1440 pcmpistri $0x1a, %xmm1, %xmm0 1441# endif 1442 jbe LABEL(exit_use) 1443# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1444 sub $16, %r11 1445 jbe LABEL(strcmp_exitz) 1446# endif 1447 1448 add $16, %rdx 1449 add $16, %r10 1450 jg LABEL(nibble_ashr_13_use) 1451 1452 movdqa (%rdi, %rdx), %xmm0 1453 palignr $13, -16(%rdi, %rdx), %xmm0 1454# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1455 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1456# else 1457 movdqa (%rsi,%rdx), %xmm1 1458 TOLOWER (%xmm0, %xmm1) 1459 pcmpistri $0x1a, %xmm1, %xmm0 1460# endif 1461 jbe LABEL(exit_use) 1462# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1463 sub $16, %r11 1464 jbe LABEL(strcmp_exitz) 1465# endif 1466 add $16, %rdx 1467 jmp LABEL(loop_ashr_13_use) 1468 1469 .p2align 4 1470LABEL(nibble_ashr_13_use): 1471 sub $0x1000, %r10 1472 movdqa -16(%rdi, %rdx), %xmm0 1473 psrldq $13, %xmm0 1474 pcmpistri $0x3a,%xmm0, %xmm0 1475# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1476 cmp %r11, %rcx 1477 jae LABEL(nibble_ashr_exit_use) 1478# endif 1479 cmp $2, %ecx 1480 ja LABEL(nibble_ashr_13_restart_use) 1481 1482 jmp LABEL(nibble_ashr_exit_use) 1483 1484/* 1485 * The following cases will be handled by ashr_14 1486 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 1487 * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14 1488 */ 1489 .p2align 4 1490LABEL(ashr_14): 1491 pslldq $2, %xmm2 1492 TOLOWER (%xmm1, %xmm2) 1493 pcmpeqb %xmm1, %xmm2 1494 psubb %xmm0, %xmm2 1495 pmovmskb %xmm2, %r9d 1496 shr %cl, %edx 1497 shr %cl, %r9d 1498 sub %r9d, %edx 1499 jnz LABEL(less32bytes) 1500 movdqa (%rdi), %xmm3 1501 1502 UPDATE_STRNCMP_COUNTER 1503 1504 mov $16, %rcx /* index for loads */ 1505 mov $14, %r9d /* byte position left over from less32bytes case */ 1506 /* 1507 * Setup %r10 value allows us to detect crossing a page boundary. 1508 * When %r10 goes positive we have crossed a page boundary and 1509 * need to do a nibble. 1510 */ 1511 lea 14(%rdi), %r10 1512 and $0xfff, %r10 /* offset into 4K page */ 1513 sub $0x1000, %r10 /* subtract 4K pagesize */ 1514 1515 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 1516 1517 .p2align 4 1518LABEL(loop_ashr_14_use): 1519 add $16, %r10 1520 jg LABEL(nibble_ashr_14_use) 1521 1522LABEL(nibble_ashr_14_restart_use): 1523 movdqa (%rdi, %rdx), %xmm0 1524 palignr $14, -16(%rdi, %rdx), %xmm0 1525# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1526 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1527# else 1528 movdqa (%rsi,%rdx), %xmm1 1529 TOLOWER (%xmm0, %xmm1) 1530 pcmpistri $0x1a, %xmm1, %xmm0 1531# endif 1532 jbe LABEL(exit_use) 1533# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1534 sub $16, %r11 1535 jbe LABEL(strcmp_exitz) 1536# endif 1537 1538 add $16, %rdx 1539 add $16, %r10 1540 jg LABEL(nibble_ashr_14_use) 1541 1542 movdqa (%rdi, %rdx), %xmm0 1543 palignr $14, -16(%rdi, %rdx), %xmm0 1544# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1545 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1546# else 1547 movdqa (%rsi,%rdx), %xmm1 1548 TOLOWER (%xmm0, %xmm1) 1549 pcmpistri $0x1a, %xmm1, %xmm0 1550# endif 1551 jbe LABEL(exit_use) 1552# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1553 sub $16, %r11 1554 jbe LABEL(strcmp_exitz) 1555# endif 1556 add $16, %rdx 1557 jmp LABEL(loop_ashr_14_use) 1558 1559 .p2align 4 1560LABEL(nibble_ashr_14_use): 1561 sub $0x1000, %r10 1562 movdqa -16(%rdi, %rdx), %xmm0 1563 psrldq $14, %xmm0 1564 pcmpistri $0x3a,%xmm0, %xmm0 1565# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1566 cmp %r11, %rcx 1567 jae LABEL(nibble_ashr_exit_use) 1568# endif 1569 cmp $1, %ecx 1570 ja LABEL(nibble_ashr_14_restart_use) 1571 1572 jmp LABEL(nibble_ashr_exit_use) 1573 1574/* 1575 * The following cases will be handled by ashr_15 1576 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case 1577 * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15 1578 */ 1579 .p2align 4 1580LABEL(ashr_15): 1581 pslldq $1, %xmm2 1582 TOLOWER (%xmm1, %xmm2) 1583 pcmpeqb %xmm1, %xmm2 1584 psubb %xmm0, %xmm2 1585 pmovmskb %xmm2, %r9d 1586 shr %cl, %edx 1587 shr %cl, %r9d 1588 sub %r9d, %edx 1589 jnz LABEL(less32bytes) 1590 1591 movdqa (%rdi), %xmm3 1592 1593 UPDATE_STRNCMP_COUNTER 1594 1595 mov $16, %rcx /* index for loads */ 1596 mov $15, %r9d /* byte position left over from less32bytes case */ 1597 /* 1598 * Setup %r10 value allows us to detect crossing a page boundary. 1599 * When %r10 goes positive we have crossed a page boundary and 1600 * need to do a nibble. 1601 */ 1602 lea 15(%rdi), %r10 1603 and $0xfff, %r10 /* offset into 4K page */ 1604 1605 sub $0x1000, %r10 /* subtract 4K pagesize */ 1606 1607 mov %rcx, %rdx /* only for offset of sse4 instruction loop*/ 1608 1609 .p2align 4 1610LABEL(loop_ashr_15_use): 1611 add $16, %r10 1612 jg LABEL(nibble_ashr_15_use) 1613 1614LABEL(nibble_ashr_15_restart_use): 1615 movdqa (%rdi, %rdx), %xmm0 1616 palignr $15, -16(%rdi, %rdx), %xmm0 1617# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1618 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1619# else 1620 movdqa (%rsi,%rdx), %xmm1 1621 TOLOWER (%xmm0, %xmm1) 1622 pcmpistri $0x1a, %xmm1, %xmm0 1623# endif 1624 jbe LABEL(exit_use) 1625# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1626 sub $16, %r11 1627 jbe LABEL(strcmp_exitz) 1628# endif 1629 1630 add $16, %rdx 1631 add $16, %r10 1632 jg LABEL(nibble_ashr_15_use) 1633 1634 movdqa (%rdi, %rdx), %xmm0 1635 palignr $15, -16(%rdi, %rdx), %xmm0 1636# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1637 pcmpistri $0x1a, (%rsi,%rdx), %xmm0 1638# else 1639 movdqa (%rsi,%rdx), %xmm1 1640 TOLOWER (%xmm0, %xmm1) 1641 pcmpistri $0x1a, %xmm1, %xmm0 1642# endif 1643 jbe LABEL(exit_use) 1644# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1645 sub $16, %r11 1646 jbe LABEL(strcmp_exitz) 1647# endif 1648 add $16, %rdx 1649 jmp LABEL(loop_ashr_15_use) 1650 1651 .p2align 4 1652LABEL(nibble_ashr_15_use): 1653 sub $0x1000, %r10 1654 movdqa -16(%rdi, %rdx), %xmm0 1655 psrldq $15, %xmm0 1656 pcmpistri $0x3a,%xmm0, %xmm0 1657# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1658 cmp %r11, %rcx 1659 jae LABEL(nibble_ashr_exit_use) 1660# endif 1661 cmp $0, %ecx 1662 ja LABEL(nibble_ashr_15_restart_use) 1663 1664LABEL(nibble_ashr_exit_use): 1665# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L 1666 pcmpistri $0x1a,(%rsi,%rdx), %xmm0 1667# else 1668 movdqa (%rsi,%rdx), %xmm1 1669 TOLOWER (%xmm0, %xmm1) 1670 pcmpistri $0x1a, %xmm1, %xmm0 1671# endif 1672 .p2align 4 1673LABEL(exit_use): 1674 jnc LABEL(strcmp_exitz) 1675# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1676 sub %rcx, %r11 1677 jbe LABEL(strcmp_exitz) 1678# endif 1679 add %rcx, %rdx 1680 lea -16(%rdi, %r9), %rdi 1681 movzbl (%rdi, %rdx), %eax 1682 movzbl (%rsi, %rdx), %edx 1683 test %r8d, %r8d 1684 jz LABEL(ret_use) 1685 xchg %eax, %edx 1686LABEL(ret_use): 1687# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L 1688 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx 1689 movl (%rcx,%rdx,4), %edx 1690 movl (%rcx,%rax,4), %eax 1691# endif 1692 1693 sub %edx, %eax 1694 ret 1695 1696LABEL(less32bytes): 1697 lea (%rdi, %rax), %rdi /* locate the exact address for first operand(rdi) */ 1698 lea (%rsi, %rcx), %rsi /* locate the exact address for second operand(rsi) */ 1699 test %r8d, %r8d 1700 jz LABEL(ret) 1701 xchg %rsi, %rdi /* recover original order according to flag(%r8d) */ 1702 1703 .p2align 4 1704LABEL(ret): 1705LABEL(less16bytes): 1706 bsf %rdx, %rdx /* find and store bit index in %rdx */ 1707 1708# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L 1709 sub %rdx, %r11 1710 jbe LABEL(strcmp_exitz) 1711# endif 1712 movzbl (%rsi, %rdx), %ecx 1713 movzbl (%rdi, %rdx), %eax 1714 1715# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L 1716 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx 1717 movl (%rdx,%rcx,4), %ecx 1718 movl (%rdx,%rax,4), %eax 1719# endif 1720 1721 sub %ecx, %eax 1722 ret 1723 1724LABEL(strcmp_exitz): 1725 xor %eax, %eax 1726 ret 1727 1728 .p2align 4 1729 // XXX Same as code above 1730LABEL(Byte0): 1731 movzbl (%rsi), %ecx 1732 movzbl (%rdi), %eax 1733 1734# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L 1735 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx 1736 movl (%rdx,%rcx,4), %ecx 1737 movl (%rdx,%rax,4), %eax 1738# endif 1739 1740 sub %ecx, %eax 1741 ret 1742 cfi_endproc 1743 .size STRCMP, .-STRCMP 1744 1745# undef UCLOW_reg 1746# undef UCHIGH_reg 1747# undef LCQWORD_reg 1748# undef TOLOWER 1749 1750 /* Put all SSE 4.2 functions together. */ 1751 .section .rodata.SECTION,"a",@progbits 1752 .p2align 3 1753LABEL(unaligned_table): 1754 .int LABEL(ashr_1) - LABEL(unaligned_table) 1755 .int LABEL(ashr_2) - LABEL(unaligned_table) 1756 .int LABEL(ashr_3) - LABEL(unaligned_table) 1757 .int LABEL(ashr_4) - LABEL(unaligned_table) 1758 .int LABEL(ashr_5) - LABEL(unaligned_table) 1759 .int LABEL(ashr_6) - LABEL(unaligned_table) 1760 .int LABEL(ashr_7) - LABEL(unaligned_table) 1761 .int LABEL(ashr_8) - LABEL(unaligned_table) 1762 .int LABEL(ashr_9) - LABEL(unaligned_table) 1763 .int LABEL(ashr_10) - LABEL(unaligned_table) 1764 .int LABEL(ashr_11) - LABEL(unaligned_table) 1765 .int LABEL(ashr_12) - LABEL(unaligned_table) 1766 .int LABEL(ashr_13) - LABEL(unaligned_table) 1767 .int LABEL(ashr_14) - LABEL(unaligned_table) 1768 .int LABEL(ashr_15) - LABEL(unaligned_table) 1769 .int LABEL(ashr_0) - LABEL(unaligned_table) 1770 1771# undef LABEL 1772# undef SECTION 1773# undef movdqa 1774# undef movdqu 1775# undef pmovmskb 1776# undef pcmpistri 1777# undef psubb 1778# undef pcmpeqb 1779# undef psrldq 1780# undef pslldq 1781# undef palignr 1782# undef pxor 1783# undef D 1784#endif 1785