1/* strlen with SSE2 and BSF 2 Copyright (C) 2010-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if defined SHARED && IS_IN (libc) 20 21#include <sysdep.h> 22 23#define CFI_PUSH(REG) \ 24 cfi_adjust_cfa_offset (4); \ 25 cfi_rel_offset (REG, 0) 26 27#define CFI_POP(REG) \ 28 cfi_adjust_cfa_offset (-4); \ 29 cfi_restore (REG) 30 31#define PUSH(REG) pushl REG; CFI_PUSH (REG) 32#define POP(REG) popl REG; CFI_POP (REG) 33#define PARMS 4 + 8 /* Preserve ESI and EDI. */ 34#define STR PARMS 35#define ENTRANCE PUSH (%esi); PUSH (%edi); cfi_remember_state 36#define RETURN POP (%edi); POP (%esi); ret; \ 37 cfi_restore_state; cfi_remember_state 38 39 .text 40ENTRY ( __strlen_sse2_bsf) 41 ENTRANCE 42 mov STR(%esp), %edi 43 xor %eax, %eax 44 mov %edi, %ecx 45 and $0x3f, %ecx 46 pxor %xmm0, %xmm0 47 cmp $0x30, %ecx 48 ja L(next) 49 movdqu (%edi), %xmm1 50 pcmpeqb %xmm1, %xmm0 51 pmovmskb %xmm0, %edx 52 test %edx, %edx 53 jnz L(exit_less16) 54 mov %edi, %eax 55 and $-16, %eax 56 jmp L(align16_start) 57L(next): 58 59 mov %edi, %eax 60 and $-16, %eax 61 pcmpeqb (%eax), %xmm0 62 mov $-1, %esi 63 sub %eax, %ecx 64 shl %cl, %esi 65 pmovmskb %xmm0, %edx 66 and %esi, %edx 67 jnz L(exit) 68L(align16_start): 69 pxor %xmm0, %xmm0 70 pxor %xmm1, %xmm1 71 pxor %xmm2, %xmm2 72 pxor %xmm3, %xmm3 73 .p2align 4 74L(align16_loop): 75 pcmpeqb 16(%eax), %xmm0 76 pmovmskb %xmm0, %edx 77 test %edx, %edx 78 jnz L(exit16) 79 80 pcmpeqb 32(%eax), %xmm1 81 pmovmskb %xmm1, %edx 82 test %edx, %edx 83 jnz L(exit32) 84 85 pcmpeqb 48(%eax), %xmm2 86 pmovmskb %xmm2, %edx 87 test %edx, %edx 88 jnz L(exit48) 89 90 pcmpeqb 64(%eax), %xmm3 91 pmovmskb %xmm3, %edx 92 lea 64(%eax), %eax 93 test %edx, %edx 94 jz L(align16_loop) 95L(exit): 96 sub %edi, %eax 97L(exit_less16): 98 bsf %edx, %edx 99 add %edx, %eax 100 RETURN 101L(exit16): 102 sub %edi, %eax 103 bsf %edx, %edx 104 add %edx, %eax 105 add $16, %eax 106 RETURN 107L(exit32): 108 sub %edi, %eax 109 bsf %edx, %edx 110 add %edx, %eax 111 add $32, %eax 112 RETURN 113L(exit48): 114 sub %edi, %eax 115 bsf %edx, %edx 116 add %edx, %eax 117 add $48, %eax 118 POP (%edi) 119 POP (%esi) 120 ret 121 122END ( __strlen_sse2_bsf) 123 124#endif 125