1/* strchr with SSE2 with bsf 2 Copyright (C) 2011-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20 21# include <sysdep.h> 22 23# define CFI_PUSH(REG) \ 24 cfi_adjust_cfa_offset (4); \ 25 cfi_rel_offset (REG, 0) 26 27# define CFI_POP(REG) \ 28 cfi_adjust_cfa_offset (-4); \ 29 cfi_restore (REG) 30 31# define PUSH(REG) pushl REG; CFI_PUSH (REG) 32# define POP(REG) popl REG; CFI_POP (REG) 33 34# define PARMS 8 35# define ENTRANCE PUSH(%edi) 36# define RETURN POP(%edi); ret; CFI_PUSH(%edi); 37 38# define STR1 PARMS 39# define STR2 STR1+4 40 41 .text 42ENTRY (__strchr_sse2_bsf) 43 44 ENTRANCE 45 mov STR1(%esp), %ecx 46 movd STR2(%esp), %xmm1 47 48 pxor %xmm2, %xmm2 49 mov %ecx, %edi 50 punpcklbw %xmm1, %xmm1 51 punpcklbw %xmm1, %xmm1 52 /* ECX has OFFSET. */ 53 and $15, %ecx 54 pshufd $0, %xmm1, %xmm1 55 je L(loop) 56 57/* Handle unaligned string. */ 58 and $-16, %edi 59 movdqa (%edi), %xmm0 60 pcmpeqb %xmm0, %xmm2 61 pcmpeqb %xmm1, %xmm0 62 /* Find where NULL is. */ 63 pmovmskb %xmm2, %edx 64 /* Check if there is a match. */ 65 pmovmskb %xmm0, %eax 66 /* Remove the leading bytes. */ 67 sarl %cl, %edx 68 sarl %cl, %eax 69 test %eax, %eax 70 je L(unaligned_no_match) 71 /* Check which byte is a match. */ 72 bsf %eax, %eax 73 /* Is there a NULL? */ 74 test %edx, %edx 75 je L(unaligned_match) 76 bsf %edx, %edx 77 cmpl %edx, %eax 78 /* Return NULL if NULL comes first. */ 79 ja L(return_null) 80L(unaligned_match): 81 add %edi, %eax 82 add %ecx, %eax 83 RETURN 84 85 .p2align 4 86L(unaligned_no_match): 87 test %edx, %edx 88 jne L(return_null) 89 pxor %xmm2, %xmm2 90 91 add $16, %edi 92 93 .p2align 4 94/* Loop start on aligned string. */ 95L(loop): 96 movdqa (%edi), %xmm0 97 pcmpeqb %xmm0, %xmm2 98 add $16, %edi 99 pcmpeqb %xmm1, %xmm0 100 pmovmskb %xmm2, %edx 101 pmovmskb %xmm0, %eax 102 or %eax, %edx 103 jnz L(matches) 104 105 movdqa (%edi), %xmm0 106 pcmpeqb %xmm0, %xmm2 107 add $16, %edi 108 pcmpeqb %xmm1, %xmm0 109 pmovmskb %xmm2, %edx 110 pmovmskb %xmm0, %eax 111 or %eax, %edx 112 jnz L(matches) 113 114 movdqa (%edi), %xmm0 115 pcmpeqb %xmm0, %xmm2 116 add $16, %edi 117 pcmpeqb %xmm1, %xmm0 118 pmovmskb %xmm2, %edx 119 pmovmskb %xmm0, %eax 120 or %eax, %edx 121 jnz L(matches) 122 123 movdqa (%edi), %xmm0 124 pcmpeqb %xmm0, %xmm2 125 add $16, %edi 126 pcmpeqb %xmm1, %xmm0 127 pmovmskb %xmm2, %edx 128 pmovmskb %xmm0, %eax 129 or %eax, %edx 130 jnz L(matches) 131 jmp L(loop) 132 133L(matches): 134 pmovmskb %xmm2, %edx 135 test %eax, %eax 136 jz L(return_null) 137 bsf %eax, %eax 138 /* There is a match. First find where NULL is. */ 139 test %edx, %edx 140 je L(match) 141 bsf %edx, %ecx 142 /* Check if NULL comes first. */ 143 cmpl %ecx, %eax 144 ja L(return_null) 145L(match): 146 sub $16, %edi 147 add %edi, %eax 148 RETURN 149 150/* Return NULL. */ 151 .p2align 4 152L(return_null): 153 xor %eax, %eax 154 RETURN 155 156END (__strchr_sse2_bsf) 157#endif 158