1/* strchr SSE2 without bsf 2 Copyright (C) 2011-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20 21# include <sysdep.h> 22 23# define CFI_PUSH(REG) \ 24 cfi_adjust_cfa_offset (4); \ 25 cfi_rel_offset (REG, 0) 26 27# define CFI_POP(REG) \ 28 cfi_adjust_cfa_offset (-4); \ 29 cfi_restore (REG) 30 31# define PUSH(REG) pushl REG; CFI_PUSH (REG) 32# define POP(REG) popl REG; CFI_POP (REG) 33 34# define PARMS 8 35# define ENTRANCE PUSH(%edi) 36# define RETURN POP(%edi); ret; CFI_PUSH(%edi); 37 38# define STR1 PARMS 39# define STR2 STR1+4 40 41 atom_text_section 42ENTRY (__strchr_sse2) 43 44 ENTRANCE 45 mov STR1(%esp), %ecx 46 movd STR2(%esp), %xmm1 47 48 pxor %xmm2, %xmm2 49 mov %ecx, %edi 50 punpcklbw %xmm1, %xmm1 51 punpcklbw %xmm1, %xmm1 52 /* ECX has OFFSET. */ 53 and $15, %ecx 54 pshufd $0, %xmm1, %xmm1 55 je L(loop) 56 57/* Handle unaligned string. */ 58 and $-16, %edi 59 movdqa (%edi), %xmm0 60 pcmpeqb %xmm0, %xmm2 61 pcmpeqb %xmm1, %xmm0 62 /* Find where NULL is. */ 63 pmovmskb %xmm2, %edx 64 /* Check if there is a match. */ 65 pmovmskb %xmm0, %eax 66 /* Remove the leading bytes. */ 67 sarl %cl, %edx 68 sarl %cl, %eax 69 test %eax, %eax 70 jz L(unaligned_no_match) 71 /* Check which byte is a match. */ 72 /* Is there a NULL? */ 73 add %ecx, %edi 74 test %edx, %edx 75 jz L(match_case1) 76 jmp L(match_case2) 77 78 .p2align 4 79L(unaligned_no_match): 80 test %edx, %edx 81 jne L(return_null) 82 83 pxor %xmm2, %xmm2 84 add $16, %edi 85 86 .p2align 4 87/* Loop start on aligned string. */ 88L(loop): 89 movdqa (%edi), %xmm0 90 pcmpeqb %xmm0, %xmm2 91 pcmpeqb %xmm1, %xmm0 92 pmovmskb %xmm2, %edx 93 pmovmskb %xmm0, %eax 94 test %eax, %eax 95 jnz L(matches) 96 test %edx, %edx 97 jnz L(return_null) 98 add $16, %edi 99 100 movdqa (%edi), %xmm0 101 pcmpeqb %xmm0, %xmm2 102 pcmpeqb %xmm1, %xmm0 103 pmovmskb %xmm2, %edx 104 pmovmskb %xmm0, %eax 105 test %eax, %eax 106 jnz L(matches) 107 test %edx, %edx 108 jnz L(return_null) 109 add $16, %edi 110 111 movdqa (%edi), %xmm0 112 pcmpeqb %xmm0, %xmm2 113 pcmpeqb %xmm1, %xmm0 114 pmovmskb %xmm2, %edx 115 pmovmskb %xmm0, %eax 116 test %eax, %eax 117 jnz L(matches) 118 test %edx, %edx 119 jnz L(return_null) 120 add $16, %edi 121 122 movdqa (%edi), %xmm0 123 pcmpeqb %xmm0, %xmm2 124 pcmpeqb %xmm1, %xmm0 125 pmovmskb %xmm2, %edx 126 pmovmskb %xmm0, %eax 127 test %eax, %eax 128 jnz L(matches) 129 test %edx, %edx 130 jnz L(return_null) 131 add $16, %edi 132 jmp L(loop) 133 134L(matches): 135 /* There is a match. First find where NULL is. */ 136 test %edx, %edx 137 jz L(match_case1) 138 139 .p2align 4 140L(match_case2): 141 test %al, %al 142 jz L(match_higth_case2) 143 144 mov %al, %cl 145 and $15, %cl 146 jnz L(match_case2_4) 147 148 mov %dl, %ch 149 and $15, %ch 150 jnz L(return_null) 151 152 test $0x10, %al 153 jnz L(Exit5) 154 test $0x10, %dl 155 jnz L(return_null) 156 test $0x20, %al 157 jnz L(Exit6) 158 test $0x20, %dl 159 jnz L(return_null) 160 test $0x40, %al 161 jnz L(Exit7) 162 test $0x40, %dl 163 jnz L(return_null) 164 lea 7(%edi), %eax 165 RETURN 166 167 .p2align 4 168L(match_case2_4): 169 test $0x01, %al 170 jnz L(Exit1) 171 test $0x01, %dl 172 jnz L(return_null) 173 test $0x02, %al 174 jnz L(Exit2) 175 test $0x02, %dl 176 jnz L(return_null) 177 test $0x04, %al 178 jnz L(Exit3) 179 test $0x04, %dl 180 jnz L(return_null) 181 lea 3(%edi), %eax 182 RETURN 183 184 .p2align 4 185L(match_higth_case2): 186 test %dl, %dl 187 jnz L(return_null) 188 189 mov %ah, %cl 190 and $15, %cl 191 jnz L(match_case2_12) 192 193 mov %dh, %ch 194 and $15, %ch 195 jnz L(return_null) 196 197 test $0x10, %ah 198 jnz L(Exit13) 199 test $0x10, %dh 200 jnz L(return_null) 201 test $0x20, %ah 202 jnz L(Exit14) 203 test $0x20, %dh 204 jnz L(return_null) 205 test $0x40, %ah 206 jnz L(Exit15) 207 test $0x40, %dh 208 jnz L(return_null) 209 lea 15(%edi), %eax 210 RETURN 211 212 .p2align 4 213L(match_case2_12): 214 test $0x01, %ah 215 jnz L(Exit9) 216 test $0x01, %dh 217 jnz L(return_null) 218 test $0x02, %ah 219 jnz L(Exit10) 220 test $0x02, %dh 221 jnz L(return_null) 222 test $0x04, %ah 223 jnz L(Exit11) 224 test $0x04, %dh 225 jnz L(return_null) 226 lea 11(%edi), %eax 227 RETURN 228 229 .p2align 4 230L(match_case1): 231 test %al, %al 232 jz L(match_higth_case1) 233 234 test $0x01, %al 235 jnz L(Exit1) 236 test $0x02, %al 237 jnz L(Exit2) 238 test $0x04, %al 239 jnz L(Exit3) 240 test $0x08, %al 241 jnz L(Exit4) 242 test $0x10, %al 243 jnz L(Exit5) 244 test $0x20, %al 245 jnz L(Exit6) 246 test $0x40, %al 247 jnz L(Exit7) 248 lea 7(%edi), %eax 249 RETURN 250 251 .p2align 4 252L(match_higth_case1): 253 test $0x01, %ah 254 jnz L(Exit9) 255 test $0x02, %ah 256 jnz L(Exit10) 257 test $0x04, %ah 258 jnz L(Exit11) 259 test $0x08, %ah 260 jnz L(Exit12) 261 test $0x10, %ah 262 jnz L(Exit13) 263 test $0x20, %ah 264 jnz L(Exit14) 265 test $0x40, %ah 266 jnz L(Exit15) 267 lea 15(%edi), %eax 268 RETURN 269 270 .p2align 4 271L(Exit1): 272 lea (%edi), %eax 273 RETURN 274 275 .p2align 4 276L(Exit2): 277 lea 1(%edi), %eax 278 RETURN 279 280 .p2align 4 281L(Exit3): 282 lea 2(%edi), %eax 283 RETURN 284 285 .p2align 4 286L(Exit4): 287 lea 3(%edi), %eax 288 RETURN 289 290 .p2align 4 291L(Exit5): 292 lea 4(%edi), %eax 293 RETURN 294 295 .p2align 4 296L(Exit6): 297 lea 5(%edi), %eax 298 RETURN 299 300 .p2align 4 301L(Exit7): 302 lea 6(%edi), %eax 303 RETURN 304 305 .p2align 4 306L(Exit9): 307 lea 8(%edi), %eax 308 RETURN 309 310 .p2align 4 311L(Exit10): 312 lea 9(%edi), %eax 313 RETURN 314 315 .p2align 4 316L(Exit11): 317 lea 10(%edi), %eax 318 RETURN 319 320 .p2align 4 321L(Exit12): 322 lea 11(%edi), %eax 323 RETURN 324 325 .p2align 4 326L(Exit13): 327 lea 12(%edi), %eax 328 RETURN 329 330 .p2align 4 331L(Exit14): 332 lea 13(%edi), %eax 333 RETURN 334 335 .p2align 4 336L(Exit15): 337 lea 14(%edi), %eax 338 RETURN 339 340/* Return NULL. */ 341 .p2align 4 342L(return_null): 343 xor %eax, %eax 344 RETURN 345 346END (__strchr_sse2) 347#endif 348