1/* strchr with SSE2 without bsf 2 Copyright (C) 2011-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <isa-level.h> 20 21/* NB: atom builds with ISA level == 1 so no reason to hold onto this 22 at ISA level >= 2. */ 23#if ISA_SHOULD_BUILD (1) 24 25# include <sysdep.h> 26# include "asm-syntax.h" 27 28 atom_text_section 29ENTRY (__strchr_sse2_no_bsf) 30 movd %esi, %xmm1 31 movq %rdi, %rcx 32 punpcklbw %xmm1, %xmm1 33 andq $~15, %rdi 34 pxor %xmm2, %xmm2 35 punpcklbw %xmm1, %xmm1 36 orl $0xffffffff, %esi 37 movdqa (%rdi), %xmm0 38 pshufd $0, %xmm1, %xmm1 39 subq %rdi, %rcx 40 movdqa %xmm0, %xmm3 41 leaq 16(%rdi), %rdi 42 pcmpeqb %xmm1, %xmm0 43 pcmpeqb %xmm2, %xmm3 44 shl %cl, %esi 45 pmovmskb %xmm0, %eax 46 pmovmskb %xmm3, %edx 47 andl %esi, %eax 48 andl %esi, %edx 49 test %eax, %eax 50 jnz L(matches) 51 test %edx, %edx 52 jnz L(return_null) 53 54L(loop): 55 movdqa (%rdi), %xmm0 56 leaq 16(%rdi), %rdi 57 movdqa %xmm0, %xmm3 58 pcmpeqb %xmm1, %xmm0 59 pcmpeqb %xmm2, %xmm3 60 pmovmskb %xmm0, %eax 61 pmovmskb %xmm3, %edx 62 or %eax, %edx 63 jz L(loop) 64 65 pmovmskb %xmm3, %edx 66 test %eax, %eax 67 jnz L(matches) 68 69/* Return NULL. */ 70 .p2align 4 71L(return_null): 72 xor %rax, %rax 73 ret 74 75L(matches): 76 /* There is a match. First find where NULL is. */ 77 leaq -16(%rdi), %rdi 78 test %edx, %edx 79 jz L(match_case1) 80 81 .p2align 4 82L(match_case2): 83 test %al, %al 84 jz L(match_high_case2) 85 86 mov %al, %cl 87 and $15, %cl 88 jnz L(match_case2_4) 89 90 mov %dl, %ch 91 and $15, %ch 92 jnz L(return_null) 93 94 test $0x10, %al 95 jnz L(Exit5) 96 test $0x10, %dl 97 jnz L(return_null) 98 test $0x20, %al 99 jnz L(Exit6) 100 test $0x20, %dl 101 jnz L(return_null) 102 test $0x40, %al 103 jnz L(Exit7) 104 test $0x40, %dl 105 jnz L(return_null) 106 lea 7(%rdi), %rax 107 ret 108 109 .p2align 4 110L(match_case2_4): 111 test $0x01, %al 112 jnz L(Exit1) 113 test $0x01, %dl 114 jnz L(return_null) 115 test $0x02, %al 116 jnz L(Exit2) 117 test $0x02, %dl 118 jnz L(return_null) 119 test $0x04, %al 120 jnz L(Exit3) 121 test $0x04, %dl 122 jnz L(return_null) 123 lea 3(%rdi), %rax 124 ret 125 126 .p2align 4 127L(match_high_case2): 128 test %dl, %dl 129 jnz L(return_null) 130 131 mov %ah, %cl 132 and $15, %cl 133 jnz L(match_case2_12) 134 135 mov %dh, %ch 136 and $15, %ch 137 jnz L(return_null) 138 139 test $0x10, %ah 140 jnz L(Exit13) 141 test $0x10, %dh 142 jnz L(return_null) 143 test $0x20, %ah 144 jnz L(Exit14) 145 test $0x20, %dh 146 jnz L(return_null) 147 test $0x40, %ah 148 jnz L(Exit15) 149 test $0x40, %dh 150 jnz L(return_null) 151 lea 15(%rdi), %rax 152 ret 153 154 .p2align 4 155L(match_case2_12): 156 test $0x01, %ah 157 jnz L(Exit9) 158 test $0x01, %dh 159 jnz L(return_null) 160 test $0x02, %ah 161 jnz L(Exit10) 162 test $0x02, %dh 163 jnz L(return_null) 164 test $0x04, %ah 165 jnz L(Exit11) 166 test $0x04, %dh 167 jnz L(return_null) 168 lea 11(%rdi), %rax 169 ret 170 171 .p2align 4 172L(match_case1): 173 test %al, %al 174 jz L(match_high_case1) 175 176 test $0x01, %al 177 jnz L(Exit1) 178 test $0x02, %al 179 jnz L(Exit2) 180 test $0x04, %al 181 jnz L(Exit3) 182 test $0x08, %al 183 jnz L(Exit4) 184 test $0x10, %al 185 jnz L(Exit5) 186 test $0x20, %al 187 jnz L(Exit6) 188 test $0x40, %al 189 jnz L(Exit7) 190 lea 7(%rdi), %rax 191 ret 192 193 .p2align 4 194L(match_high_case1): 195 test $0x01, %ah 196 jnz L(Exit9) 197 test $0x02, %ah 198 jnz L(Exit10) 199 test $0x04, %ah 200 jnz L(Exit11) 201 test $0x08, %ah 202 jnz L(Exit12) 203 test $0x10, %ah 204 jnz L(Exit13) 205 test $0x20, %ah 206 jnz L(Exit14) 207 test $0x40, %ah 208 jnz L(Exit15) 209 lea 15(%rdi), %rax 210 ret 211 212 .p2align 4 213L(Exit1): 214 lea (%rdi), %rax 215 ret 216 217 .p2align 4 218L(Exit2): 219 lea 1(%rdi), %rax 220 ret 221 222 .p2align 4 223L(Exit3): 224 lea 2(%rdi), %rax 225 ret 226 227 .p2align 4 228L(Exit4): 229 lea 3(%rdi), %rax 230 ret 231 232 .p2align 4 233L(Exit5): 234 lea 4(%rdi), %rax 235 ret 236 237 .p2align 4 238L(Exit6): 239 lea 5(%rdi), %rax 240 ret 241 242 .p2align 4 243L(Exit7): 244 lea 6(%rdi), %rax 245 ret 246 247 .p2align 4 248L(Exit9): 249 lea 8(%rdi), %rax 250 ret 251 252 .p2align 4 253L(Exit10): 254 lea 9(%rdi), %rax 255 ret 256 257 .p2align 4 258L(Exit11): 259 lea 10(%rdi), %rax 260 ret 261 262 .p2align 4 263L(Exit12): 264 lea 11(%rdi), %rax 265 ret 266 267 .p2align 4 268L(Exit13): 269 lea 12(%rdi), %rax 270 ret 271 272 .p2align 4 273L(Exit14): 274 lea 13(%rdi), %rax 275 ret 276 277 .p2align 4 278L(Exit15): 279 lea 14(%rdi), %rax 280 ret 281 282END (__strchr_sse2_no_bsf) 283#endif 284