1/* strrchr with SSE2 with bsf and bsr 2 Copyright (C) 2011-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20 21# include <sysdep.h> 22 23# define CFI_PUSH(REG) \ 24 cfi_adjust_cfa_offset (4); \ 25 cfi_rel_offset (REG, 0) 26 27# define CFI_POP(REG) \ 28 cfi_adjust_cfa_offset (-4); \ 29 cfi_restore (REG) 30 31# define PUSH(REG) pushl REG; CFI_PUSH (REG) 32# define POP(REG) popl REG; CFI_POP (REG) 33 34# define PARMS 4 35# define STR1 PARMS 36# define STR2 STR1+4 37 38 .text 39ENTRY (__strrchr_sse2_bsf) 40 41 mov STR1(%esp), %ecx 42 movd STR2(%esp), %xmm1 43 44 PUSH (%edi) 45 pxor %xmm2, %xmm2 46 mov %ecx, %edi 47 punpcklbw %xmm1, %xmm1 48 punpcklbw %xmm1, %xmm1 49 /* ECX has OFFSET. */ 50 and $63, %ecx 51 cmp $48, %ecx 52 pshufd $0, %xmm1, %xmm1 53 ja L(crosscashe) 54 55/* unaligned string. */ 56 movdqu (%edi), %xmm0 57 pcmpeqb %xmm0, %xmm2 58 pcmpeqb %xmm1, %xmm0 59 /* Find where NULL is. */ 60 pmovmskb %xmm2, %edx 61 /* Check if there is a match. */ 62 pmovmskb %xmm0, %eax 63 64 test %eax, %eax 65 jnz L(unaligned_match1) 66 67 test %edx, %edx 68 jnz L(return_null) 69 70 and $-16, %edi 71 add $16, %edi 72 73 PUSH (%esi) 74 PUSH (%ebx) 75 76 xor %ebx, %ebx 77 jmp L(loop) 78 79 CFI_POP (%esi) 80 CFI_POP (%ebx) 81 82 .p2align 4 83L(unaligned_return_value1): 84 bsf %edx, %ecx 85 mov $2, %edx 86 shl %cl, %edx 87 sub $1, %edx 88 and %edx, %eax 89 jz L(return_null) 90 bsr %eax, %eax 91 add %edi, %eax 92 POP (%edi) 93 ret 94 CFI_PUSH (%edi) 95 96 .p2align 4 97L(unaligned_match1): 98 test %edx, %edx 99 jnz L(unaligned_return_value1) 100 101 PUSH (%esi) 102 PUSH (%ebx) 103 104 mov %eax, %ebx 105 lea 16(%edi), %esi 106 and $-16, %edi 107 add $16, %edi 108 jmp L(loop) 109 110 CFI_POP (%esi) 111 CFI_POP (%ebx) 112 113 .p2align 4 114 L(crosscashe): 115/* Hancle unaligned string. */ 116 and $15, %ecx 117 and $-16, %edi 118 pxor %xmm3, %xmm3 119 movdqa (%edi), %xmm0 120 pcmpeqb %xmm0, %xmm3 121 pcmpeqb %xmm1, %xmm0 122 /* Find where NULL is. */ 123 pmovmskb %xmm3, %edx 124 /* Check if there is a match. */ 125 pmovmskb %xmm0, %eax 126 /* Remove the leading bytes. */ 127 shr %cl, %edx 128 shr %cl, %eax 129 130 test %eax, %eax 131 jnz L(unaligned_match) 132 133 test %edx, %edx 134 jnz L(return_null) 135 136 add $16, %edi 137 138 PUSH (%esi) 139 PUSH (%ebx) 140 141 xor %ebx, %ebx 142 jmp L(loop) 143 144 CFI_POP (%esi) 145 CFI_POP (%ebx) 146 147 .p2align 4 148L(unaligned_return_value): 149 add %ecx, %edi 150 bsf %edx, %ecx 151 mov $2, %edx 152 shl %cl, %edx 153 sub $1, %edx 154 and %edx, %eax 155 jz L(return_null) 156 bsr %eax, %eax 157 add %edi, %eax 158 POP (%edi) 159 ret 160 CFI_PUSH (%edi) 161 162 .p2align 4 163L(unaligned_match): 164 test %edx, %edx 165 jnz L(unaligned_return_value) 166 167 PUSH (%esi) 168 PUSH (%ebx) 169 170 mov %eax, %ebx 171 add $16, %edi 172 lea (%edi, %ecx), %esi 173 174/* Loop start on aligned string. */ 175 .p2align 4 176L(loop): 177 movdqa (%edi), %xmm0 178 pcmpeqb %xmm0, %xmm2 179 add $16, %edi 180 pcmpeqb %xmm1, %xmm0 181 pmovmskb %xmm2, %ecx 182 pmovmskb %xmm0, %eax 183 or %eax, %ecx 184 jnz L(matches) 185 186 movdqa (%edi), %xmm0 187 pcmpeqb %xmm0, %xmm2 188 add $16, %edi 189 pcmpeqb %xmm1, %xmm0 190 pmovmskb %xmm2, %ecx 191 pmovmskb %xmm0, %eax 192 or %eax, %ecx 193 jnz L(matches) 194 195 movdqa (%edi), %xmm0 196 pcmpeqb %xmm0, %xmm2 197 add $16, %edi 198 pcmpeqb %xmm1, %xmm0 199 pmovmskb %xmm2, %ecx 200 pmovmskb %xmm0, %eax 201 or %eax, %ecx 202 jnz L(matches) 203 204 movdqa (%edi), %xmm0 205 pcmpeqb %xmm0, %xmm2 206 add $16, %edi 207 pcmpeqb %xmm1, %xmm0 208 pmovmskb %xmm2, %ecx 209 pmovmskb %xmm0, %eax 210 or %eax, %ecx 211 jz L(loop) 212 213L(matches): 214 test %eax, %eax 215 jnz L(match) 216L(return_value): 217 test %ebx, %ebx 218 jz L(return_null_1) 219 bsr %ebx, %eax 220 add %esi, %eax 221 222 POP (%ebx) 223 POP (%esi) 224 225 sub $16, %eax 226 POP (%edi) 227 ret 228 229 CFI_PUSH (%edi) 230 CFI_PUSH (%ebx) 231 CFI_PUSH (%esi) 232 233 .p2align 4 234L(match): 235 pmovmskb %xmm2, %ecx 236 test %ecx, %ecx 237 jnz L(return_value_1) 238 mov %eax, %ebx 239 mov %edi, %esi 240 jmp L(loop) 241 242 .p2align 4 243L(return_value_1): 244 bsf %ecx, %ecx 245 mov $2, %edx 246 shl %cl, %edx 247 sub $1, %edx 248 and %edx, %eax 249 jz L(return_value) 250 251 POP (%ebx) 252 POP (%esi) 253 254 bsr %eax, %eax 255 add %edi, %eax 256 sub $16, %eax 257 POP (%edi) 258 ret 259 260 CFI_PUSH (%edi) 261/* Return NULL. */ 262 .p2align 4 263L(return_null): 264 xor %eax, %eax 265 POP (%edi) 266 ret 267 268 CFI_PUSH (%edi) 269 CFI_PUSH (%ebx) 270 CFI_PUSH (%esi) 271/* Return NULL. */ 272 .p2align 4 273L(return_null_1): 274 POP (%ebx) 275 POP (%esi) 276 POP (%edi) 277 xor %eax, %eax 278 ret 279 280END (__strrchr_sse2_bsf) 281#endif 282