1/* wcschr with SSE2, without using bsf instructions 2 Copyright (C) 2011-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20# include <sysdep.h> 21 22# define CFI_PUSH(REG) \ 23 cfi_adjust_cfa_offset (4); \ 24 cfi_rel_offset (REG, 0) 25 26# define CFI_POP(REG) \ 27 cfi_adjust_cfa_offset (-4); \ 28 cfi_restore (REG) 29 30# define PUSH(REG) pushl REG; CFI_PUSH (REG) 31# define POP(REG) popl REG; CFI_POP (REG) 32 33# define PARMS 4 34# define STR1 PARMS 35# define STR2 STR1+4 36 37 atom_text_section 38ENTRY (__wcschr_sse2) 39 40 mov STR1(%esp), %ecx 41 movd STR2(%esp), %xmm1 42 43 mov %ecx, %eax 44 punpckldq %xmm1, %xmm1 45 pxor %xmm2, %xmm2 46 punpckldq %xmm1, %xmm1 47 48 and $63, %eax 49 cmp $48, %eax 50 ja L(cross_cache) 51 52 movdqu (%ecx), %xmm0 53 pcmpeqd %xmm0, %xmm2 54 pcmpeqd %xmm1, %xmm0 55 pmovmskb %xmm2, %edx 56 pmovmskb %xmm0, %eax 57 or %eax, %edx 58 jnz L(matches) 59 and $-16, %ecx 60 jmp L(loop) 61 62 .p2align 4 63L(cross_cache): 64 PUSH (%edi) 65 mov %ecx, %edi 66 mov %eax, %ecx 67 and $-16, %edi 68 and $15, %ecx 69 movdqa (%edi), %xmm0 70 pcmpeqd %xmm0, %xmm2 71 pcmpeqd %xmm1, %xmm0 72 pmovmskb %xmm2, %edx 73 pmovmskb %xmm0, %eax 74 75 sarl %cl, %edx 76 sarl %cl, %eax 77 test %eax, %eax 78 jz L(unaligned_no_match) 79 80 add %edi, %ecx 81 POP (%edi) 82 83 test %edx, %edx 84 jz L(match_case1) 85 test %al, %al 86 jz L(match_higth_case2) 87 test $15, %al 88 jnz L(match_case2_4) 89 test $15, %dl 90 jnz L(return_null) 91 lea 4(%ecx), %eax 92 ret 93 94 CFI_PUSH (%edi) 95 96 .p2align 4 97L(unaligned_no_match): 98 mov %edi, %ecx 99 POP (%edi) 100 101 test %edx, %edx 102 jnz L(return_null) 103 104 pxor %xmm2, %xmm2 105 106/* Loop start on aligned string. */ 107 .p2align 4 108L(loop): 109 add $16, %ecx 110 movdqa (%ecx), %xmm0 111 pcmpeqd %xmm0, %xmm2 112 pcmpeqd %xmm1, %xmm0 113 pmovmskb %xmm2, %edx 114 pmovmskb %xmm0, %eax 115 or %eax, %edx 116 jnz L(matches) 117 add $16, %ecx 118 119 movdqa (%ecx), %xmm0 120 pcmpeqd %xmm0, %xmm2 121 pcmpeqd %xmm1, %xmm0 122 pmovmskb %xmm2, %edx 123 pmovmskb %xmm0, %eax 124 or %eax, %edx 125 jnz L(matches) 126 add $16, %ecx 127 128 movdqa (%ecx), %xmm0 129 pcmpeqd %xmm0, %xmm2 130 pcmpeqd %xmm1, %xmm0 131 pmovmskb %xmm2, %edx 132 pmovmskb %xmm0, %eax 133 or %eax, %edx 134 jnz L(matches) 135 add $16, %ecx 136 137 movdqa (%ecx), %xmm0 138 pcmpeqd %xmm0, %xmm2 139 pcmpeqd %xmm1, %xmm0 140 pmovmskb %xmm2, %edx 141 pmovmskb %xmm0, %eax 142 or %eax, %edx 143 jz L(loop) 144 145 .p2align 4 146L(matches): 147 pmovmskb %xmm2, %edx 148 test %eax, %eax 149 jz L(return_null) 150 test %edx, %edx 151 jz L(match_case1) 152 153 .p2align 4 154L(match_case2): 155 test %al, %al 156 jz L(match_higth_case2) 157 test $15, %al 158 jnz L(match_case2_4) 159 test $15, %dl 160 jnz L(return_null) 161 lea 4(%ecx), %eax 162 ret 163 164 .p2align 4 165L(match_case2_4): 166 mov %ecx, %eax 167 ret 168 169 .p2align 4 170L(match_higth_case2): 171 test %dl, %dl 172 jnz L(return_null) 173 test $15, %ah 174 jnz L(match_case2_12) 175 test $15, %dh 176 jnz L(return_null) 177 lea 12(%ecx), %eax 178 ret 179 180 .p2align 4 181L(match_case2_12): 182 lea 8(%ecx), %eax 183 ret 184 185 .p2align 4 186L(match_case1): 187 test %al, %al 188 jz L(match_higth_case1) 189 190 test $0x01, %al 191 jnz L(exit0) 192 lea 4(%ecx), %eax 193 ret 194 195 .p2align 4 196L(match_higth_case1): 197 test $0x01, %ah 198 jnz L(exit3) 199 lea 12(%ecx), %eax 200 ret 201 202 .p2align 4 203L(exit0): 204 mov %ecx, %eax 205 ret 206 207 .p2align 4 208L(exit3): 209 lea 8(%ecx), %eax 210 ret 211 212 .p2align 4 213L(return_null): 214 xor %eax, %eax 215 ret 216 217END (__wcschr_sse2) 218#endif 219