1/* strchr with SSE2 without bsf
2   Copyright (C) 2011-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <isa-level.h>
20
21/* NB: atom builds with ISA level == 1 so no reason to hold onto this
22   at ISA level >= 2.  */
23#if ISA_SHOULD_BUILD (1)
24
25# include <sysdep.h>
26# include "asm-syntax.h"
27
28	atom_text_section
29ENTRY (__strchr_sse2_no_bsf)
30	movd	%esi, %xmm1
31	movq	%rdi, %rcx
32	punpcklbw %xmm1, %xmm1
33	andq	$~15, %rdi
34	pxor	%xmm2, %xmm2
35	punpcklbw %xmm1, %xmm1
36	orl	$0xffffffff, %esi
37	movdqa	(%rdi), %xmm0
38	pshufd	$0, %xmm1, %xmm1
39	subq	%rdi, %rcx
40	movdqa	%xmm0, %xmm3
41	leaq	16(%rdi), %rdi
42	pcmpeqb	%xmm1, %xmm0
43	pcmpeqb	%xmm2, %xmm3
44	shl	%cl, %esi
45	pmovmskb %xmm0, %eax
46	pmovmskb %xmm3, %edx
47	andl	%esi, %eax
48	andl	%esi, %edx
49	test	%eax, %eax
50	jnz	L(matches)
51	test	%edx, %edx
52	jnz	L(return_null)
53
54L(loop):
55	movdqa	(%rdi), %xmm0
56	leaq	16(%rdi), %rdi
57	movdqa	%xmm0, %xmm3
58	pcmpeqb	%xmm1, %xmm0
59	pcmpeqb	%xmm2, %xmm3
60	pmovmskb %xmm0, %eax
61	pmovmskb %xmm3, %edx
62	or	%eax, %edx
63	jz	L(loop)
64
65	pmovmskb %xmm3, %edx
66	test	%eax, %eax
67	jnz	L(matches)
68
69/* Return NULL.  */
70	.p2align 4
71L(return_null):
72	xor	%rax, %rax
73	ret
74
75L(matches):
76	/* There is a match.  First find where NULL is.  */
77	leaq	-16(%rdi), %rdi
78	test	%edx, %edx
79	jz	L(match_case1)
80
81	.p2align 4
82L(match_case2):
83	test	%al, %al
84	jz	L(match_high_case2)
85
86	mov	%al, %cl
87	and	$15, %cl
88	jnz	L(match_case2_4)
89
90	mov	%dl, %ch
91	and	$15, %ch
92	jnz	L(return_null)
93
94	test	$0x10, %al
95	jnz	L(Exit5)
96	test	$0x10, %dl
97	jnz	L(return_null)
98	test	$0x20, %al
99	jnz	L(Exit6)
100	test	$0x20, %dl
101	jnz	L(return_null)
102	test	$0x40, %al
103	jnz	L(Exit7)
104	test	$0x40, %dl
105	jnz	L(return_null)
106	lea	7(%rdi), %rax
107	ret
108
109	.p2align 4
110L(match_case2_4):
111	test	$0x01, %al
112	jnz	L(Exit1)
113	test	$0x01, %dl
114	jnz	L(return_null)
115	test	$0x02, %al
116	jnz	L(Exit2)
117	test	$0x02, %dl
118	jnz	L(return_null)
119	test	$0x04, %al
120	jnz	L(Exit3)
121	test	$0x04, %dl
122	jnz	L(return_null)
123	lea	3(%rdi), %rax
124	ret
125
126	.p2align 4
127L(match_high_case2):
128	test	%dl, %dl
129	jnz	L(return_null)
130
131	mov	%ah, %cl
132	and	$15, %cl
133	jnz	L(match_case2_12)
134
135	mov	%dh, %ch
136	and	$15, %ch
137	jnz	L(return_null)
138
139	test	$0x10, %ah
140	jnz	L(Exit13)
141	test	$0x10, %dh
142	jnz	L(return_null)
143	test	$0x20, %ah
144	jnz	L(Exit14)
145	test	$0x20, %dh
146	jnz	L(return_null)
147	test	$0x40, %ah
148	jnz	L(Exit15)
149	test	$0x40, %dh
150	jnz	L(return_null)
151	lea	15(%rdi), %rax
152	ret
153
154	.p2align 4
155L(match_case2_12):
156	test	$0x01, %ah
157	jnz	L(Exit9)
158	test	$0x01, %dh
159	jnz	L(return_null)
160	test	$0x02, %ah
161	jnz	L(Exit10)
162	test	$0x02, %dh
163	jnz	L(return_null)
164	test	$0x04, %ah
165	jnz	L(Exit11)
166	test	$0x04, %dh
167	jnz	L(return_null)
168	lea	11(%rdi), %rax
169	ret
170
171	.p2align 4
172L(match_case1):
173	test	%al, %al
174	jz	L(match_high_case1)
175
176	test	$0x01, %al
177	jnz	L(Exit1)
178	test	$0x02, %al
179	jnz	L(Exit2)
180	test	$0x04, %al
181	jnz	L(Exit3)
182	test	$0x08, %al
183	jnz	L(Exit4)
184	test	$0x10, %al
185	jnz	L(Exit5)
186	test	$0x20, %al
187	jnz	L(Exit6)
188	test	$0x40, %al
189	jnz	L(Exit7)
190	lea	7(%rdi), %rax
191	ret
192
193	.p2align 4
194L(match_high_case1):
195	test	$0x01, %ah
196	jnz	L(Exit9)
197	test	$0x02, %ah
198	jnz	L(Exit10)
199	test	$0x04, %ah
200	jnz	L(Exit11)
201	test	$0x08, %ah
202	jnz	L(Exit12)
203	test	$0x10, %ah
204	jnz	L(Exit13)
205	test	$0x20, %ah
206	jnz	L(Exit14)
207	test	$0x40, %ah
208	jnz	L(Exit15)
209	lea	15(%rdi), %rax
210	ret
211
212	.p2align 4
213L(Exit1):
214	lea	(%rdi), %rax
215	ret
216
217	.p2align 4
218L(Exit2):
219	lea	1(%rdi), %rax
220	ret
221
222	.p2align 4
223L(Exit3):
224	lea	2(%rdi), %rax
225	ret
226
227	.p2align 4
228L(Exit4):
229	lea	3(%rdi), %rax
230	ret
231
232	.p2align 4
233L(Exit5):
234	lea	4(%rdi), %rax
235	ret
236
237	.p2align 4
238L(Exit6):
239	lea	5(%rdi), %rax
240	ret
241
242	.p2align 4
243L(Exit7):
244	lea	6(%rdi), %rax
245	ret
246
247	.p2align 4
248L(Exit9):
249	lea	8(%rdi), %rax
250	ret
251
252	.p2align 4
253L(Exit10):
254	lea	9(%rdi), %rax
255	ret
256
257	.p2align 4
258L(Exit11):
259	lea	10(%rdi), %rax
260	ret
261
262	.p2align 4
263L(Exit12):
264	lea	11(%rdi), %rax
265	ret
266
267	.p2align 4
268L(Exit13):
269	lea	12(%rdi), %rax
270	ret
271
272	.p2align 4
273L(Exit14):
274	lea	13(%rdi), %rax
275	ret
276
277	.p2align 4
278L(Exit15):
279	lea	14(%rdi), %rax
280	ret
281
282END (__strchr_sse2_no_bsf)
283#endif
284