1/* strchr with SSE2 with bsf
2   Copyright (C) 2011-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#if IS_IN (libc)
20
21# include <sysdep.h>
22
23# define CFI_PUSH(REG)	\
24	cfi_adjust_cfa_offset (4);	\
25	cfi_rel_offset (REG, 0)
26
27# define CFI_POP(REG)	\
28	cfi_adjust_cfa_offset (-4);	\
29	cfi_restore (REG)
30
31# define PUSH(REG) pushl REG; CFI_PUSH (REG)
32# define POP(REG) popl REG; CFI_POP (REG)
33
34# define PARMS  8
35# define ENTRANCE PUSH(%edi)
36# define RETURN  POP(%edi); ret; CFI_PUSH(%edi);
37
38# define STR1  PARMS
39# define STR2  STR1+4
40
41	.text
42ENTRY (__strchr_sse2_bsf)
43
44	ENTRANCE
45	mov	STR1(%esp), %ecx
46	movd	STR2(%esp), %xmm1
47
48	pxor	%xmm2, %xmm2
49	mov	%ecx, %edi
50	punpcklbw %xmm1, %xmm1
51	punpcklbw %xmm1, %xmm1
52	/* ECX has OFFSET. */
53	and	$15, %ecx
54	pshufd	$0, %xmm1, %xmm1
55	je	L(loop)
56
57/* Handle unaligned string.  */
58	and	$-16, %edi
59	movdqa	(%edi), %xmm0
60	pcmpeqb	%xmm0, %xmm2
61	pcmpeqb	%xmm1, %xmm0
62	/* Find where NULL is.  */
63	pmovmskb %xmm2, %edx
64	/* Check if there is a match.  */
65	pmovmskb %xmm0, %eax
66	/* Remove the leading bytes.  */
67	sarl	%cl, %edx
68	sarl	%cl, %eax
69	test	%eax, %eax
70	je	L(unaligned_no_match)
71	/* Check which byte is a match.  */
72	bsf	%eax, %eax
73	/* Is there a NULL? */
74	test	%edx, %edx
75	je	L(unaligned_match)
76	bsf	%edx, %edx
77	cmpl	%edx, %eax
78	/* Return NULL if NULL comes first.  */
79	ja	L(return_null)
80L(unaligned_match):
81	add	%edi, %eax
82	add	%ecx, %eax
83	RETURN
84
85	.p2align 4
86L(unaligned_no_match):
87	test	%edx, %edx
88	jne	L(return_null)
89	pxor	%xmm2, %xmm2
90
91	add	$16, %edi
92
93	.p2align 4
94/* Loop start on aligned string.  */
95L(loop):
96	movdqa	(%edi), %xmm0
97	pcmpeqb	%xmm0, %xmm2
98	add	$16, %edi
99	pcmpeqb	%xmm1, %xmm0
100	pmovmskb %xmm2, %edx
101	pmovmskb %xmm0, %eax
102	or	%eax, %edx
103	jnz	L(matches)
104
105	movdqa	(%edi), %xmm0
106	pcmpeqb	%xmm0, %xmm2
107	add	$16, %edi
108	pcmpeqb	%xmm1, %xmm0
109	pmovmskb %xmm2, %edx
110	pmovmskb %xmm0, %eax
111	or	%eax, %edx
112	jnz	L(matches)
113
114	movdqa	(%edi), %xmm0
115	pcmpeqb	%xmm0, %xmm2
116	add	$16, %edi
117	pcmpeqb	%xmm1, %xmm0
118	pmovmskb %xmm2, %edx
119	pmovmskb %xmm0, %eax
120	or	%eax, %edx
121	jnz	L(matches)
122
123	movdqa	(%edi), %xmm0
124	pcmpeqb	%xmm0, %xmm2
125	add	$16, %edi
126	pcmpeqb	%xmm1, %xmm0
127	pmovmskb %xmm2, %edx
128	pmovmskb %xmm0, %eax
129	or	%eax, %edx
130	jnz	L(matches)
131	jmp	L(loop)
132
133L(matches):
134	pmovmskb %xmm2, %edx
135	test	%eax, %eax
136	jz	L(return_null)
137	bsf	%eax, %eax
138	/* There is a match.  First find where NULL is.  */
139	test	%edx, %edx
140	je	L(match)
141	bsf	%edx, %ecx
142	/* Check if NULL comes first.  */
143	cmpl	%ecx, %eax
144	ja	L(return_null)
145L(match):
146	sub	$16, %edi
147	add	%edi, %eax
148	RETURN
149
150/* Return NULL.  */
151	.p2align 4
152L(return_null):
153	xor	%eax, %eax
154	RETURN
155
156END (__strchr_sse2_bsf)
157#endif
158