1/* wcslen with SSE2
2   Copyright (C) 2011-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#if IS_IN (libc)
20# include <sysdep.h>
21# define STR	4
22
23	.text
24ENTRY (__wcslen_sse2)
25	mov	STR(%esp), %edx
26
27	cmp	$0, (%edx)
28	jz	L(exit_tail0)
29	cmp	$0, 4(%edx)
30	jz	L(exit_tail1)
31	cmp	$0, 8(%edx)
32	jz	L(exit_tail2)
33	cmp	$0, 12(%edx)
34	jz	L(exit_tail3)
35	cmp	$0, 16(%edx)
36	jz	L(exit_tail4)
37	cmp	$0, 20(%edx)
38	jz	L(exit_tail5)
39	cmp	$0, 24(%edx)
40	jz	L(exit_tail6)
41	cmp	$0, 28(%edx)
42	jz	L(exit_tail7)
43
44	pxor	%xmm0, %xmm0
45
46	lea	32(%edx), %eax
47	lea	16(%edx), %ecx
48	and	$-16, %eax
49
50	pcmpeqd	(%eax), %xmm0
51	pmovmskb %xmm0, %edx
52	pxor	%xmm1, %xmm1
53	test	%edx, %edx
54	lea	16(%eax), %eax
55	jnz	L(exit)
56
57	pcmpeqd	(%eax), %xmm1
58	pmovmskb %xmm1, %edx
59	pxor	%xmm2, %xmm2
60	test	%edx, %edx
61	lea	16(%eax), %eax
62	jnz	L(exit)
63
64	pcmpeqd	(%eax), %xmm2
65	pmovmskb %xmm2, %edx
66	pxor	%xmm3, %xmm3
67	test	%edx, %edx
68	lea	16(%eax), %eax
69	jnz	L(exit)
70
71	pcmpeqd	(%eax), %xmm3
72	pmovmskb %xmm3, %edx
73	test	%edx, %edx
74	lea	16(%eax), %eax
75	jnz	L(exit)
76
77	and	$-0x40, %eax
78
79	.p2align 4
80L(aligned_64_loop):
81	movaps	(%eax), %xmm0
82	movaps	16(%eax), %xmm1
83	movaps	32(%eax), %xmm2
84	movaps	48(%eax), %xmm6
85
86	pminub	%xmm1, %xmm0
87	pminub	%xmm6, %xmm2
88	pminub	%xmm0, %xmm2
89	pcmpeqd	%xmm3, %xmm2
90	pmovmskb %xmm2, %edx
91	test	%edx, %edx
92	lea	64(%eax), %eax
93	jz	L(aligned_64_loop)
94
95	pcmpeqd	-64(%eax), %xmm3
96	pmovmskb %xmm3, %edx
97	test	%edx, %edx
98	lea	48(%ecx), %ecx
99	jnz	L(exit)
100
101	pcmpeqd	%xmm1, %xmm3
102	pmovmskb %xmm3, %edx
103	test	%edx, %edx
104	lea	-16(%ecx), %ecx
105	jnz	L(exit)
106
107	pcmpeqd	-32(%eax), %xmm3
108	pmovmskb %xmm3, %edx
109	test	%edx, %edx
110	lea	-16(%ecx), %ecx
111	jnz	L(exit)
112
113	pcmpeqd	%xmm6, %xmm3
114	pmovmskb %xmm3, %edx
115	test	%edx, %edx
116	lea	-16(%ecx), %ecx
117	jnz	L(exit)
118
119	jmp	L(aligned_64_loop)
120
121	.p2align 4
122L(exit):
123	sub	%ecx, %eax
124	shr	$2, %eax
125	test	%dl, %dl
126	jz	L(exit_high)
127
128	mov	%dl, %cl
129	and	$15, %cl
130	jz	L(exit_1)
131	ret
132
133	.p2align 4
134L(exit_high):
135	mov	%dh, %ch
136	and	$15, %ch
137	jz	L(exit_3)
138	add	$2, %eax
139	ret
140
141	.p2align 4
142L(exit_1):
143	add	$1, %eax
144	ret
145
146	.p2align 4
147L(exit_3):
148	add	$3, %eax
149	ret
150
151	.p2align 4
152L(exit_tail0):
153	xor	%eax, %eax
154	ret
155
156	.p2align 4
157L(exit_tail1):
158	mov	$1, %eax
159	ret
160
161	.p2align 4
162L(exit_tail2):
163	mov	$2, %eax
164	ret
165
166	.p2align 4
167L(exit_tail3):
168	mov	$3, %eax
169	ret
170
171	.p2align 4
172L(exit_tail4):
173	mov	$4, %eax
174	ret
175
176	.p2align 4
177L(exit_tail5):
178	mov	$5, %eax
179	ret
180
181	.p2align 4
182L(exit_tail6):
183	mov	$6, %eax
184	ret
185
186	.p2align 4
187L(exit_tail7):
188	mov	$7, %eax
189	ret
190
191END (__wcslen_sse2)
192#endif
193