1/* strlen with SSE2 and BSF
2   Copyright (C) 2010-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#if defined SHARED && IS_IN (libc)
20
21#include <sysdep.h>
22
23#define CFI_PUSH(REG)						\
24  cfi_adjust_cfa_offset (4);					\
25  cfi_rel_offset (REG, 0)
26
27#define CFI_POP(REG)						\
28  cfi_adjust_cfa_offset (-4);					\
29  cfi_restore (REG)
30
31#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
32#define POP(REG)	popl REG; CFI_POP (REG)
33#define PARMS		4 + 8	/* Preserve ESI and EDI.  */
34#define	STR		PARMS
35#define ENTRANCE	PUSH (%esi); PUSH (%edi); cfi_remember_state
36#define RETURN		POP (%edi); POP (%esi); ret; \
37			cfi_restore_state; cfi_remember_state
38
39	.text
40ENTRY ( __strlen_sse2_bsf)
41	ENTRANCE
42	mov	STR(%esp), %edi
43	xor	%eax, %eax
44	mov	%edi, %ecx
45	and	$0x3f, %ecx
46	pxor	%xmm0, %xmm0
47	cmp	$0x30, %ecx
48	ja	L(next)
49	movdqu	(%edi), %xmm1
50	pcmpeqb	%xmm1, %xmm0
51	pmovmskb %xmm0, %edx
52	test	%edx, %edx
53	jnz	L(exit_less16)
54	mov	%edi, %eax
55	and	$-16, %eax
56	jmp	L(align16_start)
57L(next):
58
59	mov	%edi, %eax
60	and	$-16, %eax
61	pcmpeqb	(%eax), %xmm0
62	mov	$-1, %esi
63	sub	%eax, %ecx
64	shl	%cl, %esi
65	pmovmskb %xmm0, %edx
66	and	%esi, %edx
67	jnz	L(exit)
68L(align16_start):
69	pxor	%xmm0, %xmm0
70	pxor	%xmm1, %xmm1
71	pxor	%xmm2, %xmm2
72	pxor	%xmm3, %xmm3
73	.p2align 4
74L(align16_loop):
75	pcmpeqb	16(%eax), %xmm0
76	pmovmskb %xmm0, %edx
77	test	%edx, %edx
78	jnz	L(exit16)
79
80	pcmpeqb	32(%eax), %xmm1
81	pmovmskb %xmm1, %edx
82	test	%edx, %edx
83	jnz	L(exit32)
84
85	pcmpeqb	48(%eax), %xmm2
86	pmovmskb %xmm2, %edx
87	test	%edx, %edx
88	jnz	L(exit48)
89
90	pcmpeqb	64(%eax), %xmm3
91	pmovmskb %xmm3, %edx
92	lea	64(%eax), %eax
93	test	%edx, %edx
94	jz	L(align16_loop)
95L(exit):
96	sub	%edi, %eax
97L(exit_less16):
98	bsf	%edx, %edx
99	add	%edx, %eax
100	RETURN
101L(exit16):
102	sub	%edi, %eax
103	bsf	%edx, %edx
104	add	%edx, %eax
105	add	$16, %eax
106	RETURN
107L(exit32):
108	sub	%edi, %eax
109	bsf	%edx, %edx
110	add	%edx, %eax
111	add	$32, %eax
112	RETURN
113L(exit48):
114	sub	%edi, %eax
115	bsf	%edx, %edx
116	add	%edx, %eax
117	add	$48, %eax
118	POP (%edi)
119	POP (%esi)
120	ret
121
122END ( __strlen_sse2_bsf)
123
124#endif
125