1/* strchr - find a character in a string
2
3   Copyright (C) 2014-2022 Free Software Foundation, Inc.
4
5   This file is part of the GNU C Library.
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 2.1 of the License, or (at your option) any later version.
11
12   The GNU C Library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   Lesser General Public License for more details.
16
17   You should have received a copy of the GNU Lesser General Public
18   License along with the GNU C Library.  If not, see
19   <https://www.gnu.org/licenses/>.  */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64, Advanced SIMD.
26 * MTE compatible.
27 */
28
29#define srcin		x0
30#define chrin		w1
31#define result		x0
32
33#define src		x2
34#define tmp1		x1
35#define wtmp2		w3
36#define tmp3		x3
37
38#define vrepchr		v0
39#define vdata		v1
40#define qdata		q1
41#define vhas_nul	v2
42#define vhas_chr	v3
43#define vrepmask	v4
44#define vrepmask2	v5
45#define vend		v6
46#define dend		d6
47
48/* Core algorithm.
49
50   For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
51   per byte. For even bytes, bits 0-1 are set if the relevant byte matched the
52   requested character, bits 2-3 are set if the byte is NUL (or matched), and
53   bits 4-7 are not used and must be zero if none of bits 0-3 are set). Odd
54   bytes set bits 4-7 so that adjacent bytes can be merged. Since the bits
55   in the syndrome reflect the order in which things occur in the original
56   string, counting trailing zeros identifies exactly which byte matched.  */
57
58ENTRY (strchr)
59	PTR_ARG (0)
60	bic	src, srcin, 15
61	dup	vrepchr.16b, chrin
62	ld1	{vdata.16b}, [src]
63	mov	wtmp2, 0x3003
64	dup	vrepmask.8h, wtmp2
65	cmeq	vhas_nul.16b, vdata.16b, 0
66	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
67	mov	wtmp2, 0xf00f
68	dup	vrepmask2.8h, wtmp2
69
70	bit	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
71	and	vhas_nul.16b, vhas_nul.16b, vrepmask2.16b
72	lsl	tmp3, srcin, 2
73	addp	vend.16b, vhas_nul.16b, vhas_nul.16b		/* 128->64 */
74
75	fmov	tmp1, dend
76	lsr	tmp1, tmp1, tmp3
77	cbz	tmp1, L(loop)
78
79	rbit	tmp1, tmp1
80	clz	tmp1, tmp1
81	/* Tmp1 is an even multiple of 2 if the target character was
82	   found first. Otherwise we've found the end of string.  */
83	tst	tmp1, 2
84	add	result, srcin, tmp1, lsr 2
85	csel	result, result, xzr, eq
86	ret
87
88	.p2align 4
89L(loop):
90	ldr	qdata, [src, 16]!
91	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
92	cmhs	vhas_nul.16b, vhas_chr.16b, vdata.16b
93	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
94	fmov	tmp1, dend
95	cbz	tmp1, L(loop)
96
97#ifdef __AARCH64EB__
98	bif	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
99	and	vhas_nul.16b, vhas_nul.16b, vrepmask2.16b
100	addp	vend.16b, vhas_nul.16b, vhas_nul.16b		/* 128->64 */
101	fmov	tmp1, dend
102#else
103	bit	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
104	and	vhas_nul.16b, vhas_nul.16b, vrepmask2.16b
105	addp	vend.16b, vhas_nul.16b, vhas_nul.16b		/* 128->64 */
106	fmov	tmp1, dend
107	rbit	tmp1, tmp1
108#endif
109	clz	tmp1, tmp1
110	/* Tmp1 is an even multiple of 2 if the target character was
111	   found first. Otherwise we've found the end of string.  */
112	tst	tmp1, 2
113	add	result, src, tmp1, lsr 2
114	csel	result, result, xzr, eq
115	ret
116
117END (strchr)
118libc_hidden_builtin_def (strchr)
119weak_alias (strchr, index)
120