1/* memchr - find a character in a memory zone
2
3   Copyright (C) 2015-2022 Free Software Foundation, Inc.
4
5   This file is part of the GNU C Library.
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 2.1 of the License, or (at your option) any later version.
11
12   The GNU C Library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   Lesser General Public License for more details.
16
17   You should have received a copy of the GNU Lesser General Public
18   License along with the GNU C Library.  If not, see
19   <https://www.gnu.org/licenses/>.  */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64, Advanced SIMD.
26 * MTE compatible.
27 */
28
29#ifndef MEMCHR
30# define MEMCHR __memchr
31#endif
32
33/* Arguments and results.  */
34#define srcin		x0
35#define chrin		w1
36#define cntin		x2
37#define result		x0
38
39#define src		x3
40#define cntrem		x4
41#define synd		x5
42#define shift		x6
43#define	tmp		x7
44
45#define vrepchr		v0
46#define qdata		q1
47#define vdata		v1
48#define vhas_chr	v2
49#define vend		v3
50#define dend		d3
51
52/*
53   Core algorithm:
54   For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
55   per byte. We take 4 bits of every comparison byte with shift right and narrow
56   by 4 instruction. Since the bits in the nibble mask reflect the order in
57   which things occur in the original string, counting leading zeros identifies
58   exactly which byte matched.  */
59
60ENTRY (MEMCHR)
61	PTR_ARG (0)
62	SIZE_ARG (2)
63	bic	src, srcin, 15
64	cbz	cntin, L(nomatch)
65	ld1	{vdata.16b}, [src]
66	dup	vrepchr.16b, chrin
67	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
68	lsl	shift, srcin, 2
69	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
70	fmov	synd, dend
71	lsr	synd, synd, shift
72	cbz	synd, L(start_loop)
73
74	rbit	synd, synd
75	clz	synd, synd
76	add	result, srcin, synd, lsr 2
77	cmp	cntin, synd, lsr 2
78	csel	result, result, xzr, hi
79	ret
80
81L(start_loop):
82	sub	tmp, src, srcin
83	add	tmp, tmp, 16
84	subs	cntrem, cntin, tmp
85	b.ls	L(nomatch)
86
87	/* Make sure that it won't overread by a 16-byte chunk */
88	add	tmp, cntrem, 15
89	tbnz	tmp, 4, L(loop32_2)
90
91	.p2align 4
92L(loop32):
93	ldr	qdata, [src, 16]!
94	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
95	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
96	fmov	synd, dend
97	cbnz	synd, L(end)
98
99L(loop32_2):
100	ldr	qdata, [src, 16]!
101	subs	cntrem, cntrem, 32
102	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
103	b.ls	L(end)
104	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
105	fmov	synd, dend
106	cbz	synd, L(loop32)
107L(end):
108	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
109	fmov	synd, dend
110	add	tmp, srcin, cntin
111	sub	cntrem, tmp, src
112#ifndef __AARCH64EB__
113	rbit	synd, synd
114#endif
115	clz	synd, synd
116	cmp	cntrem, synd, lsr 2
117	add	result, src, synd, lsr 2
118	csel	result, result, xzr, hi
119	ret
120
121L(nomatch):
122	mov	result, 0
123	ret
124
125END (MEMCHR)
126weak_alias (MEMCHR, memchr)
127libc_hidden_builtin_def (memchr)
128