1/* strchrnul - find a character or nul in a string
2
3   Copyright (C) 2014-2022 Free Software Foundation, Inc.
4
5   This file is part of the GNU C Library.
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 2.1 of the License, or (at your option) any later version.
11
12   The GNU C Library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   Lesser General Public License for more details.
16
17   You should have received a copy of the GNU Lesser General Public
18   License along with the GNU C Library.  If not, see
19   <https://www.gnu.org/licenses/>.  */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64, Advanced SIMD.
26 * MTE compatible.
27 */
28
29#define srcin		x0
30#define chrin		w1
31#define result		x0
32
33#define src		x2
34#define tmp1		x1
35#define tmp2		x3
36
37#define vrepchr		v0
38#define vdata		v1
39#define qdata		q1
40#define vhas_nul	v2
41#define vhas_chr	v3
42#define vend		v4
43#define dend		d4
44
45/*
46   Core algorithm:
47   For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
48   per byte. We take 4 bits of every comparison byte with shift right and narrow
49   by 4 instruction. Since the bits in the nibble mask reflect the order in
50   which things occur in the original string, counting leading zeros identifies
51   exactly which byte matched.  */
52
53ENTRY (__strchrnul)
54	PTR_ARG (0)
55	bic	src, srcin, 15
56	dup	vrepchr.16b, chrin
57	ld1	{vdata.16b}, [src]
58	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
59	cmhs	vhas_chr.16b, vhas_chr.16b, vdata.16b
60	lsl	tmp2, srcin, 2
61	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
62	fmov	tmp1, dend
63	lsr	tmp1, tmp1, tmp2	/* Mask padding bits.  */
64	cbz	tmp1, L(loop)
65
66	rbit	tmp1, tmp1
67	clz	tmp1, tmp1
68	add	result, srcin, tmp1, lsr 2
69	ret
70
71	.p2align 4
72L(loop):
73	ldr	qdata, [src, 16]!
74	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
75	cmhs	vhas_chr.16b, vhas_chr.16b, vdata.16b
76	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b
77	fmov	tmp1, dend
78	cbz	tmp1, L(loop)
79
80	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
81	fmov	tmp1, dend
82#ifndef __AARCH64EB__
83	rbit	tmp1, tmp1
84#endif
85	clz	tmp1, tmp1
86	add	result, src, tmp1, lsr 2
87	ret
88
89END(__strchrnul)
90weak_alias (__strchrnul, strchrnul)
91