1/* strnlen - calculate the length of a string with limit.
2
3   Copyright (C) 2013-2022 Free Software Foundation, Inc.
4
5   This file is part of the GNU C Library.
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 2.1 of the License, or (at your option) any later version.
11
12   The GNU C Library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   Lesser General Public License for more details.
16
17   You should have received a copy of the GNU Lesser General Public
18   License along with the GNU C Library.  If not, see
19   <https://www.gnu.org/licenses/>.  */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64, Advanced SIMD.
26 * MTE compatible.
27 */
28
29#define srcin		x0
30#define cntin		x1
31#define result		x0
32
33#define src		x2
34#define synd		x3
35#define	shift		x4
36#define tmp		x4
37#define cntrem		x5
38
39#define qdata		q0
40#define vdata		v0
41#define vhas_chr	v1
42#define vend		v2
43#define dend		d2
44
45/*
46   Core algorithm:
47
48   For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
49   per byte. We take 4 bits of every comparison byte with shift right and narrow
50   by 4 instruction. Since the bits in the nibble mask reflect the order in
51   which things occur in the original string, counting trailing zeros identifies
52   exactly which byte matched.  */
53
54ENTRY (__strnlen)
55	PTR_ARG (0)
56	SIZE_ARG (1)
57	bic	src, srcin, 15
58	cbz	cntin, L(nomatch)
59	ld1	{vdata.16b}, [src], 16
60	cmeq	vhas_chr.16b, vdata.16b, 0
61	lsl	shift, srcin, 2
62	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
63	fmov	synd, dend
64	lsr	synd, synd, shift
65	cbz	synd, L(start_loop)
66L(finish):
67	rbit	synd, synd
68	clz	synd, synd
69	lsr	result, synd, 2
70	cmp	cntin, result
71	csel	result, cntin, result, ls
72	ret
73
74L(start_loop):
75	sub	tmp, src, srcin
76	subs	cntrem, cntin, tmp
77	b.ls	L(nomatch)
78
79	/* Make sure that it won't overread by a 16-byte chunk */
80	add	tmp, cntrem, 15
81	tbnz	tmp, 4, L(loop32_2)
82
83	.p2align 5
84L(loop32):
85	ldr	qdata, [src], 16
86	cmeq	vhas_chr.16b, vdata.16b, 0
87	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
88	fmov	synd, dend
89	cbnz	synd, L(end)
90L(loop32_2):
91	ldr	qdata, [src], 16
92	subs	cntrem, cntrem, 32
93	cmeq	vhas_chr.16b, vdata.16b, 0
94	b.ls	L(end)
95	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
96	fmov	synd, dend
97	cbz	synd, L(loop32)
98
99L(end):
100	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
101	sub	src, src, 16
102	mov	synd, vend.d[0]
103	sub	result, src, srcin
104#ifndef __AARCH64EB__
105	rbit	synd, synd
106#endif
107	clz	synd, synd
108	add	result, result, synd, lsr 2
109	cmp	cntin, result
110	csel	result, cntin, result, ls
111	ret
112
113L(nomatch):
114	mov	result, cntin
115	ret
116
117END (__strnlen)
118libc_hidden_def (__strnlen)
119weak_alias (__strnlen, strnlen)
120libc_hidden_def (strnlen)
121