1/* Copyright (C) 2011-2022 Free Software Foundation, Inc.
2   This file is part of the GNU C Library.
3
4   The GNU C Library is free software; you can redistribute it and/or
5   modify it under the terms of the GNU Lesser General Public
6   License as published by the Free Software Foundation; either
7   version 2.1 of the License, or (at your option) any later version.
8
9   The GNU C Library is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   Lesser General Public License for more details.
13
14   You should have received a copy of the GNU Lesser General Public
15   License along with the GNU C Library.  If not, see
16   <https://www.gnu.org/licenses/>.  */
17
18#include <sysdep.h>
19
20@ This memchr routine is optimised on a Cortex-A9 and should work on all ARMv7
21@ and ARMv6T2 processors.  It has a fast path for short sizes, and has an
22@ optimised path for large data sets; the worst case is finding the match early
23@ in a large data set.
24@ Note: The use of cbz/cbnz means it's Thumb only
25
26@ 2011-07-15 david.gilbert@linaro.org
27@    Copy from Cortex strings release 21 and change license
28@ http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/view/head:/src/linaro-a9/memchr.S
29@    Change function declarations/entry/exit
30@ 2011-12-01 david.gilbert@linaro.org
31@    Add some fixes from comments received (including use of ldrd instead ldm)
32@ 2011-12-07 david.gilbert@linaro.org
33@    Removed cbz from align loop - can't be taken
34
35@ this lets us check a flag in a 00/ff byte easily in either endianness
36#ifdef __ARMEB__
37#define CHARTSTMASK(c) 1<<(31-(c*8))
38#else
39#define CHARTSTMASK(c) 1<<(c*8)
40#endif
41	.syntax unified
42
43	.text
44	.thumb
45	.thumb_func
46	.global memchr
47	.type memchr,%function
48ENTRY(memchr)
49	@ r0 = start of memory to scan
50	@ r1 = character to look for
51	@ r2 = length
52	@ returns r0 = pointer to character or NULL if not found
53	and	r1,r1,#0xff	@ Don't think we can trust the caller to actually pass a char
54
55	cmp	r2,#16		@ If it's short don't bother with anything clever
56	blt	20f
57
58	tst	r0, #7		@ If it's already aligned skip the next bit
59	beq	10f
60
61	@ Work up to an aligned point
625:
63	ldrb	r3, [r0],#1
64	subs	r2, r2, #1
65	cmp	r3, r1
66	beq	50f		@ If it matches exit found
67	tst	r0, #7
68	bne	5b		@ If not aligned yet then do next byte
69
7010:
71	@ At this point, we are aligned, we know we have at least 8 bytes to work with
72	push	{r4,r5,r6,r7}
73	cfi_adjust_cfa_offset (16)
74	cfi_rel_offset (r4, 0)
75	cfi_rel_offset (r5, 4)
76	cfi_rel_offset (r6, 8)
77	cfi_rel_offset (r7, 12)
78
79	cfi_remember_state
80
81	orr	r1, r1, r1, lsl #8	@ expand the match word across to all bytes
82	orr	r1, r1, r1, lsl #16
83	bic	r6, r2, #7	@ Number of double words to work with * 8
84	mvns	r7, #0		@ all F's
85	movs	r3, #0
86
8715:
88	ldrd 	r4,r5, [r0],#8
89	subs	r6, r6, #8
90	eor	r4,r4, r1	@ Get it so that r4,r5 have 00's where the bytes match the target
91	eor	r5,r5, r1
92	uadd8	r4, r4, r7	@ Parallel add 0xff - sets the GE bits for anything that wasn't 0
93	sel	r4, r3, r7	@ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
94	uadd8	r5, r5, r7	@ Parallel add 0xff - sets the GE bits for anything that wasn't 0
95	sel	r5, r4, r7	@ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
96	cbnz	r5, 60f
97	bne	15b		@ (Flags from the subs above) If not run out of bytes then go around again
98
99	pop	{r4,r5,r6,r7}
100	cfi_adjust_cfa_offset (-16)
101	cfi_restore (r4)
102	cfi_restore (r5)
103	cfi_restore (r6)
104	cfi_restore (r7)
105
106	and	r1,r1,#0xff	@ Get r1 back to a single character from the expansion above
107	and	r2,r2,#7	@ Leave the count remaining as the number after the double words have been done
108
10920:
110	cbz	r2, 40f		@ 0 length or hit the end already then not found
111
11221:  @ Post aligned section, or just a short call
113	ldrb	r3,[r0],#1
114	subs	r2,r2,#1
115	eor	r3,r3,r1	@ r3 = 0 if match - doesn't break flags from sub
116	cbz	r3, 50f
117	bne	21b		@ on r2 flags
118
11940:
120	movs	r0,#0		@ not found
121	DO_RET(lr)
122
12350:
124	subs	r0,r0,#1	@ found
125	DO_RET(lr)
126
12760:  @ We're here because the fast path found a hit - now we have to track down exactly which word it was
128     @ r0 points to the start of the double word after the one that was tested
129     @ r4 has the 00/ff pattern for the first word, r5 has the chained value
130	cfi_restore_state
131	cmp	r4, #0
132	itte	eq
133	moveq	r4, r5		@ the end is in the 2nd word
134	subeq	r0,r0,#3	@ Points to 2nd byte of 2nd word
135	subne	r0,r0,#7	@ or 2nd byte of 1st word
136
137	@ r0 currently points to the 2nd byte of the word containing the hit
138	tst	r4, # CHARTSTMASK(0)	@ 1st character
139	bne	61f
140	adds	r0,r0,#1
141	tst	r4, # CHARTSTMASK(1)	@ 2nd character
142	ittt	eq
143	addeq	r0,r0,#1
144	tsteq	r4, # (3<<15)		@ 2nd & 3rd character
145	@ If not the 3rd must be the last one
146	addeq	r0,r0,#1
147
14861:
149	pop	{r4,r5,r6,r7}
150	cfi_adjust_cfa_offset (-16)
151	cfi_restore (r4)
152	cfi_restore (r5)
153	cfi_restore (r6)
154	cfi_restore (r7)
155
156	subs	r0,r0,#1
157	DO_RET(lr)
158
159END(memchr)
160libc_hidden_builtin_def (memchr)
161