1/* Optimized memchr implementation for PowerPC64/POWER7 using cmpb insn.
2   Copyright (C) 2010-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20
21/* int [r3] memchr (char *s [r3], int byte [r4], int size [r5])  */
22
23#ifndef MEMCHR
24# define MEMCHR __memchr
25#endif
26	.machine  power7
27ENTRY_TOCLESS (MEMCHR)
28	CALL_MCOUNT 3
29	dcbt	0,r3
30	clrrdi  r8,r3,3
31	insrdi	r4,r4,8,48
32
33	/* Calculate the last acceptable address and check for possible
34	   addition overflow by using satured math:
35	   r7 = r3 + r5
36	   r7 |= -(r7 < x)  */
37	add     r7,r3,r5
38	subfc   r6,r3,r7
39	subfe   r9,r9,r9
40	extsw   r6,r9
41	or      r7,r7,r6
42
43	insrdi	r4,r4,16,32
44	cmpldi	r5,32
45	li	r9, -1
46	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
47	insrdi  r4,r4,32,0
48	addi	r7,r7,-1
49#ifdef __LITTLE_ENDIAN__
50	sld	r9,r9,r6
51#else
52	srd	r9,r9,r6
53#endif
54	ble	L(small_range)
55
56	ld	r12,0(r8)     /* Load doubleword from memory.  */
57	cmpb	r3,r12,r4     /* Check for BYTEs in DWORD1.  */
58	and	r3,r3,r9
59	clrldi	r5,r7,61      /* Byte count - 1 in last dword.  */
60	clrrdi	r7,r7,3       /* Address of last doubleword.  */
61	cmpldi	cr7,r3,0      /* Does r3 indicate we got a hit?  */
62	bne	cr7,L(done)
63
64	mtcrf   0x01,r8
65	/* Are we now aligned to a quadword boundary?  If so, skip to
66	   the main loop.  Otherwise, go through the alignment code.  */
67	bt	28,L(loop_setup)
68
69	/* Handle DWORD2 of pair.  */
70	ldu	r12,8(r8)
71	cmpb	r3,r12,r4
72	cmpldi	cr7,r3,0
73	bne	cr7,L(done)
74
75L(loop_setup):
76	/* The last dword we want to read in the loop below is the one
77	   containing the last byte of the string, ie. the dword at
78	   (s + size - 1) & ~7, or r7.  The first dword read is at
79	   r8 + 8, we read 2 * cnt dwords, so the last dword read will
80	   be at r8 + 8 + 16 * cnt - 8.  Solving for cnt gives
81	   cnt = (r7 - r8) / 16  */
82	sub	r6,r7,r8
83	srdi	r6,r6,4	      /* Number of loop iterations.  */
84	mtctr	r6            /* Setup the counter.  */
85
86	/* Main loop to look for BYTE in the string.  Since
87	   it's a small loop (8 instructions), align it to 32-bytes.  */
88	.align	5
89L(loop):
90	/* Load two doublewords, compare and merge in a
91	   single register for speed.  This is an attempt
92	   to speed up the byte-checking process for bigger strings.  */
93	ld	r12,8(r8)
94	ldu	r11,16(r8)
95	cmpb	r3,r12,r4
96	cmpb	r9,r11,r4
97	or	r6,r9,r3      /* Merge everything in one doubleword.  */
98	cmpldi	cr7,r6,0
99	bne	cr7,L(found)
100	bdnz	L(loop)
101
102	/* We may have one more dword to read.  */
103	cmpld	r8,r7
104	beqlr
105
106	ldu	r12,8(r8)
107	cmpb	r3,r12,r4
108	cmpldi	cr6,r3,0
109	bne	cr6,L(done)
110	blr
111
112	.align	4
113L(found):
114	/* OK, one (or both) of the doublewords contains BYTE.  Check
115	   the first doubleword and decrement the address in case the first
116	   doubleword really contains BYTE.  */
117	cmpldi	cr6,r3,0
118	addi	r8,r8,-8
119	bne	cr6,L(done)
120
121	/* BYTE must be in the second doubleword.  Adjust the address
122	   again and move the result of cmpb to r3 so we can calculate the
123	   pointer.  */
124
125	mr	r3,r9
126	addi	r8,r8,8
127
128	/* r3 has the output of the cmpb instruction, that is, it contains
129	   0xff in the same position as BYTE in the original
130	   doubleword from the string.  Use that to calculate the pointer.
131	   We need to make sure BYTE is *before* the end of the range.  */
132L(done):
133#ifdef __LITTLE_ENDIAN__
134	addi    r0,r3,-1
135	andc    r0,r0,r3
136	popcntd	r0,r0	      /* Count trailing zeros.  */
137#else
138	cntlzd	r0,r3	      /* Count leading zeros before the match.  */
139#endif
140	cmpld	r8,r7         /* Are we on the last dword?  */
141	srdi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
142	add	r3,r8,r0
143	cmpld	cr7,r0,r5     /* If on the last dword, check byte offset.  */
144	bnelr
145	blelr	cr7
146	li	r3,0
147	blr
148
149	.align	4
150L(null):
151	li	r3,0
152	blr
153
154/* Deals with size <= 32.  */
155	.align	4
156L(small_range):
157	cmpldi	r5,0
158	beq	L(null)
159	ld	r12,0(r8)     /* Load word from memory.  */
160	cmpb	r3,r12,r4     /* Check for BYTE in DWORD1.  */
161	and	r3,r3,r9
162	cmpldi	cr7,r3,0
163	clrldi	r5,r7,61      /* Byte count - 1 in last dword.  */
164	clrrdi	r7,r7,3       /* Address of last doubleword.  */
165	cmpld	r8,r7         /* Are we done already?  */
166	bne	cr7,L(done)
167	beqlr
168
169	ldu	r12,8(r8)
170	cmpb	r3,r12,r4
171	cmpldi	cr6,r3,0
172	cmpld	r8,r7
173	bne	cr6,L(done)   /* Found something.  */
174	beqlr		      /* Hit end of string (length).  */
175
176	ldu	r12,8(r8)
177	cmpb	r3,r12,r4
178	cmpldi	cr6,r3,0
179	cmpld	r8,r7
180	bne	cr6,L(done)
181	beqlr
182
183	ldu	r12,8(r8)
184	cmpb	r3,r12,r4
185	cmpldi	cr6,r3,0
186	cmpld	r8,r7
187	bne	cr6,L(done)
188	beqlr
189
190	ldu	r12,8(r8)
191	cmpb	r3,r12,r4
192	cmpldi	cr6,r3,0
193	bne	cr6,L(done)
194	blr
195
196END (MEMCHR)
197weak_alias (__memchr, memchr)
198libc_hidden_builtin_def (memchr)
199