1/* Optimized memchr implementation for PowerPC32/POWER7 using cmpb insn.
2   Copyright (C) 2010-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20
21/* int [r3] memchr (char *s [r3], int byte [r4], int size [r5])  */
22	.machine  power7
23ENTRY (__memchr)
24	CALL_MCOUNT
25	dcbt	0,r3
26	clrrwi  r8,r3,2
27	insrwi	r4,r4,8,16    /* Replicate byte to word.  */
28
29	/* Calculate the last acceptable address and check for possible
30	   addition overflow by using satured math:
31	   r7 = r3 + r5
32	   r7 |= -(r7 < x)  */
33	add     r7,r3,r5
34	subfc   r6,r3,r7
35	subfe   r9,r9,r9
36	or      r7,r7,r9
37
38	insrwi	r4,r4,16,0
39	cmplwi	r5,16
40	li	r9, -1
41	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
42	addi	r7,r7,-1
43#ifdef __LITTLE_ENDIAN__
44	slw	r9,r9,r6
45#else
46	srw	r9,r9,r6
47#endif
48	ble	L(small_range)
49
50	lwz	r12,0(r8)     /* Load word from memory.  */
51	cmpb	r3,r12,r4     /* Check for BYTEs in WORD1.  */
52	and	r3,r3,r9
53	clrlwi	r5,r7,30      /* Byte count - 1 in last word.  */
54	clrrwi	r7,r7,2       /* Address of last word.  */
55	cmplwi	cr7,r3,0      /* If r3 == 0, no BYTEs have been found.  */
56	bne	cr7,L(done)
57
58	mtcrf   0x01,r8
59	/* Are we now aligned to a doubleword boundary?  If so, skip to
60	   the main loop.  Otherwise, go through the alignment code.  */
61	bt	29,L(loop_setup)
62
63	/* Handle WORD2 of pair.  */
64	lwzu	r12,4(r8)
65	cmpb	r3,r12,r4
66	cmplwi	cr7,r3,0
67	bne	cr7,L(done)
68
69L(loop_setup):
70	/* The last word we want to read in the loop below is the one
71	   containing the last byte of the string, ie. the word at
72	   (s + size - 1) & ~3, or r7.  The first word read is at
73	   r8 + 4, we read 2 * cnt words, so the last word read will
74	   be at r8 + 4 + 8 * cnt - 4.  Solving for cnt gives
75	   cnt = (r7 - r8) / 8  */
76	sub	r6,r7,r8
77	srwi	r6,r6,3	      /* Number of loop iterations.  */
78	mtctr	r6            /* Setup the counter.  */
79
80	/* Main loop to look for BYTE in the string.  Since
81	   it's a small loop (8 instructions), align it to 32-bytes.  */
82	.align	5
83L(loop):
84	/* Load two words, compare and merge in a
85	   single register for speed.  This is an attempt
86	   to speed up the byte-checking process for bigger strings.  */
87	lwz	r12,4(r8)
88	lwzu	r11,8(r8)
89	cmpb	r3,r12,r4
90	cmpb	r9,r11,r4
91	or	r6,r9,r3      /* Merge everything in one word.  */
92	cmplwi	cr7,r6,0
93	bne	cr7,L(found)
94	bdnz	L(loop)
95
96	/* We may have one more dword to read.  */
97	cmplw	r8,r7
98	beqlr
99
100	lwzu	r12,4(r8)
101	cmpb	r3,r12,r4
102	cmplwi	cr6,r3,0
103	bne	cr6,L(done)
104	blr
105
106	.align	4
107L(found):
108	/* OK, one (or both) of the words contains BYTE.  Check
109	   the first word and decrement the address in case the first
110	   word really contains BYTE.  */
111	cmplwi	cr6,r3,0
112	addi	r8,r8,-4
113	bne	cr6,L(done)
114
115	/* BYTE must be in the second word.  Adjust the address
116	   again and move the result of cmpb to r3 so we can calculate the
117	   pointer.  */
118
119	mr	r3,r9
120	addi	r8,r8,4
121
122	/* r3 has the output of the cmpb instruction, that is, it contains
123	   0xff in the same position as BYTE in the original
124	   word from the string.  Use that to calculate the pointer.
125	   We need to make sure BYTE is *before* the end of the range.  */
126L(done):
127#ifdef __LITTLE_ENDIAN__
128	addi    r0,r3,-1
129	andc    r0,r0,r3
130	popcntw	r0,r0	      /* Count trailing zeros.  */
131#else
132	cntlzw	r0,r3	      /* Count leading zeros before the match.  */
133#endif
134	cmplw	r8,r7         /* Are we on the last word?  */
135	srwi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
136	add	r3,r8,r0
137	cmplw	cr7,r0,r5     /* If on the last dword, check byte offset.  */
138	bnelr
139	blelr	cr7
140	li	r3,0
141	blr
142
143	.align	4
144L(null):
145	li	r3,0
146	blr
147
148/* Deals with size <= 16.  */
149	.align	4
150L(small_range):
151	cmplwi	r5,0
152	beq	L(null)
153	lwz	r12,0(r8)     /* Load word from memory.  */
154	cmpb	r3,r12,r4     /* Check for BYTE in DWORD1.  */
155	and	r3,r3,r9
156	cmplwi	cr7,r3,0
157	clrlwi	r5,r7,30      /* Byte count - 1 in last word.  */
158	clrrwi	r7,r7,2       /* Address of last word.  */
159	cmplw	r8,r7         /* Are we done already?  */
160	bne	cr7,L(done)
161	beqlr
162
163	lwzu	r12,4(r8)
164	cmpb	r3,r12,r4
165	cmplwi	cr6,r3,0
166	cmplw	r8,r7
167	bne	cr6,L(done)
168	beqlr
169
170	lwzu	r12,4(r8)
171	cmpb	r3,r12,r4
172	cmplwi	cr6,r3,0
173	cmplw	r8,r7
174	bne	cr6,L(done)
175	beqlr
176
177	lwzu	r12,4(r8)
178	cmpb	r3,r12,r4
179	cmplwi	cr6,r3,0
180	cmplw	r8,r7
181	bne	cr6,L(done)
182	beqlr
183
184	lwzu	r12,4(r8)
185	cmpb	r3,r12,r4
186	cmplwi	cr6,r3,0
187	bne	cr6,L(done)
188	blr
189
190END (__memchr)
191weak_alias (__memchr, memchr)
192libc_hidden_builtin_def (memchr)
193