1/* Optimized memrchr implementation for PowerPC32/POWER7 using cmpb insn.
2   Copyright (C) 2010-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20
21/* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5])  */
22	.machine  power7
23ENTRY (__memrchr)
24	CALL_MCOUNT
25	add	r7,r3,r5      /* Calculate the last acceptable address.  */
26	neg	r0,r7
27	addi	r7,r7,-1
28	mr	r10,r3
29	clrrwi	r6,r7,7
30	li	r9,3<<5
31	dcbt	r9,r6,16      /* Stream hint, decreasing addresses.  */
32
33	/* Replicate BYTE to word.  */
34	insrwi	r4,r4,8,16
35	insrwi	r4,r4,16,0
36	li	r6,-4
37	li	r9,-1
38	rlwinm	r0,r0,3,27,28 /* Calculate padding.  */
39	clrrwi	r8,r7,2
40	srw	r9,r9,r0
41	cmplwi	r5,16
42	clrrwi	r0,r10,2
43	ble	L(small_range)
44
45#ifdef __LITTLE_ENDIAN__
46	lwzx	r12,0,r8
47#else
48	lwbrx	r12,0,r8      /* Load reversed word from memory.  */
49#endif
50	cmpb	r3,r12,r4     /* Check for BYTE in WORD1.  */
51	and	r3,r3,r9
52	cmplwi	cr7,r3,0      /* If r3 == 0, no BYTEs have been found.  */
53	bne	cr7,L(done)
54
55	mtcrf   0x01,r8
56	/* Are we now aligned to a doubleword boundary?  If so, skip to
57	   the main loop.  Otherwise, go through the alignment code.  */
58	bf	29,L(loop_setup)
59
60	/* Handle WORD2 of pair.  */
61#ifdef __LITTLE_ENDIAN__
62	lwzx	r12,r8,r6
63#else
64	lwbrx	r12,r8,r6
65#endif
66	addi	r8,r8,-4
67	cmpb	r3,r12,r4
68	cmplwi	cr7,r3,0
69	bne	cr7,L(done)
70
71L(loop_setup):
72	/* The last word we want to read in the loop below is the one
73	   containing the first byte of the string, ie. the word at
74	   s & ~3, or r0.  The first word read is at r8 - 4, we
75	   read 2 * cnt words, so the last word read will be at
76	   r8 - 4 - 8 * cnt + 4.  Solving for cnt gives
77	   cnt = (r8 - r0) / 8  */
78	sub	r5,r8,r0
79	addi	r8,r8,-4
80	srwi	r9,r5,3       /* Number of loop iterations.  */
81	mtctr	r9	      /* Setup the counter.  */
82
83	/* Main loop to look for BYTE backwards in the string.
84	   FIXME: Investigate whether 32 byte align helps with this
85	   9 instruction loop.  */
86	.align	5
87L(loop):
88	/* Load two words, compare and merge in a
89	   single register for speed.  This is an attempt
90	   to speed up the byte-checking process for bigger strings.  */
91
92#ifdef __LITTLE_ENDIAN__
93	lwzx	r12,0,r8
94	lwzx	r11,r8,r6
95#else
96	lwbrx	r12,0,r8
97	lwbrx	r11,r8,r6
98#endif
99	cmpb	r3,r12,r4
100	cmpb	r9,r11,r4
101	or	r5,r9,r3      /* Merge everything in one word.  */
102	cmplwi	cr7,r5,0
103	bne	cr7,L(found)
104	addi	r8,r8,-8
105	bdnz	L(loop)
106
107	/* We may have one more word to read.  */
108	cmplw	r8,r0
109	bnelr
110
111#ifdef __LITTLE_ENDIAN__
112	lwzx	r12,0,r8
113#else
114	lwbrx	r12,0,r8
115#endif
116	cmpb	r3,r12,r4
117	cmplwi	cr7,r3,0
118	bne	cr7,L(done)
119	blr
120
121	.align	4
122L(found):
123	/* OK, one (or both) of the words contains BYTE.  Check
124	   the first word.  */
125	cmplwi	cr6,r3,0
126	bne	cr6,L(done)
127
128	/* BYTE must be in the second word.  Adjust the address
129	   again and move the result of cmpb to r3 so we can calculate the
130	   pointer.  */
131
132	mr	r3,r9
133	addi	r8,r8,-4
134
135	/* r3 has the output of the cmpb instruction, that is, it contains
136	   0xff in the same position as BYTE in the original
137	   word from the string.  Use that to calculate the pointer.
138	   We need to make sure BYTE is *before* the end of the
139	   range.  */
140L(done):
141	cntlzw	r9,r3	      /* Count leading zeros before the match.  */
142	cmplw	r8,r0         /* Are we on the last word?  */
143	srwi	r6,r9,3	      /* Convert leading zeros to bytes.  */
144	addi	r0,r6,-3
145	sub	r3,r8,r0
146	cmplw	cr7,r3,r10
147	bnelr
148	bgelr	cr7
149	li	r3,0
150	blr
151
152	.align	4
153L(null):
154	li	r3,0
155	blr
156
157/* Deals with size <= 16.  */
158	.align	4
159L(small_range):
160	cmplwi	r5,0
161	beq	L(null)
162
163#ifdef __LITTLE_ENDIAN__
164	lwzx	r12,0,r8
165#else
166	lwbrx	r12,0,r8      /* Load reversed word from memory.  */
167#endif
168	cmpb	r3,r12,r4     /* Check for BYTE in WORD1.  */
169	and	r3,r3,r9
170	cmplwi	cr7,r3,0
171	bne	cr7,L(done)
172
173	/* Are we done already?  */
174	cmplw	r8,r0
175	addi	r8,r8,-4
176	beqlr
177
178	.align	5
179L(loop_small):
180#ifdef __LITTLE_ENDIAN__
181	lwzx	r12,0,r8
182#else
183	lwbrx	r12,0,r8
184#endif
185	cmpb	r3,r12,r4
186	cmplw	r8,r0
187	cmplwi	cr7,r3,0
188	bne	cr7,L(done)
189	addi	r8,r8,-4
190	bne	L(loop_small)
191	blr
192
193END (__memrchr)
194weak_alias (__memrchr, memrchr)
195libc_hidden_builtin_def (memrchr)
196