1/* Optimized memrchr implementation for PowerPC64/POWER7 using cmpb insn.
2   Copyright (C) 2010-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20
21/* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5])  */
22
23#ifndef MEMRCHR
24# define MEMRCHR __memrchr
25#endif
26	.machine  power7
27ENTRY_TOCLESS (MEMRCHR)
28	CALL_MCOUNT 3
29	add	r7,r3,r5      /* Calculate the last acceptable address.  */
30	neg	r0,r7
31	addi	r7,r7,-1
32	mr	r10,r3
33	clrrdi	r6,r7,7
34	li	r9,3<<5
35	dcbt	r9,r6,8       /* Stream hint, decreasing addresses.  */
36
37	/* Replicate BYTE to doubleword.  */
38	insrdi	r4,r4,8,48
39	insrdi	r4,r4,16,32
40	insrdi  r4,r4,32,0
41	li	r6,-8
42	li	r9,-1
43	rlwinm	r0,r0,3,26,28 /* Calculate padding.  */
44	clrrdi	r8,r7,3
45	srd	r9,r9,r0
46	cmpldi	r5,32
47	clrrdi	r0,r10,3
48	ble	L(small_range)
49
50#ifdef __LITTLE_ENDIAN__
51	ldx	r12,0,r8
52#else
53	ldbrx	r12,0,r8      /* Load reversed doubleword from memory.  */
54#endif
55	cmpb	r3,r12,r4     /* Check for BYTE in DWORD1.  */
56	and	r3,r3,r9
57	cmpldi	cr7,r3,0      /* If r3 == 0, no BYTEs have been found.  */
58	bne	cr7,L(done)
59
60	mtcrf   0x01,r8
61	/* Are we now aligned to a quadword boundary?  If so, skip to
62	   the main loop.  Otherwise, go through the alignment code.  */
63	bf	28,L(loop_setup)
64
65	/* Handle DWORD2 of pair.  */
66#ifdef __LITTLE_ENDIAN__
67	ldx	r12,r8,r6
68#else
69	ldbrx	r12,r8,r6
70#endif
71	addi	r8,r8,-8
72	cmpb	r3,r12,r4
73	cmpldi	cr7,r3,0
74	bne	cr7,L(done)
75
76L(loop_setup):
77	/* The last dword we want to read in the loop below is the one
78	   containing the first byte of the string, ie. the dword at
79	   s & ~7, or r0.  The first dword read is at r8 - 8, we
80	   read 2 * cnt dwords, so the last dword read will be at
81	   r8 - 8 - 16 * cnt + 8.  Solving for cnt gives
82	   cnt = (r8 - r0) / 16  */
83	sub	r5,r8,r0
84	addi	r8,r8,-8
85	srdi	r9,r5,4       /* Number of loop iterations.  */
86	mtctr	r9	      /* Setup the counter.  */
87
88	/* Main loop to look for BYTE backwards in the string.
89	   FIXME: Investigate whether 32 byte align helps with this
90	   9 instruction loop.  */
91	.align	5
92L(loop):
93	/* Load two doublewords, compare and merge in a
94	   single register for speed.  This is an attempt
95	   to speed up the byte-checking process for bigger strings.  */
96
97#ifdef __LITTLE_ENDIAN__
98	ldx	r12,0,r8
99	ldx	r11,r8,r6
100#else
101	ldbrx	r12,0,r8
102	ldbrx	r11,r8,r6
103#endif
104	cmpb	r3,r12,r4
105	cmpb	r9,r11,r4
106	or	r5,r9,r3      /* Merge everything in one doubleword.  */
107	cmpldi	cr7,r5,0
108	bne	cr7,L(found)
109	addi	r8,r8,-16
110	bdnz	L(loop)
111
112	/* We may have one more word to read.  */
113	cmpld	r8,r0
114	bnelr
115
116#ifdef __LITTLE_ENDIAN__
117	ldx	r12,0,r8
118#else
119	ldbrx	r12,0,r8
120#endif
121	cmpb	r3,r12,r4
122	cmpldi	cr7,r3,0
123	bne	cr7,L(done)
124	blr
125
126	.align	4
127L(found):
128	/* OK, one (or both) of the dwords contains BYTE.  Check
129	   the first dword.  */
130	cmpldi	cr6,r3,0
131	bne	cr6,L(done)
132
133	/* BYTE must be in the second word.  Adjust the address
134	   again and move the result of cmpb to r3 so we can calculate the
135	   pointer.  */
136
137	mr	r3,r9
138	addi	r8,r8,-8
139
140	/* r3 has the output of the cmpb instruction, that is, it contains
141	   0xff in the same position as BYTE in the original
142	   word from the string.  Use that to calculate the pointer.
143	   We need to make sure BYTE is *before* the end of the
144	   range.  */
145L(done):
146	cntlzd	r9,r3	      /* Count leading zeros before the match.  */
147	cmpld	r8,r0         /* Are we on the last word?  */
148	srdi	r6,r9,3	      /* Convert leading zeros to bytes.  */
149	addi	r0,r6,-7
150	sub	r3,r8,r0
151	cmpld	cr7,r3,r10
152	bnelr
153	bgelr	cr7
154	li	r3,0
155	blr
156
157	.align	4
158L(null):
159	li	r3,0
160	blr
161
162/* Deals with size <= 32.  */
163	.align	4
164L(small_range):
165	cmpldi	r5,0
166	beq	L(null)
167
168#ifdef __LITTLE_ENDIAN__
169	ldx	r12,0,r8
170#else
171	ldbrx	r12,0,r8      /* Load reversed doubleword from memory.  */
172#endif
173	cmpb	r3,r12,r4     /* Check for BYTE in DWORD1.  */
174	and	r3,r3,r9
175	cmpldi	cr7,r3,0
176	bne	cr7,L(done)
177
178	/* Are we done already?  */
179	cmpld	r8,r0
180	addi	r8,r8,-8
181	beqlr
182
183	.align	5
184L(loop_small):
185#ifdef __LITTLE_ENDIAN__
186	ldx	r12,0,r8
187#else
188	ldbrx	r12,0,r8
189#endif
190	cmpb	r3,r12,r4
191	cmpld	r8,r0
192	cmpldi	cr7,r3,0
193	bne	cr7,L(done)
194	addi	r8,r8,-8
195	bne	L(loop_small)
196	blr
197
198END (MEMRCHR)
199weak_alias (__memrchr, memrchr)
200libc_hidden_builtin_def (memrchr)
201