1/* Optimized strcmp implementation for POWER7/PowerPC64.
2   Copyright (C) 2010-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20
21#ifndef STRNCMP
22# define STRNCMP strncmp
23#endif
24
25/* See strlen.s for comments on how the end-of-string testing works.  */
26
27/* int [r3] strncmp (const char *s1 [r3],
28		     const char *s2 [r4],
29		     size_t size [r5])  */
30
31	.machine power7
32ENTRY_TOCLESS (STRNCMP, 5)
33	CALL_MCOUNT 3
34
35#define rTMP2	r0
36#define rRTN	r3
37#define rSTR1	r3	/* first string arg */
38#define rSTR2	r4	/* second string arg */
39#define rN	r5	/* max string length */
40#define rWORD1	r6	/* current word in s1 */
41#define rWORD2	r7	/* current word in s2 */
42#define rWORD3  r10
43#define rWORD4  r11
44#define rFEFE	r8	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
45#define r7F7F	r9	/* constant 0x7f7f7f7f7f7f7f7f */
46#define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
47#define rBITDIF	r11	/* bits that differ in s1 & s2 words */
48#define rTMP	r12
49
50	dcbt	0,rSTR1
51	nop
52	or	rTMP,rSTR2,rSTR1
53	lis	r7F7F,0x7f7f
54	dcbt	0,rSTR2
55	nop
56	clrldi.	rTMP,rTMP,61
57	cmpldi	cr1,rN,0
58	lis	rFEFE,-0x101
59	bne	L(unaligned)
60/* We are doubleword aligned so set up for two loops.  first a double word
61   loop, then fall into the byte loop if any residual.  */
62	srdi.	rTMP,rN,3
63	clrldi	rN,rN,61
64	addi	rFEFE,rFEFE,-0x101
65	addi	r7F7F,r7F7F,0x7f7f
66	cmpldi	cr1,rN,0
67	beq	L(unaligned)
68
69	mtctr	rTMP
70	ld	rWORD1,0(rSTR1)
71	ld	rWORD2,0(rSTR2)
72	sldi	rTMP,rFEFE,32
73	insrdi	r7F7F,r7F7F,32,0
74	add	rFEFE,rFEFE,rTMP
75	b	L(g1)
76
77L(g0):
78	ldu	rWORD1,8(rSTR1)
79	bne	cr1,L(different)
80	ldu	rWORD2,8(rSTR2)
81L(g1):	add	rTMP,rFEFE,rWORD1
82	nor	rNEG,r7F7F,rWORD1
83	bdz	L(tail)
84	and.	rTMP,rTMP,rNEG
85	cmpd	cr1,rWORD1,rWORD2
86	beq	L(g0)
87
88/* OK. We've hit the end of the string. We need to be careful that
89   we don't compare two strings as different because of gunk beyond
90   the end of the strings...  */
91
92#ifdef __LITTLE_ENDIAN__
93L(endstring):
94	addi    rTMP2, rTMP, -1
95	beq	cr1, L(equal)
96	andc    rTMP2, rTMP2, rTMP
97	rldimi	rTMP2, rTMP2, 1, 0
98	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
99	and	rWORD1, rWORD1, rTMP2
100	cmpd	cr1, rWORD1, rWORD2
101	beq	cr1, L(equal)
102	cmpb	rBITDIF, rWORD1, rWORD2	/* 0xff on equal bytes.  */
103	addi	rNEG, rBITDIF, 1
104	orc	rNEG, rNEG, rBITDIF	/* 0's below LS differing byte.  */
105	sldi	rNEG, rNEG, 8		/* 1's above LS differing byte.  */
106	andc	rWORD1, rWORD1, rNEG	/* mask off MS bytes.  */
107	andc	rWORD2, rWORD2, rNEG
108	xor.	rBITDIF, rWORD1, rWORD2
109	sub	rRTN, rWORD1, rWORD2
110	blt	L(highbit)
111	sradi	rRTN, rRTN, 63		/* must return an int.  */
112	ori	rRTN, rRTN, 1
113	blr
114L(equal):
115	li	rRTN, 0
116	blr
117
118L(different):
119	ld	rWORD1, -8(rSTR1)
120	cmpb	rBITDIF, rWORD1, rWORD2	/* 0xff on equal bytes.  */
121	addi	rNEG, rBITDIF, 1
122	orc	rNEG, rNEG, rBITDIF	/* 0's below LS differing byte.  */
123	sldi	rNEG, rNEG, 8		/* 1's above LS differing byte.  */
124	andc	rWORD1, rWORD1, rNEG	/* mask off MS bytes.  */
125	andc	rWORD2, rWORD2, rNEG
126	xor.	rBITDIF, rWORD1, rWORD2
127	sub	rRTN, rWORD1, rWORD2
128	blt	L(highbit)
129	sradi	rRTN, rRTN, 63
130	ori	rRTN, rRTN, 1
131	blr
132L(highbit):
133	sradi	rRTN, rWORD2, 63
134	ori	rRTN, rRTN, 1
135	blr
136
137#else
138L(endstring):
139	and	rTMP,r7F7F,rWORD1
140	beq	cr1,L(equal)
141	add	rTMP,rTMP,r7F7F
142	xor.	rBITDIF,rWORD1,rWORD2
143	andc	rNEG,rNEG,rTMP
144	blt	L(highbit)
145	cntlzd	rBITDIF,rBITDIF
146	cntlzd	rNEG,rNEG
147	addi	rNEG,rNEG,7
148	cmpd	cr1,rNEG,rBITDIF
149	sub	rRTN,rWORD1,rWORD2
150	blt	cr1,L(equal)
151	sradi	rRTN,rRTN,63		/* must return an int.  */
152	ori	rRTN,rRTN,1
153	blr
154L(equal):
155	li	rRTN,0
156	blr
157
158L(different):
159	ld	rWORD1,-8(rSTR1)
160	xor.	rBITDIF,rWORD1,rWORD2
161	sub	rRTN,rWORD1,rWORD2
162	blt	L(highbit)
163	sradi	rRTN,rRTN,63
164	ori	rRTN,rRTN,1
165	blr
166L(highbit):
167	sradi	rRTN,rWORD2,63
168	ori	rRTN,rRTN,1
169	blr
170#endif
171
172/* Oh well.  In this case, we just do a byte-by-byte comparison.  */
173	.align	4
174L(tail):
175	and.	rTMP,rTMP,rNEG
176	cmpd	cr1,rWORD1,rWORD2
177	bne	L(endstring)
178	addi	rSTR1,rSTR1,8
179	bne	cr1,L(different)
180	addi	rSTR2,rSTR2,8
181	cmpldi	cr1,rN,0
182L(unaligned):
183	mtctr	rN
184	ble	cr1,L(ux)
185L(uz):
186	lbz	rWORD1,0(rSTR1)
187	lbz	rWORD2,0(rSTR2)
188	.align	4
189L(u1):
190	cmpdi	cr1,rWORD1,0
191	bdz	L(u4)
192	cmpd	rWORD1,rWORD2
193	beq	cr1,L(u4)
194	bne	L(u4)
195	lbzu	rWORD3,1(rSTR1)
196	lbzu	rWORD4,1(rSTR2)
197	cmpdi	cr1,rWORD3,0
198	bdz	L(u3)
199	cmpd	rWORD3,rWORD4
200	beq	cr1,L(u3)
201	bne	L(u3)
202	lbzu	rWORD1,1(rSTR1)
203	lbzu	rWORD2,1(rSTR2)
204	cmpdi	cr1,rWORD1,0
205	bdz	L(u4)
206	cmpd	rWORD1,rWORD2
207	beq	cr1,L(u4)
208	bne	L(u4)
209	lbzu	rWORD3,1(rSTR1)
210	lbzu	rWORD4,1(rSTR2)
211	cmpdi	cr1,rWORD3,0
212	bdz	L(u3)
213	cmpd	rWORD3,rWORD4
214	beq	cr1,L(u3)
215	bne	L(u3)
216	lbzu	rWORD1,1(rSTR1)
217	lbzu	rWORD2,1(rSTR2)
218	b	L(u1)
219
220L(u3):  sub	rRTN,rWORD3,rWORD4
221	blr
222L(u4):	sub	rRTN,rWORD1,rWORD2
223	blr
224L(ux):
225	li	rRTN,0
226	blr
227END (STRNCMP)
228libc_hidden_builtin_def (strncmp)
229