1/* Optimized strcmp implementation for PowerPC64.
2   Copyright (C) 2003-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20
21#ifndef STRNCMP
22# define STRNCMP strncmp
23#endif
24
25/* See strlen.s for comments on how the end-of-string testing works.  */
26
27/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
28
29ENTRY_TOCLESS (STRNCMP, 4)
30	CALL_MCOUNT 3
31
32#define rTMP2	r0
33#define rRTN	r3
34#define rSTR1	r3	/* first string arg */
35#define rSTR2	r4	/* second string arg */
36#define rN	r5	/* max string length */
37#define rWORD1	r6	/* current word in s1 */
38#define rWORD2	r7	/* current word in s2 */
39#define rWORD3  r10
40#define rWORD4  r11
41#define rFEFE	r8	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
42#define r7F7F	r9	/* constant 0x7f7f7f7f7f7f7f7f */
43#define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
44#define rBITDIF	r11	/* bits that differ in s1 & s2 words */
45#define rTMP	r12
46
47	dcbt	0,rSTR1
48	or	rTMP, rSTR2, rSTR1
49	lis	r7F7F, 0x7f7f
50	dcbt	0,rSTR2
51	clrldi.	rTMP, rTMP, 61
52	cmpldi	cr1, rN, 0
53	lis	rFEFE, -0x101
54	bne	L(unaligned)
55/* We are doubleword aligned so set up for two loops.  first a double word
56   loop, then fall into the byte loop if any residual.  */
57	srdi.	rTMP, rN, 3
58	clrldi	rN, rN, 61
59	addi	rFEFE, rFEFE, -0x101
60	addi	r7F7F, r7F7F, 0x7f7f
61	cmpldi	cr1, rN, 0
62	beq	L(unaligned)
63
64	mtctr	rTMP	/* Power4 wants mtctr 1st in dispatch group.  */
65	ld	rWORD1, 0(rSTR1)
66	ld	rWORD2, 0(rSTR2)
67	sldi	rTMP, rFEFE, 32
68	insrdi	r7F7F, r7F7F, 32, 0
69	add	rFEFE, rFEFE, rTMP
70	b	L(g1)
71
72L(g0):
73	ldu	rWORD1, 8(rSTR1)
74	bne-	cr1, L(different)
75	ldu	rWORD2, 8(rSTR2)
76L(g1):	add	rTMP, rFEFE, rWORD1
77	nor	rNEG, r7F7F, rWORD1
78	bdz	L(tail)
79	and.	rTMP, rTMP, rNEG
80	cmpd	cr1, rWORD1, rWORD2
81	beq+	L(g0)
82
83/* OK. We've hit the end of the string. We need to be careful that
84   we don't compare two strings as different because of gunk beyond
85   the end of the strings...  */
86
87#ifdef __LITTLE_ENDIAN__
88L(endstring):
89	addi    rTMP2, rTMP, -1
90	beq	cr1, L(equal)
91	andc    rTMP2, rTMP2, rTMP
92	rldimi	rTMP2, rTMP2, 1, 0
93	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
94	and	rWORD1, rWORD1, rTMP2
95	cmpd	cr1, rWORD1, rWORD2
96	beq	cr1, L(equal)
97	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
98	neg	rNEG, rBITDIF
99	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
100	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
101	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
102	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
103	sld	rWORD2, rWORD2, rNEG
104	xor.	rBITDIF, rWORD1, rWORD2
105	sub	rRTN, rWORD1, rWORD2
106	blt-	L(highbit)
107	sradi	rRTN, rRTN, 63		/* must return an int.  */
108	ori	rRTN, rRTN, 1
109	blr
110L(equal):
111	li	rRTN, 0
112	blr
113
114L(different):
115	ld	rWORD1, -8(rSTR1)
116	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
117	neg	rNEG, rBITDIF
118	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
119	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
120	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
121	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
122	sld	rWORD2, rWORD2, rNEG
123	xor.	rBITDIF, rWORD1, rWORD2
124	sub	rRTN, rWORD1, rWORD2
125	blt-	L(highbit)
126	sradi	rRTN, rRTN, 63
127	ori	rRTN, rRTN, 1
128	blr
129L(highbit):
130	sradi	rRTN, rWORD2, 63
131	ori	rRTN, rRTN, 1
132	blr
133
134#else
135L(endstring):
136	and	rTMP, r7F7F, rWORD1
137	beq	cr1, L(equal)
138	add	rTMP, rTMP, r7F7F
139	xor.	rBITDIF, rWORD1, rWORD2
140	andc	rNEG, rNEG, rTMP
141	blt-	L(highbit)
142	cntlzd	rBITDIF, rBITDIF
143	cntlzd	rNEG, rNEG
144	addi	rNEG, rNEG, 7
145	cmpd	cr1, rNEG, rBITDIF
146	sub	rRTN, rWORD1, rWORD2
147	blt-	cr1, L(equal)
148	sradi	rRTN, rRTN, 63		/* must return an int.  */
149	ori	rRTN, rRTN, 1
150	blr
151L(equal):
152	li	rRTN, 0
153	blr
154
155L(different):
156	ld	rWORD1, -8(rSTR1)
157	xor.	rBITDIF, rWORD1, rWORD2
158	sub	rRTN, rWORD1, rWORD2
159	blt-	L(highbit)
160	sradi	rRTN, rRTN, 63
161	ori	rRTN, rRTN, 1
162	blr
163L(highbit):
164	sradi	rRTN, rWORD2, 63
165	ori	rRTN, rRTN, 1
166	blr
167#endif
168
169/* Oh well.  In this case, we just do a byte-by-byte comparison.  */
170	.align 4
171L(tail):
172	and.	rTMP, rTMP, rNEG
173	cmpd	cr1, rWORD1, rWORD2
174	bne-	L(endstring)
175	addi	rSTR1, rSTR1, 8
176	bne-	cr1, L(different)
177	addi	rSTR2, rSTR2, 8
178	cmpldi	cr1, rN, 0
179L(unaligned):
180	mtctr   rN	/* Power4 wants mtctr 1st in dispatch group */
181	ble	cr1, L(ux)
182L(uz):
183	lbz	rWORD1, 0(rSTR1)
184	lbz	rWORD2, 0(rSTR2)
185	.align 4
186L(u1):
187	cmpdi	cr1, rWORD1, 0
188	bdz	L(u4)
189	cmpd	rWORD1, rWORD2
190	beq-	cr1, L(u4)
191	bne-	L(u4)
192	lbzu    rWORD3, 1(rSTR1)
193	lbzu	rWORD4, 1(rSTR2)
194	cmpdi	cr1, rWORD3, 0
195	bdz	L(u3)
196	cmpd	rWORD3, rWORD4
197	beq-    cr1, L(u3)
198	bne-    L(u3)
199	lbzu	rWORD1, 1(rSTR1)
200	lbzu	rWORD2, 1(rSTR2)
201	cmpdi	cr1, rWORD1, 0
202	bdz	L(u4)
203	cmpd	rWORD1, rWORD2
204	beq-	cr1, L(u4)
205	bne-	L(u4)
206	lbzu	rWORD3, 1(rSTR1)
207	lbzu	rWORD4, 1(rSTR2)
208	cmpdi	cr1, rWORD3, 0
209	bdz	L(u3)
210	cmpd	rWORD3, rWORD4
211	beq-    cr1, L(u3)
212	bne-    L(u3)
213	lbzu	rWORD1, 1(rSTR1)
214	lbzu	rWORD2, 1(rSTR2)
215	b       L(u1)
216
217L(u3):  sub     rRTN, rWORD3, rWORD4
218	blr
219L(u4):	sub	rRTN, rWORD1, rWORD2
220	blr
221L(ux):
222	li	rRTN, 0
223	blr
224END (STRNCMP)
225libc_hidden_builtin_def (strncmp)
226