1/* Compare no more than N characters of S1 and S2, returning less than,
2   equal to or greater than zero if S1 is lexicographically less than,
3   equal to or greater than S2.
4   For SPARC v9.
5   Copyright (C) 1997-2022 Free Software Foundation, Inc.
6   This file is part of the GNU C Library.
7
8   The GNU C Library is free software; you can redistribute it and/or
9   modify it under the terms of the GNU Lesser General Public
10   License as published by the Free Software Foundation; either
11   version 2.1 of the License, or (at your option) any later version.
12
13   The GNU C Library is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public
19   License along with the GNU C Library; if not, see
20   <https://www.gnu.org/licenses/>.  */
21
22#include <sysdep.h>
23#include <asm/asi.h>
24#ifndef XCC
25#define XCC xcc
26#define USE_BPR
27	.register	%g2, #scratch
28	.register	%g3, #scratch
29	.register	%g6, #scratch
30#endif
31
32	/* Normally, this uses
33	   ((xword - 0x0101010101010101) & 0x8080808080808080) test
34	   to find out if any byte in xword could be zero. This is fast, but
35	   also gives false alarm for any byte in range 0x81-0xff. It does
36	   not matter for correctness, as if this test tells us there could
37	   be some zero byte, we check it byte by byte, but if bytes with
38	   high bits set are common in the strings, then this will give poor
39	   performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
40	   will use one tick slower, but more precise test
41	   ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
42	   which does not give any false alarms (but if some bits are set,
43	   one cannot assume from it which bytes are zero and which are not).
44	   It is yet to be measured, what is the correct default for glibc
45	   in these days for an average user.
46	 */
47
48	.text
49	.align		32
50ENTRY(strncmp)
51#ifdef USE_BPR
52	brz,pn		%o2, 4f				/* CTI+IEU1	Group		*/
53#else
54	tst		%o2				/* IEU1		Group		*/
55	be,pn		%XCC, 4f			/* CTI				*/
56#endif
57	 sethi		%hi(0x1010101), %g1		/* IEU0				*/
58	andcc		%o0, 7, %g0			/* IEU1		Group		*/
59	bne,pn		%icc, 9f			/* CTI				*/
60
61	 or		%g1, %lo(0x1010101), %g1	/* IEU0				*/
62	andcc		%o1, 7, %g3			/* IEU1		Group		*/
63	bne,pn		%icc, 11f			/* CTI				*/
64	 sllx		%g1, 32, %g2			/* IEU0				*/
65
66	ldx		[%o0], %g4			/* Load		Group		*/
67	or		%g1, %g2, %g1			/* IEU0				*/
681:	ldx		[%o1], %o3			/* Load		Group		*/
69	sllx		%g1, 7, %g2			/* IEU0				*/
70
71	add		%o0, 8, %o0			/* IEU1				*/
722:	subcc		%o2, 8, %o2			/* IEU1		Group		*/
73	bcs,pn		%XCC, 5f			/* CTI				*/
74	 add		%o1, 8, %o1			/* IEU0				*/
75
76	sub		%g4, %g1, %g3			/* IEU0		Group		*/
77	subcc		%g4, %o3, %o4			/* IEU1				*/
78#ifdef EIGHTBIT_NOT_RARE
79	andn		%g3, %g4, %g6			/* IEU0		Group		*/
80#endif
81	bne,pn		%xcc, 6f			/* CTI				*/
82	 ldxa		[%o0] ASI_PNF, %g4		/* Load		Group		*/
83
84	add		%o0, 8, %o0			/* IEU0				*/
85#ifdef EIGHTBIT_NOT_RARE
86	andcc		%g6, %g2, %g0			/* IEU1				*/
87#else
88	andcc		%g3, %g2, %g0			/* IEU1				*/
89#endif
90	be,a,pt		%xcc, 2b			/* CTI				*/
91	 ldxa		[%o1] ASI_PNF, %o3		/* Load		Group		*/
92
93	addcc		%g3, %g1, %o4			/* IEU1				*/
94#ifdef EIGHTBIT_NOT_RARE
95	srlx		%g6, 32, %g6			/* IEU0				*/
96	andcc		%g6, %g2, %g0			/* IEU1		Group		*/
97#else
98	srlx		%g3, 32, %g3			/* IEU0				*/
99	andcc		%g3, %g2, %g0			/* IEU1		Group		*/
100#endif
101	be,pt		%xcc, 3f			/* CTI				*/
102
103	 srlx		%o4, 56, %o5			/* IEU0				*/
104	andcc		%o5, 0xff, %g0			/* IEU1		Group		*/
105	be,pn		%icc, 4f			/* CTI				*/
106	 srlx		%o4, 48, %o5			/* IEU0				*/
107
108	andcc		%o5, 0xff, %g0			/* IEU1		Group		*/
109	be,pn		%icc, 4f			/* CTI				*/
110	 srlx		%o4, 40, %o5			/* IEU0				*/
111	andcc		%o5, 0xff, %g0			/* IEU1		Group		*/
112
113	be,pn		%icc, 4f			/* CTI				*/
114	 srlx		%o4, 32, %o5			/* IEU0				*/
115	andcc		%o5, 0xff, %g0			/* IEU1		Group		*/
116	be,pn		%icc, 4f			/* CTI				*/
117
1183:	 srlx		%o4, 24, %o5			/* IEU0				*/
119	andcc		%o5, 0xff, %g0			/* IEU1		Group		*/
120	be,pn		%icc, 4f			/* CTI				*/
121	 srlx		%o4, 16, %o5			/* IEU0				*/
122
123	andcc		%o5, 0xff, %g0			/* IEU1		Group		*/
124	be,pn		%icc, 4f			/* CTI				*/
125	 srlx		%o4, 8, %o5			/* IEU0				*/
126	andcc		%o5, 0xff, %g0			/* IEU1		Group		*/
127
128	be,pn		%icc, 4f			/* CTI				*/
129	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
130	bne,a,pn	%icc, 2b			/* CTI				*/
131	 ldxa		[%o1] ASI_PNF, %o3		/* Load				*/
132
1334:	retl						/* CTI+IEU1	Group		*/
134	 clr		%o0				/* IEU0				*/
135
136	.align		16
1375:	srlx		%g4, 56, %o4			/* IEU0		Group		*/
138	cmp		%o2, -8				/* IEU1				*/
139	be,pn		%XCC, 4b			/* CTI				*/
140	 srlx		%o3, 56, %o5			/* IEU0		Group		*/
141
142	andcc		%o4, 0xff, %g0			/* IEU1				*/
143	be,pn		%xcc, 8f			/* CTI				*/
144	 subcc		%o4, %o5, %o4			/* IEU1		Group		*/
145	bne,pn		%xcc, 8f			/* CTI				*/
146
147	 srlx		%o3, 48, %o5			/* IEU0				*/
148	cmp		%o2, -7				/* IEU1		Group		*/
149	be,pn		%XCC, 4b			/* CTI				*/
150	 srlx		%g4, 48, %o4			/* IEU0				*/
151
152	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
153	be,pn		%xcc, 8f			/* CTI				*/
154	 subcc		%o4, %o5, %o4			/* IEU1		Group		*/
155	bne,pn		%xcc, 8f			/* CTI				*/
156
157	 srlx		%o3, 40, %o5			/* IEU0				*/
158	cmp		%o2, -6				/* IEU1		Group		*/
159	be,pn		%XCC, 4b			/* CTI				*/
160	 srlx		%g4, 40, %o4			/* IEU0				*/
161
162	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
163	be,pn		%xcc, 8f			/* CTI				*/
164	 subcc		%o4, %o5, %o4			/* IEU1		Group		*/
165	bne,pn		%xcc, 8f			/* CTI				*/
166
167	 srlx		%o3, 32, %o5			/* IEU0				*/
168	cmp		%o2, -5				/* IEU1		Group		*/
169	be,pn		%XCC, 4b			/* CTI				*/
170	 srlx		%g4, 32, %o4			/* IEU0				*/
171
172	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
173	be,pn		%xcc, 8f			/* CTI				*/
174	 subcc		%o4, %o5, %o4			/* IEU1		Group		*/
175	bne,pn		%xcc, 8f			/* CTI				*/
176
177	 srlx		%o3, 24, %o5			/* IEU0				*/
178	cmp		%o2, -4				/* IEU1		Group		*/
179	be,pn		%XCC, 4b			/* CTI				*/
180	 srlx		%g4, 24, %o4			/* IEU0				*/
181
182	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
183	be,pn		%xcc, 8f			/* CTI				*/
184	 subcc		%o4, %o5, %o4			/* IEU1		Group		*/
185	bne,pn		%xcc, 8f			/* CTI				*/
186
187	 srlx		%o3, 16, %o5			/* IEU0				*/
188	cmp		%o2, -3				/* IEU1		Group		*/
189	be,pn		%XCC, 4b			/* CTI				*/
190	 srlx		%g4, 16, %o4			/* IEU0				*/
191
192	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
193	be,pn		%xcc, 8f			/* CTI				*/
194	 subcc		%o4, %o5, %o4			/* IEU1		Group		*/
195	bne,pn		%xcc, 8f			/* CTI				*/
196
197	 srlx		%o3, 8, %o5			/* IEU0				*/
198	cmp		%o2, -2				/* IEU1		Group		*/
199	be,pn		%XCC, 4b			/* CTI				*/
200	 srlx		%g4, 8, %o4			/* IEU0				*/
201
202	retl						/* CTI+IEU1	Group		*/
203	 sub		%o4, %o5, %o0			/* IEU0				*/
2046:	addcc		%o3, %o4, %g4			/* IEU1				*/
2057:	srlx		%o3, 56, %o5			/* IEU0				*/
206
207	srlx		%g4, 56, %o4			/* IEU0		Group		*/
208	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
209	be,pn		%xcc, 8f			/* CTI				*/
210	 subcc		%o4, %o5, %o4			/* IEU1		Group		*/
211
212	bne,pn		%xcc, 8f			/* CTI				*/
213	 srlx		%o3, 48, %o5			/* IEU0				*/
214	srlx		%g4, 48, %o4			/* IEU0		Group		*/
215	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
216
217	be,pn		%xcc, 8f			/* CTI				*/
218	 subcc		%o4, %o5, %o4			/* IEU1		Group		*/
219	bne,pn		%xcc, 8f			/* CTI				*/
220	 srlx		%o3, 40, %o5			/* IEU0				*/
221
222	srlx		%g4, 40, %o4			/* IEU0		Group		*/
223	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
224	be,pn		%xcc, 8f			/* CTI				*/
225	 subcc		%o4, %o5, %o4			/* IEU1		Group		*/
226
227	bne,pn		%xcc, 8f			/* CTI				*/
228	 srlx		%o3, 32, %o5			/* IEU0				*/
229	srlx		%g4, 32, %o4			/* IEU0		Group		*/
230	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
231
232	be,pn		%xcc, 8f			/* CTI				*/
233	 subcc		%o4, %o5, %o4			/* IEU1		Group		*/
234	bne,pn		%xcc, 8f			/* CTI				*/
235	 srlx		%o3, 24, %o5			/* IEU0				*/
236
237	srlx		%g4, 24, %o4			/* IEU0		Group		*/
238	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
239	be,pn		%xcc, 8f			/* CTI				*/
240	 subcc		%o4, %o5, %o4			/* IEU1		Group		*/
241
242	bne,pn		%xcc, 8f			/* CTI				*/
243	 srlx		%o3, 16, %o5			/* IEU0				*/
244	srlx		%g4, 16, %o4			/* IEU0		Group		*/
245	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
246
247	be,pn		%xcc, 8f			/* CTI				*/
248	 subcc		%o4, %o5, %o4			/* IEU1		Group		*/
249	bne,pn		%xcc, 8f			/* CTI				*/
250	 srlx		%o3, 8, %o5			/* IEU0				*/
251
252	srlx		%g4, 8, %o4			/* IEU0		Group		*/
253	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
254	be,pn		%xcc, 8f			/* CTI				*/
255	 subcc		%o4, %o5, %o4			/* IEU1		Group		*/
256
257	retl						/* CTI+IEU1	Group		*/
258	 sub		%g4, %o3, %o0			/* IEU0				*/
2598:	retl						/* CTI+IEU1	Group		*/
260	 mov		%o4, %o0			/* IEU0				*/
261
2629:	ldub		[%o0], %g4			/* Load		Group		*/
263	add		%o0, 1, %o0			/* IEU0				*/
264	ldub		[%o1], %o3			/* Load		Group		*/
265	sllx		%g1, 32, %g2			/* IEU0				*/
266
26710:	subcc		%o2, 1, %o2			/* IEU1		Group		*/
268	be,pn		%XCC, 8b			/* CTI				*/
269	 sub		%g4, %o3, %o4			/* IEU0				*/
270	add		%o1, 1, %o1			/* IEU0		Group		*/
271
272	cmp		%g4, %o3			/* IEU1				*/
273	bne,pn		%xcc, 8b			/* CTI				*/
274	 lduba		[%o0] ASI_PNF, %g4		/* Load		Group		*/
275	andcc		%o3, 0xff, %g0			/* IEU1				*/
276
277	be,pn		%icc, 4b			/* CTI				*/
278	 lduba		[%o1] ASI_PNF, %o3		/* Load		Group		*/
279	andcc		%o0, 7, %g0			/* IEU1				*/
280	bne,a,pn	%icc, 10b			/* CTI				*/
281
282	 add		%o0, 1, %o0			/* IEU0		Group		*/
283	or		%g1, %g2, %g1			/* IEU1				*/
284	andcc		%o1, 7, %g3			/* IEU1		Group		*/
285	be,pn		%icc, 1b			/* CTI				*/
286
287	 ldxa		[%o0] ASI_PNF, %g4		/* Load				*/
28811:	sllx		%g3, 3, %g5			/* IEU0		Group		*/
289	mov		64, %g6				/* IEU1				*/
290	or		%g1, %g2, %g1			/* IEU0		Group		*/
291	sub		%o1, %g3, %o1			/* IEU1				*/
292
293	sub		%g6, %g5, %g6			/* IEU0		Group		*/
294	ldxa		[%o1] ASI_PNF, %o4		/* Load				*/
295	sllx		%g1, 7, %g2			/* IEU1				*/
296	add		%o1, 8, %o1			/* IEU0		Group		*/
297							/* %g1 = 0101010101010101
298							   %g2 = 8080808080808080
299							   %g3 = %o1 alignment
300							   %g5 = number of bits to shift left
301							   %g6 = number of bits to shift right */
302
30312:	sllx		%o4, %g5, %o3			/* IEU0		Group		*/
304	ldxa		[%o1] ASI_PNF, %o4		/* Load				*/
305	add		%o1, 8, %o1			/* IEU1				*/
30613:	ldxa		[%o0] ASI_PNF, %g4		/* Load		Group		*/
307
308	addcc		%o0, 8, %o0			/* IEU1				*/
309	srlx		%o4, %g6, %o5			/* IEU0				*/
310	subcc		%o2, 8, %o2			/* IEU1		Group		*/
311	bcs,pn		%XCC, 5b			/* CTI				*/
312
313	 or		%o3, %o5, %o3			/* IEU0				*/
314	cmp		%g4, %o3			/* IEU1		Group		*/
315	bne,pn		%xcc, 7b			/* CTI				*/
316	 sub		%g4, %g1, %o5			/* IEU0				*/
317
318#ifdef EIGHTBIT_NOT_RARE
319	andn		%o5, %g4, %o5			/* IEU0		Group		*/
320#endif
321	andcc		%o5, %g2, %g0			/* IEU1		Group		*/
322	be,pt		%xcc, 12b			/* CTI				*/
323	 srlx		%o5, 32, %o5			/* IEU0				*/
324	andcc		%o5, %g2, %g0			/* IEU1		Group		*/
325
326	be,pt		%xcc, 14f			/* CTI				*/
327	 srlx		%g4, 56, %o5			/* IEU0				*/
328	andcc		%o5, 0xff, %g0			/* IEU1		Group		*/
329	be,pn		%icc, 4b			/* CTI				*/
330
331	 srlx		%g4, 48, %o5			/* IEU0				*/
332	andcc		%o5, 0xff, %g0			/* IEU1		Group		*/
333	be,pn		%icc, 4b			/* CTI				*/
334	 srlx		%g4, 40, %o5			/* IEU0				*/
335
336	andcc		%o5, 0xff, %g0			/* IEU1		Group		*/
337	be,pn		%icc, 4b			/* CTI				*/
338	 srlx		%g4, 32, %o5			/* IEU0				*/
339	andcc		%o5, 0xff, %g0			/* IEU1		Group		*/
340
341	be,pn		%icc, 4b			/* CTI				*/
34214:	 srlx		%g4, 24, %o5			/* IEU0				*/
343	andcc		%o5, 0xff, %g0			/* IEU1		Group		*/
344	be,pn		%icc, 4b			/* CTI				*/
345
346	 srlx		%g4, 16, %o5			/* IEU0				*/
347	andcc		%o5, 0xff, %g0			/* IEU1		Group		*/
348	be,pn		%icc, 4b			/* CTI				*/
349	 srlx		%g4, 8, %o5			/* IEU0				*/
350
351	andcc		%o5, 0xff, %g0			/* IEU1		Group		*/
352	be,pn		%icc, 4b			/* CTI				*/
353	 andcc		%g4, 0xff, %g0			/* IEU1		Group		*/
354	be,pn		%icc, 4b			/* CTI				*/
355
356	 sllx		%o4, %g5, %o3			/* IEU0				*/
357	ldxa		[%o1] ASI_PNF, %o4		/* Load		Group		*/
358	ba,pt		%xcc, 13b			/* CTI				*/
359	 add		%o1, 8, %o1			/* IEU0				*/
360END(strncmp)
361libc_hidden_builtin_def (strncmp)
362