1/* stpncpy(DST, SRC, COUNT) - Copy no more than N characters of
2   SRC to DEST, returning the address of the terminating '\0' in
3   DEST, if any, or else DEST + N.
4   For SPARC v9.
5   Copyright (C) 1998-2022 Free Software Foundation, Inc.
6   This file is part of the GNU C Library.
7
8   The GNU C Library is free software; you can redistribute it and/or
9   modify it under the terms of the GNU Lesser General Public
10   License as published by the Free Software Foundation; either
11   version 2.1 of the License, or (at your option) any later version.
12
13   The GNU C Library is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public
19   License along with the GNU C Library; if not, see
20   <https://www.gnu.org/licenses/>.  */
21
22#include <sysdep.h>
23#include <asm/asi.h>
24#ifndef XCC
25#define XCC xcc
26#define USE_BPR
27	.register	%g2, #scratch
28	.register	%g3, #scratch
29	.register	%g6, #scratch
30#endif
31
32	/* Normally, this uses
33	   ((xword - 0x0101010101010101) & 0x8080808080808080) test
34	   to find out if any byte in xword could be zero. This is fast, but
35	   also gives false alarm for any byte in range 0x81-0xff. It does
36	   not matter for correctness, as if this test tells us there could
37	   be some zero byte, we check it byte by byte, but if bytes with
38	   high bits set are common in the strings, then this will give poor
39	   performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
40	   will use one tick slower, but more precise test
41	   ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
42	   which does not give any false alarms (but if some bits are set,
43	   one cannot assume from it which bytes are zero and which are not).
44	   It is yet to be measured, what is the correct default for glibc
45	   in these days for an average user.
46	 */
47
48	.text
49	.align		32
50ENTRY(__stpncpy)
51	sethi		%hi(0x01010101), %g1		/* IEU0		Group		*/
52#ifdef USE_BPR
53	brz,pn		%o2, 19f			/* CTI+IEU1			*/
54#else
55	tst		%o2				/* IEU1				*/
56	be,pn		%XCC, 19f			/* CTI				*/
57#endif
58	 or		%g1, %lo(0x01010101), %g1	/* IEU1				*/
59	andcc		%o0, 7, %g0			/* IEU1		Group		*/
60
61	sllx		%g1, 32, %g2			/* IEU0				*/
62	bne,pn		%icc, 26f			/* CTI				*/
63	 or		%g1, %g2, %g1			/* IEU0		Group		*/
64	andcc		%o1, 7, %g3			/* IEU1				*/
65
66	bne,pn		%icc, 28f			/* CTI				*/
67	 sllx		%g1, 7, %g2			/* IEU0		Group		*/
68	ldx		[%o1], %o3			/* Load				*/
691:	add		%o1, 8, %o1			/* IEU1				*/
70
712:	subcc		%o2, 8, %o2			/* IEU1		Group		*/
72	bl,pn		%XCC, 18f			/* CTI				*/
73	 sub		%o3, %g1, %o4			/* IEU0				*/
74	add		%o0, 8, %o0			/* IEU0		Group		*/
75
76#ifdef EIGHTBIT_NOT_MORE
77	andn		%o4, %o3, %o4			/* IEU1				*/
78#endif
79	mov		%o3, %g3			/* IEU1				*/
80	ldxa		[%o1] ASI_PNF, %o3		/* Load				*/
81	add		%o1, 8, %o1			/* IEU0		Group		*/
82	andcc		%o4, %g2, %g0			/* IEU1				*/
83
84	be,a,pt		%xcc, 2b			/* CTI				*/
85	 stx		%g3, [%o0-8]			/* Store	Group		*/
86	srlx		%g3, 56, %g5			/* IEU0		Group		*/
87	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
88
89	be,pn		%icc, 16f			/* CTI				*/
90	 srlx		%g3, 48, %g4			/* IEU0				*/
91	andcc		%g4, 0xff, %g0			/* IEU1		Group		*/
92	be,pn		%icc, 15f			/* CTI				*/
93
94	 srlx		%g3, 40, %g5			/* IEU0				*/
95	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
96	be,pn		%icc, 14f			/* CTI				*/
97	 srlx		%g3, 32, %g4			/* IEU0				*/
98
99	andcc		%g4, 0xff, %g0			/* IEU1		Group		*/
100	be,pn		%icc, 13f			/* CTI				*/
101	 srlx		%g3, 24, %g5			/* IEU0				*/
102	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
103
104	be,pn		%icc, 12f			/* CTI				*/
105	 srlx		%g3, 16, %g4			/* IEU0				*/
106	andcc		%g4, 0xff, %g0			/* IEU1		Group		*/
107	be,pn		%icc, 11f			/* CTI				*/
108
109	 srlx		%g3, 8, %g5			/* IEU0				*/
110	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
111	be,pn		%icc, 10f			/* CTI				*/
112	 sub		%o0, 1, %g6			/* IEU0				*/
113
114	andcc		%g3, 0xff, %g0			/* IEU1		Group		*/
115	bne,pt		%icc, 2b			/* CTI				*/
1163:	 stx		%g3, [%o0-8]			/* Store			*/
117	andncc		%o2, 31, %g3			/* IEU1		Group		*/
118
1194:	be,pn		%XCC, 41f			/* CTI				*/
120	 and		%o2, 31, %o2			/* IEU1		Group		*/
12140:	stx		%g0, [%o0]			/* Store			*/
122	stx		%g0, [%o0 + 8]			/* Store	Group		*/
123
124	subcc		%g3, 32, %g3			/* IEU1				*/
125	stx		%g0, [%o0 + 16]			/* Store	Group		*/
126	stx		%g0, [%o0 + 24]			/* Store	Group		*/
127	bne,pt		%XCC, 40b			/* CTI				*/
128
129	 add		%o0, 32, %o0			/* IEU0				*/
13041:	subcc		%o2, 8, %o2			/* IEU1		Group		*/
131	bl,a,pn		%XCC, 6f			/* CTI				*/
132	 andcc		%o2, 4, %g0			/* IEU1		Group		*/
133
1345:	stx		%g0, [%o0]			/* Store			*/
135	subcc		%o2, 8, %o2			/* IEU1		Group		*/
136	bge,pt		%XCC, 5b			/* CTI				*/
137	 add		%o0, 8, %o0			/* IEU0				*/
138
139	andcc		%o2, 4, %g0			/* IEU1		Group		*/
1406:	be,a,pn		%icc, 7f			/* CTI				*/
141	 andcc		%o2, 2, %g0			/* IEU1		Group		*/
142	stw		%g0, [%o0]			/* Store			*/
143
144	add		%o0, 4, %o0			/* IEU0				*/
145	andcc		%o2, 2, %g0			/* IEU1		Group		*/
1467:	be,a,pn		%icc, 8f			/* CTI				*/
147	 andcc		%o2, 1, %g0			/* IEU1		Group		*/
148
149	sth		%g0, [%o0]			/* Store			*/
150	add		%o0, 2, %o0			/* IEU0				*/
151	andcc		%o2, 1, %g0			/* IEU1		Group		*/
1528:	bne,a,pn	%icc, 9f			/* CTI				*/
153
154	 stb		%g0, [%o0]			/* Store			*/
1559:	retl						/* CTI+IEU1	Group		*/
156	 mov		%g6, %o0			/* IEU0				*/
15710:	subcc		%o0, 2, %g6			/* IEU1		Group		*/
158
159	ba,pt		%xcc, 3b			/* CTI				*/
160	 sllx		%g5, 8, %g3			/* IEU0				*/
16111:	subcc		%o0, 3, %g6			/* IEU1		Group		*/
162	ba,pt		%xcc, 3b			/* CTI				*/
163
164	 sllx		%g4, 16, %g3			/* IEU0				*/
16512:	subcc		%o0, 4, %g6			/* IEU1		Group		*/
166	ba,pt		%xcc, 3b			/* CTI				*/
167	 sllx		%g5, 24, %g3			/* IEU0				*/
168
16913:	subcc		%o0, 5, %g6			/* IEU1		Group		*/
170	ba,pt		%xcc, 3b			/* CTI				*/
171	 sllx		%g4, 32, %g3			/* IEU0				*/
17214:	subcc		%o0, 6, %g6			/* IEU1		Group		*/
173
174	ba,pt		%xcc, 3b			/* CTI				*/
175	 sllx		%g5, 40, %g3			/* IEU0				*/
17615:	subcc		%o0, 7, %g6			/* IEU1		Group		*/
177	ba,pt		%xcc, 3b			/* CTI				*/
178
179	 sllx		%g4, 48, %g3			/* IEU0				*/
18016:	subcc		%o0, 8, %g6			/* IEU1		Group		*/
181	ba,pt		%xcc, 3b			/* CTI				*/
182	 clr		%g3				/* IEU0				*/
183
184	.align		16
18517:	or		%o3, %o4, %o3			/* IEU0		Group		*/
186	sub		%o3, %g1, %o4			/* IEU1				*/
18718:	addcc		%o2, 8, %o2			/* IEU1		Group		*/
188	be,pn		%XCC, 19f			/* CTI				*/
189
190	 andcc		%o4, %g2, %g0			/* IEU1		Group		*/
191	be,pt		%xcc, 21f			/* CTI				*/
192	 srlx		%o3, 56, %g5			/* IEU0				*/
193	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
194
195	be,pn		%icc, 20f			/* CTI				*/
196	 stb		%g5, [%o0]			/* Store			*/
197	add		%o0, 1, %o0			/* IEU0		Group		*/
198	subcc		%o2, 1, %o2			/* IEU1				*/
199
200	be,pn		%XCC, 19f			/* CTI				*/
201	 srlx		%o3, 48, %g5			/* IEU0		Group		*/
202	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
203	be,pn		%icc, 20f			/* CTI				*/
204
205	 stb		%g5, [%o0]			/* Store			*/
206	add		%o0, 1, %o0			/* IEU0		Group		*/
207	subcc		%o2, 1, %o2			/* IEU1				*/
208	be,pn		%XCC, 19f			/* CTI				*/
209
210	 srlx		%o3, 40, %g5			/* IEU0		Group		*/
211	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
212	be,pn		%icc, 20f			/* CTI				*/
213	 stb		%g5, [%o0]			/* Store			*/
214
215	add		%o0, 1, %o0			/* IEU0		Group		*/
216	subcc		%o2, 1, %o2			/* IEU1				*/
217	be,pn		%XCC, 19f			/* CTI				*/
218	 srlx		%o3, 32, %g5			/* IEU0		Group		*/
219
220	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
221	be,pn		%icc, 20f			/* CTI				*/
222	 stb		%g5, [%o0]			/* Store			*/
223	add		%o0, 1, %o0			/* IEU0		Group		*/
224
225	subcc		%o2, 1, %o2			/* IEU1				*/
226	be,pn		%XCC, 19f			/* CTI				*/
227	 srlx		%o3, 24, %g5			/* IEU0		Group		*/
228	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
229
230	be,pn		%icc, 20f			/* CTI				*/
231	 stb		%g5, [%o0]			/* Store			*/
232	add		%o0, 1, %o0			/* IEU0		Group		*/
233	subcc		%o2, 1, %o2			/* IEU1				*/
234
235	be,pn		%XCC, 19f			/* CTI				*/
236	 srlx		%o3, 16, %g5			/* IEU0		Group		*/
237	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
238	be,pn		%icc, 20f			/* CTI				*/
239
240	 stb		%g5, [%o0]			/* Store			*/
241	add		%o0, 1, %o0			/* IEU0		Group		*/
242	subcc		%o2, 1, %o2			/* IEU1				*/
243	be,pn		%XCC, 19f			/* CTI				*/
244
245	 srlx		%o3, 8, %o3			/* IEU0		Group		*/
246	stb		%o3, [%o0]			/* Store			*/
24759:	add		%o0, 1, %o2			/* IEU1				*/
248	andcc		%o3, 0xff, %g0			/* IEU1		Group		*/
249
250	retl						/* CTI+IEU1	Group		*/
251	 movne		%icc, %o2, %o0			/* Single	Group		*/
25219:	retl						/* CTI+IEU1	Group		*/
253	 nop						/* IEU0				*/
254
25520:	mov		%o0, %g6			/* IEU0		Group		*/
256	subcc		%o2, 1, %o2			/* IEU1				*/
257	be,pn		%XCC, 51f			/* CTI				*/
258	 add		%o0, 1, %o0			/* IEU0		Group		*/
259
26050:	stb		%g0, [%o0]			/* Store	Group		*/
261	subcc		%o2, 1, %o2			/* IEU1		Group		*/
262	bne,pt		%XCC, 50b			/* CTI				*/
263	 add		%o0, 1, %o0			/* IEU0				*/
264
26551:	retl						/* CTI+IEU1	Group		*/
266	 mov		%g6, %o0			/* IEU0				*/
267
268	.align		16
26921:	andcc		%o2, 4, %g0			/* IEU1		Group		*/
270	be,pn		%icc, 22f			/* CTI				*/
271	 srlx		%o3, 32, %g5			/* IEU0				*/
272	stw		%g5, [%o0]			/* Store	Group		*/
273
274	add		%o0, 4, %o0			/* IEU0				*/
275	mov		%o3, %g5			/* IEU1				*/
27622:	andcc		%o2, 2, %g0			/* IEU1		Group		*/
277	be,pn		%icc, 23f			/* CTI				*/
278
279	 srlx		%g5, 16, %g4			/* IEU0				*/
280	sth		%g4, [%o0]			/* Store	Group		*/
281	add		%o0, 2, %o0			/* IEU0				*/
282	mov		%g5, %g4			/* IEU1				*/
283
28423:	srlx		%g4, 8, %g4			/* IEU0		Group		*/
285	andcc		%o2, 1, %g0			/* IEU1				*/
286	bne,a,pn	%icc, 24f			/* CTI				*/
287	 stb		%g4, [%o0]			/* Store	Group		*/
288
289	retl						/* CTI+IEU1	Group		*/
290	 nop						/* IEU0				*/
29124:	retl						/* CTI+IEU1	Group		*/
292	 add		%o0, 1, %o0			/* IEU0				*/
293
294	.align		16
29555:	sub		%o0, 1, %g6			/* IEU0		Group		*/
29625:	andcc		%o0, 7, %g0			/* IEU1				*/
297	be,a,pn		%icc, 4b			/* CTI				*/
298	 andncc		%o2, 31, %g3			/* IEU1		Group		*/
299
300	stb		%g0, [%o0]			/* Store	Group		*/
301	subcc		%o2, 1, %o2			/* IEU1				*/
302	bne,pt		%XCC, 25b			/* CTI				*/
303	 add		%o0, 1, %o0			/* IEU0		Group		*/
304
305	retl						/* CTI+IEU1	Group		*/
306	 mov		%g6, %o0			/* IEU0				*/
307
308	.align		16
30926:	ldub		[%o1], %o3			/* Load				*/
310	sllx		%g1, 7, %g2			/* IEU0		Group		*/
311	stb		%o3, [%o0]			/* Store			*/
31227:	subcc		%o2, 1, %o2			/* IEU1				*/
313
314	be,pn		%XCC, 59b			/* CTI				*/
315	 add		%o1, 1, %o1			/* IEU0		Group		*/
316	add		%o0, 1, %o0			/* IEU1				*/
317	andcc		%o3, 0xff, %g0			/* IEU1		Group		*/
318
319	be,pn		%icc, 55b			/* CTI				*/
320	 lduba		[%o1] ASI_PNF, %o3		/* Load				*/
321	andcc		%o0, 7, %g0			/* IEU1		Group		*/
322	bne,a,pt	%icc, 27b			/* CTI				*/
323
324	 stb		%o3, [%o0]			/* Store			*/
325	andcc		%o1, 7, %g3			/* IEU1		Group		*/
326	be,a,pt		%icc, 1b			/* CTI				*/
327	 ldx		[%o1], %o3			/* Load				*/
328
32928:	orcc		%g0, 64, %g4			/* IEU1		Group		*/
330	sllx		%g3, 3, %g5			/* IEU0				*/
331	sub		%g4, %g5, %g4			/* IEU0		Group		*/
332	sub		%o1, %g3, %o1			/* IEU1				*/
333							/* %g1 = 0101010101010101
334							   %g2 = 8080808080808080
335							   %g3 = source alignment
336							   %g5 = number of bits to shift left
337							   %g4 = number of bits to shift right */
338
339	ldxa		[%o1] ASI_PNF, %o5		/* Load		Group		*/
340	addcc		%o1, 8, %o1			/* IEU1				*/
34129:	sllx		%o5, %g5, %o3			/* IEU0		Group		*/
342	ldxa		[%o1] ASI_PNF, %o5		/* Load				*/
343
344	subcc		%o2, 8, %o2			/* IEU1				*/
345	bl,pn		%XCC, 17b			/* CTI				*/
346	 srlx		%o5, %g4, %o4			/* IEU0		Group		*/
347	add		%o1, 8, %o1			/* IEU1				*/
348
349	or		%o3, %o4, %o3			/* IEU0		Group		*/
350	add		%o0, 8, %o0			/* IEU1				*/
351	sub		%o3, %g1, %o4			/* IEU0		Group		*/
352#ifdef EIGHTBIT_NOT_RARE
353	andn		%o4, %o3, %o4			/* IEU0		Group		*/
354#endif
355	andcc		%o4, %g2, %g0			/* IEU1		Group		*/
356
357	be,a,pt		%xcc, 29b			/* CTI				*/
358	 stx		%o3, [%o0-8]			/* Store			*/
359	srlx		%o3, 56, %o4			/* IEU0		Group		*/
360	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
361
362	be,pn		%icc, 36f			/* CTI				*/
363	 srlx		%o3, 48, %g6			/* IEU0				*/
364	andcc		%g6, 0xff, %g0			/* IEU1		Group		*/
365	be,pn		%icc, 35f			/* CTI				*/
366
367	 srlx		%o3, 40, %o4			/* IEU0				*/
368	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
369	be,pn		%icc, 34f			/* CTI				*/
370	 srlx		%o3, 32, %g6			/* IEU0				*/
371
372	andcc		%g6, 0xff, %g0			/* IEU1		Group		*/
373	be,pn		%icc, 33f			/* CTI				*/
374	 srlx		%o3, 24, %o4			/* IEU0				*/
375	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
376
377	be,pn		%icc, 32f			/* CTI				*/
378	 srlx		%o3, 16, %g6			/* IEU0				*/
379	andcc		%g6, 0xff, %g0			/* IEU1		Group		*/
380	be,pn		%icc, 31f			/* CTI				*/
381
382	 srlx		%o3, 8, %o4			/* IEU0				*/
383	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
384	be,pn		%icc, 30f			/* CTI				*/
385	 andcc		%o3, 0xff, %g0			/* IEU1		Group		*/
386
387	bne,pn		%icc, 29b			/* CTI				*/
388	 stx		%o3, [%o0-8]			/* Store			*/
389	sub		%o0, 1, %g6			/* IEU0		Group		*/
390	ba,pt		%xcc, 4b			/* CTI				*/
391
392	 andncc		%o2, 31, %g3			/* IEU1				*/
39330:	subcc		%o0, 2, %g6			/* IEU0				*/
394	ba,pt		%xcc, 3b			/* CTI				*/
395	 sllx		%o4, 8, %g3			/* IEU0		Group		*/
396
39731:	sllx		%g6, 16, %g3			/* IEU0		Group		*/
398	ba,pt		%xcc, 3b			/* CTI				*/
399	 sub		%o0, 3, %g6			/* IEU1				*/
40032:	subcc		%o0, 4, %g6			/* IEU1		Group		*/
401
402	ba,pt		%xcc, 3b			/* CTI				*/
403	 sllx		%o4, 24, %g3			/* IEU0				*/
40433:	sllx		%g6, 32, %g3			/* IEU0		Group		*/
405	ba,pt		%xcc, 3b			/* CTI				*/
406
407	 sub		%o0, 5, %g6			/* IEU1				*/
40834:	subcc		%o0, 6, %g6			/* IEU1		Group		*/
409	ba,pt		%xcc, 3b			/* CTI				*/
410	 sllx		%o4, 40, %g3			/* IEU0				*/
411
41235:	sllx		%g6, 48, %g3			/* IEU0		Group		*/
413	ba,pt		%xcc, 3b			/* CTI				*/
414	 sub		%o0, 7, %g6			/* IEU1				*/
41536:	subcc		%o0, 8, %g6			/* IEU1		Group		*/
416
417	ba,pt		%xcc, 3b			/* CTI				*/
418	 sllx		%o4, 56, %g3			/* IEU0				*/
419END(__stpncpy)
420
421libc_hidden_def (__stpncpy)
422weak_alias (__stpncpy, stpncpy)
423