1/* Copy SRC to DEST returning the address of the terminating '\0' in DEST.
2   For SPARC v9.
3   Copyright (C) 1998-2022 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, see
18   <https://www.gnu.org/licenses/>.  */
19
20#include <sysdep.h>
21#include <asm/asi.h>
22#ifndef XCC
23	.register	%g2, #scratch
24	.register	%g3, #scratch
25	.register	%g6, #scratch
26#endif
27
28	/* Normally, this uses
29	   ((xword - 0x0101010101010101) & 0x8080808080808080) test
30	   to find out if any byte in xword could be zero. This is fast, but
31	   also gives false alarm for any byte in range 0x81-0xff. It does
32	   not matter for correctness, as if this test tells us there could
33	   be some zero byte, we check it byte by byte, but if bytes with
34	   high bits set are common in the strings, then this will give poor
35	   performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
36	   will use one tick slower, but more precise test
37	   ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
38	   which does not give any false alarms (but if some bits are set,
39	   one cannot assume from it which bytes are zero and which are not).
40	   It is yet to be measured, what is the correct default for glibc
41	   in these days for an average user.
42	 */
43
44	.text
45	.align		32
46ENTRY(__stpcpy)
47	sethi		%hi(0x01010101), %g1		/* IEU0		Group		*/
48	or		%g1, %lo(0x01010101), %g1	/* IEU0		Group		*/
49	andcc		%o0, 7, %g0			/* IEU1				*/
50	sllx		%g1, 32, %g2			/* IEU0		Group		*/
51
52	bne,pn		%icc, 12f			/* CTI				*/
53	 andcc		%o1, 7, %g3			/* IEU1				*/
54	or		%g1, %g2, %g1			/* IEU0		Group		*/
55	bne,pn		%icc, 14f			/* CTI				*/
56
57	 sllx		%g1, 7, %g2			/* IEU0		Group		*/
581:	ldx		[%o1], %o3			/* Load				*/
59	add		%o1, 8, %o1			/* IEU1				*/
602:	mov		%o3, %g3			/* IEU0		Group		*/
61
62	sub		%o3, %g1, %o2			/* IEU1				*/
633:	ldxa		[%o1] ASI_PNF, %o3		/* Load				*/
64#ifdef EIGHTBIT_NOT_RARE
65	andn		%o2, %g3, %o2			/* IEU0		Group		*/
66#endif
67	add		%o0, 8, %o0			/* IEU0		Group		*/
68	andcc		%o2, %g2, %g0			/* IEU1				*/
69
70	add		%o1, 8, %o1			/* IEU0		Group		*/
71	be,a,pt		%xcc, 2b			/* CTI				*/
72	 stx		%g3, [%o0 - 8]			/* Store			*/
73	srlx		%g3, 56, %g5			/* IEU0		Group		*/
74
75	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
76	be,pn		%icc, 11f			/* CTI				*/
77	 srlx		%g3, 48, %g4			/* IEU0				*/
78	andcc		%g4, 0xff, %g0			/* IEU1		Group		*/
79
80	be,pn		%icc, 10f			/* CTI				*/
81	 srlx		%g3, 40, %g5			/* IEU0				*/
82	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
83	be,pn		%icc, 9f			/* CTI				*/
84
85	 srlx		%g3, 32, %g4			/* IEU0				*/
86	andcc		%g4, 0xff, %g0			/* IEU1		Group		*/
87	be,pn		%icc, 8f			/* CTI				*/
88	 srlx		%g3, 24, %g5			/* IEU0				*/
89
90	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
91	be,pn		%icc, 7f			/* CTI				*/
92	 srlx		%g3, 16, %g4			/* IEU0				*/
93	andcc		%g4, 0xff, %g0			/* IEU1		Group		*/
94
95	be,pn		%icc, 6f			/* CTI				*/
96	 srlx		%g3, 8, %g5			/* IEU0				*/
97	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
98	be,pn		%icc, 5f			/* CTI				*/
99
100	 sub		%o3, %g1, %o2			/* IEU0				*/
101	stx		%g3, [%o0 - 8]			/* Store	Group		*/
102	andcc		%g3, 0xff, %g0			/* IEU1				*/
103	bne,pt		%icc, 3b			/* CTI				*/
104
105	 mov		%o3, %g3			/* IEU0		Group		*/
1064:	retl						/* CTI+IEU1	Group		*/
107	 sub		%o0, 1, %o0			/* IEU0				*/
108
109	.align		16
1106:	ba,pt		%xcc, 23f			/* CTI		Group		*/
111	 sub		%o0, 3, %g6			/* IEU0				*/
1125:	sub		%o0, 2, %g6			/* IEU0		Group		*/
113	stb		%g5, [%o0 - 2]			/* Store			*/
114
115	srlx		%g3, 16, %g4			/* IEU0		Group		*/
11623:	sth		%g4, [%o0 - 4]			/* Store			*/
117	srlx		%g3, 32, %g4			/* IEU0		Group		*/
118	stw		%g4, [%o0 - 8]			/* Store			*/
119
120	retl						/* CTI+IEU1	Group		*/
121	 mov		%g6, %o0			/* IEU0				*/
1228:	ba,pt		%xcc, 24f			/* CTI		Group		*/
123	 sub		%o0, 5, %g6			/* IEU0				*/
124
1257:	sub		%o0, 4, %g6			/* IEU0		Group		*/
126	stb		%g5, [%o0 - 4]			/* Store			*/
127	srlx		%g3, 32, %g4			/* IEU0		Group		*/
12824:	stw		%g4, [%o0 - 8]			/* Store			*/
129
130	retl						/* CTI+IEU1	Group		*/
131	 mov		%g6, %o0 			/* IEU0				*/
13210:	ba,pt		%xcc, 25f			/* CTI		Group		*/
133	 sub		%o0, 7, %g6			/* IEU0				*/
134
1359:	sub		%o0, 6, %g6			/* IEU0		Group		*/
136	stb		%g5, [%o0 - 6]			/* Store			*/
137	srlx		%g3, 48, %g4			/* IEU0				*/
13825:	sth		%g4, [%o0 - 8]			/* Store	Group		*/
139
140	retl						/* CTI+IEU1	Group		*/
141	 mov		%g6, %o0			/* IEU0				*/
14211:	stb		%g5, [%o0 - 8]			/* Store	Group		*/
143	retl						/* CTI+IEU1	Group		*/
144
145	 sub		%o0, 8, %o0			/* IEU0				*/
146
147	.align		16
14812:	or		%g1, %g2, %g1			/* IEU0		Group		*/
149	ldub		[%o1], %o3			/* Load				*/
150	sllx		%g1, 7, %g2			/* IEU0		Group		*/
151	stb		%o3, [%o0]			/* Store	Group		*/
152
15313:	add		%o0, 1, %o0			/* IEU0				*/
154	add		%o1, 1, %o1			/* IEU1				*/
155	andcc		%o3, 0xff, %g0			/* IEU1		Group		*/
156	be,pn		%icc, 4b			/* CTI				*/
157
158	 lduba		[%o1] ASI_PNF, %o3		/* Load				*/
159	andcc		%o0, 7, %g0			/* IEU1		Group		*/
160	bne,a,pt	%icc, 13b			/* CTI				*/
161	 stb		%o3, [%o0]			/* Store			*/
162
163	andcc		%o1, 7, %g3			/* IEU1		Group		*/
164	be,a,pt		%icc, 1b			/* CTI				*/
165	 ldx		[%o1], %o3			/* Load				*/
16614:	orcc		%g0, 64, %g4			/* IEU1		Group		*/
167
168	sllx		%g3, 3, %g5			/* IEU0				*/
169	sub		%o1, %g3, %o1			/* IEU0		Group		*/
170	sub		%g4, %g5, %g4			/* IEU1				*/
171							/* %g1 = 0101010101010101	*
172							 * %g2 = 8080808080808080	*
173							 * %g3 = source alignment	*
174							 * %g5 = number of bits to shift left  *
175							 * %g4 = number of bits to shift right */
176	ldxa		[%o1] ASI_PNF, %o5		/* Load		Group		*/
177
178	addcc		%o1, 8, %o1			/* IEU1				*/
17915:	sllx		%o5, %g5, %o3			/* IEU0		Group		*/
180	ldxa		[%o1] ASI_PNF, %o5		/* Load				*/
181	srlx		%o5, %g4, %o4			/* IEU0		Group		*/
182
183	add		%o0, 8, %o0			/* IEU1				*/
184	or		%o3, %o4, %o3			/* IEU0		Group		*/
185	add		%o1, 8, %o1			/* IEU1				*/
186	sub		%o3, %g1, %o4			/* IEU0		Group		*/
187
188#ifdef EIGHTBIT_NOT_RARE
189	andn		%o4, %o3, %o4			/* IEU0		Group		*/
190#endif
191	andcc		%o4, %g2, %g0			/* IEU1		Group		*/
192	be,a,pt		%xcc, 15b			/* CTI				*/
193	 stx		%o3, [%o0 - 8]			/* Store			*/
194	srlx		%o3, 56, %o4			/* IEU0		Group		*/
195
196	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
197	be,pn		%icc, 22f			/* CTI				*/
198	 srlx		%o3, 48, %o4			/* IEU0				*/
199	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
200
201	be,pn		%icc, 21f			/* CTI				*/
202	 srlx		%o3, 40, %o4			/* IEU0				*/
203	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
204	be,pn		%icc, 20f			/* CTI				*/
205
206	 srlx		%o3, 32, %o4			/* IEU0				*/
207	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
208	be,pn		%icc, 19f			/* CTI				*/
209	 srlx		%o3, 24, %o4			/* IEU0				*/
210
211	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
212	be,pn		%icc, 18f			/* CTI				*/
213	 srlx		%o3, 16, %o4			/* IEU0				*/
214	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
215
216	be,pn		%icc, 17f			/* CTI				*/
217	 srlx		%o3, 8, %o4			/* IEU0				*/
218	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
219	be,pn		%icc, 16f			/* CTI				*/
220
221	 andcc		%o3, 0xff, %g0			/* IEU1		Group		*/
222	bne,pn		%icc, 15b			/* CTI				*/
223	 stx		%o3, [%o0 - 8]			/* Store			*/
224	retl						/* CTI+IEU1	Group		*/
225
226	 sub		%o0, 1, %o0			/* IEU0				*/
227
228	.align		16
22917:	ba,pt		%xcc, 26f			/* CTI		Group		*/
230	 subcc		%o0, 3, %g6			/* IEU1				*/
23118:	ba,pt		%xcc, 27f			/* CTI		Group		*/
232	 subcc		%o0, 4, %g6			/* IEU1				*/
233
23419:	ba,pt		%xcc, 28f			/* CTI		Group		*/
235	 subcc		%o0, 5, %g6			/* IEU1				*/
23616:	subcc		%o0, 2, %g6			/* IEU1		Group		*/
237	srlx		%o3, 8, %o4			/* IEU0				*/
238
239	stb		%o4, [%o0 - 2]			/* Store			*/
24026:	srlx		%o3, 16, %o4			/* IEU0		Group		*/
241	stb		%o4, [%o0 - 3]			/* Store			*/
24227:	srlx		%o3, 24, %o4			/* IEU0		Group		*/
243
244	stb		%o4, [%o0 - 4]			/* Store			*/
24528:	srlx		%o3, 32, %o4			/* IEU0		Group		*/
246	stw		%o4, [%o0 - 8]			/* Store			*/
247	retl						/* CTI+IEU1	Group		*/
248
249	 mov		%g6, %o0 			/* IEU0				*/
250
251	.align		16
25221:	ba,pt		%xcc, 29f			/* CTI		Group		*/
253	 subcc		%o0, 7, %g6			/* IEU1				*/
25422:	ba,pt		%xcc, 30f			/* CTI		Group		*/
255	 subcc		%o0, 8, %g6			/* IEU1				*/
256
25720:	subcc		%o0, 6, %g6			/* IEU1		Group		*/
258	srlx		%o3, 40, %o4			/* IEU0				*/
259	stb		%o4, [%o0 - 6]			/* Store			*/
26029:	srlx		%o3, 48, %o4			/* IEU0		Group		*/
261
262	stb		%o4, [%o0 - 7]			/* Store			*/
26330:	srlx		%o3, 56, %o4			/* IEU0		Group		*/
264	stb		%o4, [%o0 - 8]			/* Store			*/
265	retl						/* CTI+IEU1	Group		*/
266
267	 mov		%g6, %o0			/* IEU0				*/
268END(__stpcpy)
269
270weak_alias (__stpcpy, stpcpy)
271libc_hidden_def (__stpcpy)
272libc_hidden_builtin_def (stpcpy)
273