1/* Copy SRC to DEST returning DEST.
2   For SPARC v9.
3   Copyright (C) 1998-2022 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, see
18   <https://www.gnu.org/licenses/>.  */
19
20#include <sysdep.h>
21#include <asm/asi.h>
22#ifndef XCC
23	.register	%g2, #scratch
24	.register	%g3, #scratch
25	.register	%g6, #scratch
26#endif
27
28	/* Normally, this uses
29	   ((xword - 0x0101010101010101) & 0x8080808080808080) test
30	   to find out if any byte in xword could be zero. This is fast, but
31	   also gives false alarm for any byte in range 0x81-0xff. It does
32	   not matter for correctness, as if this test tells us there could
33	   be some zero byte, we check it byte by byte, but if bytes with
34	   high bits set are common in the strings, then this will give poor
35	   performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
36	   will use one tick slower, but more precise test
37	   ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
38	   which does not give any false alarms (but if some bits are set,
39	   one cannot assume from it which bytes are zero and which are not).
40	   It is yet to be measured, what is the correct default for glibc
41	   in these days for an average user.
42	 */
43
44	.text
45	.align		32
46ENTRY(strcpy)
47	sethi		%hi(0x01010101), %g1		/* IEU0		Group		*/
48	mov		%o0, %g6			/* IEU1				*/
49	or		%g1, %lo(0x01010101), %g1	/* IEU0		Group		*/
50	andcc		%o0, 7, %g0			/* IEU1				*/
51
52	sllx		%g1, 32, %g2			/* IEU0		Group		*/
53	bne,pn		%icc, 12f			/* CTI				*/
54	 andcc		%o1, 7, %g3			/* IEU1				*/
55	or		%g1, %g2, %g1			/* IEU0		Group		*/
56
57	bne,pn		%icc, 14f			/* CTI				*/
58	 sllx		%g1, 7, %g2			/* IEU0		Group		*/
591:	ldx		[%o1], %o3			/* Load				*/
60	add		%o1, 8, %o1			/* IEU1				*/
61
622:	mov		%o3, %g3			/* IEU0		Group		*/
633:	sub		%o3, %g1, %o2			/* IEU1				*/
64	ldxa		[%o1] ASI_PNF, %o3		/* Load				*/
65#ifdef EIGHTBIT_NOT_RARE
66	andn		%o2, %g3, %o2			/* IEU0		Group		*/
67#endif
68	add		%o0, 8, %o0			/* IEU0		Group		*/
69
70	andcc		%o2, %g2, %g0			/* IEU1				*/
71	add		%o1, 8, %o1			/* IEU0		Group		*/
72	be,a,pt		%xcc, 2b			/* CTI				*/
73	 stx		%g3, [%o0 - 8]			/* Store			*/
74
75	srlx		%g3, 56, %g5			/* IEU0		Group		*/
76	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
77	be,pn		%icc, 11f			/* CTI				*/
78	 srlx		%g3, 48, %g4			/* IEU0				*/
79
80	andcc		%g4, 0xff, %g0			/* IEU1		Group		*/
81	be,pn		%icc, 10f			/* CTI				*/
82	 srlx		%g3, 40, %g5			/* IEU0				*/
83	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
84
85	be,pn		%icc, 9f			/* CTI				*/
86	 srlx		%g3, 32, %g4			/* IEU0				*/
87	andcc		%g4, 0xff, %g0			/* IEU1		Group		*/
88	be,pn		%icc, 8f			/* CTI				*/
89
90	 srlx		%g3, 24, %g5			/* IEU0				*/
91	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
92	be,pn		%icc, 7f			/* CTI				*/
93	 srlx		%g3, 16, %g4			/* IEU0				*/
94
95	andcc		%g4, 0xff, %g0			/* IEU1		Group		*/
96	be,pn		%icc, 6f			/* CTI				*/
97	 srlx		%g3, 8, %g5			/* IEU0				*/
98	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
99
100	be,pn		%icc, 5f			/* CTI				*/
101	 sub		%o3, %g1, %o2			/* IEU0				*/
102	stx		%g3, [%o0 - 8]			/* Store	Group		*/
103	andcc		%g3, 0xff, %g0			/* IEU1				*/
104
105	bne,pt		%icc, 3b			/* CTI				*/
106	 mov		%o3, %g3			/* IEU0		Group		*/
1074:	retl						/* CTI+IEU1	Group		*/
108	 mov		%g6, %o0			/* IEU0				*/
109
110	.align		16
1115:	stb		%g5, [%o0 - 2]			/* Store	Group		*/
112	srlx		%g3, 16, %g4			/* IEU0				*/
1136:	sth		%g4, [%o0 - 4]			/* Store	Group		*/
114	srlx		%g3, 32, %g4			/* IEU0				*/
115
116	stw		%g4, [%o0 - 8]			/* Store	Group		*/
117	retl						/* CTI+IEU1	Group		*/
118	 mov		%g6, %o0			/* IEU0				*/
1197:	stb		%g5, [%o0 - 4]			/* Store	Group		*/
120
121	srlx		%g3, 32, %g4			/* IEU0				*/
1228:	stw		%g4, [%o0 - 8]			/* Store	Group		*/
123	retl						/* CTI+IEU1	Group		*/
124	 mov		%g6, %o0 			/* IEU0				*/
125
1269:	stb		%g5, [%o0 - 6]			/* Store	Group		*/
127	srlx		%g3, 48, %g4			/* IEU0				*/
12810:	sth		%g4, [%o0 - 8]			/* Store	Group		*/
129	retl						/* CTI+IEU1	Group		*/
130
131	 mov		%g6, %o0			/* IEU0				*/
13211:	stb		%g5, [%o0 - 8]			/* Store	Group		*/
133	retl						/* CTI+IEU1	Group		*/
134	 mov		%g6, %o0			/* IEU0				*/
135
13612:	or		%g1, %g2, %g1			/* IEU0		Group		*/
137	ldub		[%o1], %o3			/* Load				*/
138	sllx		%g1, 7, %g2			/* IEU0		Group		*/
139	stb		%o3, [%o0]			/* Store	Group		*/
140
14113:	add		%o0, 1, %o0			/* IEU0				*/
142	add		%o1, 1, %o1			/* IEU1				*/
143	andcc		%o3, 0xff, %g0			/* IEU1		Group		*/
144	be,pn		%icc, 4b			/* CTI				*/
145
146	 lduba		[%o1] ASI_PNF, %o3		/* Load				*/
147	andcc		%o0, 7, %g0			/* IEU1		Group		*/
148	bne,a,pt	%icc, 13b			/* CTI				*/
149	 stb		%o3, [%o0]			/* Store			*/
150
151	andcc		%o1, 7, %g3			/* IEU1		Group		*/
152	be,a,pt		%icc, 1b			/* CTI				*/
153	 ldx		[%o1], %o3			/* Load				*/
15414:	orcc		%g0, 64, %g4			/* IEU1		Group		*/
155
156	sllx		%g3, 3, %g5			/* IEU0				*/
157	sub		%o1, %g3, %o1			/* IEU0		Group		*/
158	sub		%g4, %g5, %g4			/* IEU1				*/
159							/* %g1 = 0101010101010101	*
160							 * %g2 = 8080808080808080	*
161							 * %g3 = source alignment	*
162							 * %g5 = number of bits to shift left  *
163							 * %g4 = number of bits to shift right */
164	ldxa		[%o1] ASI_PNF, %o5		/* Load		Group		*/
165
166	addcc		%o1, 8, %o1			/* IEU1				*/
16715:	sllx		%o5, %g5, %o3			/* IEU0		Group		*/
168	ldxa		[%o1] ASI_PNF, %o5		/* Load				*/
169	srlx		%o5, %g4, %o4			/* IEU0		Group		*/
170
171	add		%o0, 8, %o0			/* IEU1				*/
172	or		%o3, %o4, %o3			/* IEU0		Group		*/
173	add		%o1, 8, %o1			/* IEU1				*/
174	sub		%o3, %g1, %o4			/* IEU0		Group		*/
175
176#ifdef EIGHTBIT_NOT_RARE
177	andn		%o4, %o3, %o4			/* IEU0		Group		*/
178#endif
179	andcc		%o4, %g2, %g0			/* IEU1		Group		*/
180	be,a,pt		%xcc, 15b			/* CTI				*/
181	 stx		%o3, [%o0 - 8]			/* Store			*/
182	srlx		%o3, 56, %o4			/* IEU0		Group		*/
183
184	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
185	be,pn		%icc, 22f			/* CTI				*/
186	 srlx		%o3, 48, %o4			/* IEU0				*/
187	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
188
189	be,pn		%icc, 21f			/* CTI				*/
190	 srlx		%o3, 40, %o4			/* IEU0				*/
191	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
192	be,pn		%icc, 20f			/* CTI				*/
193
194	 srlx		%o3, 32, %o4			/* IEU0				*/
195	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
196	be,pn		%icc, 19f			/* CTI				*/
197	 srlx		%o3, 24, %o4			/* IEU0				*/
198
199	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
200	be,pn		%icc, 18f			/* CTI				*/
201	 srlx		%o3, 16, %o4			/* IEU0				*/
202	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
203
204	be,pn		%icc, 17f			/* CTI				*/
205	 srlx		%o3, 8, %o4			/* IEU0				*/
206	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
207	be,pn		%icc, 16f			/* CTI				*/
208
209	 andcc		%o3, 0xff, %g0			/* IEU1		Group		*/
210	bne,pn		%icc, 15b			/* CTI				*/
211	 stx		%o3, [%o0 - 8]			/* Store			*/
212	retl						/* CTI+IEU1	Group		*/
213
214	 mov		%g6, %o0			/* IEU0				*/
215
216	.align		16
21716:	srlx		%o3, 8, %o4			/* IEU0		Group		*/
218	stb		%o4, [%o0 - 2]			/* Store			*/
21917:	srlx		%o3, 16, %o4			/* IEU0		Group		*/
220	stb		%o4, [%o0 - 3]			/* Store			*/
221
22218:	srlx		%o3, 24, %o4			/* IEU0		Group		*/
223	stb		%o4, [%o0 - 4]			/* Store			*/
22419:	srlx		%o3, 32, %o4			/* IEU0		Group		*/
225	stw		%o4, [%o0 - 8]			/* Store			*/
226
227	retl						/* CTI+IEU1	Group		*/
228	 mov		%g6, %o0 			/* IEU0				*/
229	nop
230	nop
231
23220:	srlx		%o3, 40, %o4			/* IEU0		Group		*/
233	stb		%o4, [%o0 - 6]			/* Store			*/
23421:	srlx		%o3, 48, %o4			/* IEU0		Group		*/
235	stb		%o4, [%o0 - 7]			/* Store			*/
236
23722:	srlx		%o3, 56, %o4			/* IEU0		Group		*/
238	stb		%o4, [%o0 - 8]			/* Store			*/
239	retl						/* CTI+IEU1	Group		*/
240	 mov		%g6, %o0			/* IEU0				*/
241END(strcpy)
242libc_hidden_builtin_def (strcpy)
243