1/* strcat (dest, src) -- Append SRC on the end of DEST.
2   For SPARC v9.
3   Copyright (C) 1998-2022 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, see
18   <https://www.gnu.org/licenses/>.  */
19
20#include <sysdep.h>
21#include <asm/asi.h>
22#ifndef XCC
23#define XCC xcc
24#define USE_BPR
25	.register	%g2, #scratch
26	.register	%g3, #scratch
27	.register	%g6, #scratch
28#endif
29
30	/* Normally, this uses
31	   ((xword - 0x0101010101010101) & 0x8080808080808080) test
32	   to find out if any byte in xword could be zero. This is fast, but
33	   also gives false alarm for any byte in range 0x81-0xff. It does
34	   not matter for correctness, as if this test tells us there could
35	   be some zero byte, we check it byte by byte, but if bytes with
36	   high bits set are common in the strings, then this will give poor
37	   performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
38	   will use one tick slower, but more precise test
39	   ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
40	   which does not give any false alarms (but if some bits are set,
41	   one cannot assume from it which bytes are zero and which are not).
42	   It is yet to be measured, what is the correct default for glibc
43	   in these days for an average user.
44	 */
45
46	.text
47	.align		32
48ENTRY(strcat)
49	sethi		%hi(0x01010101), %g1		/* IEU0		Group		*/
50	ldub		[%o0], %o3			/* Load				*/
51	or		%g1, %lo(0x01010101), %g1	/* IEU0		Group		*/
52	mov		%o0, %g6			/* IEU1				*/
53
54	sllx		%g1, 32, %g2			/* IEU0		Group		*/
55	andcc		%o0, 7, %g0			/* IEU1				*/
56	or		%g1, %g2, %g1			/* IEU0		Group		*/
57	bne,pn		%icc, 32f			/* CTI				*/
58
59	 sllx		%g1, 7, %g2			/* IEU0		Group		*/
60	brz,pn		%o3, 30f			/* CTI+IEU1			*/
61	 ldx		[%o0], %o3			/* Load				*/
6248:	add		%o0, 8, %o0			/* IEU0		Group		*/
63
6449:	sub		%o3, %g1, %o2			/* IEU0		Group		*/
65#ifdef EIGHTBIT_NOT_RARE
66	andn		%o2, %o3, %g5			/* IEU0		Group		*/
67	ldxa		[%o0] ASI_PNF, %o3		/* Load				*/
68	andcc		%g5, %g2, %g0			/* IEU1		Group		*/
69#else
70	ldxa		[%o0] ASI_PNF, %o3		/* Load				*/
71	andcc		%o2, %g2, %g0			/* IEU1		Group		*/
72#endif
73	be,pt		%xcc, 49b			/* CTI				*/
74
75	 add		%o0, 8, %o0			/* IEU0				*/
76 	addcc		%o2, %g1, %g3			/* IEU1		Group		*/
77	srlx		%o2, 32, %o2			/* IEU0				*/
7850:	andcc		%o2, %g2, %g0			/* IEU1		Group		*/
79
80	be,pn		%xcc, 51f			/* CTI				*/
81	 srlx		%g3, 56, %o2			/* IEU0				*/
82	andcc		%o2, 0xff, %g0			/* IEU1		Group		*/
83	be,pn		%icc, 29f			/* CTI				*/
84
85	 srlx		%g3, 48, %o2			/* IEU0				*/
86	andcc		%o2, 0xff, %g0			/* IEU1		Group		*/
87	be,pn		%icc, 28f			/* CTI				*/
88	 srlx		%g3, 40, %o2			/* IEU0				*/
89
90	andcc		%o2, 0xff, %g0			/* IEU1		Group		*/
91	be,pn		%icc, 27f			/* CTI				*/
92	 srlx		%g3, 32, %o2			/* IEU0				*/
93	andcc		%o2, 0xff, %g0			/* IEU1		Group		*/
94
95	be,pn		%icc, 26f			/* CTI				*/
9651:	 srlx		%g3, 24, %o2			/* IEU0				*/
97	andcc		%o2, 0xff, %g0			/* IEU1		Group		*/
98	be,pn		%icc, 25f			/* CTI				*/
99
100	 srlx		%g3, 16, %o2			/* IEU0				*/
101	andcc		%o2, 0xff, %g0			/* IEU1		Group		*/
102	be,pn		%icc, 24f			/* CTI				*/
103	 srlx		%g3, 8, %o2			/* IEU0				*/
104
105	andcc		%o2, 0xff, %g0			/* IEU1		Group		*/
106	be,pn		%icc, 23f			/* CTI				*/
107	 sub		%o3, %g1, %o2			/* IEU0				*/
108	andcc		%g3, 0xff, %g0			/* IEU1		Group		*/
109
110	be,pn		%icc, 52f			/* CTI				*/
111	 ldxa		[%o0] ASI_PNF, %o3		/* Load				*/
112	andcc		%o2, %g2, %g0			/* IEU1		Group		*/
113	be,pt		%xcc, 49b			/* CTI				*/
114
115	 add		%o0, 8, %o0			/* IEU0				*/
116	addcc		%o2, %g1, %g3			/* IEU1		Group		*/
117	ba,pt		%xcc, 50b			/* CTI				*/
118	 srlx		%o2, 32, %o2			/* IEU0				*/
119
120	.align		16
12152:	ba,pt		%xcc, 12f			/* CTI		Group		*/
122	 add		%o0, -9, %o0			/* IEU0				*/
12323:	ba,pt		%xcc, 12f			/* CTI		Group		*/
124	 add		%o0, -10, %o0			/* IEU0				*/
125
12624:	ba,pt		%xcc, 12f			/* CTI		Group		*/
127	 add		%o0, -11, %o0			/* IEU0				*/
12825:	ba,pt		%xcc, 12f			/* CTI		Group		*/
129	 add		%o0, -12, %o0			/* IEU0				*/
130
13126:	ba,pt		%xcc, 12f			/* CTI		Group		*/
132	 add		%o0, -13, %o0			/* IEU0				*/
13327:	ba,pt		%xcc, 12f			/* CTI		Group		*/
134	 add		%o0, -14, %o0			/* IEU0				*/
135
13628:	ba,pt		%xcc, 12f			/* CTI		Group		*/
137	 add		%o0, -15, %o0			/* IEU0				*/
13829:	add		%o0, -16, %o0			/* IEU0		Group		*/
13930:	andcc		%o1, 7, %g3			/* IEU1				*/
140
14131:	bne,pn		%icc, 14f			/* CTI				*/
142	 orcc		%g0, 64, %g4			/* IEU1		Group		*/
1431:	ldx		[%o1], %o3			/* Load				*/
144	add		%o1, 8, %o1			/* IEU1				*/
145
1462:	mov		%o3, %g3			/* IEU0		Group		*/
1473:	sub		%o3, %g1, %o2			/* IEU1				*/
148	ldxa		[%o1] ASI_PNF, %o3		/* Load				*/
149#ifdef EIGHTBIT_NOT_RARE
150	andn		%o2, %g3, %o2			/* IEU0		Group		*/
151#endif
152	add		%o0, 8, %o0			/* IEU0		Group		*/
153
154	andcc		%o2, %g2, %g0			/* IEU1				*/
155	add		%o1, 8, %o1			/* IEU0		Group		*/
156	be,a,pt		%xcc, 2b			/* CTI				*/
157	 stx		%g3, [%o0 - 8]			/* Store			*/
158
159	srlx		%g3, 56, %g5			/* IEU0		Group		*/
160	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
161	be,pn		%icc, 11f			/* CTI				*/
162	 srlx		%g3, 48, %g4			/* IEU0				*/
163
164	andcc		%g4, 0xff, %g0			/* IEU1		Group		*/
165	be,pn		%icc, 10f			/* CTI				*/
166	 srlx		%g3, 40, %g5			/* IEU0				*/
167	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
168
169	be,pn		%icc, 9f			/* CTI				*/
170	 srlx		%g3, 32, %g4			/* IEU0				*/
171	andcc		%g4, 0xff, %g0			/* IEU1		Group		*/
172	be,pn		%icc, 8f			/* CTI				*/
173
174	 srlx		%g3, 24, %g5			/* IEU0				*/
175	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
176	be,pn		%icc, 7f			/* CTI				*/
177	 srlx		%g3, 16, %g4			/* IEU0				*/
178
179	andcc		%g4, 0xff, %g0			/* IEU1		Group		*/
180	be,pn		%icc, 6f			/* CTI				*/
181	 srlx		%g3, 8, %g5			/* IEU0				*/
182	andcc		%g5, 0xff, %g0			/* IEU1		Group		*/
183
184	be,pn		%icc, 5f			/* CTI				*/
185	 sub		%o3, %g1, %o2			/* IEU0				*/
186	stx		%g3, [%o0 - 8]			/* Store	Group		*/
187	andcc		%g3, 0xff, %g0			/* IEU1				*/
188
189	bne,pt		%icc, 3b			/* CTI				*/
190	 mov		%o3, %g3			/* IEU0		Group		*/
1914:	retl						/* CTI+IEU1	Group		*/
192	 mov		%g6, %o0			/* IEU0				*/
193
194	.align		16
1955:	stb		%g5, [%o0 - 2]			/* Store	Group		*/
196	srlx		%g3, 16, %g4			/* IEU0				*/
1976:	sth		%g4, [%o0 - 4]			/* Store	Group		*/
198	srlx		%g3, 32, %g4			/* IEU0				*/
199
200	stw		%g4, [%o0 - 8]			/* Store	Group		*/
201	retl						/* CTI+IEU1	Group		*/
202	 mov		%g6, %o0			/* IEU0				*/
2037:	stb		%g5, [%o0 - 4]			/* Store	Group		*/
204
205	srlx		%g3, 32, %g4			/* IEU0				*/
2068:	stw		%g4, [%o0 - 8]			/* Store	Group		*/
207	retl						/* CTI+IEU1	Group		*/
208	 mov		%g6, %o0 			/* IEU0				*/
209
2109:	stb		%g5, [%o0 - 6]			/* Store	Group		*/
211	srlx		%g3, 48, %g4			/* IEU0				*/
21210:	sth		%g4, [%o0 - 8]			/* Store	Group		*/
213	retl						/* CTI+IEU1	Group		*/
214
215	 mov		%g6, %o0			/* IEU0				*/
21611:	stb		%g5, [%o0 - 8]			/* Store	Group		*/
217	retl						/* CTI+IEU1	Group		*/
218	 mov		%g6, %o0			/* IEU0				*/
219
220	.align		16
22132:	andcc		%o0, 7, %g0			/* IEU1		Group		*/
222	be,a,pn		%icc, 48b			/* CTI				*/
223	 ldx		[%o0], %o3			/* Load				*/
224	add		%o0, 1, %o0			/* IEU0		Group		*/
225
226	brnz,a,pt	%o3, 32b			/* CTI+IEU1			*/
227	 lduba		[%o0] ASI_PNF, %o3		/* Load				*/
228	add		%o0, -1, %o0			/* IEU0		Group		*/
229	andcc		%o0, 7, %g0			/* IEU1		Group		*/
230
231	be,a,pn		%icc, 31b			/* CTI				*/
232	 andcc		%o1, 7, %g3			/* IEU1		Group		*/
23312:	ldub		[%o1], %o3			/* Load				*/
234	stb		%o3, [%o0]			/* Store	Group		*/
235
23613:	add		%o0, 1, %o0			/* IEU0				*/
237	add		%o1, 1, %o1			/* IEU1				*/
238	andcc		%o3, 0xff, %g0			/* IEU1		Group		*/
239	be,pn		%icc, 4b			/* CTI				*/
240
241	 lduba		[%o1] ASI_PNF, %o3		/* Load				*/
242	andcc		%o0, 7, %g0			/* IEU1		Group		*/
243	bne,a,pt	%icc, 13b			/* CTI				*/
244	 stb		%o3, [%o0]			/* Store			*/
245
246	andcc		%o1, 7, %g3			/* IEU1		Group		*/
247	be,a,pt		%icc, 1b			/* CTI				*/
248	 ldx		[%o1], %o3			/* Load				*/
249	orcc		%g0, 64, %g4			/* IEU1		Group		*/
250
25114:	sllx		%g3, 3, %g5			/* IEU0				*/
252	sub		%o1, %g3, %o1			/* IEU0		Group		*/
253	sub		%g4, %g5, %g4			/* IEU1				*/
254							/* %g1 = 0101010101010101	*
255							 * %g2 = 8080808080808080	*
256							 * %g3 = source alignment	*
257							 * %g5 = number of bits to shift left  *
258							 * %g4 = number of bits to shift right */
259	ldxa		[%o1] ASI_PNF, %o5		/* Load		Group		*/
260
261	addcc		%o1, 8, %o1			/* IEU1				*/
26215:	sllx		%o5, %g5, %o3			/* IEU0		Group		*/
263	ldxa		[%o1] ASI_PNF, %o5		/* Load				*/
264	srlx		%o5, %g4, %o4			/* IEU0		Group		*/
265
266	add		%o0, 8, %o0			/* IEU1				*/
267	or		%o3, %o4, %o3			/* IEU0		Group		*/
268	add		%o1, 8, %o1			/* IEU1				*/
269	sub		%o3, %g1, %o4			/* IEU0		Group		*/
270
271#ifdef EIGHTBIT_NOT_RARE
272	andn		%o4, %o3, %o4			/* IEU0		Group		*/
273#endif
274	andcc		%o4, %g2, %g0			/* IEU1		Group		*/
275	be,a,pt		%xcc, 15b			/* CTI				*/
276	 stx		%o3, [%o0 - 8]			/* Store			*/
277	srlx		%o3, 56, %o4			/* IEU0		Group		*/
278
279	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
280	be,pn		%icc, 22f			/* CTI				*/
281	 srlx		%o3, 48, %o4			/* IEU0				*/
282	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
283
284	be,pn		%icc, 21f			/* CTI				*/
285	 srlx		%o3, 40, %o4			/* IEU0				*/
286	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
287	be,pn		%icc, 20f			/* CTI				*/
288
289	 srlx		%o3, 32, %o4			/* IEU0				*/
290	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
291	be,pn		%icc, 19f			/* CTI				*/
292	 srlx		%o3, 24, %o4			/* IEU0				*/
293
294	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
295	be,pn		%icc, 18f			/* CTI				*/
296	 srlx		%o3, 16, %o4			/* IEU0				*/
297	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
298
299	be,pn		%icc, 17f			/* CTI				*/
300	 srlx		%o3, 8, %o4			/* IEU0				*/
301	andcc		%o4, 0xff, %g0			/* IEU1		Group		*/
302	be,pn		%icc, 16f			/* CTI				*/
303
304	 andcc		%o3, 0xff, %g0			/* IEU1		Group		*/
305	bne,pn		%icc, 15b			/* CTI				*/
306	 stx		%o3, [%o0 - 8]			/* Store			*/
307	retl						/* CTI+IEU1	Group		*/
308
309	 mov		%g6, %o0			/* IEU0				*/
310
311	.align		16
31216:	srlx		%o3, 8, %o4			/* IEU0		Group		*/
313	stb		%o4, [%o0 - 2]			/* Store			*/
31417:	srlx		%o3, 16, %o4			/* IEU0		Group		*/
315	stb		%o4, [%o0 - 3]			/* Store			*/
316
31718:	srlx		%o3, 24, %o4			/* IEU0		Group		*/
318	stb		%o4, [%o0 - 4]			/* Store			*/
31919:	srlx		%o3, 32, %o4			/* IEU0		Group		*/
320	stw		%o4, [%o0 - 8]			/* Store			*/
321
322	retl						/* CTI+IEU1	Group		*/
323	 mov		%g6, %o0 			/* IEU0				*/
324	nop
325	nop
326
32720:	srlx		%o3, 40, %o4			/* IEU0		Group		*/
328	stb		%o4, [%o0 - 6]			/* Store			*/
32921:	srlx		%o3, 48, %o4			/* IEU0		Group		*/
330	stb		%o4, [%o0 - 7]			/* Store			*/
331
33222:	srlx		%o3, 56, %o4			/* IEU0		Group		*/
333	stb		%o4, [%o0 - 8]			/* Store			*/
334	retl						/* CTI+IEU1	Group		*/
335	 mov		%g6, %o0			/* IEU0				*/
336END(strcat)
337libc_hidden_builtin_def (strcat)
338