1/* Set a block of memory to some byte value.
2   For UltraSPARC.
3   Copyright (C) 1996-2022 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, see
18   <https://www.gnu.org/licenses/>.  */
19
20#include <sysdep.h>
21#include <asm/asi.h>
22#ifndef XCC
23#define XCC xcc
24#define USE_BPR
25#endif
26#define FPRS_FEF	4
27
28#define SET_BLOCKS(base, offset, source)		\
29	stx		source, [base - offset - 0x18];	\
30	stx		source, [base - offset - 0x10];	\
31	stx		source, [base - offset - 0x08];	\
32	stx		source, [base - offset - 0x00];
33
34#define ZERO_BLOCKS(base, offset, source)		\
35	stx		source, [base - offset - 0x38];	\
36	stx		source, [base - offset - 0x30];	\
37	stx		source, [base - offset - 0x28];	\
38	stx		source, [base - offset - 0x20];	\
39	stx		source, [base - offset - 0x18];	\
40	stx		source, [base - offset - 0x10];	\
41	stx		source, [base - offset - 0x08];	\
42	stx		source, [base - offset - 0x00];
43
44	/* Well, memset is a lot easier to get right than bcopy... */
45	.text
46	.align		32
47ENTRY(memset)
48	andcc		%o1, 0xff, %o1
49	mov		%o0, %o5
50	be,a,pt		%icc, 50f
51#ifndef USE_BPR
52	 srl		%o2, 0, %o1
53#else
54	 mov		%o2, %o1
55#endif
56	cmp		%o2, 7
57#ifndef USE_BPR
58	srl		%o2, 0, %o2
59#endif
60	bleu,pn		%XCC, 17f
61	 andcc		%o0, 3, %g5
62	be,pt		%xcc, 4f
63	 and		%o1, 0xff, %o1
64	cmp		%g5, 3
65	be,pn		%xcc, 2f
66	 stb		%o1, [%o0 + 0x00]
67	cmp		%g5, 2
68	be,pt		%xcc, 2f
69	 stb		%o1, [%o0 + 0x01]
70	stb		%o1, [%o0 + 0x02]
712:	sub		%g5, 4, %g5
72	sub		%o0, %g5, %o0
73	add		%o2, %g5, %o2
744:	sllx		%o1, 8, %g1
75	andcc		%o0, 4, %g0
76	or		%o1, %g1, %o1
77	sllx		%o1, 16, %g1
78	or		%o1, %g1, %o1
79	be,pt		%xcc, 2f
80	 sllx		%o1, 32, %g1
81	stw		%o1, [%o0]
82	sub		%o2, 4, %o2
83	add		%o0, 4, %o0
842:	cmp		%o2, 128
85	or		%o1, %g1, %o1
86	blu,pn		%xcc, 9f
87	 andcc		%o0, 0x38, %g5
88	be,pn		%icc, 6f
89	 mov		64, %o4
90	andcc		%o0, 8, %g0
91	be,pn		%icc, 1f
92	 sub		%o4, %g5, %o4
93	stx		%o1, [%o0]
94	add		%o0, 8, %o0
951:	andcc		%o4, 16, %g0
96	be,pn		%icc, 1f
97	 sub		%o2, %o4, %o2
98	stx		%o1, [%o0]
99	stx		%o1, [%o0 + 8]
100	add		%o0, 16, %o0
1011:	andcc		%o4, 32, %g0
102	be,pn		%icc, 7f
103	 andncc		%o2, 0x3f, %o3
104	stw		%o1, [%o0]
105	stw		%o1, [%o0 + 4]
106	stw		%o1, [%o0 + 8]
107	stw		%o1, [%o0 + 12]
108	stw		%o1, [%o0 + 16]
109	stw		%o1, [%o0 + 20]
110	stw		%o1, [%o0 + 24]
111	stw		%o1, [%o0 + 28]
112	add		%o0, 32, %o0
1137:	be,pn		%xcc, 9f
114	 nop
115	ldd		[%o0 - 8], %f0
11618:	wr		%g0, ASI_BLK_P, %asi
117	membar		#StoreStore | #LoadStore
118	andcc		%o3, 0xc0, %g5
119	and		%o2, 0x3f, %o2
120	fsrc2		%f0, %f2
121	fsrc2		%f0, %f4
122	andn		%o3, 0xff, %o3
123	fsrc2		%f0, %f6
124	cmp		%g5, 64
125	fsrc2		%f0, %f8
126	fsrc2		%f0, %f10
127	fsrc2		%f0, %f12
128	brz,pn		%g5, 10f
129	 fsrc2		%f0, %f14
130	be,pn		%icc, 2f
131	 stda		%f0, [%o0 + 0x00] %asi
132	cmp		%g5, 128
133	be,pn		%icc, 2f
134	 stda		%f0, [%o0 + 0x40] %asi
135	stda		%f0, [%o0 + 0x80] %asi
1362:	brz,pn		%o3, 12f
137	 add		%o0, %g5, %o0
13810:	stda		%f0, [%o0 + 0x00] %asi
139	stda		%f0, [%o0 + 0x40] %asi
140	stda		%f0, [%o0 + 0x80] %asi
141	stda		%f0, [%o0 + 0xc0] %asi
14211:	subcc		%o3, 256, %o3
143	bne,pt		%xcc, 10b
144	 add		%o0, 256, %o0
14512:	wr		%g0, FPRS_FEF, %fprs
146	membar		#StoreLoad | #StoreStore
1479:	andcc		%o2, 0x78, %g5
148	be,pn		%xcc, 13f
149	 andcc		%o2, 7, %o2
15014:	rd		%pc, %o4
151	srl		%g5, 1, %o3
152	sub		%o4, %o3, %o4
153	jmpl		%o4 + (13f - 14b), %g0
154	 add		%o0, %g5, %o0
15512:	SET_BLOCKS	(%o0, 0x68, %o1)
156	SET_BLOCKS	(%o0, 0x48, %o1)
157	SET_BLOCKS	(%o0, 0x28, %o1)
158	SET_BLOCKS	(%o0, 0x08, %o1)
15913:	be,pn		%xcc, 8f
160	 andcc		%o2, 4, %g0
161	be,pn		%xcc, 1f
162	 andcc		%o2, 2, %g0
163	stw		%o1, [%o0]
164	add		%o0, 4, %o0
1651:	be,pn		%xcc, 1f
166	 andcc		%o2, 1, %g0
167	sth		%o1, [%o0]
168	add		%o0, 2, %o0
1691:	bne,a,pn	%xcc, 8f
170	 stb		%o1, [%o0]
1718:	retl
172	 mov		%o5, %o0
17317:	brz,pn		%o2, 0f
1748:	 add		%o0, 1, %o0
175	subcc		%o2, 1, %o2
176	bne,pt		%xcc, 8b
177	 stb		%o1, [%o0 - 1]
1780:	retl
179	 mov		%o5, %o0
180
1816:	stx		%o1, [%o0]
182	andncc		%o2, 0x3f, %o3
183	be,pn		%xcc, 9b
184	 nop
185	ba,pt		%xcc, 18b
186	 ldd		[%o0], %f0
187
188#ifndef USE_BPR
189	srl		%o1, 0, %o1
190#endif
191	mov		%o0, %o5
19250:	cmp		%o1, 7
193	bleu,pn		%xcc, 17f
194	 andcc		%o0, 3, %o2
195	be,a,pt		%xcc, 4f
196	 andcc		%o0, 4, %g0
197	cmp		%o2, 3
198	be,pn		%xcc, 2f
199	 stb		%g0, [%o0 + 0x00]
200	cmp		%o2, 2
201	be,pt		%xcc, 2f
202	 stb		%g0, [%o0 + 0x01]
203	stb		%g0, [%o0 + 0x02]
2042:	sub		%o2, 4, %o2
205	sub		%o0, %o2, %o0
206	add		%o1, %o2, %o1
207	andcc		%o0, 4, %g0
2084:	be,pt		%xcc, 2f
209	 cmp		%o1, 128
210	stw		%g0, [%o0]
211	sub		%o1, 4, %o1
212	add		%o0, 4, %o0
2132:	blu,pn		%xcc, 9f
214	 andcc		%o0, 0x38, %o2
215	be,pn		%icc, 6f
216	 mov		64, %o4
217	andcc		%o0, 8, %g0
218	be,pn		%icc, 1f
219	 sub		%o4, %o2, %o4
220	stx		%g0, [%o0]
221	add		%o0, 8, %o0
2221:	andcc		%o4, 16, %g0
223	be,pn		%icc, 1f
224	 sub		%o1, %o4, %o1
225	stx		%g0, [%o0]
226	stx		%g0, [%o0 + 8]
227	add		%o0, 16, %o0
2281:	andcc		%o4, 32, %g0
229	be,pn		%icc, 7f
230	 andncc		%o1, 0x3f, %o3
231	stx		%g0, [%o0]
232	stx		%g0, [%o0 + 8]
233	stx		%g0, [%o0 + 16]
234	stx		%g0, [%o0 + 24]
235	add		%o0, 32, %o0
2366:	andncc		%o1, 0x3f, %o3
2377:	be,pn		%xcc, 9f
238	 wr		%g0, ASI_BLK_P, %asi
239	membar		#StoreLoad | #StoreStore | #LoadStore
240	fzero		%f0
241	andcc		%o3, 0xc0, %o2
242	and		%o1, 0x3f, %o1
243	fzero		%f2
244	andn		%o3, 0xff, %o3
245	faddd		%f0, %f2, %f4
246	fmuld		%f0, %f2, %f6
247	cmp		%o2, 64
248	faddd		%f0, %f2, %f8
249	fmuld		%f0, %f2, %f10
250	faddd		%f0, %f2, %f12
251	brz,pn		%o2, 10f
252	 fmuld		%f0, %f2, %f14
253	be,pn		%icc, 2f
254	 stda		%f0, [%o0 + 0x00] %asi
255	cmp		%o2, 128
256	be,pn		%icc, 2f
257	 stda		%f0, [%o0 + 0x40] %asi
258	stda		%f0, [%o0 + 0x80] %asi
2592:	brz,pn		%o3, 12f
260	 add		%o0, %o2, %o0
26110:	stda		%f0, [%o0 + 0x00] %asi
262	stda		%f0, [%o0 + 0x40] %asi
263	stda		%f0, [%o0 + 0x80] %asi
264	stda		%f0, [%o0 + 0xc0] %asi
26511:	subcc		%o3, 256, %o3
266	bne,pt		%xcc, 10b
267	 add		%o0, 256, %o0
26812:	wr		%g0, FPRS_FEF, %fprs
269	membar		#StoreLoad | #StoreStore
2709:	andcc		%o1, 0xf8, %o2
271	be,pn		%xcc, 13f
272	 andcc		%o1, 7, %o1
27314:	rd		%pc, %o4
274	srl		%o2, 1, %o3
275	sub		%o4, %o3, %o4
276	jmpl		%o4 + (13f - 14b), %g0
277	 add		%o0, %o2, %o0
27812:	ZERO_BLOCKS	(%o0, 0xc8, %g0)
279	ZERO_BLOCKS	(%o0, 0x88, %g0)
280	ZERO_BLOCKS	(%o0, 0x48, %g0)
281	ZERO_BLOCKS	(%o0, 0x08, %g0)
28213:	be,pn		%xcc, 8f
283	 andcc		%o1, 4, %g0
284	be,pn		%xcc, 1f
285	 andcc		%o1, 2, %g0
286	stw		%g0, [%o0]
287	add		%o0, 4, %o0
2881:	be,pn		%xcc, 1f
289	 andcc		%o1, 1, %g0
290	sth		%g0, [%o0]
291	add		%o0, 2, %o0
2921:	bne,a,pn	%xcc, 8f
293	 stb		%g0, [%o0]
2948:	retl
295	 mov		%o5, %o0
29617:	be,pn		%xcc, 13b
297	 orcc		%o1, 0, %g0
298	be,pn		%xcc, 0f
2998:	 add		%o0, 1, %o0
300	subcc		%o1, 1, %o1
301	bne,pt		%xcc, 8b
302	 stb		%g0, [%o0 - 1]
3030:	retl
304	 mov		%o5, %o0
305END(memset)
306libc_hidden_builtin_def (memset)
307