1/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code.
2 *
3 *  Copyright(C) 1995 Linus Torvalds
4 *  Copyright(C) 1996 David S. Miller
5 *  Copyright(C) 1996 Eddie C. Dost
6 *  Copyright(C) 1996,1998 Jakub Jelinek
7 *
8 * derived from:
9 *	e-mail between David and Eddie.
10 *
11 * Returns 0 if successful, otherwise count of bytes not copied yet
12 */
13
14#include <asm/cprefix.h>
15#include <asm/ptrace.h>
16#include <asm/asmmacro.h>
17#include <asm/page.h>
18
19/* Work around cpp -rob */
20#define ALLOC #alloc
21#define EXECINSTR #execinstr
22#define EX(x,y,a,b) 				\
2398: 	x,y;					\
24	.section .fixup,ALLOC,EXECINSTR;	\
25	.align	4;				\
2699:	ba fixupretl;				\
27	 a, b, %g3;				\
28	.section __ex_table,ALLOC;		\
29	.align	4;				\
30	.word	98b, 99b;			\
31	.text;					\
32	.align	4
33
34#define EX2(x,y,c,d,e,a,b) 			\
3598: 	x,y;					\
36	.section .fixup,ALLOC,EXECINSTR;	\
37	.align	4;				\
3899:	c, d, e;				\
39	ba fixupretl;				\
40	 a, b, %g3;				\
41	.section __ex_table,ALLOC;		\
42	.align	4;				\
43	.word	98b, 99b;			\
44	.text;					\
45	.align	4
46
47#define EXO2(x,y) 				\
4898: 	x, y;					\
49	.section __ex_table,ALLOC;		\
50	.align	4;				\
51	.word	98b, 97f;			\
52	.text;					\
53	.align	4
54
55#define EXT(start,end,handler)			\
56	.section __ex_table,ALLOC;		\
57	.align	4;				\
58	.word	start, 0, end, handler;		\
59	.text;					\
60	.align	4
61
62/* Please do not change following macros unless you change logic used
63 * in .fixup at the end of this file as well
64 */
65
66/* Both these macros have to start with exactly the same insn */
67#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
68	ldd	[%src + (offset) + 0x00], %t0; \
69	ldd	[%src + (offset) + 0x08], %t2; \
70	ldd	[%src + (offset) + 0x10], %t4; \
71	ldd	[%src + (offset) + 0x18], %t6; \
72	st	%t0, [%dst + (offset) + 0x00]; \
73	st	%t1, [%dst + (offset) + 0x04]; \
74	st	%t2, [%dst + (offset) + 0x08]; \
75	st	%t3, [%dst + (offset) + 0x0c]; \
76	st	%t4, [%dst + (offset) + 0x10]; \
77	st	%t5, [%dst + (offset) + 0x14]; \
78	st	%t6, [%dst + (offset) + 0x18]; \
79	st	%t7, [%dst + (offset) + 0x1c];
80
81#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
82	ldd	[%src + (offset) + 0x00], %t0; \
83	ldd	[%src + (offset) + 0x08], %t2; \
84	ldd	[%src + (offset) + 0x10], %t4; \
85	ldd	[%src + (offset) + 0x18], %t6; \
86	std	%t0, [%dst + (offset) + 0x00]; \
87	std	%t2, [%dst + (offset) + 0x08]; \
88	std	%t4, [%dst + (offset) + 0x10]; \
89	std	%t6, [%dst + (offset) + 0x18];
90
91#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
92	ldd	[%src - (offset) - 0x10], %t0; \
93	ldd	[%src - (offset) - 0x08], %t2; \
94	st	%t0, [%dst - (offset) - 0x10]; \
95	st	%t1, [%dst - (offset) - 0x0c]; \
96	st	%t2, [%dst - (offset) - 0x08]; \
97	st	%t3, [%dst - (offset) - 0x04];
98
99#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
100	lduh	[%src + (offset) + 0x00], %t0; \
101	lduh	[%src + (offset) + 0x02], %t1; \
102	lduh	[%src + (offset) + 0x04], %t2; \
103	lduh	[%src + (offset) + 0x06], %t3; \
104	sth	%t0, [%dst + (offset) + 0x00]; \
105	sth	%t1, [%dst + (offset) + 0x02]; \
106	sth	%t2, [%dst + (offset) + 0x04]; \
107	sth	%t3, [%dst + (offset) + 0x06];
108
109#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
110	ldub	[%src - (offset) - 0x02], %t0; \
111	ldub	[%src - (offset) - 0x01], %t1; \
112	stb	%t0, [%dst - (offset) - 0x02]; \
113	stb	%t1, [%dst - (offset) - 0x01];
114
115	.text
116	.align	4
117
118	.globl  __copy_user_begin
119__copy_user_begin:
120
121	.globl	C_LABEL(__copy_user)
122dword_align:
123	andcc	%o1, 1, %g0
124	be	4f
125	 andcc	%o1, 2, %g0
126
127	EXO2(ldub [%o1], %g2)
128	add	%o1, 1, %o1
129	EXO2(stb %g2, [%o0])
130	sub	%o2, 1, %o2
131	bne	3f
132	 add	%o0, 1, %o0
133
134	EXO2(lduh [%o1], %g2)
135	add	%o1, 2, %o1
136	EXO2(sth %g2, [%o0])
137	sub	%o2, 2, %o2
138	b	3f
139	 add	%o0, 2, %o0
1404:
141	EXO2(lduh [%o1], %g2)
142	add	%o1, 2, %o1
143	EXO2(sth %g2, [%o0])
144	sub	%o2, 2, %o2
145	b	3f
146	 add	%o0, 2, %o0
147
148C_LABEL(__copy_user):	/* %o0=dst %o1=src %o2=len */
149	xor	%o0, %o1, %o4
1501:
151	andcc	%o4, 3, %o5
1522:
153	bne	cannot_optimize
154	 cmp	%o2, 15
155
156	bleu	short_aligned_end
157	 andcc	%o1, 3, %g0
158
159	bne	dword_align
1603:
161	 andcc	%o1, 4, %g0
162
163	be	2f
164	 mov	%o2, %g1
165
166	EXO2(ld [%o1], %o4)
167	sub	%g1, 4, %g1
168	EXO2(st %o4, [%o0])
169	add	%o1, 4, %o1
170	add	%o0, 4, %o0
1712:
172	andcc	%g1, 0xffffff80, %g7
173	be	3f
174	 andcc	%o0, 4, %g0
175
176	be	ldd_std + 4
1775:
178	MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
179	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
180	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
181	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
18280:
183	EXT(5b, 80b, 50f)
184	subcc	%g7, 128, %g7
185	add	%o1, 128, %o1
186	bne	5b
187	 add	%o0, 128, %o0
1883:
189	andcc	%g1, 0x70, %g7
190	be	copy_user_table_end
191	 andcc	%g1, 8, %g0
192
193	sethi	%hi(copy_user_table_end), %o5
194	srl	%g7, 1, %o4
195	add	%g7, %o4, %o4
196	add	%o1, %g7, %o1
197	sub	%o5, %o4, %o5
198	jmpl	%o5 + %lo(copy_user_table_end), %g0
199	 add	%o0, %g7, %o0
200
201copy_user_table:
202	MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
203	MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
204	MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
205	MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
206	MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
207	MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
208	MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
209copy_user_table_end:
210	EXT(copy_user_table, copy_user_table_end, 51f)
211	be	copy_user_last7
212	 andcc	%g1, 4, %g0
213
214	EX(ldd	[%o1], %g2, and %g1, 0xf)
215	add	%o0, 8, %o0
216	add	%o1, 8, %o1
217	EX(st	%g2, [%o0 - 0x08], and %g1, 0xf)
218	EX2(st	%g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4)
219copy_user_last7:
220	be	1f
221	 andcc	%g1, 2, %g0
222
223	EX(ld	[%o1], %g2, and %g1, 7)
224	add	%o1, 4, %o1
225	EX(st	%g2, [%o0], and %g1, 7)
226	add	%o0, 4, %o0
2271:
228	be	1f
229	 andcc	%g1, 1, %g0
230
231	EX(lduh	[%o1], %g2, and %g1, 3)
232	add	%o1, 2, %o1
233	EX(sth	%g2, [%o0], and %g1, 3)
234	add	%o0, 2, %o0
2351:
236	be	1f
237	 nop
238
239	EX(ldub	[%o1], %g2, add %g0, 1)
240	EX(stb	%g2, [%o0], add %g0, 1)
2411:
242	retl
243 	 clr	%o0
244
245ldd_std:
246	MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
247	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
248	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
249	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
25081:
251	EXT(ldd_std, 81b, 52f)
252	subcc	%g7, 128, %g7
253	add	%o1, 128, %o1
254	bne	ldd_std
255	 add	%o0, 128, %o0
256
257	andcc	%g1, 0x70, %g7
258	be	copy_user_table_end
259	 andcc	%g1, 8, %g0
260
261	sethi	%hi(copy_user_table_end), %o5
262	srl	%g7, 1, %o4
263	add	%g7, %o4, %o4
264	add	%o1, %g7, %o1
265	sub	%o5, %o4, %o5
266	jmpl	%o5 + %lo(copy_user_table_end), %g0
267	 add	%o0, %g7, %o0
268
269cannot_optimize:
270	bleu	short_end
271	 cmp	%o5, 2
272
273	bne	byte_chunk
274	 and	%o2, 0xfffffff0, %o3
275
276	andcc	%o1, 1, %g0
277	be	10f
278	 nop
279
280	EXO2(ldub [%o1], %g2)
281	add	%o1, 1, %o1
282	EXO2(stb %g2, [%o0])
283	sub	%o2, 1, %o2
284	andcc	%o2, 0xfffffff0, %o3
285	be	short_end
286	 add	%o0, 1, %o0
28710:
288	MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
289	MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
29082:
291	EXT(10b, 82b, 53f)
292	subcc	%o3, 0x10, %o3
293	add	%o1, 0x10, %o1
294	bne	10b
295	 add	%o0, 0x10, %o0
296	b	2f
297	 and	%o2, 0xe, %o3
298
299byte_chunk:
300	MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3)
301	MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3)
302	MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3)
303	MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3)
304	MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3)
305	MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
306	MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
307	MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
30883:
309	EXT(byte_chunk, 83b, 54f)
310	subcc	%o3, 0x10, %o3
311	add	%o1, 0x10, %o1
312	bne	byte_chunk
313	 add	%o0, 0x10, %o0
314
315short_end:
316	and	%o2, 0xe, %o3
3172:
318	sethi	%hi(short_table_end), %o5
319	sll	%o3, 3, %o4
320	add	%o0, %o3, %o0
321	sub	%o5, %o4, %o5
322	add	%o1, %o3, %o1
323	jmpl	%o5 + %lo(short_table_end), %g0
324	 andcc	%o2, 1, %g0
32584:
326	MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
327	MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
328	MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
329	MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
330	MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
331	MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
332	MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
333short_table_end:
334	EXT(84b, short_table_end, 55f)
335	be	1f
336	 nop
337	EX(ldub	[%o1], %g2, add %g0, 1)
338	EX(stb	%g2, [%o0], add %g0, 1)
3391:
340	retl
341 	 clr	%o0
342
343short_aligned_end:
344	bne	short_end
345	 andcc	%o2, 8, %g0
346
347	be	1f
348	 andcc	%o2, 4, %g0
349
350	EXO2(ld	[%o1 + 0x00], %g2)
351	EXO2(ld	[%o1 + 0x04], %g3)
352	add	%o1, 8, %o1
353	EXO2(st	%g2, [%o0 + 0x00])
354	EX(st	%g3, [%o0 + 0x04], sub %o2, 4)
355	add	%o0, 8, %o0
3561:
357	b	copy_user_last7
358	 mov	%o2, %g1
359
360	.section .fixup,#alloc,#execinstr
361	.align	4
36297:
363	mov	%o2, %g3
364fixupretl:
365	sethi   %hi(PAGE_OFFSET), %g1
366	cmp	%o0, %g1
367	blu	1f
368	 cmp	%o1, %g1
369	bgeu	1f
370	 nop
371	save	%sp, -64, %sp
372	mov	%i0, %o0
373	call	__bzero
374	 mov	%g3, %o1
375	restore
3761:	retl
377	 mov	%g3, %o0
378
379/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
38050:
381/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
382 * happens. This is derived from the amount ldd reads, st stores, etc.
383 * x = g2 % 12;
384 * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
385 * o0 += (g2 / 12) * 32;
386 */
387	cmp	%g2, 12
388	add	%o0, %g7, %o0
389	bcs	1f
390	 cmp	%g2, 24
391	bcs	2f
392	 cmp	%g2, 36
393	bcs	3f
394	 nop
395	sub	%g2, 12, %g2
396	sub	%g7, 32, %g7
3973:	sub	%g2, 12, %g2
398	sub	%g7, 32, %g7
3992:	sub	%g2, 12, %g2
400	sub	%g7, 32, %g7
4011:	cmp	%g2, 4
402	bcs,a	60f
403	 clr	%g2
404	sub	%g2, 4, %g2
405	sll	%g2, 2, %g2
40660:	and	%g1, 0x7f, %g3
407	sub	%o0, %g7, %o0
408	add	%g3, %g7, %g3
409	ba	fixupretl
410	 sub	%g3, %g2, %g3
41151:
412/* i = 41 - g2; j = i % 6;
413 * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
414 * o0 -= (i / 6) * 16 + 16;
415 */
416	neg	%g2
417	and	%g1, 0xf, %g1
418	add	%g2, 41, %g2
419	add	%o0, %g1, %o0
4201:	cmp	%g2, 6
421	bcs,a	2f
422	 cmp	%g2, 4
423	add	%g1, 16, %g1
424	b	1b
425	 sub	%g2, 6, %g2
4262:	bcc,a	2f
427	 mov	16, %g2
428	inc	%g2
429	sll	%g2, 2, %g2
4302:	add	%g1, %g2, %g3
431	ba	fixupretl
432	 sub	%o0, %g3, %o0
43352:
434/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
435   o0 += (g2 / 8) * 32 */
436	andn	%g2, 7, %g4
437	add	%o0, %g7, %o0
438	andcc	%g2, 4, %g0
439	and	%g2, 3, %g2
440	sll	%g4, 2, %g4
441	sll	%g2, 3, %g2
442	bne	60b
443	 sub	%g7, %g4, %g7
444	ba	60b
445	 clr	%g2
44653:
447/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
448   o0 += (g2 & 8) */
449	and	%g2, 3, %g4
450	andcc	%g2, 4, %g0
451	and	%g2, 8, %g2
452	sll	%g4, 1, %g4
453	be	1f
454	 add	%o0, %g2, %o0
455	add	%g2, %g4, %g2
4561:	and	%o2, 0xf, %g3
457	add	%g3, %o3, %g3
458	ba	fixupretl
459	 sub	%g3, %g2, %g3
46054:
461/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
462   o0 += (g2 / 4) * 2 */
463	srl	%g2, 2, %o4
464	and	%g2, 1, %o5
465	srl	%g2, 1, %g2
466	add	%o4, %o4, %o4
467	and	%o5, %g2, %o5
468	and	%o2, 0xf, %o2
469	add	%o0, %o4, %o0
470	sub	%o3, %o5, %o3
471	sub	%o2, %o4, %o2
472	ba	fixupretl
473	 add	%o2, %o3, %g3
47455:
475/* i = 27 - g2;
476   g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
477   o0 -= i / 4 * 2 + 1 */
478	neg	%g2
479	and	%o2, 1, %o2
480	add	%g2, 27, %g2
481	srl	%g2, 2, %o5
482	andcc	%g2, 3, %g0
483	mov	1, %g2
484	add	%o5, %o5, %o5
485	be,a	1f
486	 clr	%g2
4871:	add	%g2, %o5, %g3
488	sub	%o0, %g3, %o0
489	ba	fixupretl
490	 add	%g3, %o2, %g3
491
492	.globl  __copy_user_end
493__copy_user_end:
494