1/* Copyright 2002 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License v2.
3 *
4 * Functions to copy from and to user space.
5 */
6
7	#include <linux/config.h>
8
9/* #define FIX_ALIGNMENT 1 */
10
11	#include <asm/current.h>
12	#include <asm/offset.h>
13
14/* Standard copy_to_user with segment limit checking */
15	.globl copy_to_user
16	.p2align 4
17copy_to_user:
18	GET_CURRENT(%rax)
19	movq %rdi,%rcx
20	addq %rdx,%rcx
21	jc  bad_to_user
22	cmpq tsk_addr_limit(%rax),%rcx
23	jae bad_to_user
24	jmp copy_user_generic
25
26/* Standard copy_from_user with segment limit checking */
27	.globl copy_from_user
28	.p2align 4
29copy_from_user:
30	GET_CURRENT(%rax)
31	movq %rsi,%rcx
32	addq %rdx,%rcx
33	jc  bad_from_user
34	cmpq tsk_addr_limit(%rax),%rcx
35	jae  bad_from_user
36	/* FALL THROUGH to copy_user_generic */
37
38	.section .fixup,"ax"
39	/* must zero dest */
40bad_from_user:
41	movl %edx,%ecx
42	xorl %eax,%eax
43	rep
44	stosb
45bad_to_user:
46	movl	%edx,%eax
47	ret
48	.previous
49
50/*
51 * copy_user_generic - memory copy with exception handling.
52 *
53 * Input:
54 * rdi destination
55 * rsi source
56 * rdx count
57 *
58 * Output:
59 * eax uncopied bytes or 0 if successfull.
60 */
61	.globl copy_user_generic
62	.p2align 4
63copy_user_generic:
64	prefetcht0 (%rsi)
65#ifdef CONFIG_MK8
66	prefetchw (%rdi)
67#endif
68	pushq %rbx
69	xorl %eax,%eax		/*zero for the exception handler */
70
71#ifdef FIX_ALIGNMENT
72	/* check for bad alignment of destination */
73	movl %edi,%ecx
74	andl $7,%ecx
75	jnz  .Lbad_alignment
76.Lafter_bad_alignment:
77#endif
78
79	movq %rdx,%rcx
80
81	movl $64,%ebx
82	shrq $6,%rdx
83	decq %rdx
84	js   .Lhandle_tail
85
86	.p2align 4
87.Lloop:
88.Ls1:	movq (%rsi),%r11
89.Ls2:	movq 1*8(%rsi),%r8
90.Ls3:	movq 2*8(%rsi),%r9
91.Ls4:	movq 3*8(%rsi),%r10
92.Ld1:	movq %r11,(%rdi)
93.Ld2:	movq %r8,1*8(%rdi)
94.Ld3:	movq %r9,2*8(%rdi)
95.Ld4:	movq %r10,3*8(%rdi)
96
97.Ls5:	movq 4*8(%rsi),%r11
98.Ls6:	movq 5*8(%rsi),%r8
99.Ls7:	movq 6*8(%rsi),%r9
100.Ls8:	movq 7*8(%rsi),%r10
101.Ld5:	movq %r11,4*8(%rdi)
102.Ld6:	movq %r8,5*8(%rdi)
103.Ld7:	movq %r9,6*8(%rdi)
104.Ld8:	movq %r10,7*8(%rdi)
105
106	decq %rdx
107
108	leaq 64(%rsi),%rsi
109	leaq 64(%rdi),%rdi
110
111	jns  .Lloop
112
113	.p2align 4
114.Lhandle_tail:
115	movl %ecx,%edx
116	andl $63,%ecx
117	shrl $3,%ecx
118	jz   .Lhandle_7
119	movl $8,%ebx
120	.p2align 4
121.Lloop_8:
122.Ls9:	movq (%rsi),%r8
123.Ld9:	movq %r8,(%rdi)
124	decl %ecx
125	leaq 8(%rdi),%rdi
126	leaq 8(%rsi),%rsi
127	jnz .Lloop_8
128
129.Lhandle_7:
130	movl %edx,%ecx
131	andl $7,%ecx
132	jz   .Lende
133	.p2align 4
134.Lloop_1:
135.Ls10:	movb (%rsi),%bl
136.Ld10:	movb %bl,(%rdi)
137	incq %rdi
138	incq %rsi
139	decl %ecx
140	jnz .Lloop_1
141
142.Lende:
143	popq %rbx
144	ret
145
146#ifdef FIX_ALIGNMENT
147	/* align destination */
148	.p2align 4
149.Lbad_alignment:
150	movl $8,%r9d
151	subl %ecx,%r9d
152	movl %r9d,%ecx
153	subq %r9,%rdx
154	jz   .Lsmall_align
155	js   .Lsmall_align
156.Lalign_1:
157.Ls11:	movb (%rsi),%bl
158.Ld11:	movb %bl,(%rdi)
159	incq %rsi
160	incq %rdi
161	decl %ecx
162	jnz .Lalign_1
163	jmp .Lafter_bad_alignment
164.Lsmall_align:
165	addq %r9,%rdx
166	jmp .Lhandle_7
167#endif
168
169	/* table sorted by exception address */
170	.section __ex_table,"a"
171	.align 8
172	.quad .Ls1,.Ls1e
173	.quad .Ls2,.Ls2e
174	.quad .Ls3,.Ls3e
175	.quad .Ls4,.Ls4e
176	.quad .Ld1,.Ls1e
177	.quad .Ld2,.Ls2e
178	.quad .Ld3,.Ls3e
179	.quad .Ld4,.Ls4e
180	.quad .Ls5,.Ls5e
181	.quad .Ls6,.Ls6e
182	.quad .Ls7,.Ls7e
183	.quad .Ls8,.Ls8e
184	.quad .Ld5,.Ls5e
185	.quad .Ld6,.Ls6e
186	.quad .Ld7,.Ls7e
187	.quad .Ld8,.Ls8e
188	.quad .Ls9,.Le_quad
189	.quad .Ld9,.Le_quad
190	.quad .Ls10,.Le_byte
191	.quad .Ld10,.Le_byte
192#ifdef FIX_ALIGNMENT
193	.quad .Ls11,.Le_byte
194	.quad .Ld11,.Le_byte
195#endif
196	.quad .Le5,.Le_zero
197	.previous
198
199	/* compute 64-offset for main loop. 8 bytes accuracy with error on the
200	   pessimistic side. this is gross. it would be better to fix the
201	   interface. */
202	/* eax: zero, ebx: 64 */
203.Ls1e: 	addl $8,%eax
204.Ls2e: 	addl $8,%eax
205.Ls3e: 	addl $8,%eax
206.Ls4e: 	addl $8,%eax
207.Ls5e: 	addl $8,%eax
208.Ls6e: 	addl $8,%eax
209.Ls7e: 	addl $8,%eax
210.Ls8e: 	addl $8,%eax
211	addq %rbx,%rdi	/* +64 */
212	subq %rax,%rdi  /* correct destination with computed offset */
213
214	shlq $6,%rdx	/* loop counter * 64 (stride length) */
215	addq %rax,%rdx	/* add offset to loopcnt */
216	andl $63,%ecx	/* remaining bytes */
217	addq %rcx,%rdx	/* add them */
218	jmp .Lzero_rest
219
220	/* exception on quad word loop in tail handling */
221	/* ecx:	loopcnt/8, %edx: length, rdi: correct */
222.Le_quad:
223	shll $3,%ecx
224	andl $7,%edx
225	addl %ecx,%edx
226	/* edx: bytes to zero, rdi: dest, eax:zero */
227.Lzero_rest:
228	movq %rdx,%rcx
229.Le_byte:
230	xorl %eax,%eax
231.Le5:	rep
232	stosb
233	/* when there is another exception while zeroing the rest just return */
234.Le_zero:
235	movq %rdx,%rax
236	jmp .Lende
237