1/*
2 *  linux/arch/x86_64/entry.S
3 *
4 *  Copyright (C) 1991, 1992  Linus Torvalds
5 *  Copyright (C) 2000, 2001, 2002  Andi Kleen SuSE Labs
6 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
7 *
8 *  $Id: entry.S,v 1.99 2003/10/24 17:48:32 ak Exp $
9 */
10
11/*
12 * entry.S contains the system-call and fault low-level handling routines.
13 *
14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call.
16 *
17 * Normal syscalls and interrupts don't save a full stack frame, this is
18 * only done for PT_TRACESYS, signals or fork/exec et.al.
19 *
20 * TODO:
21 * - schedule it carefully for the final hardware.
22 *
23 */
24
25#define ASSEMBLY 1
26#include <linux/config.h>
27#include <linux/linkage.h>
28#include <asm/segment.h>
29#include <asm/current.h>
30#include <asm/smp.h>
31#include <asm/cache.h>
32#include <asm/errno.h>
33#include <asm/calling.h>
34#include <asm/offset.h>
35#include <asm/msr.h>
36#include <asm/unistd.h>
37#include <asm/hw_irq.h>
38
39	.code64
40
41#define PDAREF(field) %gs:field
42
43/*
44 * C code is not supposed to know about partial frames. Everytime a C function
45 * that looks at the pt_regs is called these two macros are executed around it.
46 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
47 * manipulation.
48 */
49
50	/* %rsp:at FRAMEEND */
51	.macro FIXUP_TOP_OF_STACK tmp
52	movq	PDAREF(pda_oldrsp),\tmp
53	movq  	\tmp,RSP(%rsp)
54	movq    $__USER_DS,SS(%rsp)
55	movq    $__USER_CS,CS(%rsp)
56	movq	$-1,RCX(%rsp)	/* contains return address, already in RIP */
57	movq	R11(%rsp),\tmp  /* get eflags */
58	movq	\tmp,EFLAGS(%rsp)
59	.endm
60
61	.macro RESTORE_TOP_OF_STACK tmp,offset=0
62	movq   RSP-\offset(%rsp),\tmp
63	movq   \tmp,PDAREF(pda_oldrsp)
64	movq   EFLAGS-\offset(%rsp),\tmp
65	movq   \tmp,R11-\offset(%rsp)
66	.endm
67
68
69/*
70 * A newly forked process directly context switches into this.
71 */
72ENTRY(ret_from_fork)
73	movq %rax,%rdi		/* return value of __switch_to -> prev task */
74	call schedule_tail
75	GET_CURRENT(%rcx)
76	testb $PT_TRACESYS,tsk_ptrace(%rcx)
77	jnz 2f
781:
79	RESTORE_REST
80	testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
81	jz   int_ret_from_sys_call
82	testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx)
83	jnz  int_ret_from_sys_call
84	RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
85	jmp ret_from_sys_call
862:
87	movq %rsp,%rdi
88	call syscall_trace
89	GET_CURRENT(%rcx)
90	jmp 1b
91
92/*
93 * System call entry. Upto 6 arguments in registers are supported.
94 *
95 * SYSCALL does not save anything on the stack and does not change the
96 * stack pointer. Gets the per CPU area from the hidden GS MSR and finds the
97 * current kernel stack.
98 */
99
100/*
101 * Register setup:
102 * rax  system call number
103 * rdi  arg0
104 * rcx  return address for syscall/sysret, C arg3
105 * rsi  arg1
106 * rdx  arg2
107 * r10  arg3 	(--> moved to rcx for C)
108 * r8   arg4
109 * r9   arg5
110 * r11  eflags for syscall/sysret, temporary for C
111 * r12-r15,rbp,rbx saved by C code, not touched.
112 *
113 * Interrupts are off on entry.
114 * Only called from user space.
115 */
116
117ENTRY(system_call)
118	swapgs
119	movq	%rsp,PDAREF(pda_oldrsp)
120	movq	PDAREF(pda_kernelstack),%rsp
121	sti
122	SAVE_ARGS 8,1
123	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
124	movq  %rcx,RIP-ARGOFFSET(%rsp)
125	GET_CURRENT(%rcx)
126	testl $PT_TRACESYS,tsk_ptrace(%rcx)
127	jne tracesys
128	cmpq $__NR_syscall_max,%rax
129	ja badsys
130	movq %r10,%rcx
131	call *sys_call_table(,%rax,8)  # XXX:	 rip relative
132	movq %rax,RAX-ARGOFFSET(%rsp)
133	.globl ret_from_sys_call
134ret_from_sys_call:
135sysret_with_reschedule:
136	GET_CURRENT(%rcx)
137	cli
138	cmpq $0,tsk_need_resched(%rcx)
139	jne sysret_reschedule
140	cmpl $0,tsk_sigpending(%rcx)
141	jne sysret_signal
142sysret_restore_args:
143	movq    RIP-ARGOFFSET(%rsp),%rcx
144	RESTORE_ARGS 0,-ARG_SKIP,1
145	movq	PDAREF(pda_oldrsp),%rsp
146	swapgs
147	sysretq
148
149sysret_signal:
150	sti
151	xorl %esi,%esi		# oldset
152	leaq -ARGOFFSET(%rsp),%rdi	# regs
153	leaq do_signal(%rip),%rax
154	call ptregscall_common
155sysret_signal_test:
156	GET_CURRENT(%rcx)
157	cli
158	cmpq $0,tsk_need_resched(%rcx)
159	je   sysret_restore_args
160	sti
161	call schedule
162	jmp sysret_signal_test
163
164sysret_reschedule:
165	sti
166	call schedule
167	jmp sysret_with_reschedule
168
169tracesys:
170	SAVE_REST
171	movq $-ENOSYS,RAX(%rsp)
172	FIXUP_TOP_OF_STACK %rdi
173	movq %rsp,%rdi
174	call syscall_trace
175	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
176	RESTORE_REST
177	cmpq $__NR_syscall_max,%rax
178	ja  tracesys_done
179tracesys_call:		/* backtrace marker */
180	movq %r10,%rcx	/* fixup for C */
181	call *sys_call_table(,%rax,8)
182	movq %rax,RAX-ARGOFFSET(%rsp)
183tracesys_done:		/* backtrace marker */
184	SAVE_REST
185	movq %rsp,%rdi
186	call syscall_trace
187	RESTORE_TOP_OF_STACK %rbx
188	RESTORE_REST
189	jmp ret_from_sys_call
190
191badsys:
192	movq $0,ORIG_RAX-ARGOFFSET(%rsp)
193	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
194	jmp ret_from_sys_call
195
196/*
197 * Syscall return path ending with IRET.
198 * This can be either 64bit calls that require restoring of all registers
199 * (impossible with sysret) or 32bit calls.
200 */
201ENTRY(int_ret_from_sys_call)
202intret_test_kernel:
203	testl $3,CS-ARGOFFSET(%rsp)
204	je retint_restore_args
205intret_with_reschedule:
206	GET_CURRENT(%rcx)
207	cli
208	cmpq $0,tsk_need_resched(%rcx)
209	jne intret_reschedule
210	cmpl $0,tsk_sigpending(%rcx)
211	jne intret_signal
212	jmp retint_restore_args_swapgs
213
214intret_reschedule:
215	sti
216	call schedule
217	jmp intret_with_reschedule
218
219intret_signal:
220	sti
221	SAVE_REST
222	xorq %rsi,%rsi		# oldset -> arg2
223	movq %rsp,%rdi		# &ptregs -> arg1
224	call do_signal
225	RESTORE_REST
226intret_signal_test:
227	GET_CURRENT(%rcx)
228	cli
229	cmpq $0,tsk_need_resched(%rcx)
230	je   retint_restore_args_swapgs
231	sti
232	call schedule
233	# RED-PEN: can we lose signals here?
234	jmp  intret_signal_test
235
236/*
237 * Certain special system calls that need to save a complete stack frame.
238 */
239
240	.macro PTREGSCALL label,func
241	.globl \label
242\label:
243	leaq	\func(%rip),%rax
244	jmp	ptregscall_common
245	.endm
246
247	PTREGSCALL stub_clone, sys_clone
248	PTREGSCALL stub_fork, sys_fork
249	PTREGSCALL stub_vfork, sys_vfork
250	PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend
251	PTREGSCALL stub_sigaltstack, sys_sigaltstack
252
253	.macro PTREGSCALL3 label,func,arg
254	.globl \label
255\label:
256	leaq	\func(%rip),%rax
257	leaq    -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
258	jmp	ptregscall_common
259	.endm
260
261	PTREGSCALL3 stub_iopl, sys_iopl, %rsi
262
263ENTRY(ptregscall_common)
264	popq %r11
265	SAVE_REST
266	movq %r11, %r15
267	FIXUP_TOP_OF_STACK %r11
268	call *%rax
269	RESTORE_TOP_OF_STACK %r11
270	movq %r15, %r11
271	RESTORE_REST
272	pushq %r11
273	ret
274
275ENTRY(stub_execve)
276	popq %r11
277	SAVE_REST
278	movq %r11, %r15
279	FIXUP_TOP_OF_STACK %r11
280	call sys_execve
281	GET_CURRENT(%rcx)
282	testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx)
283	jnz exec_32bit
284	RESTORE_TOP_OF_STACK %r11
285	movq %r15, %r11
286	RESTORE_REST
287	push %r11
288	ret
289
290exec_32bit:
291	movq %rax,RAX(%rsp)
292	RESTORE_REST
293	jmp int_ret_from_sys_call
294
295/*
296 * sigreturn is special because it needs to restore all registers on return.
297 * This cannot be done with SYSRET, so use the IRET return path instead.
298 */
299ENTRY(stub_rt_sigreturn)
300	addq $8, %rsp
301	SAVE_REST
302	FIXUP_TOP_OF_STACK %r11
303	call sys_rt_sigreturn
304	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
305	RESTORE_REST
306	jmp int_ret_from_sys_call
307
308/*
309 * Interrupt entry/exit.
310 *
311 * Interrupt entry points save only callee clobbered registers, except
312 * for signals again.
313 *
314 * Entry runs with interrupts off.
315 */
316
317/* 0(%rsp): interrupt number */
318ENTRY(common_interrupt)
319	testl $3,16(%rsp)	# from kernel?
320	je   1f
321	swapgs
3221:	cld
323#ifdef CONFIG_X86_REMOTE_DEBUG
324	SAVE_ALL
325	movq %rsp,%rdi
326#else
327	SAVE_ARGS
328	leaq -ARGOFFSET(%rsp),%rdi	# arg1 for handler
329#endif
330	addl $1,PDAREF(pda_irqcount)	# XXX: should be merged with irq.c irqcount
331	movq PDAREF(pda_irqstackptr),%rax
332	cmoveq %rax,%rsp
333	pushq %rdi			# save old stack
334	call do_IRQ
335	/* 0(%rsp): oldrsp-ARGOFFSET */
336ENTRY(ret_from_intr)
337	cli
338	popq  %rdi
339	subl $1,PDAREF(pda_irqcount)
340	leaq ARGOFFSET(%rdi),%rsp
341	testl $3,CS(%rdi)	# from kernel?
342	je	retint_restore_args
343	/* Interrupt came from user space */
344retint_with_reschedule:
345	GET_CURRENT(%rcx)
346	cmpq $0,tsk_need_resched(%rcx)
347	jne retint_reschedule
348	cmpl $0,tsk_sigpending(%rcx)
349	jne retint_signal
350retint_restore_args_swapgs:
351	swapgs
352retint_restore_args:
353	RESTORE_ARGS 0,8
354iret_label:
355	iretq
356	.section __ex_table,"a"
357	.align 8
358	.quad iret_label,bad_iret
359	.previous
360	.section .fixup,"ax"
361	/* force a signal here? this matches i386 behaviour */
362bad_iret:
363	/* runs with kernelgs again */
364	movq $-9999,%rdi	/* better code? */
365	jmp do_exit
366	.previous
367
368retint_signal:
369	sti
370	SAVE_REST
371	movq $-1,ORIG_RAX(%rsp)
372	xorq %rsi,%rsi		# oldset
373	movq %rsp,%rdi		# &pt_regs
374	call do_signal
375	RESTORE_REST
376retint_signal_test:
377	cli
378	GET_CURRENT(%rcx)
379	cmpq $0,tsk_need_resched(%rcx)
380	je   retint_restore_args_swapgs
381	sti
382	call schedule
383	jmp retint_signal_test
384
385retint_reschedule:
386	sti
387	call schedule
388	cli
389	jmp retint_with_reschedule
390
391/* IF:off, stack contains irq number on origrax */
392	.macro IRQ_ENTER
393	cld
394	pushq %rdi
395	pushq %rsi
396	pushq %rdx
397	pushq %rcx
398	pushq %rax
399	pushq %r8
400	pushq %r9
401	pushq %r10
402	pushq %r11
403	leaq -48(%rsp),%rdi
404	testl $3,136(%rdi)
405	je 1f
406	swapgs
4071:	addl $1,%gs:pda_irqcount
408	movq %gs:pda_irqstackptr,%rax
409	cmoveq %rax,%rsp
410	pushq %rdi
411	.endm
412
413	.macro BUILD_SMP_INTERRUPT x,v
414ENTRY(\x)
415	push $\v-256
416	IRQ_ENTER
417	call smp_\x
418	jmp ret_from_intr
419	.endm
420
421#ifdef CONFIG_SMP
422	BUILD_SMP_INTERRUPT reschedule_interrupt,RESCHEDULE_VECTOR
423	BUILD_SMP_INTERRUPT invalidate_interrupt,INVALIDATE_TLB_VECTOR
424	BUILD_SMP_INTERRUPT call_function_interrupt,CALL_FUNCTION_VECTOR
425#endif
426#ifdef CONFIG_X86_LOCAL_APIC
427	BUILD_SMP_INTERRUPT apic_timer_interrupt,LOCAL_TIMER_VECTOR
428	BUILD_SMP_INTERRUPT error_interrupt,ERROR_APIC_VECTOR
429	BUILD_SMP_INTERRUPT spurious_interrupt,SPURIOUS_APIC_VECTOR
430#endif
431
432
433/*
434 * Exception entry points.
435 */
436	.macro zeroentry sym
437	pushq $0	/* push error code/oldrax */
438	pushq %rax	/* push real oldrax to the rdi slot */
439	leaq  \sym(%rip),%rax
440	jmp error_entry
441	.endm
442
443	.macro errorentry sym
444	pushq %rax
445	leaq  \sym(%rip),%rax
446	jmp error_entry
447	.endm
448
449/*
450 * Exception entry point. This expects an error code/orig_rax on the stack
451 * and the exception handler in %rax.
452 */
453 	ALIGN
454error_entry:
455	/* rdi slot contains rax, oldrax contains error code */
456	pushq %rsi
457	movq  8(%rsp),%rsi	/* load rax */
458	pushq %rdx
459	pushq %rcx
460	pushq %rsi	/* store rax */
461	pushq %r8
462	pushq %r9
463	pushq %r10
464	pushq %r11
465	cld
466	SAVE_REST
467	xorl %r15d,%r15d
468	testl $3,CS(%rsp)
469	je error_kernelspace
470	swapgs
471error_action:
472	movq  %rdi,RDI(%rsp)
473	movq %rsp,%rdi
474	movq ORIG_RAX(%rsp),%rsi	/* get error code */
475	movq $-1,ORIG_RAX(%rsp)
476	call *%rax
477	/* r15d: swapgs flag */
478error_exit:
479	testl %r15d,%r15d
480	jnz   error_restore
481error_test:
482	cli
483	GET_CURRENT(%rcx)
484	cmpq $0,tsk_need_resched(%rcx)
485	jne  error_reschedule
486	cmpl $0,tsk_sigpending(%rcx)
487	jne  error_signal
488error_restore_swapgs:
489	swapgs
490error_restore:
491	RESTORE_REST
492	jmp retint_restore_args
493
494error_reschedule:
495	sti
496	call schedule
497	jmp  error_test
498
499error_signal:
500	sti
501	xorq %rsi,%rsi
502	movq %rsp,%rdi
503	call do_signal
504error_signal_test:
505	GET_CURRENT(%rcx)
506	cli
507	cmpq $0,tsk_need_resched(%rcx)
508	je   error_restore_swapgs
509	sti
510	call schedule
511	jmp  error_signal_test
512
513error_kernelspace:
514	incl %r15d
515	/* There are two places in the kernel that can potentially fault with
516	   usergs. Handle them here. */
517	leaq iret_label(%rip),%rdx
518	cmpq %rdx,RIP(%rsp)
519	je 1f
520	/* check truncated address too. This works around a CPU issue */
521	movl %edx,%edx	/* zero extend */
522	cmpq %rdx,RIP(%rsp)
523	je   1f
524	cmpq $gs_change,RIP(%rsp)
525	jne  error_action
526	/* iret_label and gs_change are handled by exception handlers
527	   and the exit points run with kernelgs again */
5281:	swapgs
529	jmp error_action
530
531	/* Reload gs selector with exception handling */
532	/* edi:	 new selector */
533ENTRY(load_gs_index)
534	pushf
535	cli
536	swapgs
537gs_change:
538	movl %edi,%gs
5392:	mfence		/* workaround for opteron errata #88 */
540	swapgs
541	popf
542	ret
543
544	.section __ex_table,"a"
545	.align 8
546	.quad gs_change,bad_gs
547	.previous
548
549bad_gs:
550	swapgs
551	xorl %eax,%eax
552	movl %eax,%gs
553	jmp 2b
554/*
555 * Create a kernel thread.
556 *
557 * C extern interface:
558 *	extern long arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
559 *
560 * asm input arguments:
561 *	rdi: fn, rsi: arg, rdx: flags
562 */
563ENTRY(arch_kernel_thread)
564	FAKE_STACK_FRAME $child_rip
565	SAVE_ALL
566
567	# rdi: flags, rsi: usp, rdx: will be &pt_regs
568	movq %rdx,%rdi
569	orq  $CLONE_VM, %rdi
570
571	movq $-1, %rsi
572
573	movq %rsp, %rdx
574
575	# clone now
576	call do_fork
577	# save retval on the stack so it's popped before `ret`
578	movq %rax, RAX(%rsp)
579
580	/*
581	 * It isn't worth to check for reschedule here,
582	 * so internally to the x86_64 port you can rely on kernel_thread()
583	 * not to reschedule the child before returning, this avoids the need
584	 * of hacks for example to fork off the per-CPU idle tasks.
585         * [Hopefully no generic code relies on the reschedule -AK]
586	 */
587	RESTORE_ALL
588	UNFAKE_STACK_FRAME
589	ret
590
591child_rip:
592	/*
593	 * Here we are in the child and the registers are set as they were
594	 * at kernel_thread() invocation in the parent.
595	 */
596	movq %rdi, %rax
597	movq %rsi, %rdi
598	call *%rax
599	# exit
600	xorq %rdi, %rdi
601	call do_exit
602
603/*
604 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
605 *
606 * C extern interface:
607 *	 extern long execve(char *name, char **argv, char **envp)
608 *
609 * asm input arguments:
610 *	rdi: name, rsi: argv, rdx: envp
611 *
612 * We want to fallback into:
613 *	extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
614 *
615 * do_sys_execve asm fallback arguments:
616 *	rdi: name, rsi: argv, rdx: envp, fake frame on the stack
617 */
618ENTRY(execve)
619	FAKE_STACK_FRAME $0
620	SAVE_ALL
621	call sys_execve
622	movq %rax, RAX(%rsp)
623	RESTORE_REST
624	testq %rax,%rax
625	je int_ret_from_sys_call
626	RESTORE_ARGS
627	UNFAKE_STACK_FRAME
628	ret
629
630ENTRY(page_fault)
631	errorentry do_page_fault
632
633ENTRY(coprocessor_error)
634	zeroentry do_coprocessor_error
635
636ENTRY(simd_coprocessor_error)
637	zeroentry do_simd_coprocessor_error
638
639ENTRY(device_not_available)
640	pushq $-1
641	SAVE_ALL
642	xorl %r15d,%r15d
643	testl $3,CS(%rsp)
644	jz 1f
645	swapgs
6462:	movq  %cr0,%rax
647	leaq  math_state_restore(%rip),%rcx
648	leaq  math_emulate(%rip),%rbx
649	testl $0x4,%eax
650	cmoveq %rcx,%rbx
651	call  *%rbx
652	jmp  error_exit
6531:	incl %r15d
654	jmp  2b
655
656ENTRY(debug)
657	zeroentry do_debug
658
659ENTRY(nmi)
660	pushq $-1
661	SAVE_ALL
662	/* NMI could happen inside the critical section of a swapgs,
663	   so it is needed to use this expensive way to check.
664	   Rely on arch_prctl forbiding user space from setting a negative
665	   GS. Only the kernel value is negative. */
666	movl  $MSR_GS_BASE,%ecx
667	rdmsr
668	xorl  %ebx,%ebx
669	testl %edx,%edx
670	js    1f
671	swapgs
672	movl  $1,%ebx
6731:	movq %rsp,%rdi
674	call do_nmi
675	cli
676	testl %ebx,%ebx
677	jz error_restore
678	swapgs
679	jmp error_restore
680
681ENTRY(int3)
682	zeroentry do_int3
683
684ENTRY(overflow)
685	zeroentry do_overflow
686
687ENTRY(bounds)
688	zeroentry do_bounds
689
690ENTRY(invalid_op)
691	zeroentry do_invalid_op
692
693ENTRY(coprocessor_segment_overrun)
694	zeroentry do_coprocessor_segment_overrun
695
696ENTRY(reserved)
697	zeroentry do_reserved
698
699ENTRY(double_fault)
700	errorentry do_double_fault
701
702ENTRY(invalid_TSS)
703	errorentry do_invalid_TSS
704
705ENTRY(segment_not_present)
706	errorentry do_segment_not_present
707
708ENTRY(stack_segment)
709	errorentry do_stack_segment
710
711ENTRY(general_protection)
712	errorentry do_general_protection
713
714ENTRY(alignment_check)
715	errorentry do_alignment_check
716
717ENTRY(divide_error)
718	zeroentry do_divide_error
719
720ENTRY(spurious_interrupt_bug)
721	zeroentry do_spurious_interrupt_bug
722
723ENTRY(machine_check)
724	zeroentry do_machine_check
725
726ENTRY(call_debug)
727	zeroentry do_call_debug
728
729