1/* 2 * linux/arch/x86_64/entry.S 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs 6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 7 * 8 * $Id: entry.S,v 1.99 2003/10/24 17:48:32 ak Exp $ 9 */ 10 11/* 12 * entry.S contains the system-call and fault low-level handling routines. 13 * 14 * NOTE: This code handles signal-recognition, which happens every time 15 * after an interrupt and after each system call. 16 * 17 * Normal syscalls and interrupts don't save a full stack frame, this is 18 * only done for PT_TRACESYS, signals or fork/exec et.al. 19 * 20 * TODO: 21 * - schedule it carefully for the final hardware. 22 * 23 */ 24 25#define ASSEMBLY 1 26#include <linux/config.h> 27#include <linux/linkage.h> 28#include <asm/segment.h> 29#include <asm/current.h> 30#include <asm/smp.h> 31#include <asm/cache.h> 32#include <asm/errno.h> 33#include <asm/calling.h> 34#include <asm/offset.h> 35#include <asm/msr.h> 36#include <asm/unistd.h> 37#include <asm/hw_irq.h> 38 39 .code64 40 41#define PDAREF(field) %gs:field 42 43/* 44 * C code is not supposed to know about partial frames. Everytime a C function 45 * that looks at the pt_regs is called these two macros are executed around it. 46 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs 47 * manipulation. 48 */ 49 50 /* %rsp:at FRAMEEND */ 51 .macro FIXUP_TOP_OF_STACK tmp 52 movq PDAREF(pda_oldrsp),\tmp 53 movq \tmp,RSP(%rsp) 54 movq $__USER_DS,SS(%rsp) 55 movq $__USER_CS,CS(%rsp) 56 movq $-1,RCX(%rsp) /* contains return address, already in RIP */ 57 movq R11(%rsp),\tmp /* get eflags */ 58 movq \tmp,EFLAGS(%rsp) 59 .endm 60 61 .macro RESTORE_TOP_OF_STACK tmp,offset=0 62 movq RSP-\offset(%rsp),\tmp 63 movq \tmp,PDAREF(pda_oldrsp) 64 movq EFLAGS-\offset(%rsp),\tmp 65 movq \tmp,R11-\offset(%rsp) 66 .endm 67 68 69/* 70 * A newly forked process directly context switches into this. 71 */ 72ENTRY(ret_from_fork) 73 movq %rax,%rdi /* return value of __switch_to -> prev task */ 74 call schedule_tail 75 GET_CURRENT(%rcx) 76 testb $PT_TRACESYS,tsk_ptrace(%rcx) 77 jnz 2f 781: 79 RESTORE_REST 80 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? 81 jz int_ret_from_sys_call 82 testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx) 83 jnz int_ret_from_sys_call 84 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET 85 jmp ret_from_sys_call 862: 87 movq %rsp,%rdi 88 call syscall_trace 89 GET_CURRENT(%rcx) 90 jmp 1b 91 92/* 93 * System call entry. Upto 6 arguments in registers are supported. 94 * 95 * SYSCALL does not save anything on the stack and does not change the 96 * stack pointer. Gets the per CPU area from the hidden GS MSR and finds the 97 * current kernel stack. 98 */ 99 100/* 101 * Register setup: 102 * rax system call number 103 * rdi arg0 104 * rcx return address for syscall/sysret, C arg3 105 * rsi arg1 106 * rdx arg2 107 * r10 arg3 (--> moved to rcx for C) 108 * r8 arg4 109 * r9 arg5 110 * r11 eflags for syscall/sysret, temporary for C 111 * r12-r15,rbp,rbx saved by C code, not touched. 112 * 113 * Interrupts are off on entry. 114 * Only called from user space. 115 */ 116 117ENTRY(system_call) 118 swapgs 119 movq %rsp,PDAREF(pda_oldrsp) 120 movq PDAREF(pda_kernelstack),%rsp 121 sti 122 SAVE_ARGS 8,1 123 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 124 movq %rcx,RIP-ARGOFFSET(%rsp) 125 GET_CURRENT(%rcx) 126 testl $PT_TRACESYS,tsk_ptrace(%rcx) 127 jne tracesys 128 cmpq $__NR_syscall_max,%rax 129 ja badsys 130 movq %r10,%rcx 131 call *sys_call_table(,%rax,8) # XXX: rip relative 132 movq %rax,RAX-ARGOFFSET(%rsp) 133 .globl ret_from_sys_call 134ret_from_sys_call: 135sysret_with_reschedule: 136 GET_CURRENT(%rcx) 137 cli 138 cmpq $0,tsk_need_resched(%rcx) 139 jne sysret_reschedule 140 cmpl $0,tsk_sigpending(%rcx) 141 jne sysret_signal 142sysret_restore_args: 143 movq RIP-ARGOFFSET(%rsp),%rcx 144 RESTORE_ARGS 0,-ARG_SKIP,1 145 movq PDAREF(pda_oldrsp),%rsp 146 swapgs 147 sysretq 148 149sysret_signal: 150 sti 151 xorl %esi,%esi # oldset 152 leaq -ARGOFFSET(%rsp),%rdi # regs 153 leaq do_signal(%rip),%rax 154 call ptregscall_common 155sysret_signal_test: 156 GET_CURRENT(%rcx) 157 cli 158 cmpq $0,tsk_need_resched(%rcx) 159 je sysret_restore_args 160 sti 161 call schedule 162 jmp sysret_signal_test 163 164sysret_reschedule: 165 sti 166 call schedule 167 jmp sysret_with_reschedule 168 169tracesys: 170 SAVE_REST 171 movq $-ENOSYS,RAX(%rsp) 172 FIXUP_TOP_OF_STACK %rdi 173 movq %rsp,%rdi 174 call syscall_trace 175 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ 176 RESTORE_REST 177 cmpq $__NR_syscall_max,%rax 178 ja tracesys_done 179tracesys_call: /* backtrace marker */ 180 movq %r10,%rcx /* fixup for C */ 181 call *sys_call_table(,%rax,8) 182 movq %rax,RAX-ARGOFFSET(%rsp) 183tracesys_done: /* backtrace marker */ 184 SAVE_REST 185 movq %rsp,%rdi 186 call syscall_trace 187 RESTORE_TOP_OF_STACK %rbx 188 RESTORE_REST 189 jmp ret_from_sys_call 190 191badsys: 192 movq $0,ORIG_RAX-ARGOFFSET(%rsp) 193 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 194 jmp ret_from_sys_call 195 196/* 197 * Syscall return path ending with IRET. 198 * This can be either 64bit calls that require restoring of all registers 199 * (impossible with sysret) or 32bit calls. 200 */ 201ENTRY(int_ret_from_sys_call) 202intret_test_kernel: 203 testl $3,CS-ARGOFFSET(%rsp) 204 je retint_restore_args 205intret_with_reschedule: 206 GET_CURRENT(%rcx) 207 cli 208 cmpq $0,tsk_need_resched(%rcx) 209 jne intret_reschedule 210 cmpl $0,tsk_sigpending(%rcx) 211 jne intret_signal 212 jmp retint_restore_args_swapgs 213 214intret_reschedule: 215 sti 216 call schedule 217 jmp intret_with_reschedule 218 219intret_signal: 220 sti 221 SAVE_REST 222 xorq %rsi,%rsi # oldset -> arg2 223 movq %rsp,%rdi # &ptregs -> arg1 224 call do_signal 225 RESTORE_REST 226intret_signal_test: 227 GET_CURRENT(%rcx) 228 cli 229 cmpq $0,tsk_need_resched(%rcx) 230 je retint_restore_args_swapgs 231 sti 232 call schedule 233 # RED-PEN: can we lose signals here? 234 jmp intret_signal_test 235 236/* 237 * Certain special system calls that need to save a complete stack frame. 238 */ 239 240 .macro PTREGSCALL label,func 241 .globl \label 242\label: 243 leaq \func(%rip),%rax 244 jmp ptregscall_common 245 .endm 246 247 PTREGSCALL stub_clone, sys_clone 248 PTREGSCALL stub_fork, sys_fork 249 PTREGSCALL stub_vfork, sys_vfork 250 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend 251 PTREGSCALL stub_sigaltstack, sys_sigaltstack 252 253 .macro PTREGSCALL3 label,func,arg 254 .globl \label 255\label: 256 leaq \func(%rip),%rax 257 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 258 jmp ptregscall_common 259 .endm 260 261 PTREGSCALL3 stub_iopl, sys_iopl, %rsi 262 263ENTRY(ptregscall_common) 264 popq %r11 265 SAVE_REST 266 movq %r11, %r15 267 FIXUP_TOP_OF_STACK %r11 268 call *%rax 269 RESTORE_TOP_OF_STACK %r11 270 movq %r15, %r11 271 RESTORE_REST 272 pushq %r11 273 ret 274 275ENTRY(stub_execve) 276 popq %r11 277 SAVE_REST 278 movq %r11, %r15 279 FIXUP_TOP_OF_STACK %r11 280 call sys_execve 281 GET_CURRENT(%rcx) 282 testl $ASM_THREAD_IA32,tsk_thread+thread_flags(%rcx) 283 jnz exec_32bit 284 RESTORE_TOP_OF_STACK %r11 285 movq %r15, %r11 286 RESTORE_REST 287 push %r11 288 ret 289 290exec_32bit: 291 movq %rax,RAX(%rsp) 292 RESTORE_REST 293 jmp int_ret_from_sys_call 294 295/* 296 * sigreturn is special because it needs to restore all registers on return. 297 * This cannot be done with SYSRET, so use the IRET return path instead. 298 */ 299ENTRY(stub_rt_sigreturn) 300 addq $8, %rsp 301 SAVE_REST 302 FIXUP_TOP_OF_STACK %r11 303 call sys_rt_sigreturn 304 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 305 RESTORE_REST 306 jmp int_ret_from_sys_call 307 308/* 309 * Interrupt entry/exit. 310 * 311 * Interrupt entry points save only callee clobbered registers, except 312 * for signals again. 313 * 314 * Entry runs with interrupts off. 315 */ 316 317/* 0(%rsp): interrupt number */ 318ENTRY(common_interrupt) 319 testl $3,16(%rsp) # from kernel? 320 je 1f 321 swapgs 3221: cld 323#ifdef CONFIG_X86_REMOTE_DEBUG 324 SAVE_ALL 325 movq %rsp,%rdi 326#else 327 SAVE_ARGS 328 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 329#endif 330 addl $1,PDAREF(pda_irqcount) # XXX: should be merged with irq.c irqcount 331 movq PDAREF(pda_irqstackptr),%rax 332 cmoveq %rax,%rsp 333 pushq %rdi # save old stack 334 call do_IRQ 335 /* 0(%rsp): oldrsp-ARGOFFSET */ 336ENTRY(ret_from_intr) 337 cli 338 popq %rdi 339 subl $1,PDAREF(pda_irqcount) 340 leaq ARGOFFSET(%rdi),%rsp 341 testl $3,CS(%rdi) # from kernel? 342 je retint_restore_args 343 /* Interrupt came from user space */ 344retint_with_reschedule: 345 GET_CURRENT(%rcx) 346 cmpq $0,tsk_need_resched(%rcx) 347 jne retint_reschedule 348 cmpl $0,tsk_sigpending(%rcx) 349 jne retint_signal 350retint_restore_args_swapgs: 351 swapgs 352retint_restore_args: 353 RESTORE_ARGS 0,8 354iret_label: 355 iretq 356 .section __ex_table,"a" 357 .align 8 358 .quad iret_label,bad_iret 359 .previous 360 .section .fixup,"ax" 361 /* force a signal here? this matches i386 behaviour */ 362bad_iret: 363 /* runs with kernelgs again */ 364 movq $-9999,%rdi /* better code? */ 365 jmp do_exit 366 .previous 367 368retint_signal: 369 sti 370 SAVE_REST 371 movq $-1,ORIG_RAX(%rsp) 372 xorq %rsi,%rsi # oldset 373 movq %rsp,%rdi # &pt_regs 374 call do_signal 375 RESTORE_REST 376retint_signal_test: 377 cli 378 GET_CURRENT(%rcx) 379 cmpq $0,tsk_need_resched(%rcx) 380 je retint_restore_args_swapgs 381 sti 382 call schedule 383 jmp retint_signal_test 384 385retint_reschedule: 386 sti 387 call schedule 388 cli 389 jmp retint_with_reschedule 390 391/* IF:off, stack contains irq number on origrax */ 392 .macro IRQ_ENTER 393 cld 394 pushq %rdi 395 pushq %rsi 396 pushq %rdx 397 pushq %rcx 398 pushq %rax 399 pushq %r8 400 pushq %r9 401 pushq %r10 402 pushq %r11 403 leaq -48(%rsp),%rdi 404 testl $3,136(%rdi) 405 je 1f 406 swapgs 4071: addl $1,%gs:pda_irqcount 408 movq %gs:pda_irqstackptr,%rax 409 cmoveq %rax,%rsp 410 pushq %rdi 411 .endm 412 413 .macro BUILD_SMP_INTERRUPT x,v 414ENTRY(\x) 415 push $\v-256 416 IRQ_ENTER 417 call smp_\x 418 jmp ret_from_intr 419 .endm 420 421#ifdef CONFIG_SMP 422 BUILD_SMP_INTERRUPT reschedule_interrupt,RESCHEDULE_VECTOR 423 BUILD_SMP_INTERRUPT invalidate_interrupt,INVALIDATE_TLB_VECTOR 424 BUILD_SMP_INTERRUPT call_function_interrupt,CALL_FUNCTION_VECTOR 425#endif 426#ifdef CONFIG_X86_LOCAL_APIC 427 BUILD_SMP_INTERRUPT apic_timer_interrupt,LOCAL_TIMER_VECTOR 428 BUILD_SMP_INTERRUPT error_interrupt,ERROR_APIC_VECTOR 429 BUILD_SMP_INTERRUPT spurious_interrupt,SPURIOUS_APIC_VECTOR 430#endif 431 432 433/* 434 * Exception entry points. 435 */ 436 .macro zeroentry sym 437 pushq $0 /* push error code/oldrax */ 438 pushq %rax /* push real oldrax to the rdi slot */ 439 leaq \sym(%rip),%rax 440 jmp error_entry 441 .endm 442 443 .macro errorentry sym 444 pushq %rax 445 leaq \sym(%rip),%rax 446 jmp error_entry 447 .endm 448 449/* 450 * Exception entry point. This expects an error code/orig_rax on the stack 451 * and the exception handler in %rax. 452 */ 453 ALIGN 454error_entry: 455 /* rdi slot contains rax, oldrax contains error code */ 456 pushq %rsi 457 movq 8(%rsp),%rsi /* load rax */ 458 pushq %rdx 459 pushq %rcx 460 pushq %rsi /* store rax */ 461 pushq %r8 462 pushq %r9 463 pushq %r10 464 pushq %r11 465 cld 466 SAVE_REST 467 xorl %r15d,%r15d 468 testl $3,CS(%rsp) 469 je error_kernelspace 470 swapgs 471error_action: 472 movq %rdi,RDI(%rsp) 473 movq %rsp,%rdi 474 movq ORIG_RAX(%rsp),%rsi /* get error code */ 475 movq $-1,ORIG_RAX(%rsp) 476 call *%rax 477 /* r15d: swapgs flag */ 478error_exit: 479 testl %r15d,%r15d 480 jnz error_restore 481error_test: 482 cli 483 GET_CURRENT(%rcx) 484 cmpq $0,tsk_need_resched(%rcx) 485 jne error_reschedule 486 cmpl $0,tsk_sigpending(%rcx) 487 jne error_signal 488error_restore_swapgs: 489 swapgs 490error_restore: 491 RESTORE_REST 492 jmp retint_restore_args 493 494error_reschedule: 495 sti 496 call schedule 497 jmp error_test 498 499error_signal: 500 sti 501 xorq %rsi,%rsi 502 movq %rsp,%rdi 503 call do_signal 504error_signal_test: 505 GET_CURRENT(%rcx) 506 cli 507 cmpq $0,tsk_need_resched(%rcx) 508 je error_restore_swapgs 509 sti 510 call schedule 511 jmp error_signal_test 512 513error_kernelspace: 514 incl %r15d 515 /* There are two places in the kernel that can potentially fault with 516 usergs. Handle them here. */ 517 leaq iret_label(%rip),%rdx 518 cmpq %rdx,RIP(%rsp) 519 je 1f 520 /* check truncated address too. This works around a CPU issue */ 521 movl %edx,%edx /* zero extend */ 522 cmpq %rdx,RIP(%rsp) 523 je 1f 524 cmpq $gs_change,RIP(%rsp) 525 jne error_action 526 /* iret_label and gs_change are handled by exception handlers 527 and the exit points run with kernelgs again */ 5281: swapgs 529 jmp error_action 530 531 /* Reload gs selector with exception handling */ 532 /* edi: new selector */ 533ENTRY(load_gs_index) 534 pushf 535 cli 536 swapgs 537gs_change: 538 movl %edi,%gs 5392: mfence /* workaround for opteron errata #88 */ 540 swapgs 541 popf 542 ret 543 544 .section __ex_table,"a" 545 .align 8 546 .quad gs_change,bad_gs 547 .previous 548 549bad_gs: 550 swapgs 551 xorl %eax,%eax 552 movl %eax,%gs 553 jmp 2b 554/* 555 * Create a kernel thread. 556 * 557 * C extern interface: 558 * extern long arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) 559 * 560 * asm input arguments: 561 * rdi: fn, rsi: arg, rdx: flags 562 */ 563ENTRY(arch_kernel_thread) 564 FAKE_STACK_FRAME $child_rip 565 SAVE_ALL 566 567 # rdi: flags, rsi: usp, rdx: will be &pt_regs 568 movq %rdx,%rdi 569 orq $CLONE_VM, %rdi 570 571 movq $-1, %rsi 572 573 movq %rsp, %rdx 574 575 # clone now 576 call do_fork 577 # save retval on the stack so it's popped before `ret` 578 movq %rax, RAX(%rsp) 579 580 /* 581 * It isn't worth to check for reschedule here, 582 * so internally to the x86_64 port you can rely on kernel_thread() 583 * not to reschedule the child before returning, this avoids the need 584 * of hacks for example to fork off the per-CPU idle tasks. 585 * [Hopefully no generic code relies on the reschedule -AK] 586 */ 587 RESTORE_ALL 588 UNFAKE_STACK_FRAME 589 ret 590 591child_rip: 592 /* 593 * Here we are in the child and the registers are set as they were 594 * at kernel_thread() invocation in the parent. 595 */ 596 movq %rdi, %rax 597 movq %rsi, %rdi 598 call *%rax 599 # exit 600 xorq %rdi, %rdi 601 call do_exit 602 603/* 604 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 605 * 606 * C extern interface: 607 * extern long execve(char *name, char **argv, char **envp) 608 * 609 * asm input arguments: 610 * rdi: name, rsi: argv, rdx: envp 611 * 612 * We want to fallback into: 613 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs) 614 * 615 * do_sys_execve asm fallback arguments: 616 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack 617 */ 618ENTRY(execve) 619 FAKE_STACK_FRAME $0 620 SAVE_ALL 621 call sys_execve 622 movq %rax, RAX(%rsp) 623 RESTORE_REST 624 testq %rax,%rax 625 je int_ret_from_sys_call 626 RESTORE_ARGS 627 UNFAKE_STACK_FRAME 628 ret 629 630ENTRY(page_fault) 631 errorentry do_page_fault 632 633ENTRY(coprocessor_error) 634 zeroentry do_coprocessor_error 635 636ENTRY(simd_coprocessor_error) 637 zeroentry do_simd_coprocessor_error 638 639ENTRY(device_not_available) 640 pushq $-1 641 SAVE_ALL 642 xorl %r15d,%r15d 643 testl $3,CS(%rsp) 644 jz 1f 645 swapgs 6462: movq %cr0,%rax 647 leaq math_state_restore(%rip),%rcx 648 leaq math_emulate(%rip),%rbx 649 testl $0x4,%eax 650 cmoveq %rcx,%rbx 651 call *%rbx 652 jmp error_exit 6531: incl %r15d 654 jmp 2b 655 656ENTRY(debug) 657 zeroentry do_debug 658 659ENTRY(nmi) 660 pushq $-1 661 SAVE_ALL 662 /* NMI could happen inside the critical section of a swapgs, 663 so it is needed to use this expensive way to check. 664 Rely on arch_prctl forbiding user space from setting a negative 665 GS. Only the kernel value is negative. */ 666 movl $MSR_GS_BASE,%ecx 667 rdmsr 668 xorl %ebx,%ebx 669 testl %edx,%edx 670 js 1f 671 swapgs 672 movl $1,%ebx 6731: movq %rsp,%rdi 674 call do_nmi 675 cli 676 testl %ebx,%ebx 677 jz error_restore 678 swapgs 679 jmp error_restore 680 681ENTRY(int3) 682 zeroentry do_int3 683 684ENTRY(overflow) 685 zeroentry do_overflow 686 687ENTRY(bounds) 688 zeroentry do_bounds 689 690ENTRY(invalid_op) 691 zeroentry do_invalid_op 692 693ENTRY(coprocessor_segment_overrun) 694 zeroentry do_coprocessor_segment_overrun 695 696ENTRY(reserved) 697 zeroentry do_reserved 698 699ENTRY(double_fault) 700 errorentry do_double_fault 701 702ENTRY(invalid_TSS) 703 errorentry do_invalid_TSS 704 705ENTRY(segment_not_present) 706 errorentry do_segment_not_present 707 708ENTRY(stack_segment) 709 errorentry do_stack_segment 710 711ENTRY(general_protection) 712 errorentry do_general_protection 713 714ENTRY(alignment_check) 715 errorentry do_alignment_check 716 717ENTRY(divide_error) 718 zeroentry do_divide_error 719 720ENTRY(spurious_interrupt_bug) 721 zeroentry do_spurious_interrupt_bug 722 723ENTRY(machine_check) 724 zeroentry do_machine_check 725 726ENTRY(call_debug) 727 zeroentry do_call_debug 728 729