2 * linux/arch/x86_64/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
10 * entry.S contains the system-call and fault low-level handling routines.
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
40 #include <linux/linkage.h>
41 #include <asm/segment.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
52 #include <asm/irqflags.h>
53 #include <asm/paravirt.h>
59 cmpq $ftrace_stub, ftrace_trace_function
66 /* taken from glibc */
79 call *ftrace_trace_function
94 #ifndef CONFIG_PREEMPT
95 #define retint_kernel retint_restore_args
98 #ifdef CONFIG_PARAVIRT
99 ENTRY(native_irq_enable_syscall_ret)
100 movq %gs:pda_oldrsp,%rsp
103 #endif /* CONFIG_PARAVIRT */
106 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
107 #ifdef CONFIG_TRACE_IRQFLAGS
108 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
116 * C code is not supposed to know about undefined top of stack. Every time
117 * a C function with an pt_regs argument is called from the SYSCALL based
118 * fast path FIXUP_TOP_OF_STACK is needed.
119 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
123 /* %rsp:at FRAMEEND */
124 .macro FIXUP_TOP_OF_STACK tmp
125 movq %gs:pda_oldrsp,\tmp
127 movq $__USER_DS,SS(%rsp)
128 movq $__USER_CS,CS(%rsp)
130 movq R11(%rsp),\tmp /* get eflags */
131 movq \tmp,EFLAGS(%rsp)
134 .macro RESTORE_TOP_OF_STACK tmp,offset=0
135 movq RSP-\offset(%rsp),\tmp
136 movq \tmp,%gs:pda_oldrsp
137 movq EFLAGS-\offset(%rsp),\tmp
138 movq \tmp,R11-\offset(%rsp)
141 .macro FAKE_STACK_FRAME child_rip
142 /* push in order ss, rsp, eflags, cs, rip */
145 CFI_ADJUST_CFA_OFFSET 8
146 /*CFI_REL_OFFSET ss,0*/
148 CFI_ADJUST_CFA_OFFSET 8
150 pushq $(1<<9) /* eflags - interrupts on */
151 CFI_ADJUST_CFA_OFFSET 8
152 /*CFI_REL_OFFSET rflags,0*/
153 pushq $__KERNEL_CS /* cs */
154 CFI_ADJUST_CFA_OFFSET 8
155 /*CFI_REL_OFFSET cs,0*/
156 pushq \child_rip /* rip */
157 CFI_ADJUST_CFA_OFFSET 8
159 pushq %rax /* orig rax */
160 CFI_ADJUST_CFA_OFFSET 8
163 .macro UNFAKE_STACK_FRAME
165 CFI_ADJUST_CFA_OFFSET -(6*8)
168 .macro CFI_DEFAULT_STACK start=1
174 CFI_DEF_CFA_OFFSET SS+8
176 CFI_REL_OFFSET r15,R15
177 CFI_REL_OFFSET r14,R14
178 CFI_REL_OFFSET r13,R13
179 CFI_REL_OFFSET r12,R12
180 CFI_REL_OFFSET rbp,RBP
181 CFI_REL_OFFSET rbx,RBX
182 CFI_REL_OFFSET r11,R11
183 CFI_REL_OFFSET r10,R10
186 CFI_REL_OFFSET rax,RAX
187 CFI_REL_OFFSET rcx,RCX
188 CFI_REL_OFFSET rdx,RDX
189 CFI_REL_OFFSET rsi,RSI
190 CFI_REL_OFFSET rdi,RDI
191 CFI_REL_OFFSET rip,RIP
192 /*CFI_REL_OFFSET cs,CS*/
193 /*CFI_REL_OFFSET rflags,EFLAGS*/
194 CFI_REL_OFFSET rsp,RSP
195 /*CFI_REL_OFFSET ss,SS*/
198 * A newly forked process directly context switches into this.
203 push kernel_eflags(%rip)
204 CFI_ADJUST_CFA_OFFSET 4
205 popf # reset kernel eflags
206 CFI_ADJUST_CFA_OFFSET -4
208 GET_THREAD_INFO(%rcx)
209 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
213 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
214 je int_ret_from_sys_call
215 testl $_TIF_IA32,threadinfo_flags(%rcx)
216 jnz int_ret_from_sys_call
217 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
218 jmp ret_from_sys_call
221 call syscall_trace_leave
222 GET_THREAD_INFO(%rcx)
228 * System call entry. Upto 6 arguments in registers are supported.
230 * SYSCALL does not save anything on the stack and does not change the
236 * rax system call number
238 * rcx return address for syscall/sysret, C arg3
241 * r10 arg3 (--> moved to rcx for C)
244 * r11 eflags for syscall/sysret, temporary for C
245 * r12-r15,rbp,rbx saved by C code, not touched.
247 * Interrupts are off on entry.
248 * Only called from user space.
250 * XXX if we had a free scratch register we could save the RSP into the stack frame
251 * and report it properly in ps. Unfortunately we haven't.
253 * When user can change the frames always force IRET. That is because
254 * it deals with uncanonical addresses better. SYSRET has trouble
255 * with them due to bugs in both AMD and Intel CPUs.
261 CFI_DEF_CFA rsp,PDA_STACKOFFSET
263 /*CFI_REGISTER rflags,r11*/
266 * A hypervisor implementation might want to use a label
267 * after the swapgs, so that it can do the swapgs
268 * for the guest and jump here on syscall.
270 ENTRY(system_call_after_swapgs)
272 movq %rsp,%gs:pda_oldrsp
273 movq %gs:pda_kernelstack,%rsp
275 * No need to follow this irqs off/on section - it's straight
278 ENABLE_INTERRUPTS(CLBR_NONE)
280 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
281 movq %rcx,RIP-ARGOFFSET(%rsp)
282 CFI_REL_OFFSET rip,RIP-ARGOFFSET
283 GET_THREAD_INFO(%rcx)
284 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
286 cmpq $__NR_syscall_max,%rax
289 call *sys_call_table(,%rax,8) # XXX: rip relative
290 movq %rax,RAX-ARGOFFSET(%rsp)
292 * Syscall return path ending with SYSRET (fast path)
293 * Has incomplete stack frame and undefined top of stack.
296 movl $_TIF_ALLWORK_MASK,%edi
300 GET_THREAD_INFO(%rcx)
301 DISABLE_INTERRUPTS(CLBR_NONE)
303 movl threadinfo_flags(%rcx),%edx
308 * sysretq will re-enable interrupts:
311 movq RIP-ARGOFFSET(%rsp),%rcx
313 RESTORE_ARGS 0,-ARG_SKIP,1
314 /*CFI_REGISTER rflags,r11*/
315 ENABLE_INTERRUPTS_SYSCALL_RET
318 /* Handle reschedules */
319 /* edx: work, edi: workmask */
321 bt $TIF_NEED_RESCHED,%edx
324 ENABLE_INTERRUPTS(CLBR_NONE)
326 CFI_ADJUST_CFA_OFFSET 8
329 CFI_ADJUST_CFA_OFFSET -8
332 /* Handle a signal */
335 ENABLE_INTERRUPTS(CLBR_NONE)
336 testl $_TIF_DO_NOTIFY_MASK,%edx
339 /* Really a signal */
340 /* edx: work flags (arg3) */
341 leaq do_notify_resume(%rip),%rax
342 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
343 xorl %esi,%esi # oldset -> arg2
344 call ptregscall_common
345 1: movl $_TIF_NEED_RESCHED,%edi
346 /* Use IRET because user could have changed frame. This
347 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
348 DISABLE_INTERRUPTS(CLBR_NONE)
353 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
354 jmp ret_from_sys_call
356 /* Do syscall tracing */
359 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
360 FIXUP_TOP_OF_STACK %rdi
362 call syscall_trace_enter
363 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
365 cmpq $__NR_syscall_max,%rax
366 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
367 movq %r10,%rcx /* fixup for C */
368 call *sys_call_table(,%rax,8)
369 movq %rax,RAX-ARGOFFSET(%rsp)
370 /* Use IRET because user could have changed frame */
373 * Syscall return path ending with IRET.
374 * Has correct top of stack, but partial stack frame.
376 .globl int_ret_from_sys_call
377 int_ret_from_sys_call:
378 DISABLE_INTERRUPTS(CLBR_NONE)
380 testl $3,CS-ARGOFFSET(%rsp)
381 je retint_restore_args
382 movl $_TIF_ALLWORK_MASK,%edi
383 /* edi: mask to check */
386 GET_THREAD_INFO(%rcx)
387 movl threadinfo_flags(%rcx),%edx
390 andl $~TS_COMPAT,threadinfo_status(%rcx)
393 /* Either reschedule or signal or syscall exit tracking needed. */
394 /* First do a reschedule test. */
395 /* edx: work, edi: workmask */
397 bt $TIF_NEED_RESCHED,%edx
400 ENABLE_INTERRUPTS(CLBR_NONE)
402 CFI_ADJUST_CFA_OFFSET 8
405 CFI_ADJUST_CFA_OFFSET -8
406 DISABLE_INTERRUPTS(CLBR_NONE)
410 /* handle signals and tracing -- both require a full stack frame */
413 ENABLE_INTERRUPTS(CLBR_NONE)
415 /* Check for syscall exit trace */
416 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
419 CFI_ADJUST_CFA_OFFSET 8
420 leaq 8(%rsp),%rdi # &ptregs -> arg1
421 call syscall_trace_leave
423 CFI_ADJUST_CFA_OFFSET -8
424 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
428 testl $_TIF_DO_NOTIFY_MASK,%edx
430 movq %rsp,%rdi # &ptregs -> arg1
431 xorl %esi,%esi # oldset -> arg2
432 call do_notify_resume
433 1: movl $_TIF_NEED_RESCHED,%edi
436 DISABLE_INTERRUPTS(CLBR_NONE)
443 * Certain special system calls that need to save a complete full stack frame.
446 .macro PTREGSCALL label,func,arg
449 leaq \func(%rip),%rax
450 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
451 jmp ptregscall_common
457 PTREGSCALL stub_clone, sys_clone, %r8
458 PTREGSCALL stub_fork, sys_fork, %rdi
459 PTREGSCALL stub_vfork, sys_vfork, %rdi
460 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
461 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
462 PTREGSCALL stub_iopl, sys_iopl, %rsi
464 ENTRY(ptregscall_common)
466 CFI_ADJUST_CFA_OFFSET -8
467 CFI_REGISTER rip, r11
470 CFI_REGISTER rip, r15
471 FIXUP_TOP_OF_STACK %r11
473 RESTORE_TOP_OF_STACK %r11
475 CFI_REGISTER rip, r11
478 CFI_ADJUST_CFA_OFFSET 8
479 CFI_REL_OFFSET rip, 0
482 END(ptregscall_common)
487 CFI_ADJUST_CFA_OFFSET -8
488 CFI_REGISTER rip, r11
490 FIXUP_TOP_OF_STACK %r11
493 RESTORE_TOP_OF_STACK %r11
496 jmp int_ret_from_sys_call
501 * sigreturn is special because it needs to restore all registers on return.
502 * This cannot be done with SYSRET, so use the IRET return path instead.
504 ENTRY(stub_rt_sigreturn)
507 CFI_ADJUST_CFA_OFFSET -8
510 FIXUP_TOP_OF_STACK %r11
511 call sys_rt_sigreturn
512 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
514 jmp int_ret_from_sys_call
516 END(stub_rt_sigreturn)
519 * initial frame state for interrupts and exceptions
524 CFI_DEF_CFA rsp,SS+8-\ref
525 /*CFI_REL_OFFSET ss,SS-\ref*/
526 CFI_REL_OFFSET rsp,RSP-\ref
527 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
528 /*CFI_REL_OFFSET cs,CS-\ref*/
529 CFI_REL_OFFSET rip,RIP-\ref
532 /* initial frame state for interrupts (and exceptions without error code) */
533 #define INTR_FRAME _frame RIP
534 /* initial frame state for exceptions with error code (and interrupts with
535 vector already pushed) */
536 #define XCPT_FRAME _frame ORIG_RAX
539 * Interrupt entry/exit.
541 * Interrupt entry points save only callee clobbered registers in fast path.
543 * Entry runs with interrupts off.
546 /* 0(%rsp): interrupt number */
547 .macro interrupt func
550 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
552 CFI_ADJUST_CFA_OFFSET 8
553 CFI_REL_OFFSET rbp, 0
555 CFI_DEF_CFA_REGISTER rbp
559 /* irqcount is used to check if a CPU is already on an interrupt
560 stack or not. While this is essentially redundant with preempt_count
561 it is a little cheaper to use a separate counter in the PDA
562 (short of moving irq_enter into assembly, which would be too
564 1: incl %gs:pda_irqcount
565 cmoveq %gs:pda_irqstackptr,%rsp
566 push %rbp # backlink for old unwinder
568 * We entered an interrupt context - irqs are off:
574 ENTRY(common_interrupt)
577 /* 0(%rsp): oldrsp-ARGOFFSET */
579 DISABLE_INTERRUPTS(CLBR_NONE)
581 decl %gs:pda_irqcount
583 CFI_DEF_CFA_REGISTER rsp
584 CFI_ADJUST_CFA_OFFSET -8
586 GET_THREAD_INFO(%rcx)
587 testl $3,CS-ARGOFFSET(%rsp)
590 /* Interrupt came from user space */
592 * Has a correct top of stack, but a partial stack frame
593 * %rcx: thread info. Interrupts off.
595 retint_with_reschedule:
596 movl $_TIF_WORK_MASK,%edi
599 movl threadinfo_flags(%rcx),%edx
604 retint_swapgs: /* return to user-space */
606 * The iretq could re-enable interrupts:
608 DISABLE_INTERRUPTS(CLBR_ANY)
613 retint_restore_args: /* return to kernel space */
614 DISABLE_INTERRUPTS(CLBR_ANY)
616 * The iretq could re-enable interrupts:
625 .section __ex_table, "a"
626 .quad irq_return, bad_iret
629 #ifdef CONFIG_PARAVIRT
633 .section __ex_table,"a"
634 .quad native_iret, bad_iret
641 * The iret traps when the %cs or %ss being restored is bogus.
642 * We've lost the original trap vector and error code.
643 * #GPF is the most likely one to get for an invalid selector.
644 * So pretend we completed the iret and took the #GPF in user mode.
646 * We are now running with the kernel GS after exception recovery.
647 * But error_entry expects us to have user GS to match the user %cs,
653 jmp general_protection
657 /* edi: workmask, edx: work */
660 bt $TIF_NEED_RESCHED,%edx
663 ENABLE_INTERRUPTS(CLBR_NONE)
665 CFI_ADJUST_CFA_OFFSET 8
668 CFI_ADJUST_CFA_OFFSET -8
669 GET_THREAD_INFO(%rcx)
670 DISABLE_INTERRUPTS(CLBR_NONE)
675 testl $_TIF_DO_NOTIFY_MASK,%edx
678 ENABLE_INTERRUPTS(CLBR_NONE)
680 movq $-1,ORIG_RAX(%rsp)
681 xorl %esi,%esi # oldset
682 movq %rsp,%rdi # &pt_regs
683 call do_notify_resume
685 DISABLE_INTERRUPTS(CLBR_NONE)
687 movl $_TIF_NEED_RESCHED,%edi
688 GET_THREAD_INFO(%rcx)
691 #ifdef CONFIG_PREEMPT
692 /* Returning to kernel space. Check if we need preemption */
693 /* rcx: threadinfo. interrupts off. */
695 cmpl $0,threadinfo_preempt_count(%rcx)
696 jnz retint_restore_args
697 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
698 jnc retint_restore_args
699 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
700 jnc retint_restore_args
701 call preempt_schedule_irq
706 END(common_interrupt)
711 .macro apicinterrupt num,func
714 CFI_ADJUST_CFA_OFFSET 8
720 ENTRY(thermal_interrupt)
721 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
722 END(thermal_interrupt)
724 ENTRY(threshold_interrupt)
725 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
726 END(threshold_interrupt)
729 ENTRY(reschedule_interrupt)
730 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
731 END(reschedule_interrupt)
733 .macro INVALIDATE_ENTRY num
734 ENTRY(invalidate_interrupt\num)
735 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
736 END(invalidate_interrupt\num)
748 ENTRY(call_function_interrupt)
749 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
750 END(call_function_interrupt)
751 ENTRY(irq_move_cleanup_interrupt)
752 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
753 END(irq_move_cleanup_interrupt)
756 ENTRY(apic_timer_interrupt)
757 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
758 END(apic_timer_interrupt)
760 ENTRY(error_interrupt)
761 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
764 ENTRY(spurious_interrupt)
765 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
766 END(spurious_interrupt)
769 * Exception entry points.
773 pushq $0 /* push error code/oldrax */
774 CFI_ADJUST_CFA_OFFSET 8
775 pushq %rax /* push real oldrax to the rdi slot */
776 CFI_ADJUST_CFA_OFFSET 8
783 .macro errorentry sym
786 CFI_ADJUST_CFA_OFFSET 8
793 /* error code is on the stack already */
794 /* handle NMI like exceptions that can happen everywhere */
795 .macro paranoidentry sym, ist=0, irqtrace=1
799 movl $MSR_GS_BASE,%ecx
807 movq %gs:pda_data_offset, %rbp
810 movq ORIG_RAX(%rsp),%rsi
811 movq $-1,ORIG_RAX(%rsp)
813 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
817 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
819 DISABLE_INTERRUPTS(CLBR_NONE)
826 * "Paranoid" exit path from exception stack.
827 * Paranoid because this is used by NMIs and cannot take
828 * any kernel state for granted.
829 * We don't do kernel preemption checks here, because only
830 * NMI should be common and it does not enable IRQs and
831 * cannot get reschedule ticks.
833 * "trace" is 0 for the NMI handler only, because irq-tracing
834 * is fundamentally NMI-unsafe. (we cannot change the soft and
835 * hard flags at once, atomically)
837 .macro paranoidexit trace=1
838 /* ebx: no swapgs flag */
840 testl %ebx,%ebx /* swapgs needed? */
841 jnz paranoid_restore\trace
843 jnz paranoid_userspace\trace
844 paranoid_swapgs\trace:
849 paranoid_restore\trace:
852 paranoid_userspace\trace:
853 GET_THREAD_INFO(%rcx)
854 movl threadinfo_flags(%rcx),%ebx
855 andl $_TIF_WORK_MASK,%ebx
856 jz paranoid_swapgs\trace
857 movq %rsp,%rdi /* &pt_regs */
859 movq %rax,%rsp /* switch stack for scheduling */
860 testl $_TIF_NEED_RESCHED,%ebx
861 jnz paranoid_schedule\trace
862 movl %ebx,%edx /* arg3: thread flags */
866 ENABLE_INTERRUPTS(CLBR_NONE)
867 xorl %esi,%esi /* arg2: oldset */
868 movq %rsp,%rdi /* arg1: &pt_regs */
869 call do_notify_resume
870 DISABLE_INTERRUPTS(CLBR_NONE)
874 jmp paranoid_userspace\trace
875 paranoid_schedule\trace:
879 ENABLE_INTERRUPTS(CLBR_ANY)
881 DISABLE_INTERRUPTS(CLBR_ANY)
885 jmp paranoid_userspace\trace
890 * Exception entry point. This expects an error code/orig_rax on the stack
891 * and the exception handler in %rax.
893 KPROBE_ENTRY(error_entry)
896 /* rdi slot contains rax, oldrax contains error code */
899 CFI_ADJUST_CFA_OFFSET (14*8)
901 CFI_REL_OFFSET rsi,RSI
902 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
905 CFI_REL_OFFSET rdx,RDX
907 CFI_REL_OFFSET rcx,RCX
908 movq %rsi,10*8(%rsp) /* store rax */
909 CFI_REL_OFFSET rax,RAX
915 CFI_REL_OFFSET r10,R10
917 CFI_REL_OFFSET r11,R11
919 CFI_REL_OFFSET rbx,RBX
921 CFI_REL_OFFSET rbp,RBP
923 CFI_REL_OFFSET r12,R12
925 CFI_REL_OFFSET r13,R13
927 CFI_REL_OFFSET r14,R14
929 CFI_REL_OFFSET r15,R15
937 CFI_REL_OFFSET rdi,RDI
939 movq ORIG_RAX(%rsp),%rsi /* get error code */
940 movq $-1,ORIG_RAX(%rsp)
942 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
946 DISABLE_INTERRUPTS(CLBR_NONE)
948 GET_THREAD_INFO(%rcx)
952 movl threadinfo_flags(%rcx),%edx
953 movl $_TIF_WORK_MASK,%edi
961 /* There are two places in the kernel that can potentially fault with
962 usergs. Handle them here. The exception handlers after
963 iret run with kernel gs again, so don't set the user space flag.
964 B stepping K8s sometimes report an truncated RIP for IRET
965 exceptions returning to compat mode. Check for these here too. */
966 leaq irq_return(%rip),%rbp
969 movl %ebp,%ebp /* zero extend */
972 cmpq $gs_change,RIP(%rsp)
975 KPROBE_END(error_entry)
977 /* Reload gs selector with exception handling */
978 /* edi: new selector */
982 CFI_ADJUST_CFA_OFFSET 8
983 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
987 2: mfence /* workaround */
990 CFI_ADJUST_CFA_OFFSET -8
993 ENDPROC(load_gs_index)
995 .section __ex_table,"a"
997 .quad gs_change,bad_gs
1000 /* running with kernelgs */
1002 SWAPGS /* switch back to user gs */
1009 * Create a kernel thread.
1011 * C extern interface:
1012 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1014 * asm input arguments:
1015 * rdi: fn, rsi: arg, rdx: flags
1017 ENTRY(kernel_thread)
1019 FAKE_STACK_FRAME $child_rip
1022 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1024 orq kernel_thread_flags(%rip),%rdi
1037 * It isn't worth to check for reschedule here,
1038 * so internally to the x86_64 port you can rely on kernel_thread()
1039 * not to reschedule the child before returning, this avoids the need
1040 * of hacks for example to fork off the per-CPU idle tasks.
1041 * [Hopefully no generic code relies on the reschedule -AK]
1047 ENDPROC(kernel_thread)
1050 pushq $0 # fake return address
1053 * Here we are in the child and the registers are set as they were
1054 * at kernel_thread() invocation in the parent.
1066 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1068 * C extern interface:
1069 * extern long execve(char *name, char **argv, char **envp)
1071 * asm input arguments:
1072 * rdi: name, rsi: argv, rdx: envp
1074 * We want to fallback into:
1075 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
1077 * do_sys_execve asm fallback arguments:
1078 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1080 ENTRY(kernel_execve)
1086 movq %rax, RAX(%rsp)
1089 je int_ret_from_sys_call
1094 ENDPROC(kernel_execve)
1096 KPROBE_ENTRY(page_fault)
1097 errorentry do_page_fault
1098 KPROBE_END(page_fault)
1100 ENTRY(coprocessor_error)
1101 zeroentry do_coprocessor_error
1102 END(coprocessor_error)
1104 ENTRY(simd_coprocessor_error)
1105 zeroentry do_simd_coprocessor_error
1106 END(simd_coprocessor_error)
1108 ENTRY(device_not_available)
1109 zeroentry math_state_restore
1110 END(device_not_available)
1112 /* runs on exception stack */
1116 CFI_ADJUST_CFA_OFFSET 8
1117 paranoidentry do_debug, DEBUG_STACK
1121 /* runs on exception stack */
1125 CFI_ADJUST_CFA_OFFSET 8
1126 paranoidentry do_nmi, 0, 0
1127 #ifdef CONFIG_TRACE_IRQFLAGS
1138 CFI_ADJUST_CFA_OFFSET 8
1139 paranoidentry do_int3, DEBUG_STACK
1145 zeroentry do_overflow
1153 zeroentry do_invalid_op
1156 ENTRY(coprocessor_segment_overrun)
1157 zeroentry do_coprocessor_segment_overrun
1158 END(coprocessor_segment_overrun)
1161 zeroentry do_reserved
1164 /* runs on exception stack */
1167 paranoidentry do_double_fault
1173 errorentry do_invalid_TSS
1176 ENTRY(segment_not_present)
1177 errorentry do_segment_not_present
1178 END(segment_not_present)
1180 /* runs on exception stack */
1181 ENTRY(stack_segment)
1183 paranoidentry do_stack_segment
1188 KPROBE_ENTRY(general_protection)
1189 errorentry do_general_protection
1190 KPROBE_END(general_protection)
1192 ENTRY(alignment_check)
1193 errorentry do_alignment_check
1194 END(alignment_check)
1197 zeroentry do_divide_error
1200 ENTRY(spurious_interrupt_bug)
1201 zeroentry do_spurious_interrupt_bug
1202 END(spurious_interrupt_bug)
1204 #ifdef CONFIG_X86_MCE
1205 /* runs on exception stack */
1206 ENTRY(machine_check)
1209 CFI_ADJUST_CFA_OFFSET 8
1210 paranoidentry do_machine_check
1216 /* Call softirq on interrupt stack. Interrupts are off. */
1220 CFI_ADJUST_CFA_OFFSET 8
1221 CFI_REL_OFFSET rbp,0
1223 CFI_DEF_CFA_REGISTER rbp
1224 incl %gs:pda_irqcount
1225 cmove %gs:pda_irqstackptr,%rsp
1226 push %rbp # backlink for old unwinder
1229 CFI_DEF_CFA_REGISTER rsp
1230 CFI_ADJUST_CFA_OFFSET -8
1231 decl %gs:pda_irqcount
1234 ENDPROC(call_softirq)
1236 KPROBE_ENTRY(ignore_sysret)
1241 ENDPROC(ignore_sysret)