]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - arch/x86/kernel/entry_64.S
Merge branches 'tracing/ftrace', 'tracing/hw-branch-tracing' and 'tracing/ring-buffer...
[mirror_ubuntu-jammy-kernel.git] / arch / x86 / kernel / entry_64.S
1 /*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 */
8
9 /*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
38 */
39
40 #include <linux/linkage.h>
41 #include <asm/segment.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
47 #include <asm/msr.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
51 #include <asm/page.h>
52 #include <asm/irqflags.h>
53 #include <asm/paravirt.h>
54 #include <asm/ftrace.h>
55
56 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
57 #include <linux/elf-em.h>
58 #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
59 #define __AUDIT_ARCH_64BIT 0x80000000
60 #define __AUDIT_ARCH_LE 0x40000000
61
62 .code64
63
64 #ifdef CONFIG_FUNCTION_TRACER
65 #ifdef CONFIG_DYNAMIC_FTRACE
66 ENTRY(mcount)
67 retq
68 END(mcount)
69
70 ENTRY(ftrace_caller)
71 cmpl $0, function_trace_stop
72 jne ftrace_stub
73
74 MCOUNT_SAVE_FRAME
75
76 movq 0x38(%rsp), %rdi
77 movq 8(%rbp), %rsi
78 subq $MCOUNT_INSN_SIZE, %rdi
79
80 .globl ftrace_call
81 ftrace_call:
82 call ftrace_stub
83
84 MCOUNT_RESTORE_FRAME
85
86 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
87 .globl ftrace_graph_call
88 ftrace_graph_call:
89 jmp ftrace_stub
90 #endif
91
92 .globl ftrace_stub
93 ftrace_stub:
94 retq
95 END(ftrace_caller)
96
97 #else /* ! CONFIG_DYNAMIC_FTRACE */
98 ENTRY(mcount)
99 cmpl $0, function_trace_stop
100 jne ftrace_stub
101
102 cmpq $ftrace_stub, ftrace_trace_function
103 jnz trace
104
105 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
106 cmpq $ftrace_stub, ftrace_graph_return
107 jnz ftrace_graph_caller
108
109 cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
110 jnz ftrace_graph_caller
111 #endif
112
113 .globl ftrace_stub
114 ftrace_stub:
115 retq
116
117 trace:
118 MCOUNT_SAVE_FRAME
119
120 movq 0x38(%rsp), %rdi
121 movq 8(%rbp), %rsi
122 subq $MCOUNT_INSN_SIZE, %rdi
123
124 call *ftrace_trace_function
125
126 MCOUNT_RESTORE_FRAME
127
128 jmp ftrace_stub
129 END(mcount)
130 #endif /* CONFIG_DYNAMIC_FTRACE */
131 #endif /* CONFIG_FUNCTION_TRACER */
132
133 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
134 ENTRY(ftrace_graph_caller)
135 cmpl $0, function_trace_stop
136 jne ftrace_stub
137
138 MCOUNT_SAVE_FRAME
139
140 leaq 8(%rbp), %rdi
141 movq 0x38(%rsp), %rsi
142 subq $MCOUNT_INSN_SIZE, %rsi
143
144 call prepare_ftrace_return
145
146 MCOUNT_RESTORE_FRAME
147
148 retq
149 END(ftrace_graph_caller)
150
151
152 .globl return_to_handler
153 return_to_handler:
154 subq $80, %rsp
155
156 movq %rax, (%rsp)
157 movq %rcx, 8(%rsp)
158 movq %rdx, 16(%rsp)
159 movq %rsi, 24(%rsp)
160 movq %rdi, 32(%rsp)
161 movq %r8, 40(%rsp)
162 movq %r9, 48(%rsp)
163 movq %r10, 56(%rsp)
164 movq %r11, 64(%rsp)
165
166 call ftrace_return_to_handler
167
168 movq %rax, 72(%rsp)
169 movq 64(%rsp), %r11
170 movq 56(%rsp), %r10
171 movq 48(%rsp), %r9
172 movq 40(%rsp), %r8
173 movq 32(%rsp), %rdi
174 movq 24(%rsp), %rsi
175 movq 16(%rsp), %rdx
176 movq 8(%rsp), %rcx
177 movq (%rsp), %rax
178 addq $72, %rsp
179 retq
180 #endif
181
182
183 #ifndef CONFIG_PREEMPT
184 #define retint_kernel retint_restore_args
185 #endif
186
187 #ifdef CONFIG_PARAVIRT
188 ENTRY(native_usergs_sysret64)
189 swapgs
190 sysretq
191 #endif /* CONFIG_PARAVIRT */
192
193
194 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
195 #ifdef CONFIG_TRACE_IRQFLAGS
196 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
197 jnc 1f
198 TRACE_IRQS_ON
199 1:
200 #endif
201 .endm
202
203 /*
204 * C code is not supposed to know about undefined top of stack. Every time
205 * a C function with an pt_regs argument is called from the SYSCALL based
206 * fast path FIXUP_TOP_OF_STACK is needed.
207 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
208 * manipulation.
209 */
210
211 /* %rsp:at FRAMEEND */
212 .macro FIXUP_TOP_OF_STACK tmp
213 movq %gs:pda_oldrsp,\tmp
214 movq \tmp,RSP(%rsp)
215 movq $__USER_DS,SS(%rsp)
216 movq $__USER_CS,CS(%rsp)
217 movq $-1,RCX(%rsp)
218 movq R11(%rsp),\tmp /* get eflags */
219 movq \tmp,EFLAGS(%rsp)
220 .endm
221
222 .macro RESTORE_TOP_OF_STACK tmp,offset=0
223 movq RSP-\offset(%rsp),\tmp
224 movq \tmp,%gs:pda_oldrsp
225 movq EFLAGS-\offset(%rsp),\tmp
226 movq \tmp,R11-\offset(%rsp)
227 .endm
228
229 .macro FAKE_STACK_FRAME child_rip
230 /* push in order ss, rsp, eflags, cs, rip */
231 xorl %eax, %eax
232 pushq $__KERNEL_DS /* ss */
233 CFI_ADJUST_CFA_OFFSET 8
234 /*CFI_REL_OFFSET ss,0*/
235 pushq %rax /* rsp */
236 CFI_ADJUST_CFA_OFFSET 8
237 CFI_REL_OFFSET rsp,0
238 pushq $(1<<9) /* eflags - interrupts on */
239 CFI_ADJUST_CFA_OFFSET 8
240 /*CFI_REL_OFFSET rflags,0*/
241 pushq $__KERNEL_CS /* cs */
242 CFI_ADJUST_CFA_OFFSET 8
243 /*CFI_REL_OFFSET cs,0*/
244 pushq \child_rip /* rip */
245 CFI_ADJUST_CFA_OFFSET 8
246 CFI_REL_OFFSET rip,0
247 pushq %rax /* orig rax */
248 CFI_ADJUST_CFA_OFFSET 8
249 .endm
250
251 .macro UNFAKE_STACK_FRAME
252 addq $8*6, %rsp
253 CFI_ADJUST_CFA_OFFSET -(6*8)
254 .endm
255
256 .macro CFI_DEFAULT_STACK start=1
257 .if \start
258 CFI_STARTPROC simple
259 CFI_SIGNAL_FRAME
260 CFI_DEF_CFA rsp,SS+8
261 .else
262 CFI_DEF_CFA_OFFSET SS+8
263 .endif
264 CFI_REL_OFFSET r15,R15
265 CFI_REL_OFFSET r14,R14
266 CFI_REL_OFFSET r13,R13
267 CFI_REL_OFFSET r12,R12
268 CFI_REL_OFFSET rbp,RBP
269 CFI_REL_OFFSET rbx,RBX
270 CFI_REL_OFFSET r11,R11
271 CFI_REL_OFFSET r10,R10
272 CFI_REL_OFFSET r9,R9
273 CFI_REL_OFFSET r8,R8
274 CFI_REL_OFFSET rax,RAX
275 CFI_REL_OFFSET rcx,RCX
276 CFI_REL_OFFSET rdx,RDX
277 CFI_REL_OFFSET rsi,RSI
278 CFI_REL_OFFSET rdi,RDI
279 CFI_REL_OFFSET rip,RIP
280 /*CFI_REL_OFFSET cs,CS*/
281 /*CFI_REL_OFFSET rflags,EFLAGS*/
282 CFI_REL_OFFSET rsp,RSP
283 /*CFI_REL_OFFSET ss,SS*/
284 .endm
285 /*
286 * A newly forked process directly context switches into this.
287 */
288 /* rdi: prev */
289 ENTRY(ret_from_fork)
290 CFI_DEFAULT_STACK
291 push kernel_eflags(%rip)
292 CFI_ADJUST_CFA_OFFSET 8
293 popf # reset kernel eflags
294 CFI_ADJUST_CFA_OFFSET -8
295 call schedule_tail
296 GET_THREAD_INFO(%rcx)
297 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
298 jnz rff_trace
299 rff_action:
300 RESTORE_REST
301 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
302 je int_ret_from_sys_call
303 testl $_TIF_IA32,TI_flags(%rcx)
304 jnz int_ret_from_sys_call
305 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
306 jmp ret_from_sys_call
307 rff_trace:
308 movq %rsp,%rdi
309 call syscall_trace_leave
310 GET_THREAD_INFO(%rcx)
311 jmp rff_action
312 CFI_ENDPROC
313 END(ret_from_fork)
314
315 /*
316 * System call entry. Upto 6 arguments in registers are supported.
317 *
318 * SYSCALL does not save anything on the stack and does not change the
319 * stack pointer.
320 */
321
322 /*
323 * Register setup:
324 * rax system call number
325 * rdi arg0
326 * rcx return address for syscall/sysret, C arg3
327 * rsi arg1
328 * rdx arg2
329 * r10 arg3 (--> moved to rcx for C)
330 * r8 arg4
331 * r9 arg5
332 * r11 eflags for syscall/sysret, temporary for C
333 * r12-r15,rbp,rbx saved by C code, not touched.
334 *
335 * Interrupts are off on entry.
336 * Only called from user space.
337 *
338 * XXX if we had a free scratch register we could save the RSP into the stack frame
339 * and report it properly in ps. Unfortunately we haven't.
340 *
341 * When user can change the frames always force IRET. That is because
342 * it deals with uncanonical addresses better. SYSRET has trouble
343 * with them due to bugs in both AMD and Intel CPUs.
344 */
345
346 ENTRY(system_call)
347 CFI_STARTPROC simple
348 CFI_SIGNAL_FRAME
349 CFI_DEF_CFA rsp,PDA_STACKOFFSET
350 CFI_REGISTER rip,rcx
351 /*CFI_REGISTER rflags,r11*/
352 SWAPGS_UNSAFE_STACK
353 /*
354 * A hypervisor implementation might want to use a label
355 * after the swapgs, so that it can do the swapgs
356 * for the guest and jump here on syscall.
357 */
358 ENTRY(system_call_after_swapgs)
359
360 movq %rsp,%gs:pda_oldrsp
361 movq %gs:pda_kernelstack,%rsp
362 /*
363 * No need to follow this irqs off/on section - it's straight
364 * and short:
365 */
366 ENABLE_INTERRUPTS(CLBR_NONE)
367 SAVE_ARGS 8,1
368 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
369 movq %rcx,RIP-ARGOFFSET(%rsp)
370 CFI_REL_OFFSET rip,RIP-ARGOFFSET
371 GET_THREAD_INFO(%rcx)
372 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
373 jnz tracesys
374 system_call_fastpath:
375 cmpq $__NR_syscall_max,%rax
376 ja badsys
377 movq %r10,%rcx
378 call *sys_call_table(,%rax,8) # XXX: rip relative
379 movq %rax,RAX-ARGOFFSET(%rsp)
380 /*
381 * Syscall return path ending with SYSRET (fast path)
382 * Has incomplete stack frame and undefined top of stack.
383 */
384 ret_from_sys_call:
385 movl $_TIF_ALLWORK_MASK,%edi
386 /* edi: flagmask */
387 sysret_check:
388 LOCKDEP_SYS_EXIT
389 GET_THREAD_INFO(%rcx)
390 DISABLE_INTERRUPTS(CLBR_NONE)
391 TRACE_IRQS_OFF
392 movl TI_flags(%rcx),%edx
393 andl %edi,%edx
394 jnz sysret_careful
395 CFI_REMEMBER_STATE
396 /*
397 * sysretq will re-enable interrupts:
398 */
399 TRACE_IRQS_ON
400 movq RIP-ARGOFFSET(%rsp),%rcx
401 CFI_REGISTER rip,rcx
402 RESTORE_ARGS 0,-ARG_SKIP,1
403 /*CFI_REGISTER rflags,r11*/
404 movq %gs:pda_oldrsp, %rsp
405 USERGS_SYSRET64
406
407 CFI_RESTORE_STATE
408 /* Handle reschedules */
409 /* edx: work, edi: workmask */
410 sysret_careful:
411 bt $TIF_NEED_RESCHED,%edx
412 jnc sysret_signal
413 TRACE_IRQS_ON
414 ENABLE_INTERRUPTS(CLBR_NONE)
415 pushq %rdi
416 CFI_ADJUST_CFA_OFFSET 8
417 call schedule
418 popq %rdi
419 CFI_ADJUST_CFA_OFFSET -8
420 jmp sysret_check
421
422 /* Handle a signal */
423 sysret_signal:
424 TRACE_IRQS_ON
425 ENABLE_INTERRUPTS(CLBR_NONE)
426 #ifdef CONFIG_AUDITSYSCALL
427 bt $TIF_SYSCALL_AUDIT,%edx
428 jc sysret_audit
429 #endif
430 /* edx: work flags (arg3) */
431 leaq do_notify_resume(%rip),%rax
432 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
433 xorl %esi,%esi # oldset -> arg2
434 call ptregscall_common
435 movl $_TIF_WORK_MASK,%edi
436 /* Use IRET because user could have changed frame. This
437 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
438 DISABLE_INTERRUPTS(CLBR_NONE)
439 TRACE_IRQS_OFF
440 jmp int_with_check
441
442 badsys:
443 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
444 jmp ret_from_sys_call
445
446 #ifdef CONFIG_AUDITSYSCALL
447 /*
448 * Fast path for syscall audit without full syscall trace.
449 * We just call audit_syscall_entry() directly, and then
450 * jump back to the normal fast path.
451 */
452 auditsys:
453 movq %r10,%r9 /* 6th arg: 4th syscall arg */
454 movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
455 movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
456 movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
457 movq %rax,%rsi /* 2nd arg: syscall number */
458 movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
459 call audit_syscall_entry
460 LOAD_ARGS 0 /* reload call-clobbered registers */
461 jmp system_call_fastpath
462
463 /*
464 * Return fast path for syscall audit. Call audit_syscall_exit()
465 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
466 * masked off.
467 */
468 sysret_audit:
469 movq %rax,%rsi /* second arg, syscall return value */
470 cmpq $0,%rax /* is it < 0? */
471 setl %al /* 1 if so, 0 if not */
472 movzbl %al,%edi /* zero-extend that into %edi */
473 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
474 call audit_syscall_exit
475 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
476 jmp sysret_check
477 #endif /* CONFIG_AUDITSYSCALL */
478
479 /* Do syscall tracing */
480 tracesys:
481 #ifdef CONFIG_AUDITSYSCALL
482 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
483 jz auditsys
484 #endif
485 SAVE_REST
486 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
487 FIXUP_TOP_OF_STACK %rdi
488 movq %rsp,%rdi
489 call syscall_trace_enter
490 /*
491 * Reload arg registers from stack in case ptrace changed them.
492 * We don't reload %rax because syscall_trace_enter() returned
493 * the value it wants us to use in the table lookup.
494 */
495 LOAD_ARGS ARGOFFSET, 1
496 RESTORE_REST
497 cmpq $__NR_syscall_max,%rax
498 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
499 movq %r10,%rcx /* fixup for C */
500 call *sys_call_table(,%rax,8)
501 movq %rax,RAX-ARGOFFSET(%rsp)
502 /* Use IRET because user could have changed frame */
503
504 /*
505 * Syscall return path ending with IRET.
506 * Has correct top of stack, but partial stack frame.
507 */
508 .globl int_ret_from_sys_call
509 .globl int_with_check
510 int_ret_from_sys_call:
511 DISABLE_INTERRUPTS(CLBR_NONE)
512 TRACE_IRQS_OFF
513 testl $3,CS-ARGOFFSET(%rsp)
514 je retint_restore_args
515 movl $_TIF_ALLWORK_MASK,%edi
516 /* edi: mask to check */
517 int_with_check:
518 LOCKDEP_SYS_EXIT_IRQ
519 GET_THREAD_INFO(%rcx)
520 movl TI_flags(%rcx),%edx
521 andl %edi,%edx
522 jnz int_careful
523 andl $~TS_COMPAT,TI_status(%rcx)
524 jmp retint_swapgs
525
526 /* Either reschedule or signal or syscall exit tracking needed. */
527 /* First do a reschedule test. */
528 /* edx: work, edi: workmask */
529 int_careful:
530 bt $TIF_NEED_RESCHED,%edx
531 jnc int_very_careful
532 TRACE_IRQS_ON
533 ENABLE_INTERRUPTS(CLBR_NONE)
534 pushq %rdi
535 CFI_ADJUST_CFA_OFFSET 8
536 call schedule
537 popq %rdi
538 CFI_ADJUST_CFA_OFFSET -8
539 DISABLE_INTERRUPTS(CLBR_NONE)
540 TRACE_IRQS_OFF
541 jmp int_with_check
542
543 /* handle signals and tracing -- both require a full stack frame */
544 int_very_careful:
545 TRACE_IRQS_ON
546 ENABLE_INTERRUPTS(CLBR_NONE)
547 SAVE_REST
548 /* Check for syscall exit trace */
549 testl $_TIF_WORK_SYSCALL_EXIT,%edx
550 jz int_signal
551 pushq %rdi
552 CFI_ADJUST_CFA_OFFSET 8
553 leaq 8(%rsp),%rdi # &ptregs -> arg1
554 call syscall_trace_leave
555 popq %rdi
556 CFI_ADJUST_CFA_OFFSET -8
557 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
558 jmp int_restore_rest
559
560 int_signal:
561 testl $_TIF_DO_NOTIFY_MASK,%edx
562 jz 1f
563 movq %rsp,%rdi # &ptregs -> arg1
564 xorl %esi,%esi # oldset -> arg2
565 call do_notify_resume
566 1: movl $_TIF_WORK_MASK,%edi
567 int_restore_rest:
568 RESTORE_REST
569 DISABLE_INTERRUPTS(CLBR_NONE)
570 TRACE_IRQS_OFF
571 jmp int_with_check
572 CFI_ENDPROC
573 END(system_call)
574
575 /*
576 * Certain special system calls that need to save a complete full stack frame.
577 */
578
579 .macro PTREGSCALL label,func,arg
580 .globl \label
581 \label:
582 leaq \func(%rip),%rax
583 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
584 jmp ptregscall_common
585 END(\label)
586 .endm
587
588 CFI_STARTPROC
589
590 PTREGSCALL stub_clone, sys_clone, %r8
591 PTREGSCALL stub_fork, sys_fork, %rdi
592 PTREGSCALL stub_vfork, sys_vfork, %rdi
593 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
594 PTREGSCALL stub_iopl, sys_iopl, %rsi
595
596 ENTRY(ptregscall_common)
597 popq %r11
598 CFI_ADJUST_CFA_OFFSET -8
599 CFI_REGISTER rip, r11
600 SAVE_REST
601 movq %r11, %r15
602 CFI_REGISTER rip, r15
603 FIXUP_TOP_OF_STACK %r11
604 call *%rax
605 RESTORE_TOP_OF_STACK %r11
606 movq %r15, %r11
607 CFI_REGISTER rip, r11
608 RESTORE_REST
609 pushq %r11
610 CFI_ADJUST_CFA_OFFSET 8
611 CFI_REL_OFFSET rip, 0
612 ret
613 CFI_ENDPROC
614 END(ptregscall_common)
615
616 ENTRY(stub_execve)
617 CFI_STARTPROC
618 popq %r11
619 CFI_ADJUST_CFA_OFFSET -8
620 CFI_REGISTER rip, r11
621 SAVE_REST
622 FIXUP_TOP_OF_STACK %r11
623 movq %rsp, %rcx
624 call sys_execve
625 RESTORE_TOP_OF_STACK %r11
626 movq %rax,RAX(%rsp)
627 RESTORE_REST
628 jmp int_ret_from_sys_call
629 CFI_ENDPROC
630 END(stub_execve)
631
632 /*
633 * sigreturn is special because it needs to restore all registers on return.
634 * This cannot be done with SYSRET, so use the IRET return path instead.
635 */
636 ENTRY(stub_rt_sigreturn)
637 CFI_STARTPROC
638 addq $8, %rsp
639 CFI_ADJUST_CFA_OFFSET -8
640 SAVE_REST
641 movq %rsp,%rdi
642 FIXUP_TOP_OF_STACK %r11
643 call sys_rt_sigreturn
644 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
645 RESTORE_REST
646 jmp int_ret_from_sys_call
647 CFI_ENDPROC
648 END(stub_rt_sigreturn)
649
650 /*
651 * initial frame state for interrupts and exceptions
652 */
653 .macro _frame ref
654 CFI_STARTPROC simple
655 CFI_SIGNAL_FRAME
656 CFI_DEF_CFA rsp,SS+8-\ref
657 /*CFI_REL_OFFSET ss,SS-\ref*/
658 CFI_REL_OFFSET rsp,RSP-\ref
659 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
660 /*CFI_REL_OFFSET cs,CS-\ref*/
661 CFI_REL_OFFSET rip,RIP-\ref
662 .endm
663
664 /* initial frame state for interrupts (and exceptions without error code) */
665 #define INTR_FRAME _frame RIP
666 /* initial frame state for exceptions with error code (and interrupts with
667 vector already pushed) */
668 #define XCPT_FRAME _frame ORIG_RAX
669
670 /*
671 * Interrupt entry/exit.
672 *
673 * Interrupt entry points save only callee clobbered registers in fast path.
674 *
675 * Entry runs with interrupts off.
676 */
677
678 /* 0(%rsp): interrupt number */
679 .macro interrupt func
680 cld
681 SAVE_ARGS
682 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
683 pushq %rbp
684 /*
685 * Save rbp twice: One is for marking the stack frame, as usual, and the
686 * other, to fill pt_regs properly. This is because bx comes right
687 * before the last saved register in that structure, and not bp. If the
688 * base pointer were in the place bx is today, this would not be needed.
689 */
690 movq %rbp, -8(%rsp)
691 CFI_ADJUST_CFA_OFFSET 8
692 CFI_REL_OFFSET rbp, 0
693 movq %rsp,%rbp
694 CFI_DEF_CFA_REGISTER rbp
695 testl $3,CS(%rdi)
696 je 1f
697 SWAPGS
698 /* irqcount is used to check if a CPU is already on an interrupt
699 stack or not. While this is essentially redundant with preempt_count
700 it is a little cheaper to use a separate counter in the PDA
701 (short of moving irq_enter into assembly, which would be too
702 much work) */
703 1: incl %gs:pda_irqcount
704 cmoveq %gs:pda_irqstackptr,%rsp
705 push %rbp # backlink for old unwinder
706 /*
707 * We entered an interrupt context - irqs are off:
708 */
709 TRACE_IRQS_OFF
710 call \func
711 .endm
712
713 ENTRY(common_interrupt)
714 XCPT_FRAME
715 interrupt do_IRQ
716 /* 0(%rsp): oldrsp-ARGOFFSET */
717 ret_from_intr:
718 DISABLE_INTERRUPTS(CLBR_NONE)
719 TRACE_IRQS_OFF
720 decl %gs:pda_irqcount
721 leaveq
722 CFI_DEF_CFA_REGISTER rsp
723 CFI_ADJUST_CFA_OFFSET -8
724 exit_intr:
725 GET_THREAD_INFO(%rcx)
726 testl $3,CS-ARGOFFSET(%rsp)
727 je retint_kernel
728
729 /* Interrupt came from user space */
730 /*
731 * Has a correct top of stack, but a partial stack frame
732 * %rcx: thread info. Interrupts off.
733 */
734 retint_with_reschedule:
735 movl $_TIF_WORK_MASK,%edi
736 retint_check:
737 LOCKDEP_SYS_EXIT_IRQ
738 movl TI_flags(%rcx),%edx
739 andl %edi,%edx
740 CFI_REMEMBER_STATE
741 jnz retint_careful
742
743 retint_swapgs: /* return to user-space */
744 /*
745 * The iretq could re-enable interrupts:
746 */
747 DISABLE_INTERRUPTS(CLBR_ANY)
748 TRACE_IRQS_IRETQ
749 SWAPGS
750 jmp restore_args
751
752 retint_restore_args: /* return to kernel space */
753 DISABLE_INTERRUPTS(CLBR_ANY)
754 /*
755 * The iretq could re-enable interrupts:
756 */
757 TRACE_IRQS_IRETQ
758 restore_args:
759 RESTORE_ARGS 0,8,0
760
761 irq_return:
762 INTERRUPT_RETURN
763
764 .section __ex_table, "a"
765 .quad irq_return, bad_iret
766 .previous
767
768 #ifdef CONFIG_PARAVIRT
769 ENTRY(native_iret)
770 iretq
771
772 .section __ex_table,"a"
773 .quad native_iret, bad_iret
774 .previous
775 #endif
776
777 .section .fixup,"ax"
778 bad_iret:
779 /*
780 * The iret traps when the %cs or %ss being restored is bogus.
781 * We've lost the original trap vector and error code.
782 * #GPF is the most likely one to get for an invalid selector.
783 * So pretend we completed the iret and took the #GPF in user mode.
784 *
785 * We are now running with the kernel GS after exception recovery.
786 * But error_entry expects us to have user GS to match the user %cs,
787 * so swap back.
788 */
789 pushq $0
790
791 SWAPGS
792 jmp general_protection
793
794 .previous
795
796 /* edi: workmask, edx: work */
797 retint_careful:
798 CFI_RESTORE_STATE
799 bt $TIF_NEED_RESCHED,%edx
800 jnc retint_signal
801 TRACE_IRQS_ON
802 ENABLE_INTERRUPTS(CLBR_NONE)
803 pushq %rdi
804 CFI_ADJUST_CFA_OFFSET 8
805 call schedule
806 popq %rdi
807 CFI_ADJUST_CFA_OFFSET -8
808 GET_THREAD_INFO(%rcx)
809 DISABLE_INTERRUPTS(CLBR_NONE)
810 TRACE_IRQS_OFF
811 jmp retint_check
812
813 retint_signal:
814 testl $_TIF_DO_NOTIFY_MASK,%edx
815 jz retint_swapgs
816 TRACE_IRQS_ON
817 ENABLE_INTERRUPTS(CLBR_NONE)
818 SAVE_REST
819 movq $-1,ORIG_RAX(%rsp)
820 xorl %esi,%esi # oldset
821 movq %rsp,%rdi # &pt_regs
822 call do_notify_resume
823 RESTORE_REST
824 DISABLE_INTERRUPTS(CLBR_NONE)
825 TRACE_IRQS_OFF
826 GET_THREAD_INFO(%rcx)
827 jmp retint_with_reschedule
828
829 #ifdef CONFIG_PREEMPT
830 /* Returning to kernel space. Check if we need preemption */
831 /* rcx: threadinfo. interrupts off. */
832 ENTRY(retint_kernel)
833 cmpl $0,TI_preempt_count(%rcx)
834 jnz retint_restore_args
835 bt $TIF_NEED_RESCHED,TI_flags(%rcx)
836 jnc retint_restore_args
837 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
838 jnc retint_restore_args
839 call preempt_schedule_irq
840 jmp exit_intr
841 #endif
842
843 CFI_ENDPROC
844 END(common_interrupt)
845
846 /*
847 * APIC interrupts.
848 */
849 .macro apicinterrupt num,func
850 INTR_FRAME
851 pushq $~(\num)
852 CFI_ADJUST_CFA_OFFSET 8
853 interrupt \func
854 jmp ret_from_intr
855 CFI_ENDPROC
856 .endm
857
858 ENTRY(thermal_interrupt)
859 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
860 END(thermal_interrupt)
861
862 ENTRY(threshold_interrupt)
863 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
864 END(threshold_interrupt)
865
866 #ifdef CONFIG_SMP
867 ENTRY(reschedule_interrupt)
868 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
869 END(reschedule_interrupt)
870
871 .macro INVALIDATE_ENTRY num
872 ENTRY(invalidate_interrupt\num)
873 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
874 END(invalidate_interrupt\num)
875 .endm
876
877 INVALIDATE_ENTRY 0
878 INVALIDATE_ENTRY 1
879 INVALIDATE_ENTRY 2
880 INVALIDATE_ENTRY 3
881 INVALIDATE_ENTRY 4
882 INVALIDATE_ENTRY 5
883 INVALIDATE_ENTRY 6
884 INVALIDATE_ENTRY 7
885
886 ENTRY(call_function_interrupt)
887 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
888 END(call_function_interrupt)
889 ENTRY(call_function_single_interrupt)
890 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
891 END(call_function_single_interrupt)
892 ENTRY(irq_move_cleanup_interrupt)
893 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
894 END(irq_move_cleanup_interrupt)
895 #endif
896
897 ENTRY(apic_timer_interrupt)
898 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
899 END(apic_timer_interrupt)
900
901 ENTRY(uv_bau_message_intr1)
902 apicinterrupt 220,uv_bau_message_interrupt
903 END(uv_bau_message_intr1)
904
905 ENTRY(error_interrupt)
906 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
907 END(error_interrupt)
908
909 ENTRY(spurious_interrupt)
910 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
911 END(spurious_interrupt)
912
913 /*
914 * Exception entry points.
915 */
916 .macro zeroentry sym
917 INTR_FRAME
918 PARAVIRT_ADJUST_EXCEPTION_FRAME
919 pushq $0 /* push error code/oldrax */
920 CFI_ADJUST_CFA_OFFSET 8
921 pushq %rax /* push real oldrax to the rdi slot */
922 CFI_ADJUST_CFA_OFFSET 8
923 CFI_REL_OFFSET rax,0
924 leaq \sym(%rip),%rax
925 jmp error_entry
926 CFI_ENDPROC
927 .endm
928
929 .macro errorentry sym
930 XCPT_FRAME
931 PARAVIRT_ADJUST_EXCEPTION_FRAME
932 pushq %rax
933 CFI_ADJUST_CFA_OFFSET 8
934 CFI_REL_OFFSET rax,0
935 leaq \sym(%rip),%rax
936 jmp error_entry
937 CFI_ENDPROC
938 .endm
939
940 /* error code is on the stack already */
941 /* handle NMI like exceptions that can happen everywhere */
942 .macro paranoidentry sym, ist=0, irqtrace=1
943 SAVE_ALL
944 cld
945 movl $1,%ebx
946 movl $MSR_GS_BASE,%ecx
947 rdmsr
948 testl %edx,%edx
949 js 1f
950 SWAPGS
951 xorl %ebx,%ebx
952 1:
953 .if \ist
954 movq %gs:pda_data_offset, %rbp
955 .endif
956 .if \irqtrace
957 TRACE_IRQS_OFF
958 .endif
959 movq %rsp,%rdi
960 movq ORIG_RAX(%rsp),%rsi
961 movq $-1,ORIG_RAX(%rsp)
962 .if \ist
963 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
964 .endif
965 call \sym
966 .if \ist
967 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
968 .endif
969 DISABLE_INTERRUPTS(CLBR_NONE)
970 .if \irqtrace
971 TRACE_IRQS_OFF
972 .endif
973 .endm
974
975 /*
976 * "Paranoid" exit path from exception stack.
977 * Paranoid because this is used by NMIs and cannot take
978 * any kernel state for granted.
979 * We don't do kernel preemption checks here, because only
980 * NMI should be common and it does not enable IRQs and
981 * cannot get reschedule ticks.
982 *
983 * "trace" is 0 for the NMI handler only, because irq-tracing
984 * is fundamentally NMI-unsafe. (we cannot change the soft and
985 * hard flags at once, atomically)
986 */
987 .macro paranoidexit trace=1
988 /* ebx: no swapgs flag */
989 paranoid_exit\trace:
990 testl %ebx,%ebx /* swapgs needed? */
991 jnz paranoid_restore\trace
992 testl $3,CS(%rsp)
993 jnz paranoid_userspace\trace
994 paranoid_swapgs\trace:
995 .if \trace
996 TRACE_IRQS_IRETQ 0
997 .endif
998 SWAPGS_UNSAFE_STACK
999 paranoid_restore\trace:
1000 RESTORE_ALL 8
1001 jmp irq_return
1002 paranoid_userspace\trace:
1003 GET_THREAD_INFO(%rcx)
1004 movl TI_flags(%rcx),%ebx
1005 andl $_TIF_WORK_MASK,%ebx
1006 jz paranoid_swapgs\trace
1007 movq %rsp,%rdi /* &pt_regs */
1008 call sync_regs
1009 movq %rax,%rsp /* switch stack for scheduling */
1010 testl $_TIF_NEED_RESCHED,%ebx
1011 jnz paranoid_schedule\trace
1012 movl %ebx,%edx /* arg3: thread flags */
1013 .if \trace
1014 TRACE_IRQS_ON
1015 .endif
1016 ENABLE_INTERRUPTS(CLBR_NONE)
1017 xorl %esi,%esi /* arg2: oldset */
1018 movq %rsp,%rdi /* arg1: &pt_regs */
1019 call do_notify_resume
1020 DISABLE_INTERRUPTS(CLBR_NONE)
1021 .if \trace
1022 TRACE_IRQS_OFF
1023 .endif
1024 jmp paranoid_userspace\trace
1025 paranoid_schedule\trace:
1026 .if \trace
1027 TRACE_IRQS_ON
1028 .endif
1029 ENABLE_INTERRUPTS(CLBR_ANY)
1030 call schedule
1031 DISABLE_INTERRUPTS(CLBR_ANY)
1032 .if \trace
1033 TRACE_IRQS_OFF
1034 .endif
1035 jmp paranoid_userspace\trace
1036 CFI_ENDPROC
1037 .endm
1038
1039 /*
1040 * Exception entry point. This expects an error code/orig_rax on the stack
1041 * and the exception handler in %rax.
1042 */
1043 KPROBE_ENTRY(error_entry)
1044 _frame RDI
1045 CFI_REL_OFFSET rax,0
1046 /* rdi slot contains rax, oldrax contains error code */
1047 cld
1048 subq $14*8,%rsp
1049 CFI_ADJUST_CFA_OFFSET (14*8)
1050 movq %rsi,13*8(%rsp)
1051 CFI_REL_OFFSET rsi,RSI
1052 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
1053 CFI_REGISTER rax,rsi
1054 movq %rdx,12*8(%rsp)
1055 CFI_REL_OFFSET rdx,RDX
1056 movq %rcx,11*8(%rsp)
1057 CFI_REL_OFFSET rcx,RCX
1058 movq %rsi,10*8(%rsp) /* store rax */
1059 CFI_REL_OFFSET rax,RAX
1060 movq %r8, 9*8(%rsp)
1061 CFI_REL_OFFSET r8,R8
1062 movq %r9, 8*8(%rsp)
1063 CFI_REL_OFFSET r9,R9
1064 movq %r10,7*8(%rsp)
1065 CFI_REL_OFFSET r10,R10
1066 movq %r11,6*8(%rsp)
1067 CFI_REL_OFFSET r11,R11
1068 movq %rbx,5*8(%rsp)
1069 CFI_REL_OFFSET rbx,RBX
1070 movq %rbp,4*8(%rsp)
1071 CFI_REL_OFFSET rbp,RBP
1072 movq %r12,3*8(%rsp)
1073 CFI_REL_OFFSET r12,R12
1074 movq %r13,2*8(%rsp)
1075 CFI_REL_OFFSET r13,R13
1076 movq %r14,1*8(%rsp)
1077 CFI_REL_OFFSET r14,R14
1078 movq %r15,(%rsp)
1079 CFI_REL_OFFSET r15,R15
1080 xorl %ebx,%ebx
1081 testl $3,CS(%rsp)
1082 je error_kernelspace
1083 error_swapgs:
1084 SWAPGS
1085 error_sti:
1086 TRACE_IRQS_OFF
1087 movq %rdi,RDI(%rsp)
1088 CFI_REL_OFFSET rdi,RDI
1089 movq %rsp,%rdi
1090 movq ORIG_RAX(%rsp),%rsi /* get error code */
1091 movq $-1,ORIG_RAX(%rsp)
1092 call *%rax
1093 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1094 error_exit:
1095 movl %ebx,%eax
1096 RESTORE_REST
1097 DISABLE_INTERRUPTS(CLBR_NONE)
1098 TRACE_IRQS_OFF
1099 GET_THREAD_INFO(%rcx)
1100 testl %eax,%eax
1101 jne retint_kernel
1102 LOCKDEP_SYS_EXIT_IRQ
1103 movl TI_flags(%rcx),%edx
1104 movl $_TIF_WORK_MASK,%edi
1105 andl %edi,%edx
1106 jnz retint_careful
1107 jmp retint_swapgs
1108 CFI_ENDPROC
1109
1110 error_kernelspace:
1111 incl %ebx
1112 /* There are two places in the kernel that can potentially fault with
1113 usergs. Handle them here. The exception handlers after
1114 iret run with kernel gs again, so don't set the user space flag.
1115 B stepping K8s sometimes report an truncated RIP for IRET
1116 exceptions returning to compat mode. Check for these here too. */
1117 leaq irq_return(%rip),%rcx
1118 cmpq %rcx,RIP(%rsp)
1119 je error_swapgs
1120 movl %ecx,%ecx /* zero extend */
1121 cmpq %rcx,RIP(%rsp)
1122 je error_swapgs
1123 cmpq $gs_change,RIP(%rsp)
1124 je error_swapgs
1125 jmp error_sti
1126 KPROBE_END(error_entry)
1127
1128 /* Reload gs selector with exception handling */
1129 /* edi: new selector */
1130 ENTRY(native_load_gs_index)
1131 CFI_STARTPROC
1132 pushf
1133 CFI_ADJUST_CFA_OFFSET 8
1134 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1135 SWAPGS
1136 gs_change:
1137 movl %edi,%gs
1138 2: mfence /* workaround */
1139 SWAPGS
1140 popf
1141 CFI_ADJUST_CFA_OFFSET -8
1142 ret
1143 CFI_ENDPROC
1144 ENDPROC(native_load_gs_index)
1145
1146 .section __ex_table,"a"
1147 .align 8
1148 .quad gs_change,bad_gs
1149 .previous
1150 .section .fixup,"ax"
1151 /* running with kernelgs */
1152 bad_gs:
1153 SWAPGS /* switch back to user gs */
1154 xorl %eax,%eax
1155 movl %eax,%gs
1156 jmp 2b
1157 .previous
1158
1159 /*
1160 * Create a kernel thread.
1161 *
1162 * C extern interface:
1163 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1164 *
1165 * asm input arguments:
1166 * rdi: fn, rsi: arg, rdx: flags
1167 */
1168 ENTRY(kernel_thread)
1169 CFI_STARTPROC
1170 FAKE_STACK_FRAME $child_rip
1171 SAVE_ALL
1172
1173 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1174 movq %rdx,%rdi
1175 orq kernel_thread_flags(%rip),%rdi
1176 movq $-1, %rsi
1177 movq %rsp, %rdx
1178
1179 xorl %r8d,%r8d
1180 xorl %r9d,%r9d
1181
1182 # clone now
1183 call do_fork
1184 movq %rax,RAX(%rsp)
1185 xorl %edi,%edi
1186
1187 /*
1188 * It isn't worth to check for reschedule here,
1189 * so internally to the x86_64 port you can rely on kernel_thread()
1190 * not to reschedule the child before returning, this avoids the need
1191 * of hacks for example to fork off the per-CPU idle tasks.
1192 * [Hopefully no generic code relies on the reschedule -AK]
1193 */
1194 RESTORE_ALL
1195 UNFAKE_STACK_FRAME
1196 ret
1197 CFI_ENDPROC
1198 ENDPROC(kernel_thread)
1199
1200 child_rip:
1201 pushq $0 # fake return address
1202 CFI_STARTPROC
1203 /*
1204 * Here we are in the child and the registers are set as they were
1205 * at kernel_thread() invocation in the parent.
1206 */
1207 movq %rdi, %rax
1208 movq %rsi, %rdi
1209 call *%rax
1210 # exit
1211 mov %eax, %edi
1212 call do_exit
1213 CFI_ENDPROC
1214 ENDPROC(child_rip)
1215
1216 /*
1217 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1218 *
1219 * C extern interface:
1220 * extern long execve(char *name, char **argv, char **envp)
1221 *
1222 * asm input arguments:
1223 * rdi: name, rsi: argv, rdx: envp
1224 *
1225 * We want to fallback into:
1226 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
1227 *
1228 * do_sys_execve asm fallback arguments:
1229 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1230 */
1231 ENTRY(kernel_execve)
1232 CFI_STARTPROC
1233 FAKE_STACK_FRAME $0
1234 SAVE_ALL
1235 movq %rsp,%rcx
1236 call sys_execve
1237 movq %rax, RAX(%rsp)
1238 RESTORE_REST
1239 testq %rax,%rax
1240 je int_ret_from_sys_call
1241 RESTORE_ARGS
1242 UNFAKE_STACK_FRAME
1243 ret
1244 CFI_ENDPROC
1245 ENDPROC(kernel_execve)
1246
1247 KPROBE_ENTRY(page_fault)
1248 errorentry do_page_fault
1249 KPROBE_END(page_fault)
1250
1251 ENTRY(coprocessor_error)
1252 zeroentry do_coprocessor_error
1253 END(coprocessor_error)
1254
1255 ENTRY(simd_coprocessor_error)
1256 zeroentry do_simd_coprocessor_error
1257 END(simd_coprocessor_error)
1258
1259 ENTRY(device_not_available)
1260 zeroentry do_device_not_available
1261 END(device_not_available)
1262
1263 /* runs on exception stack */
1264 KPROBE_ENTRY(debug)
1265 INTR_FRAME
1266 PARAVIRT_ADJUST_EXCEPTION_FRAME
1267 pushq $0
1268 CFI_ADJUST_CFA_OFFSET 8
1269 paranoidentry do_debug, DEBUG_STACK
1270 paranoidexit
1271 KPROBE_END(debug)
1272
1273 /* runs on exception stack */
1274 KPROBE_ENTRY(nmi)
1275 INTR_FRAME
1276 PARAVIRT_ADJUST_EXCEPTION_FRAME
1277 pushq $-1
1278 CFI_ADJUST_CFA_OFFSET 8
1279 paranoidentry do_nmi, 0, 0
1280 #ifdef CONFIG_TRACE_IRQFLAGS
1281 paranoidexit 0
1282 #else
1283 jmp paranoid_exit1
1284 CFI_ENDPROC
1285 #endif
1286 KPROBE_END(nmi)
1287
1288 KPROBE_ENTRY(int3)
1289 INTR_FRAME
1290 PARAVIRT_ADJUST_EXCEPTION_FRAME
1291 pushq $0
1292 CFI_ADJUST_CFA_OFFSET 8
1293 paranoidentry do_int3, DEBUG_STACK
1294 jmp paranoid_exit1
1295 CFI_ENDPROC
1296 KPROBE_END(int3)
1297
1298 ENTRY(overflow)
1299 zeroentry do_overflow
1300 END(overflow)
1301
1302 ENTRY(bounds)
1303 zeroentry do_bounds
1304 END(bounds)
1305
1306 ENTRY(invalid_op)
1307 zeroentry do_invalid_op
1308 END(invalid_op)
1309
1310 ENTRY(coprocessor_segment_overrun)
1311 zeroentry do_coprocessor_segment_overrun
1312 END(coprocessor_segment_overrun)
1313
1314 /* runs on exception stack */
1315 ENTRY(double_fault)
1316 XCPT_FRAME
1317 PARAVIRT_ADJUST_EXCEPTION_FRAME
1318 paranoidentry do_double_fault
1319 jmp paranoid_exit1
1320 CFI_ENDPROC
1321 END(double_fault)
1322
1323 ENTRY(invalid_TSS)
1324 errorentry do_invalid_TSS
1325 END(invalid_TSS)
1326
1327 ENTRY(segment_not_present)
1328 errorentry do_segment_not_present
1329 END(segment_not_present)
1330
1331 /* runs on exception stack */
1332 ENTRY(stack_segment)
1333 XCPT_FRAME
1334 PARAVIRT_ADJUST_EXCEPTION_FRAME
1335 paranoidentry do_stack_segment
1336 jmp paranoid_exit1
1337 CFI_ENDPROC
1338 END(stack_segment)
1339
1340 KPROBE_ENTRY(general_protection)
1341 errorentry do_general_protection
1342 KPROBE_END(general_protection)
1343
1344 ENTRY(alignment_check)
1345 errorentry do_alignment_check
1346 END(alignment_check)
1347
1348 ENTRY(divide_error)
1349 zeroentry do_divide_error
1350 END(divide_error)
1351
1352 ENTRY(spurious_interrupt_bug)
1353 zeroentry do_spurious_interrupt_bug
1354 END(spurious_interrupt_bug)
1355
1356 #ifdef CONFIG_X86_MCE
1357 /* runs on exception stack */
1358 ENTRY(machine_check)
1359 INTR_FRAME
1360 PARAVIRT_ADJUST_EXCEPTION_FRAME
1361 pushq $0
1362 CFI_ADJUST_CFA_OFFSET 8
1363 paranoidentry do_machine_check
1364 jmp paranoid_exit1
1365 CFI_ENDPROC
1366 END(machine_check)
1367 #endif
1368
1369 /* Call softirq on interrupt stack. Interrupts are off. */
1370 ENTRY(call_softirq)
1371 CFI_STARTPROC
1372 push %rbp
1373 CFI_ADJUST_CFA_OFFSET 8
1374 CFI_REL_OFFSET rbp,0
1375 mov %rsp,%rbp
1376 CFI_DEF_CFA_REGISTER rbp
1377 incl %gs:pda_irqcount
1378 cmove %gs:pda_irqstackptr,%rsp
1379 push %rbp # backlink for old unwinder
1380 call __do_softirq
1381 leaveq
1382 CFI_DEF_CFA_REGISTER rsp
1383 CFI_ADJUST_CFA_OFFSET -8
1384 decl %gs:pda_irqcount
1385 ret
1386 CFI_ENDPROC
1387 ENDPROC(call_softirq)
1388
1389 KPROBE_ENTRY(ignore_sysret)
1390 CFI_STARTPROC
1391 mov $-ENOSYS,%eax
1392 sysret
1393 CFI_ENDPROC
1394 ENDPROC(ignore_sysret)
1395
1396 #ifdef CONFIG_XEN
1397 ENTRY(xen_hypervisor_callback)
1398 zeroentry xen_do_hypervisor_callback
1399 END(xen_hypervisor_callback)
1400
1401 /*
1402 # A note on the "critical region" in our callback handler.
1403 # We want to avoid stacking callback handlers due to events occurring
1404 # during handling of the last event. To do this, we keep events disabled
1405 # until we've done all processing. HOWEVER, we must enable events before
1406 # popping the stack frame (can't be done atomically) and so it would still
1407 # be possible to get enough handler activations to overflow the stack.
1408 # Although unlikely, bugs of that kind are hard to track down, so we'd
1409 # like to avoid the possibility.
1410 # So, on entry to the handler we detect whether we interrupted an
1411 # existing activation in its critical region -- if so, we pop the current
1412 # activation and restart the handler using the previous one.
1413 */
1414 ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1415 CFI_STARTPROC
1416 /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1417 see the correct pointer to the pt_regs */
1418 movq %rdi, %rsp # we don't return, adjust the stack frame
1419 CFI_ENDPROC
1420 CFI_DEFAULT_STACK
1421 11: incl %gs:pda_irqcount
1422 movq %rsp,%rbp
1423 CFI_DEF_CFA_REGISTER rbp
1424 cmovzq %gs:pda_irqstackptr,%rsp
1425 pushq %rbp # backlink for old unwinder
1426 call xen_evtchn_do_upcall
1427 popq %rsp
1428 CFI_DEF_CFA_REGISTER rsp
1429 decl %gs:pda_irqcount
1430 jmp error_exit
1431 CFI_ENDPROC
1432 END(do_hypervisor_callback)
1433
1434 /*
1435 # Hypervisor uses this for application faults while it executes.
1436 # We get here for two reasons:
1437 # 1. Fault while reloading DS, ES, FS or GS
1438 # 2. Fault while executing IRET
1439 # Category 1 we do not need to fix up as Xen has already reloaded all segment
1440 # registers that could be reloaded and zeroed the others.
1441 # Category 2 we fix up by killing the current process. We cannot use the
1442 # normal Linux return path in this case because if we use the IRET hypercall
1443 # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1444 # We distinguish between categories by comparing each saved segment register
1445 # with its current contents: any discrepancy means we in category 1.
1446 */
1447 ENTRY(xen_failsafe_callback)
1448 framesz = (RIP-0x30) /* workaround buggy gas */
1449 _frame framesz
1450 CFI_REL_OFFSET rcx, 0
1451 CFI_REL_OFFSET r11, 8
1452 movw %ds,%cx
1453 cmpw %cx,0x10(%rsp)
1454 CFI_REMEMBER_STATE
1455 jne 1f
1456 movw %es,%cx
1457 cmpw %cx,0x18(%rsp)
1458 jne 1f
1459 movw %fs,%cx
1460 cmpw %cx,0x20(%rsp)
1461 jne 1f
1462 movw %gs,%cx
1463 cmpw %cx,0x28(%rsp)
1464 jne 1f
1465 /* All segments match their saved values => Category 2 (Bad IRET). */
1466 movq (%rsp),%rcx
1467 CFI_RESTORE rcx
1468 movq 8(%rsp),%r11
1469 CFI_RESTORE r11
1470 addq $0x30,%rsp
1471 CFI_ADJUST_CFA_OFFSET -0x30
1472 pushq $0
1473 CFI_ADJUST_CFA_OFFSET 8
1474 pushq %r11
1475 CFI_ADJUST_CFA_OFFSET 8
1476 pushq %rcx
1477 CFI_ADJUST_CFA_OFFSET 8
1478 jmp general_protection
1479 CFI_RESTORE_STATE
1480 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1481 movq (%rsp),%rcx
1482 CFI_RESTORE rcx
1483 movq 8(%rsp),%r11
1484 CFI_RESTORE r11
1485 addq $0x30,%rsp
1486 CFI_ADJUST_CFA_OFFSET -0x30
1487 pushq $0
1488 CFI_ADJUST_CFA_OFFSET 8
1489 SAVE_ALL
1490 jmp error_exit
1491 CFI_ENDPROC
1492 END(xen_failsafe_callback)
1493
1494 #endif /* CONFIG_XEN */