]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - arch/x86_64/kernel/entry.S
[PATCH] x86_64: reliable stack trace support (x86-64 IRQ stack
[mirror_ubuntu-bionic-kernel.git] / arch / x86_64 / kernel / entry.S
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 *
8 * $Id$
9 */
10
11/*
12 * entry.S contains the system-call and fault low-level handling routines.
13 *
14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call.
16 *
17 * Normal syscalls and interrupts don't save a full stack frame, this is
18 * only done for syscall tracing, signals or fork/exec et.al.
19 *
20 * A note on terminology:
21 * - top of stack: Architecture defined interrupt frame from SS to RIP
22 * at the top of the kernel process stack.
23 * - partial stack frame: partially saved registers upto R11.
24 * - full stack frame: Like partial stack frame, but all register saved.
25 *
26 * TODO:
27 * - schedule it carefully for the final hardware.
28 */
29
30#define ASSEMBLY 1
31#include <linux/config.h>
32#include <linux/linkage.h>
33#include <asm/segment.h>
34#include <asm/smp.h>
35#include <asm/cache.h>
36#include <asm/errno.h>
37#include <asm/dwarf2.h>
38#include <asm/calling.h>
e2d5df93 39#include <asm/asm-offsets.h>
1da177e4
LT
40#include <asm/msr.h>
41#include <asm/unistd.h>
42#include <asm/thread_info.h>
43#include <asm/hw_irq.h>
5f8efbb9 44#include <asm/page.h>
1da177e4
LT
45
46 .code64
47
dc37db4d 48#ifndef CONFIG_PREEMPT
1da177e4
LT
49#define retint_kernel retint_restore_args
50#endif
51
52/*
53 * C code is not supposed to know about undefined top of stack. Every time
54 * a C function with an pt_regs argument is called from the SYSCALL based
55 * fast path FIXUP_TOP_OF_STACK is needed.
56 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
57 * manipulation.
58 */
59
60 /* %rsp:at FRAMEEND */
61 .macro FIXUP_TOP_OF_STACK tmp
62 movq %gs:pda_oldrsp,\tmp
63 movq \tmp,RSP(%rsp)
64 movq $__USER_DS,SS(%rsp)
65 movq $__USER_CS,CS(%rsp)
66 movq $-1,RCX(%rsp)
67 movq R11(%rsp),\tmp /* get eflags */
68 movq \tmp,EFLAGS(%rsp)
69 .endm
70
71 .macro RESTORE_TOP_OF_STACK tmp,offset=0
72 movq RSP-\offset(%rsp),\tmp
73 movq \tmp,%gs:pda_oldrsp
74 movq EFLAGS-\offset(%rsp),\tmp
75 movq \tmp,R11-\offset(%rsp)
76 .endm
77
78 .macro FAKE_STACK_FRAME child_rip
79 /* push in order ss, rsp, eflags, cs, rip */
3829ee6b 80 xorl %eax, %eax
1da177e4
LT
81 pushq %rax /* ss */
82 CFI_ADJUST_CFA_OFFSET 8
7effaa88 83 /*CFI_REL_OFFSET ss,0*/
1da177e4
LT
84 pushq %rax /* rsp */
85 CFI_ADJUST_CFA_OFFSET 8
7effaa88 86 CFI_REL_OFFSET rsp,0
1da177e4
LT
87 pushq $(1<<9) /* eflags - interrupts on */
88 CFI_ADJUST_CFA_OFFSET 8
7effaa88 89 /*CFI_REL_OFFSET rflags,0*/
1da177e4
LT
90 pushq $__KERNEL_CS /* cs */
91 CFI_ADJUST_CFA_OFFSET 8
7effaa88 92 /*CFI_REL_OFFSET cs,0*/
1da177e4
LT
93 pushq \child_rip /* rip */
94 CFI_ADJUST_CFA_OFFSET 8
7effaa88 95 CFI_REL_OFFSET rip,0
1da177e4
LT
96 pushq %rax /* orig rax */
97 CFI_ADJUST_CFA_OFFSET 8
98 .endm
99
100 .macro UNFAKE_STACK_FRAME
101 addq $8*6, %rsp
102 CFI_ADJUST_CFA_OFFSET -(6*8)
103 .endm
104
7effaa88
JB
105 .macro CFI_DEFAULT_STACK start=1
106 .if \start
107 CFI_STARTPROC simple
108 CFI_DEF_CFA rsp,SS+8
109 .else
110 CFI_DEF_CFA_OFFSET SS+8
111 .endif
112 CFI_REL_OFFSET r15,R15
113 CFI_REL_OFFSET r14,R14
114 CFI_REL_OFFSET r13,R13
115 CFI_REL_OFFSET r12,R12
116 CFI_REL_OFFSET rbp,RBP
117 CFI_REL_OFFSET rbx,RBX
118 CFI_REL_OFFSET r11,R11
119 CFI_REL_OFFSET r10,R10
120 CFI_REL_OFFSET r9,R9
121 CFI_REL_OFFSET r8,R8
122 CFI_REL_OFFSET rax,RAX
123 CFI_REL_OFFSET rcx,RCX
124 CFI_REL_OFFSET rdx,RDX
125 CFI_REL_OFFSET rsi,RSI
126 CFI_REL_OFFSET rdi,RDI
127 CFI_REL_OFFSET rip,RIP
128 /*CFI_REL_OFFSET cs,CS*/
129 /*CFI_REL_OFFSET rflags,EFLAGS*/
130 CFI_REL_OFFSET rsp,RSP
131 /*CFI_REL_OFFSET ss,SS*/
1da177e4
LT
132 .endm
133/*
134 * A newly forked process directly context switches into this.
135 */
136/* rdi: prev */
137ENTRY(ret_from_fork)
1da177e4
LT
138 CFI_DEFAULT_STACK
139 call schedule_tail
140 GET_THREAD_INFO(%rcx)
141 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
142 jnz rff_trace
143rff_action:
144 RESTORE_REST
145 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
146 je int_ret_from_sys_call
147 testl $_TIF_IA32,threadinfo_flags(%rcx)
148 jnz int_ret_from_sys_call
149 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
150 jmp ret_from_sys_call
151rff_trace:
152 movq %rsp,%rdi
153 call syscall_trace_leave
154 GET_THREAD_INFO(%rcx)
155 jmp rff_action
156 CFI_ENDPROC
4b787e0b 157END(ret_from_fork)
1da177e4
LT
158
159/*
160 * System call entry. Upto 6 arguments in registers are supported.
161 *
162 * SYSCALL does not save anything on the stack and does not change the
163 * stack pointer.
164 */
165
166/*
167 * Register setup:
168 * rax system call number
169 * rdi arg0
170 * rcx return address for syscall/sysret, C arg3
171 * rsi arg1
172 * rdx arg2
173 * r10 arg3 (--> moved to rcx for C)
174 * r8 arg4
175 * r9 arg5
176 * r11 eflags for syscall/sysret, temporary for C
177 * r12-r15,rbp,rbx saved by C code, not touched.
178 *
179 * Interrupts are off on entry.
180 * Only called from user space.
181 *
182 * XXX if we had a free scratch register we could save the RSP into the stack frame
183 * and report it properly in ps. Unfortunately we haven't.
7bf36bbc
AK
184 *
185 * When user can change the frames always force IRET. That is because
186 * it deals with uncanonical addresses better. SYSRET has trouble
187 * with them due to bugs in both AMD and Intel CPUs.
1da177e4
LT
188 */
189
190ENTRY(system_call)
7effaa88
JB
191 CFI_STARTPROC simple
192 CFI_DEF_CFA rsp,0
193 CFI_REGISTER rip,rcx
194 /*CFI_REGISTER rflags,r11*/
1da177e4
LT
195 swapgs
196 movq %rsp,%gs:pda_oldrsp
197 movq %gs:pda_kernelstack,%rsp
198 sti
199 SAVE_ARGS 8,1
200 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
7effaa88
JB
201 movq %rcx,RIP-ARGOFFSET(%rsp)
202 CFI_REL_OFFSET rip,RIP-ARGOFFSET
1da177e4
LT
203 GET_THREAD_INFO(%rcx)
204 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
7effaa88 205 CFI_REMEMBER_STATE
1da177e4
LT
206 jnz tracesys
207 cmpq $__NR_syscall_max,%rax
208 ja badsys
209 movq %r10,%rcx
210 call *sys_call_table(,%rax,8) # XXX: rip relative
211 movq %rax,RAX-ARGOFFSET(%rsp)
212/*
213 * Syscall return path ending with SYSRET (fast path)
214 * Has incomplete stack frame and undefined top of stack.
215 */
216 .globl ret_from_sys_call
217ret_from_sys_call:
11b854b2 218 movl $_TIF_ALLWORK_MASK,%edi
1da177e4
LT
219 /* edi: flagmask */
220sysret_check:
221 GET_THREAD_INFO(%rcx)
222 cli
223 movl threadinfo_flags(%rcx),%edx
224 andl %edi,%edx
7effaa88 225 CFI_REMEMBER_STATE
1da177e4
LT
226 jnz sysret_careful
227 movq RIP-ARGOFFSET(%rsp),%rcx
7effaa88 228 CFI_REGISTER rip,rcx
1da177e4 229 RESTORE_ARGS 0,-ARG_SKIP,1
7effaa88 230 /*CFI_REGISTER rflags,r11*/
1da177e4
LT
231 movq %gs:pda_oldrsp,%rsp
232 swapgs
233 sysretq
234
235 /* Handle reschedules */
236 /* edx: work, edi: workmask */
237sysret_careful:
7effaa88 238 CFI_RESTORE_STATE
1da177e4
LT
239 bt $TIF_NEED_RESCHED,%edx
240 jnc sysret_signal
241 sti
242 pushq %rdi
7effaa88 243 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
244 call schedule
245 popq %rdi
7effaa88 246 CFI_ADJUST_CFA_OFFSET -8
1da177e4
LT
247 jmp sysret_check
248
249 /* Handle a signal */
250sysret_signal:
251 sti
10ffdbb8
AK
252 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
253 jz 1f
254
255 /* Really a signal */
256 /* edx: work flags (arg3) */
1da177e4
LT
257 leaq do_notify_resume(%rip),%rax
258 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
259 xorl %esi,%esi # oldset -> arg2
260 call ptregscall_common
10ffdbb8 2611: movl $_TIF_NEED_RESCHED,%edi
7bf36bbc
AK
262 /* Use IRET because user could have changed frame. This
263 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
264 cli
265 jmp int_with_check
1da177e4 266
7effaa88
JB
267badsys:
268 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
269 jmp ret_from_sys_call
270
1da177e4
LT
271 /* Do syscall tracing */
272tracesys:
7effaa88 273 CFI_RESTORE_STATE
1da177e4
LT
274 SAVE_REST
275 movq $-ENOSYS,RAX(%rsp)
276 FIXUP_TOP_OF_STACK %rdi
277 movq %rsp,%rdi
278 call syscall_trace_enter
279 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
280 RESTORE_REST
281 cmpq $__NR_syscall_max,%rax
282 ja 1f
283 movq %r10,%rcx /* fixup for C */
284 call *sys_call_table(,%rax,8)
822ff019 2851: movq %rax,RAX-ARGOFFSET(%rsp)
7bf36bbc
AK
286 /* Use IRET because user could have changed frame */
287 jmp int_ret_from_sys_call
7effaa88 288 CFI_ENDPROC
4b787e0b 289END(system_call)
1da177e4 290
1da177e4
LT
291/*
292 * Syscall return path ending with IRET.
293 * Has correct top of stack, but partial stack frame.
294 */
7effaa88
JB
295ENTRY(int_ret_from_sys_call)
296 CFI_STARTPROC simple
297 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
298 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
299 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
300 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
301 /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
302 CFI_REL_OFFSET rip,RIP-ARGOFFSET
303 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
304 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
305 CFI_REL_OFFSET rax,RAX-ARGOFFSET
306 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
307 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
308 CFI_REL_OFFSET r8,R8-ARGOFFSET
309 CFI_REL_OFFSET r9,R9-ARGOFFSET
310 CFI_REL_OFFSET r10,R10-ARGOFFSET
311 CFI_REL_OFFSET r11,R11-ARGOFFSET
1da177e4
LT
312 cli
313 testl $3,CS-ARGOFFSET(%rsp)
314 je retint_restore_args
315 movl $_TIF_ALLWORK_MASK,%edi
316 /* edi: mask to check */
317int_with_check:
318 GET_THREAD_INFO(%rcx)
319 movl threadinfo_flags(%rcx),%edx
320 andl %edi,%edx
321 jnz int_careful
bf2fcc6f 322 andl $~TS_COMPAT,threadinfo_status(%rcx)
1da177e4
LT
323 jmp retint_swapgs
324
325 /* Either reschedule or signal or syscall exit tracking needed. */
326 /* First do a reschedule test. */
327 /* edx: work, edi: workmask */
328int_careful:
329 bt $TIF_NEED_RESCHED,%edx
330 jnc int_very_careful
331 sti
332 pushq %rdi
7effaa88 333 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
334 call schedule
335 popq %rdi
7effaa88 336 CFI_ADJUST_CFA_OFFSET -8
cdd219cd 337 cli
1da177e4
LT
338 jmp int_with_check
339
340 /* handle signals and tracing -- both require a full stack frame */
341int_very_careful:
342 sti
343 SAVE_REST
344 /* Check for syscall exit trace */
345 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
346 jz int_signal
347 pushq %rdi
7effaa88 348 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
349 leaq 8(%rsp),%rdi # &ptregs -> arg1
350 call syscall_trace_leave
351 popq %rdi
7effaa88 352 CFI_ADJUST_CFA_OFFSET -8
36c1104e 353 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
be9e6870 354 cli
1da177e4
LT
355 jmp int_restore_rest
356
357int_signal:
358 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
359 jz 1f
360 movq %rsp,%rdi # &ptregs -> arg1
361 xorl %esi,%esi # oldset -> arg2
362 call do_notify_resume
3631: movl $_TIF_NEED_RESCHED,%edi
364int_restore_rest:
365 RESTORE_REST
be9e6870 366 cli
1da177e4
LT
367 jmp int_with_check
368 CFI_ENDPROC
4b787e0b 369END(int_ret_from_sys_call)
1da177e4
LT
370
371/*
372 * Certain special system calls that need to save a complete full stack frame.
373 */
374
375 .macro PTREGSCALL label,func,arg
376 .globl \label
377\label:
378 leaq \func(%rip),%rax
379 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
380 jmp ptregscall_common
4b787e0b 381END(\label)
1da177e4
LT
382 .endm
383
7effaa88
JB
384 CFI_STARTPROC
385
1da177e4
LT
386 PTREGSCALL stub_clone, sys_clone, %r8
387 PTREGSCALL stub_fork, sys_fork, %rdi
388 PTREGSCALL stub_vfork, sys_vfork, %rdi
389 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
390 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
391 PTREGSCALL stub_iopl, sys_iopl, %rsi
392
393ENTRY(ptregscall_common)
1da177e4 394 popq %r11
7effaa88
JB
395 CFI_ADJUST_CFA_OFFSET -8
396 CFI_REGISTER rip, r11
1da177e4
LT
397 SAVE_REST
398 movq %r11, %r15
7effaa88 399 CFI_REGISTER rip, r15
1da177e4
LT
400 FIXUP_TOP_OF_STACK %r11
401 call *%rax
402 RESTORE_TOP_OF_STACK %r11
403 movq %r15, %r11
7effaa88 404 CFI_REGISTER rip, r11
1da177e4
LT
405 RESTORE_REST
406 pushq %r11
7effaa88
JB
407 CFI_ADJUST_CFA_OFFSET 8
408 CFI_REL_OFFSET rip, 0
1da177e4
LT
409 ret
410 CFI_ENDPROC
4b787e0b 411END(ptregscall_common)
1da177e4
LT
412
413ENTRY(stub_execve)
414 CFI_STARTPROC
415 popq %r11
7effaa88
JB
416 CFI_ADJUST_CFA_OFFSET -8
417 CFI_REGISTER rip, r11
1da177e4 418 SAVE_REST
1da177e4
LT
419 FIXUP_TOP_OF_STACK %r11
420 call sys_execve
1da177e4 421 RESTORE_TOP_OF_STACK %r11
1da177e4
LT
422 movq %rax,RAX(%rsp)
423 RESTORE_REST
424 jmp int_ret_from_sys_call
425 CFI_ENDPROC
4b787e0b 426END(stub_execve)
1da177e4
LT
427
428/*
429 * sigreturn is special because it needs to restore all registers on return.
430 * This cannot be done with SYSRET, so use the IRET return path instead.
431 */
432ENTRY(stub_rt_sigreturn)
433 CFI_STARTPROC
7effaa88
JB
434 addq $8, %rsp
435 CFI_ADJUST_CFA_OFFSET -8
1da177e4
LT
436 SAVE_REST
437 movq %rsp,%rdi
438 FIXUP_TOP_OF_STACK %r11
439 call sys_rt_sigreturn
440 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
441 RESTORE_REST
442 jmp int_ret_from_sys_call
443 CFI_ENDPROC
4b787e0b 444END(stub_rt_sigreturn)
1da177e4 445
7effaa88
JB
446/*
447 * initial frame state for interrupts and exceptions
448 */
449 .macro _frame ref
450 CFI_STARTPROC simple
451 CFI_DEF_CFA rsp,SS+8-\ref
452 /*CFI_REL_OFFSET ss,SS-\ref*/
453 CFI_REL_OFFSET rsp,RSP-\ref
454 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
455 /*CFI_REL_OFFSET cs,CS-\ref*/
456 CFI_REL_OFFSET rip,RIP-\ref
457 .endm
458
459/* initial frame state for interrupts (and exceptions without error code) */
460#define INTR_FRAME _frame RIP
461/* initial frame state for exceptions with error code (and interrupts with
462 vector already pushed) */
463#define XCPT_FRAME _frame ORIG_RAX
464
1da177e4
LT
465/*
466 * Interrupt entry/exit.
467 *
468 * Interrupt entry points save only callee clobbered registers in fast path.
469 *
470 * Entry runs with interrupts off.
471 */
472
473/* 0(%rsp): interrupt number */
474 .macro interrupt func
1da177e4 475 cld
1da177e4
LT
476 SAVE_ARGS
477 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
1de9c3f6
JB
478 pushq %rbp
479 CFI_ADJUST_CFA_OFFSET 8
480 CFI_REL_OFFSET rbp, 0
481 movq %rsp,%rbp
482 CFI_DEF_CFA_REGISTER rbp
1da177e4
LT
483 testl $3,CS(%rdi)
484 je 1f
485 swapgs
3829ee6b 4861: incl %gs:pda_irqcount # RED-PEN should check preempt count
1de9c3f6 487 cmoveq %gs:pda_irqstackptr,%rsp
1da177e4
LT
488 call \func
489 .endm
490
491ENTRY(common_interrupt)
7effaa88 492 XCPT_FRAME
1da177e4
LT
493 interrupt do_IRQ
494 /* 0(%rsp): oldrsp-ARGOFFSET */
7effaa88 495ret_from_intr:
1da177e4 496 cli
3829ee6b 497 decl %gs:pda_irqcount
1de9c3f6 498 leaveq
7effaa88 499 CFI_DEF_CFA_REGISTER rsp
1de9c3f6 500 CFI_ADJUST_CFA_OFFSET -8
7effaa88 501exit_intr:
1da177e4
LT
502 GET_THREAD_INFO(%rcx)
503 testl $3,CS-ARGOFFSET(%rsp)
504 je retint_kernel
505
506 /* Interrupt came from user space */
507 /*
508 * Has a correct top of stack, but a partial stack frame
509 * %rcx: thread info. Interrupts off.
510 */
511retint_with_reschedule:
512 movl $_TIF_WORK_MASK,%edi
7effaa88 513retint_check:
1da177e4
LT
514 movl threadinfo_flags(%rcx),%edx
515 andl %edi,%edx
7effaa88 516 CFI_REMEMBER_STATE
1da177e4
LT
517 jnz retint_careful
518retint_swapgs:
1da177e4
LT
519 swapgs
520retint_restore_args:
521 cli
522 RESTORE_ARGS 0,8,0
523iret_label:
524 iretq
525
526 .section __ex_table,"a"
527 .quad iret_label,bad_iret
528 .previous
529 .section .fixup,"ax"
530 /* force a signal here? this matches i386 behaviour */
531 /* running with kernel gs */
532bad_iret:
3076a492 533 movq $11,%rdi /* SIGSEGV */
2391c4b5 534 sti
1da177e4
LT
535 jmp do_exit
536 .previous
537
7effaa88 538 /* edi: workmask, edx: work */
1da177e4 539retint_careful:
7effaa88 540 CFI_RESTORE_STATE
1da177e4
LT
541 bt $TIF_NEED_RESCHED,%edx
542 jnc retint_signal
543 sti
544 pushq %rdi
7effaa88 545 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
546 call schedule
547 popq %rdi
7effaa88 548 CFI_ADJUST_CFA_OFFSET -8
1da177e4
LT
549 GET_THREAD_INFO(%rcx)
550 cli
551 jmp retint_check
552
553retint_signal:
10ffdbb8
AK
554 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
555 jz retint_swapgs
1da177e4
LT
556 sti
557 SAVE_REST
558 movq $-1,ORIG_RAX(%rsp)
3829ee6b 559 xorl %esi,%esi # oldset
1da177e4
LT
560 movq %rsp,%rdi # &pt_regs
561 call do_notify_resume
562 RESTORE_REST
563 cli
10ffdbb8 564 movl $_TIF_NEED_RESCHED,%edi
be9e6870 565 GET_THREAD_INFO(%rcx)
1da177e4
LT
566 jmp retint_check
567
568#ifdef CONFIG_PREEMPT
569 /* Returning to kernel space. Check if we need preemption */
570 /* rcx: threadinfo. interrupts off. */
571 .p2align
572retint_kernel:
573 cmpl $0,threadinfo_preempt_count(%rcx)
574 jnz retint_restore_args
575 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
576 jnc retint_restore_args
577 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
578 jnc retint_restore_args
579 call preempt_schedule_irq
580 jmp exit_intr
581#endif
4b787e0b 582
1da177e4 583 CFI_ENDPROC
4b787e0b 584END(common_interrupt)
1da177e4
LT
585
586/*
587 * APIC interrupts.
588 */
589 .macro apicinterrupt num,func
7effaa88 590 INTR_FRAME
1da177e4 591 pushq $\num-256
7effaa88 592 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
593 interrupt \func
594 jmp ret_from_intr
595 CFI_ENDPROC
596 .endm
597
598ENTRY(thermal_interrupt)
599 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
4b787e0b 600END(thermal_interrupt)
1da177e4 601
89b831ef
JS
602ENTRY(threshold_interrupt)
603 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
4b787e0b 604END(threshold_interrupt)
89b831ef 605
1da177e4
LT
606#ifdef CONFIG_SMP
607ENTRY(reschedule_interrupt)
608 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
4b787e0b 609END(reschedule_interrupt)
1da177e4 610
e5bc8b6b
AK
611 .macro INVALIDATE_ENTRY num
612ENTRY(invalidate_interrupt\num)
613 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
4b787e0b 614END(invalidate_interrupt\num)
e5bc8b6b
AK
615 .endm
616
617 INVALIDATE_ENTRY 0
618 INVALIDATE_ENTRY 1
619 INVALIDATE_ENTRY 2
620 INVALIDATE_ENTRY 3
621 INVALIDATE_ENTRY 4
622 INVALIDATE_ENTRY 5
623 INVALIDATE_ENTRY 6
624 INVALIDATE_ENTRY 7
1da177e4
LT
625
626ENTRY(call_function_interrupt)
627 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
4b787e0b 628END(call_function_interrupt)
1da177e4
LT
629#endif
630
631#ifdef CONFIG_X86_LOCAL_APIC
632ENTRY(apic_timer_interrupt)
633 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
4b787e0b 634END(apic_timer_interrupt)
1da177e4
LT
635
636ENTRY(error_interrupt)
637 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
4b787e0b 638END(error_interrupt)
1da177e4
LT
639
640ENTRY(spurious_interrupt)
641 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
4b787e0b 642END(spurious_interrupt)
1da177e4
LT
643#endif
644
645/*
646 * Exception entry points.
647 */
648 .macro zeroentry sym
7effaa88 649 INTR_FRAME
1da177e4 650 pushq $0 /* push error code/oldrax */
7effaa88 651 CFI_ADJUST_CFA_OFFSET 8
1da177e4 652 pushq %rax /* push real oldrax to the rdi slot */
7effaa88 653 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
654 leaq \sym(%rip),%rax
655 jmp error_entry
7effaa88 656 CFI_ENDPROC
1da177e4
LT
657 .endm
658
659 .macro errorentry sym
7effaa88 660 XCPT_FRAME
1da177e4 661 pushq %rax
7effaa88 662 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
663 leaq \sym(%rip),%rax
664 jmp error_entry
7effaa88 665 CFI_ENDPROC
1da177e4
LT
666 .endm
667
668 /* error code is on the stack already */
669 /* handle NMI like exceptions that can happen everywhere */
b556b35e 670 .macro paranoidentry sym, ist=0
1da177e4
LT
671 SAVE_ALL
672 cld
673 movl $1,%ebx
674 movl $MSR_GS_BASE,%ecx
675 rdmsr
676 testl %edx,%edx
677 js 1f
678 swapgs
679 xorl %ebx,%ebx
b556b35e
JB
6801:
681 .if \ist
682 movq %gs:pda_data_offset, %rbp
683 .endif
684 movq %rsp,%rdi
1da177e4
LT
685 movq ORIG_RAX(%rsp),%rsi
686 movq $-1,ORIG_RAX(%rsp)
b556b35e 687 .if \ist
5f8efbb9 688 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
b556b35e 689 .endif
1da177e4 690 call \sym
b556b35e 691 .if \ist
5f8efbb9 692 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
b556b35e 693 .endif
6fefb0d1 694 cli
1da177e4
LT
695 .endm
696
697/*
698 * Exception entry point. This expects an error code/orig_rax on the stack
699 * and the exception handler in %rax.
700 */
701ENTRY(error_entry)
7effaa88 702 _frame RDI
1da177e4
LT
703 /* rdi slot contains rax, oldrax contains error code */
704 cld
705 subq $14*8,%rsp
706 CFI_ADJUST_CFA_OFFSET (14*8)
707 movq %rsi,13*8(%rsp)
708 CFI_REL_OFFSET rsi,RSI
709 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
710 movq %rdx,12*8(%rsp)
711 CFI_REL_OFFSET rdx,RDX
712 movq %rcx,11*8(%rsp)
713 CFI_REL_OFFSET rcx,RCX
714 movq %rsi,10*8(%rsp) /* store rax */
715 CFI_REL_OFFSET rax,RAX
716 movq %r8, 9*8(%rsp)
717 CFI_REL_OFFSET r8,R8
718 movq %r9, 8*8(%rsp)
719 CFI_REL_OFFSET r9,R9
720 movq %r10,7*8(%rsp)
721 CFI_REL_OFFSET r10,R10
722 movq %r11,6*8(%rsp)
723 CFI_REL_OFFSET r11,R11
724 movq %rbx,5*8(%rsp)
725 CFI_REL_OFFSET rbx,RBX
726 movq %rbp,4*8(%rsp)
727 CFI_REL_OFFSET rbp,RBP
728 movq %r12,3*8(%rsp)
729 CFI_REL_OFFSET r12,R12
730 movq %r13,2*8(%rsp)
731 CFI_REL_OFFSET r13,R13
732 movq %r14,1*8(%rsp)
733 CFI_REL_OFFSET r14,R14
734 movq %r15,(%rsp)
735 CFI_REL_OFFSET r15,R15
736 xorl %ebx,%ebx
737 testl $3,CS(%rsp)
738 je error_kernelspace
739error_swapgs:
740 swapgs
741error_sti:
742 movq %rdi,RDI(%rsp)
743 movq %rsp,%rdi
744 movq ORIG_RAX(%rsp),%rsi /* get error code */
745 movq $-1,ORIG_RAX(%rsp)
746 call *%rax
747 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
748error_exit:
749 movl %ebx,%eax
750 RESTORE_REST
751 cli
752 GET_THREAD_INFO(%rcx)
753 testl %eax,%eax
754 jne retint_kernel
755 movl threadinfo_flags(%rcx),%edx
756 movl $_TIF_WORK_MASK,%edi
757 andl %edi,%edx
758 jnz retint_careful
759 swapgs
760 RESTORE_ARGS 0,8,0
505cc4e1 761 jmp iret_label
1da177e4
LT
762 CFI_ENDPROC
763
764error_kernelspace:
765 incl %ebx
766 /* There are two places in the kernel that can potentially fault with
767 usergs. Handle them here. The exception handlers after
768 iret run with kernel gs again, so don't set the user space flag.
769 B stepping K8s sometimes report an truncated RIP for IRET
770 exceptions returning to compat mode. Check for these here too. */
771 leaq iret_label(%rip),%rbp
772 cmpq %rbp,RIP(%rsp)
773 je error_swapgs
774 movl %ebp,%ebp /* zero extend */
775 cmpq %rbp,RIP(%rsp)
776 je error_swapgs
777 cmpq $gs_change,RIP(%rsp)
778 je error_swapgs
779 jmp error_sti
4b787e0b 780END(error_entry)
1da177e4
LT
781
782 /* Reload gs selector with exception handling */
783 /* edi: new selector */
784ENTRY(load_gs_index)
7effaa88 785 CFI_STARTPROC
1da177e4 786 pushf
7effaa88 787 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
788 cli
789 swapgs
790gs_change:
791 movl %edi,%gs
7922: mfence /* workaround */
793 swapgs
794 popf
7effaa88 795 CFI_ADJUST_CFA_OFFSET -8
1da177e4 796 ret
7effaa88 797 CFI_ENDPROC
4b787e0b 798ENDPROC(load_gs_index)
1da177e4
LT
799
800 .section __ex_table,"a"
801 .align 8
802 .quad gs_change,bad_gs
803 .previous
804 .section .fixup,"ax"
805 /* running with kernelgs */
806bad_gs:
807 swapgs /* switch back to user gs */
808 xorl %eax,%eax
809 movl %eax,%gs
810 jmp 2b
811 .previous
812
813/*
814 * Create a kernel thread.
815 *
816 * C extern interface:
817 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
818 *
819 * asm input arguments:
820 * rdi: fn, rsi: arg, rdx: flags
821 */
822ENTRY(kernel_thread)
823 CFI_STARTPROC
824 FAKE_STACK_FRAME $child_rip
825 SAVE_ALL
826
827 # rdi: flags, rsi: usp, rdx: will be &pt_regs
828 movq %rdx,%rdi
829 orq kernel_thread_flags(%rip),%rdi
830 movq $-1, %rsi
831 movq %rsp, %rdx
832
833 xorl %r8d,%r8d
834 xorl %r9d,%r9d
835
836 # clone now
837 call do_fork
838 movq %rax,RAX(%rsp)
839 xorl %edi,%edi
840
841 /*
842 * It isn't worth to check for reschedule here,
843 * so internally to the x86_64 port you can rely on kernel_thread()
844 * not to reschedule the child before returning, this avoids the need
845 * of hacks for example to fork off the per-CPU idle tasks.
846 * [Hopefully no generic code relies on the reschedule -AK]
847 */
848 RESTORE_ALL
849 UNFAKE_STACK_FRAME
850 ret
851 CFI_ENDPROC
4b787e0b 852ENDPROC(kernel_thread)
1da177e4
LT
853
854child_rip:
855 /*
856 * Here we are in the child and the registers are set as they were
857 * at kernel_thread() invocation in the parent.
858 */
859 movq %rdi, %rax
860 movq %rsi, %rdi
861 call *%rax
862 # exit
3829ee6b 863 xorl %edi, %edi
1da177e4 864 call do_exit
4b787e0b 865ENDPROC(child_rip)
1da177e4
LT
866
867/*
868 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
869 *
870 * C extern interface:
871 * extern long execve(char *name, char **argv, char **envp)
872 *
873 * asm input arguments:
874 * rdi: name, rsi: argv, rdx: envp
875 *
876 * We want to fallback into:
877 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
878 *
879 * do_sys_execve asm fallback arguments:
880 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
881 */
882ENTRY(execve)
883 CFI_STARTPROC
884 FAKE_STACK_FRAME $0
885 SAVE_ALL
886 call sys_execve
887 movq %rax, RAX(%rsp)
888 RESTORE_REST
889 testq %rax,%rax
890 je int_ret_from_sys_call
891 RESTORE_ARGS
892 UNFAKE_STACK_FRAME
893 ret
894 CFI_ENDPROC
4b787e0b 895ENDPROC(execve)
1da177e4 896
0f2fbdcb 897KPROBE_ENTRY(page_fault)
1da177e4 898 errorentry do_page_fault
4b787e0b 899END(page_fault)
0f2fbdcb 900 .previous .text
1da177e4
LT
901
902ENTRY(coprocessor_error)
903 zeroentry do_coprocessor_error
4b787e0b 904END(coprocessor_error)
1da177e4
LT
905
906ENTRY(simd_coprocessor_error)
907 zeroentry do_simd_coprocessor_error
4b787e0b 908END(simd_coprocessor_error)
1da177e4
LT
909
910ENTRY(device_not_available)
911 zeroentry math_state_restore
4b787e0b 912END(device_not_available)
1da177e4
LT
913
914 /* runs on exception stack */
0f2fbdcb 915KPROBE_ENTRY(debug)
7effaa88 916 INTR_FRAME
1da177e4
LT
917 pushq $0
918 CFI_ADJUST_CFA_OFFSET 8
5f8efbb9 919 paranoidentry do_debug, DEBUG_STACK
1da177e4
LT
920 jmp paranoid_exit
921 CFI_ENDPROC
4b787e0b 922END(debug)
0f2fbdcb 923 .previous .text
1da177e4
LT
924
925 /* runs on exception stack */
eddb6fb9 926KPROBE_ENTRY(nmi)
7effaa88 927 INTR_FRAME
1da177e4 928 pushq $-1
7effaa88 929 CFI_ADJUST_CFA_OFFSET 8
1da177e4 930 paranoidentry do_nmi
6fefb0d1
AK
931 /*
932 * "Paranoid" exit path from exception stack.
933 * Paranoid because this is used by NMIs and cannot take
934 * any kernel state for granted.
935 * We don't do kernel preemption checks here, because only
936 * NMI should be common and it does not enable IRQs and
937 * cannot get reschedule ticks.
938 */
1da177e4
LT
939 /* ebx: no swapgs flag */
940paranoid_exit:
941 testl %ebx,%ebx /* swapgs needed? */
942 jnz paranoid_restore
6fefb0d1
AK
943 testl $3,CS(%rsp)
944 jnz paranoid_userspace
1da177e4 945paranoid_swapgs:
1da177e4
LT
946 swapgs
947paranoid_restore:
948 RESTORE_ALL 8
949 iretq
950paranoid_userspace:
1da177e4 951 GET_THREAD_INFO(%rcx)
6fefb0d1
AK
952 movl threadinfo_flags(%rcx),%ebx
953 andl $_TIF_WORK_MASK,%ebx
11b854b2 954 jz paranoid_swapgs
6fefb0d1
AK
955 movq %rsp,%rdi /* &pt_regs */
956 call sync_regs
957 movq %rax,%rsp /* switch stack for scheduling */
958 testl $_TIF_NEED_RESCHED,%ebx
959 jnz paranoid_schedule
960 movl %ebx,%edx /* arg3: thread flags */
1da177e4 961 sti
6fefb0d1
AK
962 xorl %esi,%esi /* arg2: oldset */
963 movq %rsp,%rdi /* arg1: &pt_regs */
1da177e4 964 call do_notify_resume
6fefb0d1
AK
965 cli
966 jmp paranoid_userspace
967paranoid_schedule:
11b854b2
AK
968 sti
969 call schedule
6fefb0d1
AK
970 cli
971 jmp paranoid_userspace
1da177e4 972 CFI_ENDPROC
4b787e0b 973END(nmi)
eddb6fb9 974 .previous .text
6fefb0d1 975
0f2fbdcb 976KPROBE_ENTRY(int3)
b556b35e
JB
977 INTR_FRAME
978 pushq $0
979 CFI_ADJUST_CFA_OFFSET 8
5f8efbb9 980 paranoidentry do_int3, DEBUG_STACK
b556b35e
JB
981 jmp paranoid_exit
982 CFI_ENDPROC
4b787e0b 983END(int3)
0f2fbdcb 984 .previous .text
1da177e4
LT
985
986ENTRY(overflow)
987 zeroentry do_overflow
4b787e0b 988END(overflow)
1da177e4
LT
989
990ENTRY(bounds)
991 zeroentry do_bounds
4b787e0b 992END(bounds)
1da177e4
LT
993
994ENTRY(invalid_op)
995 zeroentry do_invalid_op
4b787e0b 996END(invalid_op)
1da177e4
LT
997
998ENTRY(coprocessor_segment_overrun)
999 zeroentry do_coprocessor_segment_overrun
4b787e0b 1000END(coprocessor_segment_overrun)
1da177e4
LT
1001
1002ENTRY(reserved)
1003 zeroentry do_reserved
4b787e0b 1004END(reserved)
1da177e4
LT
1005
1006 /* runs on exception stack */
1007ENTRY(double_fault)
7effaa88 1008 XCPT_FRAME
1da177e4 1009 paranoidentry do_double_fault
1da177e4
LT
1010 jmp paranoid_exit
1011 CFI_ENDPROC
4b787e0b 1012END(double_fault)
1da177e4
LT
1013
1014ENTRY(invalid_TSS)
1015 errorentry do_invalid_TSS
4b787e0b 1016END(invalid_TSS)
1da177e4
LT
1017
1018ENTRY(segment_not_present)
1019 errorentry do_segment_not_present
4b787e0b 1020END(segment_not_present)
1da177e4
LT
1021
1022 /* runs on exception stack */
1023ENTRY(stack_segment)
7effaa88 1024 XCPT_FRAME
1da177e4 1025 paranoidentry do_stack_segment
1da177e4
LT
1026 jmp paranoid_exit
1027 CFI_ENDPROC
4b787e0b 1028END(stack_segment)
1da177e4 1029
0f2fbdcb 1030KPROBE_ENTRY(general_protection)
1da177e4 1031 errorentry do_general_protection
4b787e0b 1032END(general_protection)
0f2fbdcb 1033 .previous .text
1da177e4
LT
1034
1035ENTRY(alignment_check)
1036 errorentry do_alignment_check
4b787e0b 1037END(alignment_check)
1da177e4
LT
1038
1039ENTRY(divide_error)
1040 zeroentry do_divide_error
4b787e0b 1041END(divide_error)
1da177e4
LT
1042
1043ENTRY(spurious_interrupt_bug)
1044 zeroentry do_spurious_interrupt_bug
4b787e0b 1045END(spurious_interrupt_bug)
1da177e4
LT
1046
1047#ifdef CONFIG_X86_MCE
1048 /* runs on exception stack */
1049ENTRY(machine_check)
7effaa88 1050 INTR_FRAME
1da177e4
LT
1051 pushq $0
1052 CFI_ADJUST_CFA_OFFSET 8
1053 paranoidentry do_machine_check
1054 jmp paranoid_exit
1055 CFI_ENDPROC
4b787e0b 1056END(machine_check)
1da177e4
LT
1057#endif
1058
ed6b676c 1059ENTRY(call_softirq)
7effaa88 1060 CFI_STARTPROC
ed6b676c 1061 movq %gs:pda_irqstackptr,%rax
bd9cb64d
JB
1062 movq %rsp,%rdx
1063 CFI_DEF_CFA_REGISTER rdx
ed6b676c
AK
1064 incl %gs:pda_irqcount
1065 cmove %rax,%rsp
bd9cb64d
JB
1066 pushq %rdx
1067 /*todo CFI_DEF_CFA_EXPRESSION ...*/
ed6b676c 1068 call __do_softirq
bd9cb64d 1069 popq %rsp
7effaa88 1070 CFI_DEF_CFA_REGISTER rsp
ed6b676c 1071 decl %gs:pda_irqcount
ed6b676c 1072 ret
7effaa88 1073 CFI_ENDPROC
4b787e0b 1074ENDPROC(call_softirq)
b538ed27
JB
1075
1076#ifdef CONFIG_STACK_UNWIND
1077ENTRY(arch_unwind_init_running)
1078 CFI_STARTPROC
1079 movq %r15, R15(%rdi)
1080 movq %r14, R14(%rdi)
1081 xchgq %rsi, %rdx
1082 movq %r13, R13(%rdi)
1083 movq %r12, R12(%rdi)
1084 xorl %eax, %eax
1085 movq %rbp, RBP(%rdi)
1086 movq %rbx, RBX(%rdi)
1087 movq (%rsp), %rcx
1088 movq %rax, R11(%rdi)
1089 movq %rax, R10(%rdi)
1090 movq %rax, R9(%rdi)
1091 movq %rax, R8(%rdi)
1092 movq %rax, RAX(%rdi)
1093 movq %rax, RCX(%rdi)
1094 movq %rax, RDX(%rdi)
1095 movq %rax, RSI(%rdi)
1096 movq %rax, RDI(%rdi)
1097 movq %rax, ORIG_RAX(%rdi)
1098 movq %rcx, RIP(%rdi)
1099 leaq 8(%rsp), %rcx
1100 movq $__KERNEL_CS, CS(%rdi)
1101 movq %rax, EFLAGS(%rdi)
1102 movq %rcx, RSP(%rdi)
1103 movq $__KERNEL_DS, SS(%rdi)
1104 jmpq *%rdx
1105 CFI_ENDPROC
1106ENDPROC(arch_unwind_init_running)
1107#endif