]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - arch/x86_64/kernel/entry.S
[PATCH] x86_64: Small assembly improvements
[mirror_ubuntu-bionic-kernel.git] / arch / x86_64 / kernel / entry.S
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 *
8 * $Id$
9 */
10
11/*
12 * entry.S contains the system-call and fault low-level handling routines.
13 *
14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call.
16 *
17 * Normal syscalls and interrupts don't save a full stack frame, this is
18 * only done for syscall tracing, signals or fork/exec et.al.
19 *
20 * A note on terminology:
21 * - top of stack: Architecture defined interrupt frame from SS to RIP
22 * at the top of the kernel process stack.
23 * - partial stack frame: partially saved registers upto R11.
24 * - full stack frame: Like partial stack frame, but all register saved.
25 *
26 * TODO:
27 * - schedule it carefully for the final hardware.
28 */
29
30#define ASSEMBLY 1
31#include <linux/config.h>
32#include <linux/linkage.h>
33#include <asm/segment.h>
34#include <asm/smp.h>
35#include <asm/cache.h>
36#include <asm/errno.h>
37#include <asm/dwarf2.h>
38#include <asm/calling.h>
39#include <asm/offset.h>
40#include <asm/msr.h>
41#include <asm/unistd.h>
42#include <asm/thread_info.h>
43#include <asm/hw_irq.h>
44
45 .code64
46
dc37db4d 47#ifndef CONFIG_PREEMPT
1da177e4
LT
48#define retint_kernel retint_restore_args
49#endif
50
51/*
52 * C code is not supposed to know about undefined top of stack. Every time
53 * a C function with an pt_regs argument is called from the SYSCALL based
54 * fast path FIXUP_TOP_OF_STACK is needed.
55 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
56 * manipulation.
57 */
58
59 /* %rsp:at FRAMEEND */
60 .macro FIXUP_TOP_OF_STACK tmp
61 movq %gs:pda_oldrsp,\tmp
62 movq \tmp,RSP(%rsp)
63 movq $__USER_DS,SS(%rsp)
64 movq $__USER_CS,CS(%rsp)
65 movq $-1,RCX(%rsp)
66 movq R11(%rsp),\tmp /* get eflags */
67 movq \tmp,EFLAGS(%rsp)
68 .endm
69
70 .macro RESTORE_TOP_OF_STACK tmp,offset=0
71 movq RSP-\offset(%rsp),\tmp
72 movq \tmp,%gs:pda_oldrsp
73 movq EFLAGS-\offset(%rsp),\tmp
74 movq \tmp,R11-\offset(%rsp)
75 .endm
76
77 .macro FAKE_STACK_FRAME child_rip
78 /* push in order ss, rsp, eflags, cs, rip */
3829ee6b 79 xorl %eax, %eax
1da177e4
LT
80 pushq %rax /* ss */
81 CFI_ADJUST_CFA_OFFSET 8
82 pushq %rax /* rsp */
83 CFI_ADJUST_CFA_OFFSET 8
84 CFI_OFFSET rip,0
85 pushq $(1<<9) /* eflags - interrupts on */
86 CFI_ADJUST_CFA_OFFSET 8
87 pushq $__KERNEL_CS /* cs */
88 CFI_ADJUST_CFA_OFFSET 8
89 pushq \child_rip /* rip */
90 CFI_ADJUST_CFA_OFFSET 8
91 CFI_OFFSET rip,0
92 pushq %rax /* orig rax */
93 CFI_ADJUST_CFA_OFFSET 8
94 .endm
95
96 .macro UNFAKE_STACK_FRAME
97 addq $8*6, %rsp
98 CFI_ADJUST_CFA_OFFSET -(6*8)
99 .endm
100
101 .macro CFI_DEFAULT_STACK
102 CFI_ADJUST_CFA_OFFSET (SS)
103 CFI_OFFSET r15,R15-SS
104 CFI_OFFSET r14,R14-SS
105 CFI_OFFSET r13,R13-SS
106 CFI_OFFSET r12,R12-SS
107 CFI_OFFSET rbp,RBP-SS
108 CFI_OFFSET rbx,RBX-SS
109 CFI_OFFSET r11,R11-SS
110 CFI_OFFSET r10,R10-SS
111 CFI_OFFSET r9,R9-SS
112 CFI_OFFSET r8,R8-SS
113 CFI_OFFSET rax,RAX-SS
114 CFI_OFFSET rcx,RCX-SS
115 CFI_OFFSET rdx,RDX-SS
116 CFI_OFFSET rsi,RSI-SS
117 CFI_OFFSET rdi,RDI-SS
118 CFI_OFFSET rsp,RSP-SS
119 CFI_OFFSET rip,RIP-SS
120 .endm
121/*
122 * A newly forked process directly context switches into this.
123 */
124/* rdi: prev */
125ENTRY(ret_from_fork)
126 CFI_STARTPROC
127 CFI_DEFAULT_STACK
128 call schedule_tail
129 GET_THREAD_INFO(%rcx)
130 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
131 jnz rff_trace
132rff_action:
133 RESTORE_REST
134 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
135 je int_ret_from_sys_call
136 testl $_TIF_IA32,threadinfo_flags(%rcx)
137 jnz int_ret_from_sys_call
138 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
139 jmp ret_from_sys_call
140rff_trace:
141 movq %rsp,%rdi
142 call syscall_trace_leave
143 GET_THREAD_INFO(%rcx)
144 jmp rff_action
145 CFI_ENDPROC
146
147/*
148 * System call entry. Upto 6 arguments in registers are supported.
149 *
150 * SYSCALL does not save anything on the stack and does not change the
151 * stack pointer.
152 */
153
154/*
155 * Register setup:
156 * rax system call number
157 * rdi arg0
158 * rcx return address for syscall/sysret, C arg3
159 * rsi arg1
160 * rdx arg2
161 * r10 arg3 (--> moved to rcx for C)
162 * r8 arg4
163 * r9 arg5
164 * r11 eflags for syscall/sysret, temporary for C
165 * r12-r15,rbp,rbx saved by C code, not touched.
166 *
167 * Interrupts are off on entry.
168 * Only called from user space.
169 *
170 * XXX if we had a free scratch register we could save the RSP into the stack frame
171 * and report it properly in ps. Unfortunately we haven't.
172 */
173
174ENTRY(system_call)
175 CFI_STARTPROC
176 swapgs
177 movq %rsp,%gs:pda_oldrsp
178 movq %gs:pda_kernelstack,%rsp
179 sti
180 SAVE_ARGS 8,1
181 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
182 movq %rcx,RIP-ARGOFFSET(%rsp)
183 GET_THREAD_INFO(%rcx)
184 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
185 jnz tracesys
186 cmpq $__NR_syscall_max,%rax
187 ja badsys
188 movq %r10,%rcx
189 call *sys_call_table(,%rax,8) # XXX: rip relative
190 movq %rax,RAX-ARGOFFSET(%rsp)
191/*
192 * Syscall return path ending with SYSRET (fast path)
193 * Has incomplete stack frame and undefined top of stack.
194 */
195 .globl ret_from_sys_call
196ret_from_sys_call:
11b854b2 197 movl $_TIF_ALLWORK_MASK,%edi
1da177e4
LT
198 /* edi: flagmask */
199sysret_check:
200 GET_THREAD_INFO(%rcx)
201 cli
202 movl threadinfo_flags(%rcx),%edx
203 andl %edi,%edx
204 jnz sysret_careful
205 movq RIP-ARGOFFSET(%rsp),%rcx
206 RESTORE_ARGS 0,-ARG_SKIP,1
207 movq %gs:pda_oldrsp,%rsp
208 swapgs
209 sysretq
210
211 /* Handle reschedules */
212 /* edx: work, edi: workmask */
213sysret_careful:
214 bt $TIF_NEED_RESCHED,%edx
215 jnc sysret_signal
216 sti
217 pushq %rdi
218 call schedule
219 popq %rdi
220 jmp sysret_check
221
222 /* Handle a signal */
223sysret_signal:
224 sti
10ffdbb8
AK
225 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
226 jz 1f
227
228 /* Really a signal */
229 /* edx: work flags (arg3) */
1da177e4
LT
230 leaq do_notify_resume(%rip),%rax
231 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
232 xorl %esi,%esi # oldset -> arg2
233 call ptregscall_common
10ffdbb8 2341: movl $_TIF_NEED_RESCHED,%edi
1da177e4
LT
235 jmp sysret_check
236
237 /* Do syscall tracing */
238tracesys:
239 SAVE_REST
240 movq $-ENOSYS,RAX(%rsp)
241 FIXUP_TOP_OF_STACK %rdi
242 movq %rsp,%rdi
243 call syscall_trace_enter
244 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
245 RESTORE_REST
246 cmpq $__NR_syscall_max,%rax
247 ja 1f
248 movq %r10,%rcx /* fixup for C */
249 call *sys_call_table(,%rax,8)
250 movq %rax,RAX-ARGOFFSET(%rsp)
2511: SAVE_REST
252 movq %rsp,%rdi
253 call syscall_trace_leave
254 RESTORE_TOP_OF_STACK %rbx
255 RESTORE_REST
256 jmp ret_from_sys_call
257
258badsys:
259 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
260 jmp ret_from_sys_call
261
262/*
263 * Syscall return path ending with IRET.
264 * Has correct top of stack, but partial stack frame.
265 */
266ENTRY(int_ret_from_sys_call)
267 cli
268 testl $3,CS-ARGOFFSET(%rsp)
269 je retint_restore_args
270 movl $_TIF_ALLWORK_MASK,%edi
271 /* edi: mask to check */
272int_with_check:
273 GET_THREAD_INFO(%rcx)
274 movl threadinfo_flags(%rcx),%edx
275 andl %edi,%edx
276 jnz int_careful
277 jmp retint_swapgs
278
279 /* Either reschedule or signal or syscall exit tracking needed. */
280 /* First do a reschedule test. */
281 /* edx: work, edi: workmask */
282int_careful:
283 bt $TIF_NEED_RESCHED,%edx
284 jnc int_very_careful
285 sti
286 pushq %rdi
287 call schedule
288 popq %rdi
cdd219cd 289 cli
1da177e4
LT
290 jmp int_with_check
291
292 /* handle signals and tracing -- both require a full stack frame */
293int_very_careful:
294 sti
295 SAVE_REST
296 /* Check for syscall exit trace */
297 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
298 jz int_signal
299 pushq %rdi
300 leaq 8(%rsp),%rdi # &ptregs -> arg1
301 call syscall_trace_leave
302 popq %rdi
36c1104e 303 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
be9e6870 304 cli
1da177e4
LT
305 jmp int_restore_rest
306
307int_signal:
308 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
309 jz 1f
310 movq %rsp,%rdi # &ptregs -> arg1
311 xorl %esi,%esi # oldset -> arg2
312 call do_notify_resume
3131: movl $_TIF_NEED_RESCHED,%edi
314int_restore_rest:
315 RESTORE_REST
be9e6870 316 cli
1da177e4
LT
317 jmp int_with_check
318 CFI_ENDPROC
319
320/*
321 * Certain special system calls that need to save a complete full stack frame.
322 */
323
324 .macro PTREGSCALL label,func,arg
325 .globl \label
326\label:
327 leaq \func(%rip),%rax
328 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
329 jmp ptregscall_common
330 .endm
331
332 PTREGSCALL stub_clone, sys_clone, %r8
333 PTREGSCALL stub_fork, sys_fork, %rdi
334 PTREGSCALL stub_vfork, sys_vfork, %rdi
335 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
336 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
337 PTREGSCALL stub_iopl, sys_iopl, %rsi
338
339ENTRY(ptregscall_common)
340 CFI_STARTPROC
341 popq %r11
342 CFI_ADJUST_CFA_OFFSET -8
343 SAVE_REST
344 movq %r11, %r15
345 FIXUP_TOP_OF_STACK %r11
346 call *%rax
347 RESTORE_TOP_OF_STACK %r11
348 movq %r15, %r11
349 RESTORE_REST
350 pushq %r11
351 CFI_ADJUST_CFA_OFFSET 8
352 ret
353 CFI_ENDPROC
354
355ENTRY(stub_execve)
356 CFI_STARTPROC
357 popq %r11
358 CFI_ADJUST_CFA_OFFSET -8
359 SAVE_REST
360 movq %r11, %r15
361 FIXUP_TOP_OF_STACK %r11
362 call sys_execve
363 GET_THREAD_INFO(%rcx)
364 bt $TIF_IA32,threadinfo_flags(%rcx)
365 jc exec_32bit
366 RESTORE_TOP_OF_STACK %r11
367 movq %r15, %r11
368 RESTORE_REST
369 push %r11
370 ret
371
372exec_32bit:
373 CFI_ADJUST_CFA_OFFSET REST_SKIP
374 movq %rax,RAX(%rsp)
375 RESTORE_REST
376 jmp int_ret_from_sys_call
377 CFI_ENDPROC
378
379/*
380 * sigreturn is special because it needs to restore all registers on return.
381 * This cannot be done with SYSRET, so use the IRET return path instead.
382 */
383ENTRY(stub_rt_sigreturn)
384 CFI_STARTPROC
385 addq $8, %rsp
386 SAVE_REST
387 movq %rsp,%rdi
388 FIXUP_TOP_OF_STACK %r11
389 call sys_rt_sigreturn
390 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
391 RESTORE_REST
392 jmp int_ret_from_sys_call
393 CFI_ENDPROC
394
395/*
396 * Interrupt entry/exit.
397 *
398 * Interrupt entry points save only callee clobbered registers in fast path.
399 *
400 * Entry runs with interrupts off.
401 */
402
403/* 0(%rsp): interrupt number */
404 .macro interrupt func
405 CFI_STARTPROC simple
406 CFI_DEF_CFA rsp,(SS-RDI)
407 CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
408 CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
409 cld
410#ifdef CONFIG_DEBUG_INFO
411 SAVE_ALL
412 movq %rsp,%rdi
413 /*
414 * Setup a stack frame pointer. This allows gdb to trace
415 * back to the original stack.
416 */
417 movq %rsp,%rbp
418 CFI_DEF_CFA_REGISTER rbp
419#else
420 SAVE_ARGS
421 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
422#endif
423 testl $3,CS(%rdi)
424 je 1f
425 swapgs
3829ee6b 4261: incl %gs:pda_irqcount # RED-PEN should check preempt count
1da177e4
LT
427 movq %gs:pda_irqstackptr,%rax
428 cmoveq %rax,%rsp
429 pushq %rdi # save old stack
430 call \func
431 .endm
432
433ENTRY(common_interrupt)
434 interrupt do_IRQ
435 /* 0(%rsp): oldrsp-ARGOFFSET */
436ret_from_intr:
437 popq %rdi
438 cli
3829ee6b 439 decl %gs:pda_irqcount
1da177e4
LT
440#ifdef CONFIG_DEBUG_INFO
441 movq RBP(%rdi),%rbp
442#endif
443 leaq ARGOFFSET(%rdi),%rsp
444exit_intr:
445 GET_THREAD_INFO(%rcx)
446 testl $3,CS-ARGOFFSET(%rsp)
447 je retint_kernel
448
449 /* Interrupt came from user space */
450 /*
451 * Has a correct top of stack, but a partial stack frame
452 * %rcx: thread info. Interrupts off.
453 */
454retint_with_reschedule:
455 movl $_TIF_WORK_MASK,%edi
456retint_check:
457 movl threadinfo_flags(%rcx),%edx
458 andl %edi,%edx
459 jnz retint_careful
460retint_swapgs:
1da177e4
LT
461 swapgs
462retint_restore_args:
463 cli
464 RESTORE_ARGS 0,8,0
465iret_label:
466 iretq
467
468 .section __ex_table,"a"
469 .quad iret_label,bad_iret
470 .previous
471 .section .fixup,"ax"
472 /* force a signal here? this matches i386 behaviour */
473 /* running with kernel gs */
474bad_iret:
475 movq $-9999,%rdi /* better code? */
476 jmp do_exit
477 .previous
478
479 /* edi: workmask, edx: work */
480retint_careful:
481 bt $TIF_NEED_RESCHED,%edx
482 jnc retint_signal
483 sti
484 pushq %rdi
485 call schedule
486 popq %rdi
487 GET_THREAD_INFO(%rcx)
488 cli
489 jmp retint_check
490
491retint_signal:
10ffdbb8
AK
492 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
493 jz retint_swapgs
1da177e4
LT
494 sti
495 SAVE_REST
496 movq $-1,ORIG_RAX(%rsp)
3829ee6b 497 xorl %esi,%esi # oldset
1da177e4
LT
498 movq %rsp,%rdi # &pt_regs
499 call do_notify_resume
500 RESTORE_REST
501 cli
10ffdbb8 502 movl $_TIF_NEED_RESCHED,%edi
be9e6870 503 GET_THREAD_INFO(%rcx)
1da177e4
LT
504 jmp retint_check
505
506#ifdef CONFIG_PREEMPT
507 /* Returning to kernel space. Check if we need preemption */
508 /* rcx: threadinfo. interrupts off. */
509 .p2align
510retint_kernel:
511 cmpl $0,threadinfo_preempt_count(%rcx)
512 jnz retint_restore_args
513 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
514 jnc retint_restore_args
515 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
516 jnc retint_restore_args
517 call preempt_schedule_irq
518 jmp exit_intr
519#endif
520 CFI_ENDPROC
521
522/*
523 * APIC interrupts.
524 */
525 .macro apicinterrupt num,func
526 pushq $\num-256
527 interrupt \func
528 jmp ret_from_intr
529 CFI_ENDPROC
530 .endm
531
532ENTRY(thermal_interrupt)
533 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
534
535#ifdef CONFIG_SMP
536ENTRY(reschedule_interrupt)
537 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
538
539ENTRY(invalidate_interrupt)
540 apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
541
542ENTRY(call_function_interrupt)
543 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
544#endif
545
546#ifdef CONFIG_X86_LOCAL_APIC
547ENTRY(apic_timer_interrupt)
548 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
549
550ENTRY(error_interrupt)
551 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
552
553ENTRY(spurious_interrupt)
554 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
555#endif
556
557/*
558 * Exception entry points.
559 */
560 .macro zeroentry sym
561 pushq $0 /* push error code/oldrax */
562 pushq %rax /* push real oldrax to the rdi slot */
563 leaq \sym(%rip),%rax
564 jmp error_entry
565 .endm
566
567 .macro errorentry sym
568 pushq %rax
569 leaq \sym(%rip),%rax
570 jmp error_entry
571 .endm
572
573 /* error code is on the stack already */
574 /* handle NMI like exceptions that can happen everywhere */
575 .macro paranoidentry sym
576 SAVE_ALL
577 cld
578 movl $1,%ebx
579 movl $MSR_GS_BASE,%ecx
580 rdmsr
581 testl %edx,%edx
582 js 1f
583 swapgs
584 xorl %ebx,%ebx
5851: movq %rsp,%rdi
586 movq ORIG_RAX(%rsp),%rsi
587 movq $-1,ORIG_RAX(%rsp)
588 call \sym
6fefb0d1 589 cli
1da177e4
LT
590 .endm
591
592/*
593 * Exception entry point. This expects an error code/orig_rax on the stack
594 * and the exception handler in %rax.
595 */
596ENTRY(error_entry)
597 CFI_STARTPROC simple
598 CFI_DEF_CFA rsp,(SS-RDI)
599 CFI_REL_OFFSET rsp,(RSP-RDI)
600 CFI_REL_OFFSET rip,(RIP-RDI)
601 /* rdi slot contains rax, oldrax contains error code */
602 cld
603 subq $14*8,%rsp
604 CFI_ADJUST_CFA_OFFSET (14*8)
605 movq %rsi,13*8(%rsp)
606 CFI_REL_OFFSET rsi,RSI
607 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
608 movq %rdx,12*8(%rsp)
609 CFI_REL_OFFSET rdx,RDX
610 movq %rcx,11*8(%rsp)
611 CFI_REL_OFFSET rcx,RCX
612 movq %rsi,10*8(%rsp) /* store rax */
613 CFI_REL_OFFSET rax,RAX
614 movq %r8, 9*8(%rsp)
615 CFI_REL_OFFSET r8,R8
616 movq %r9, 8*8(%rsp)
617 CFI_REL_OFFSET r9,R9
618 movq %r10,7*8(%rsp)
619 CFI_REL_OFFSET r10,R10
620 movq %r11,6*8(%rsp)
621 CFI_REL_OFFSET r11,R11
622 movq %rbx,5*8(%rsp)
623 CFI_REL_OFFSET rbx,RBX
624 movq %rbp,4*8(%rsp)
625 CFI_REL_OFFSET rbp,RBP
626 movq %r12,3*8(%rsp)
627 CFI_REL_OFFSET r12,R12
628 movq %r13,2*8(%rsp)
629 CFI_REL_OFFSET r13,R13
630 movq %r14,1*8(%rsp)
631 CFI_REL_OFFSET r14,R14
632 movq %r15,(%rsp)
633 CFI_REL_OFFSET r15,R15
634 xorl %ebx,%ebx
635 testl $3,CS(%rsp)
636 je error_kernelspace
637error_swapgs:
638 swapgs
639error_sti:
640 movq %rdi,RDI(%rsp)
641 movq %rsp,%rdi
642 movq ORIG_RAX(%rsp),%rsi /* get error code */
643 movq $-1,ORIG_RAX(%rsp)
644 call *%rax
645 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
646error_exit:
647 movl %ebx,%eax
648 RESTORE_REST
649 cli
650 GET_THREAD_INFO(%rcx)
651 testl %eax,%eax
652 jne retint_kernel
653 movl threadinfo_flags(%rcx),%edx
654 movl $_TIF_WORK_MASK,%edi
655 andl %edi,%edx
656 jnz retint_careful
657 swapgs
658 RESTORE_ARGS 0,8,0
659 iretq
660 CFI_ENDPROC
661
662error_kernelspace:
663 incl %ebx
664 /* There are two places in the kernel that can potentially fault with
665 usergs. Handle them here. The exception handlers after
666 iret run with kernel gs again, so don't set the user space flag.
667 B stepping K8s sometimes report an truncated RIP for IRET
668 exceptions returning to compat mode. Check for these here too. */
669 leaq iret_label(%rip),%rbp
670 cmpq %rbp,RIP(%rsp)
671 je error_swapgs
672 movl %ebp,%ebp /* zero extend */
673 cmpq %rbp,RIP(%rsp)
674 je error_swapgs
675 cmpq $gs_change,RIP(%rsp)
676 je error_swapgs
677 jmp error_sti
678
679 /* Reload gs selector with exception handling */
680 /* edi: new selector */
681ENTRY(load_gs_index)
682 pushf
683 cli
684 swapgs
685gs_change:
686 movl %edi,%gs
6872: mfence /* workaround */
688 swapgs
689 popf
690 ret
691
692 .section __ex_table,"a"
693 .align 8
694 .quad gs_change,bad_gs
695 .previous
696 .section .fixup,"ax"
697 /* running with kernelgs */
698bad_gs:
699 swapgs /* switch back to user gs */
700 xorl %eax,%eax
701 movl %eax,%gs
702 jmp 2b
703 .previous
704
705/*
706 * Create a kernel thread.
707 *
708 * C extern interface:
709 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
710 *
711 * asm input arguments:
712 * rdi: fn, rsi: arg, rdx: flags
713 */
714ENTRY(kernel_thread)
715 CFI_STARTPROC
716 FAKE_STACK_FRAME $child_rip
717 SAVE_ALL
718
719 # rdi: flags, rsi: usp, rdx: will be &pt_regs
720 movq %rdx,%rdi
721 orq kernel_thread_flags(%rip),%rdi
722 movq $-1, %rsi
723 movq %rsp, %rdx
724
725 xorl %r8d,%r8d
726 xorl %r9d,%r9d
727
728 # clone now
729 call do_fork
730 movq %rax,RAX(%rsp)
731 xorl %edi,%edi
732
733 /*
734 * It isn't worth to check for reschedule here,
735 * so internally to the x86_64 port you can rely on kernel_thread()
736 * not to reschedule the child before returning, this avoids the need
737 * of hacks for example to fork off the per-CPU idle tasks.
738 * [Hopefully no generic code relies on the reschedule -AK]
739 */
740 RESTORE_ALL
741 UNFAKE_STACK_FRAME
742 ret
743 CFI_ENDPROC
744
745
746child_rip:
747 /*
748 * Here we are in the child and the registers are set as they were
749 * at kernel_thread() invocation in the parent.
750 */
751 movq %rdi, %rax
752 movq %rsi, %rdi
753 call *%rax
754 # exit
3829ee6b 755 xorl %edi, %edi
1da177e4
LT
756 call do_exit
757
758/*
759 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
760 *
761 * C extern interface:
762 * extern long execve(char *name, char **argv, char **envp)
763 *
764 * asm input arguments:
765 * rdi: name, rsi: argv, rdx: envp
766 *
767 * We want to fallback into:
768 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
769 *
770 * do_sys_execve asm fallback arguments:
771 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
772 */
773ENTRY(execve)
774 CFI_STARTPROC
775 FAKE_STACK_FRAME $0
776 SAVE_ALL
777 call sys_execve
778 movq %rax, RAX(%rsp)
779 RESTORE_REST
780 testq %rax,%rax
781 je int_ret_from_sys_call
782 RESTORE_ARGS
783 UNFAKE_STACK_FRAME
784 ret
785 CFI_ENDPROC
786
787ENTRY(page_fault)
788 errorentry do_page_fault
789
790ENTRY(coprocessor_error)
791 zeroentry do_coprocessor_error
792
793ENTRY(simd_coprocessor_error)
794 zeroentry do_simd_coprocessor_error
795
796ENTRY(device_not_available)
797 zeroentry math_state_restore
798
799 /* runs on exception stack */
800ENTRY(debug)
801 CFI_STARTPROC
802 pushq $0
803 CFI_ADJUST_CFA_OFFSET 8
804 paranoidentry do_debug
1da177e4
LT
805 jmp paranoid_exit
806 CFI_ENDPROC
807
808 /* runs on exception stack */
809ENTRY(nmi)
810 CFI_STARTPROC
811 pushq $-1
812 CFI_ADJUST_CFA_OFFSET 8
813 paranoidentry do_nmi
6fefb0d1
AK
814 /*
815 * "Paranoid" exit path from exception stack.
816 * Paranoid because this is used by NMIs and cannot take
817 * any kernel state for granted.
818 * We don't do kernel preemption checks here, because only
819 * NMI should be common and it does not enable IRQs and
820 * cannot get reschedule ticks.
821 */
1da177e4
LT
822 /* ebx: no swapgs flag */
823paranoid_exit:
824 testl %ebx,%ebx /* swapgs needed? */
825 jnz paranoid_restore
6fefb0d1
AK
826 testl $3,CS(%rsp)
827 jnz paranoid_userspace
1da177e4 828paranoid_swapgs:
1da177e4
LT
829 swapgs
830paranoid_restore:
831 RESTORE_ALL 8
832 iretq
833paranoid_userspace:
1da177e4 834 GET_THREAD_INFO(%rcx)
6fefb0d1
AK
835 movl threadinfo_flags(%rcx),%ebx
836 andl $_TIF_WORK_MASK,%ebx
11b854b2 837 jz paranoid_swapgs
6fefb0d1
AK
838 movq %rsp,%rdi /* &pt_regs */
839 call sync_regs
840 movq %rax,%rsp /* switch stack for scheduling */
841 testl $_TIF_NEED_RESCHED,%ebx
842 jnz paranoid_schedule
843 movl %ebx,%edx /* arg3: thread flags */
1da177e4 844 sti
6fefb0d1
AK
845 xorl %esi,%esi /* arg2: oldset */
846 movq %rsp,%rdi /* arg1: &pt_regs */
1da177e4 847 call do_notify_resume
6fefb0d1
AK
848 cli
849 jmp paranoid_userspace
850paranoid_schedule:
11b854b2
AK
851 sti
852 call schedule
6fefb0d1
AK
853 cli
854 jmp paranoid_userspace
1da177e4 855 CFI_ENDPROC
6fefb0d1 856
1da177e4
LT
857ENTRY(int3)
858 zeroentry do_int3
859
860ENTRY(overflow)
861 zeroentry do_overflow
862
863ENTRY(bounds)
864 zeroentry do_bounds
865
866ENTRY(invalid_op)
867 zeroentry do_invalid_op
868
869ENTRY(coprocessor_segment_overrun)
870 zeroentry do_coprocessor_segment_overrun
871
872ENTRY(reserved)
873 zeroentry do_reserved
874
875 /* runs on exception stack */
876ENTRY(double_fault)
877 CFI_STARTPROC
878 paranoidentry do_double_fault
1da177e4
LT
879 jmp paranoid_exit
880 CFI_ENDPROC
881
882ENTRY(invalid_TSS)
883 errorentry do_invalid_TSS
884
885ENTRY(segment_not_present)
886 errorentry do_segment_not_present
887
888 /* runs on exception stack */
889ENTRY(stack_segment)
890 CFI_STARTPROC
891 paranoidentry do_stack_segment
1da177e4
LT
892 jmp paranoid_exit
893 CFI_ENDPROC
894
895ENTRY(general_protection)
896 errorentry do_general_protection
897
898ENTRY(alignment_check)
899 errorentry do_alignment_check
900
901ENTRY(divide_error)
902 zeroentry do_divide_error
903
904ENTRY(spurious_interrupt_bug)
905 zeroentry do_spurious_interrupt_bug
906
907#ifdef CONFIG_X86_MCE
908 /* runs on exception stack */
909ENTRY(machine_check)
910 CFI_STARTPROC
911 pushq $0
912 CFI_ADJUST_CFA_OFFSET 8
913 paranoidentry do_machine_check
914 jmp paranoid_exit
915 CFI_ENDPROC
916#endif
917
918ENTRY(call_debug)
919 zeroentry do_call_debug
920