]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - arch/x86/entry/common.c
x86/entry: Move C entry and exit code to arch/x86/entry/common.c
[mirror_ubuntu-zesty-kernel.git] / arch / x86 / entry / common.c
CommitLineData
1f484aa6
AL
1/*
2 * common.c - C code for kernel entry and exit
3 * Copyright (c) 2015 Andrew Lutomirski
4 * GPL v2
5 *
6 * Based on asm and ptrace code by many authors. The code here originated
7 * in ptrace.c and signal.c.
8 */
9
10#include <linux/kernel.h>
11#include <linux/sched.h>
12#include <linux/mm.h>
13#include <linux/smp.h>
14#include <linux/errno.h>
15#include <linux/ptrace.h>
16#include <linux/tracehook.h>
17#include <linux/audit.h>
18#include <linux/seccomp.h>
19#include <linux/signal.h>
20#include <linux/export.h>
21#include <linux/context_tracking.h>
22#include <linux/user-return-notifier.h>
23#include <linux/uprobes.h>
24
25#include <asm/desc.h>
26#include <asm/traps.h>
27
28#define CREATE_TRACE_POINTS
29#include <trace/events/syscalls.h>
30
31static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
32{
33#ifdef CONFIG_X86_64
34 if (arch == AUDIT_ARCH_X86_64) {
35 audit_syscall_entry(regs->orig_ax, regs->di,
36 regs->si, regs->dx, regs->r10);
37 } else
38#endif
39 {
40 audit_syscall_entry(regs->orig_ax, regs->bx,
41 regs->cx, regs->dx, regs->si);
42 }
43}
44
45/*
46 * We can return 0 to resume the syscall or anything else to go to phase
47 * 2. If we resume the syscall, we need to put something appropriate in
48 * regs->orig_ax.
49 *
50 * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax
51 * are fully functional.
52 *
53 * For phase 2's benefit, our return value is:
54 * 0: resume the syscall
55 * 1: go to phase 2; no seccomp phase 2 needed
56 * anything else: go to phase 2; pass return value to seccomp
57 */
58unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
59{
60 unsigned long ret = 0;
61 u32 work;
62
63 BUG_ON(regs != task_pt_regs(current));
64
65 work = ACCESS_ONCE(current_thread_info()->flags) &
66 _TIF_WORK_SYSCALL_ENTRY;
67
68 /*
69 * If TIF_NOHZ is set, we are required to call user_exit() before
70 * doing anything that could touch RCU.
71 */
72 if (work & _TIF_NOHZ) {
73 user_exit();
74 work &= ~_TIF_NOHZ;
75 }
76
77#ifdef CONFIG_SECCOMP
78 /*
79 * Do seccomp first -- it should minimize exposure of other
80 * code, and keeping seccomp fast is probably more valuable
81 * than the rest of this.
82 */
83 if (work & _TIF_SECCOMP) {
84 struct seccomp_data sd;
85
86 sd.arch = arch;
87 sd.nr = regs->orig_ax;
88 sd.instruction_pointer = regs->ip;
89#ifdef CONFIG_X86_64
90 if (arch == AUDIT_ARCH_X86_64) {
91 sd.args[0] = regs->di;
92 sd.args[1] = regs->si;
93 sd.args[2] = regs->dx;
94 sd.args[3] = regs->r10;
95 sd.args[4] = regs->r8;
96 sd.args[5] = regs->r9;
97 } else
98#endif
99 {
100 sd.args[0] = regs->bx;
101 sd.args[1] = regs->cx;
102 sd.args[2] = regs->dx;
103 sd.args[3] = regs->si;
104 sd.args[4] = regs->di;
105 sd.args[5] = regs->bp;
106 }
107
108 BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0);
109 BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1);
110
111 ret = seccomp_phase1(&sd);
112 if (ret == SECCOMP_PHASE1_SKIP) {
113 regs->orig_ax = -1;
114 ret = 0;
115 } else if (ret != SECCOMP_PHASE1_OK) {
116 return ret; /* Go directly to phase 2 */
117 }
118
119 work &= ~_TIF_SECCOMP;
120 }
121#endif
122
123 /* Do our best to finish without phase 2. */
124 if (work == 0)
125 return ret; /* seccomp and/or nohz only (ret == 0 here) */
126
127#ifdef CONFIG_AUDITSYSCALL
128 if (work == _TIF_SYSCALL_AUDIT) {
129 /*
130 * If there is no more work to be done except auditing,
131 * then audit in phase 1. Phase 2 always audits, so, if
132 * we audit here, then we can't go on to phase 2.
133 */
134 do_audit_syscall_entry(regs, arch);
135 return 0;
136 }
137#endif
138
139 return 1; /* Something is enabled that we can't handle in phase 1 */
140}
141
142/* Returns the syscall nr to run (which should match regs->orig_ax). */
143long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
144 unsigned long phase1_result)
145{
146 long ret = 0;
147 u32 work = ACCESS_ONCE(current_thread_info()->flags) &
148 _TIF_WORK_SYSCALL_ENTRY;
149
150 BUG_ON(regs != task_pt_regs(current));
151
152 /*
153 * If we stepped into a sysenter/syscall insn, it trapped in
154 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
155 * If user-mode had set TF itself, then it's still clear from
156 * do_debug() and we need to set it again to restore the user
157 * state. If we entered on the slow path, TF was already set.
158 */
159 if (work & _TIF_SINGLESTEP)
160 regs->flags |= X86_EFLAGS_TF;
161
162#ifdef CONFIG_SECCOMP
163 /*
164 * Call seccomp_phase2 before running the other hooks so that
165 * they can see any changes made by a seccomp tracer.
166 */
167 if (phase1_result > 1 && seccomp_phase2(phase1_result)) {
168 /* seccomp failures shouldn't expose any additional code. */
169 return -1;
170 }
171#endif
172
173 if (unlikely(work & _TIF_SYSCALL_EMU))
174 ret = -1L;
175
176 if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
177 tracehook_report_syscall_entry(regs))
178 ret = -1L;
179
180 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
181 trace_sys_enter(regs, regs->orig_ax);
182
183 do_audit_syscall_entry(regs, arch);
184
185 return ret ?: regs->orig_ax;
186}
187
188long syscall_trace_enter(struct pt_regs *regs)
189{
190 u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
191 unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
192
193 if (phase1_result == 0)
194 return regs->orig_ax;
195 else
196 return syscall_trace_enter_phase2(regs, arch, phase1_result);
197}
198
199void syscall_trace_leave(struct pt_regs *regs)
200{
201 bool step;
202
203 /*
204 * We may come here right after calling schedule_user()
205 * or do_notify_resume(), in which case we can be in RCU
206 * user mode.
207 */
208 user_exit();
209
210 audit_syscall_exit(regs);
211
212 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
213 trace_sys_exit(regs, regs->ax);
214
215 /*
216 * If TIF_SYSCALL_EMU is set, we only get here because of
217 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
218 * We already reported this syscall instruction in
219 * syscall_trace_enter().
220 */
221 step = unlikely(test_thread_flag(TIF_SINGLESTEP)) &&
222 !test_thread_flag(TIF_SYSCALL_EMU);
223 if (step || test_thread_flag(TIF_SYSCALL_TRACE))
224 tracehook_report_syscall_exit(regs, step);
225
226 user_enter();
227}
228
229/*
230 * notification of userspace execution resumption
231 * - triggered by the TIF_WORK_MASK flags
232 */
233__visible void
234do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
235{
236 user_exit();
237
238 if (thread_info_flags & _TIF_UPROBE)
239 uprobe_notify_resume(regs);
240
241 /* deal with pending signal delivery */
242 if (thread_info_flags & _TIF_SIGPENDING)
243 do_signal(regs);
244
245 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
246 clear_thread_flag(TIF_NOTIFY_RESUME);
247 tracehook_notify_resume(regs);
248 }
249 if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
250 fire_user_return_notifiers();
251
252 user_enter();
253}