]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/process_64.c
x86/mm: Split read_cr3() into read_cr3_pa() and __read_cr3()
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
76e4f660 17#include <linux/cpu.h>
1da177e4
LT
18#include <linux/errno.h>
19#include <linux/sched.h>
29930025 20#include <linux/sched/task.h>
68db0cf1 21#include <linux/sched/task_stack.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4
LT
29#include <linux/interrupt.h>
30#include <linux/delay.h>
186f4360 31#include <linux/export.h>
1da177e4 32#include <linux/ptrace.h>
95833c83 33#include <linux/notifier.h>
c6fd91f0 34#include <linux/kprobes.h>
1eeb66a1 35#include <linux/kdebug.h>
529e25f6 36#include <linux/prctl.h>
7de08b4e
GP
37#include <linux/uaccess.h>
38#include <linux/io.h>
8b96f011 39#include <linux/ftrace.h>
ff3f097e 40#include <linux/syscalls.h>
1da177e4 41
1da177e4 42#include <asm/pgtable.h>
1da177e4 43#include <asm/processor.h>
78f7f1e5 44#include <asm/fpu/internal.h>
1da177e4 45#include <asm/mmu_context.h>
1da177e4 46#include <asm/prctl.h>
1da177e4
LT
47#include <asm/desc.h>
48#include <asm/proto.h>
49#include <asm/ia32.h>
bbc1f698 50#include <asm/syscalls.h>
66cb5917 51#include <asm/debugreg.h>
f05e798a 52#include <asm/switch_to.h>
b7a58459 53#include <asm/xen/hypervisor.h>
2eefd878 54#include <asm/vdso.h>
4f341a5e 55#include <asm/intel_rdt.h>
ada26481
DS
56#include <asm/unistd.h>
57#ifdef CONFIG_IA32_EMULATION
58/* Not included via unistd.h */
59#include <asm/unistd_32_ia32.h>
60#endif
1da177e4 61
c38e5038 62__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
1da177e4 63
6612538c 64/* Prints also some state that isn't saved in the pt_regs */
e2ce07c8 65void __show_regs(struct pt_regs *regs, int all)
1da177e4
LT
66{
67 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 68 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
69 unsigned int fsindex, gsindex;
70 unsigned int ds, cs, es;
814e2c84 71
bb5e5ce5
JP
72 printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
73 (void *)regs->ip);
6fa81a12 74 printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
bb5e5ce5 75 regs->sp, regs->flags);
6fa81a12
JP
76 if (regs->orig_ax != -1)
77 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
78 else
79 pr_cont("\n");
80
d015a092 81 printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 82 regs->ax, regs->bx, regs->cx);
d015a092 83 printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 84 regs->dx, regs->si, regs->di);
d015a092 85 printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 86 regs->bp, regs->r8, regs->r9);
d015a092 87 printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
7de08b4e 88 regs->r10, regs->r11, regs->r12);
d015a092 89 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
7de08b4e 90 regs->r13, regs->r14, regs->r15);
1da177e4 91
7de08b4e
GP
92 asm("movl %%ds,%0" : "=r" (ds));
93 asm("movl %%cs,%0" : "=r" (cs));
94 asm("movl %%es,%0" : "=r" (es));
1da177e4
LT
95 asm("movl %%fs,%0" : "=r" (fsindex));
96 asm("movl %%gs,%0" : "=r" (gsindex));
97
98 rdmsrl(MSR_FS_BASE, fs);
7de08b4e
GP
99 rdmsrl(MSR_GS_BASE, gs);
100 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
1da177e4 101
e2ce07c8
PE
102 if (!all)
103 return;
1da177e4 104
f51c9452
GOC
105 cr0 = read_cr0();
106 cr2 = read_cr2();
6c690ee1 107 cr3 = __read_cr3();
1e02ce4c 108 cr4 = __read_cr4();
1da177e4 109
d015a092 110 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
7de08b4e 111 fs, fsindex, gs, gsindex, shadowgs);
d015a092 112 printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
8092c654 113 es, cr0);
d015a092 114 printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
8092c654 115 cr4);
bb1995d5
AS
116
117 get_debugreg(d0, 0);
118 get_debugreg(d1, 1);
119 get_debugreg(d2, 2);
bb1995d5
AS
120 get_debugreg(d3, 3);
121 get_debugreg(d6, 6);
122 get_debugreg(d7, 7);
4338774c
DJ
123
124 /* Only print out debug registers if they are in their non-default state. */
ba6d018e
NI
125 if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
126 (d6 == DR6_RESERVED) && (d7 == 0x400))) {
127 printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
128 d0, d1, d2);
129 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
130 d3, d6, d7);
131 }
4338774c 132
c0b17b5b
DH
133 if (boot_cpu_has(X86_FEATURE_OSPKE))
134 printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
1da177e4
LT
135}
136
1da177e4
LT
137void release_thread(struct task_struct *dead_task)
138{
139 if (dead_task->mm) {
a5b9e5a2 140#ifdef CONFIG_MODIFY_LDT_SYSCALL
37868fe1 141 if (dead_task->mm->context.ldt) {
349eab6e 142 pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
c767a54b 143 dead_task->comm,
0d430e3f 144 dead_task->mm->context.ldt->entries,
bbf79d21 145 dead_task->mm->context.ldt->nr_entries);
1da177e4
LT
146 BUG();
147 }
a5b9e5a2 148#endif
1da177e4
LT
149 }
150}
151
c1bd55f9
JT
152int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
153 unsigned long arg, struct task_struct *p, unsigned long tls)
1da177e4
LT
154{
155 int err;
7de08b4e 156 struct pt_regs *childregs;
0100301b
BG
157 struct fork_frame *fork_frame;
158 struct inactive_task_frame *frame;
1da177e4
LT
159 struct task_struct *me = current;
160
7076aada
AV
161 p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
162 childregs = task_pt_regs(p);
0100301b
BG
163 fork_frame = container_of(childregs, struct fork_frame, regs);
164 frame = &fork_frame->frame;
165 frame->bp = 0;
166 frame->ret_addr = (unsigned long) ret_from_fork;
167 p->thread.sp = (unsigned long) fork_frame;
66cb5917 168 p->thread.io_bitmap_ptr = NULL;
1da177e4 169
ada85708 170 savesegment(gs, p->thread.gsindex);
296f781a 171 p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
ada85708 172 savesegment(fs, p->thread.fsindex);
296f781a 173 p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
ada85708
JF
174 savesegment(es, p->thread.es);
175 savesegment(ds, p->thread.ds);
7076aada
AV
176 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
177
1d4b4b29 178 if (unlikely(p->flags & PF_KTHREAD)) {
7076aada
AV
179 /* kernel thread */
180 memset(childregs, 0, sizeof(struct pt_regs));
616d2483
BG
181 frame->bx = sp; /* function */
182 frame->r12 = arg;
7076aada
AV
183 return 0;
184 }
616d2483 185 frame->bx = 0;
1d4b4b29 186 *childregs = *current_pt_regs();
7076aada
AV
187
188 childregs->ax = 0;
1d4b4b29
AV
189 if (sp)
190 childregs->sp = sp;
1da177e4 191
66cb5917 192 err = -ENOMEM;
d3a4f48d 193 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
cced4022
TM
194 p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
195 IO_BITMAP_BYTES, GFP_KERNEL);
1da177e4
LT
196 if (!p->thread.io_bitmap_ptr) {
197 p->thread.io_bitmap_max = 0;
198 return -ENOMEM;
199 }
d3a4f48d 200 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 201 }
1da177e4
LT
202
203 /*
204 * Set a new TLS for the child thread?
205 */
206 if (clone_flags & CLONE_SETTLS) {
207#ifdef CONFIG_IA32_EMULATION
abfb9498 208 if (in_ia32_syscall())
efd1ca52 209 err = do_set_thread_area(p, -1,
c1bd55f9 210 (struct user_desc __user *)tls, 0);
7de08b4e
GP
211 else
212#endif
17a6e1b8 213 err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
7de08b4e 214 if (err)
1da177e4
LT
215 goto out;
216 }
217 err = 0;
218out:
219 if (err && p->thread.io_bitmap_ptr) {
220 kfree(p->thread.io_bitmap_ptr);
221 p->thread.io_bitmap_max = 0;
222 }
66cb5917 223
1da177e4
LT
224 return err;
225}
226
e634d8fc
PA
227static void
228start_thread_common(struct pt_regs *regs, unsigned long new_ip,
229 unsigned long new_sp,
230 unsigned int _cs, unsigned int _ss, unsigned int _ds)
513ad84b 231{
ada85708 232 loadsegment(fs, 0);
e634d8fc
PA
233 loadsegment(es, _ds);
234 loadsegment(ds, _ds);
513ad84b
IM
235 load_gs_index(0);
236 regs->ip = new_ip;
237 regs->sp = new_sp;
e634d8fc
PA
238 regs->cs = _cs;
239 regs->ss = _ss;
a6f05a6a 240 regs->flags = X86_EFLAGS_IF;
1daeaa31 241 force_iret();
513ad84b 242}
e634d8fc
PA
243
244void
245start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
246{
247 start_thread_common(regs, new_ip, new_sp,
248 __USER_CS, __USER_DS, 0);
249}
513ad84b 250
7da77078
BG
251#ifdef CONFIG_COMPAT
252void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
a6f05a6a 253{
e634d8fc 254 start_thread_common(regs, new_ip, new_sp,
d1a797f3
PA
255 test_thread_flag(TIF_X32)
256 ? __USER_CS : __USER32_CS,
257 __USER_DS, __USER_DS);
a6f05a6a
PA
258}
259#endif
513ad84b 260
1da177e4
LT
261/*
262 * switch_to(x,y) should switch tasks from x to y.
263 *
6612538c 264 * This could still be optimized:
1da177e4
LT
265 * - fold all the options into a flag word and test it with a single test.
266 * - could test fs/gs bitsliced
099f318b
AK
267 *
268 * Kprobes not supported here. Set the probe on schedule instead.
8b96f011 269 * Function graph tracer not supported too.
1da177e4 270 */
35ea7903 271__visible __notrace_funcgraph struct task_struct *
a88cde13 272__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4 273{
87b935a0
JF
274 struct thread_struct *prev = &prev_p->thread;
275 struct thread_struct *next = &next_p->thread;
384a23f9
IM
276 struct fpu *prev_fpu = &prev->fpu;
277 struct fpu *next_fpu = &next->fpu;
6612538c 278 int cpu = smp_processor_id();
24933b82 279 struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
3e2b68d7 280 unsigned prev_fsindex, prev_gsindex;
e07e23e1 281
c474e507 282 switch_fpu_prepare(prev_fpu, cpu);
4903062b 283
478de5a9
JF
284 /* We must save %fs and %gs before load_TLS() because
285 * %fs and %gs may be cleared by load_TLS().
286 *
287 * (e.g. xen_load_tls())
288 */
3e2b68d7
AL
289 savesegment(fs, prev_fsindex);
290 savesegment(gs, prev_gsindex);
478de5a9 291
f647d7c1
AL
292 /*
293 * Load TLS before restoring any segments so that segment loads
294 * reference the correct GDT entries.
295 */
1da177e4
LT
296 load_TLS(next, cpu);
297
3fe0a63e 298 /*
f647d7c1
AL
299 * Leave lazy mode, flushing any hypercalls made here. This
300 * must be done after loading TLS entries in the GDT but before
301 * loading segments that might reference them, and and it must
3a0aee48 302 * be done before fpu__restore(), so the TS bit is up to
f647d7c1 303 * date.
3fe0a63e 304 */
224101ed 305 arch_end_context_switch(next_p);
3fe0a63e 306
f647d7c1
AL
307 /* Switch DS and ES.
308 *
309 * Reading them only returns the selectors, but writing them (if
310 * nonzero) loads the full descriptor from the GDT or LDT. The
311 * LDT for next is loaded in switch_mm, and the GDT is loaded
312 * above.
313 *
314 * We therefore need to write new values to the segment
315 * registers on every context switch unless both the new and old
316 * values are zero.
317 *
318 * Note that we don't need to do anything for CS and SS, as
319 * those are saved and restored as part of pt_regs.
320 */
321 savesegment(es, prev->es);
322 if (unlikely(next->es | prev->es))
323 loadsegment(es, next->es);
324
325 savesegment(ds, prev->ds);
326 if (unlikely(next->ds | prev->ds))
327 loadsegment(ds, next->ds);
328
7de08b4e 329 /*
1da177e4 330 * Switch FS and GS.
87b935a0 331 *
558a65bc 332 * These are even more complicated than DS and ES: they have
3e2b68d7
AL
333 * 64-bit bases are that controlled by arch_prctl. The bases
334 * don't necessarily match the selectors, as user code can do
335 * any number of things to cause them to be inconsistent.
f647d7c1 336 *
3e2b68d7
AL
337 * We don't promise to preserve the bases if the selectors are
338 * nonzero. We also don't promise to preserve the base if the
339 * selector is zero and the base doesn't match whatever was
340 * most recently passed to ARCH_SET_FS/GS. (If/when the
341 * FSGSBASE instructions are enabled, we'll need to offer
342 * stronger guarantees.)
f647d7c1 343 *
3e2b68d7 344 * As an invariant,
296f781a 345 * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
3e2b68d7 346 * impossible.
1da177e4 347 */
3e2b68d7
AL
348 if (next->fsindex) {
349 /* Loading a nonzero value into FS sets the index and base. */
87b935a0 350 loadsegment(fs, next->fsindex);
3e2b68d7 351 } else {
296f781a 352 if (next->fsbase) {
3e2b68d7
AL
353 /* Next index is zero but next base is nonzero. */
354 if (prev_fsindex)
355 loadsegment(fs, 0);
296f781a 356 wrmsrl(MSR_FS_BASE, next->fsbase);
3e2b68d7
AL
357 } else {
358 /* Next base and index are both zero. */
359 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
360 /*
361 * We don't know the previous base and can't
362 * find out without RDMSR. Forcibly clear it.
363 */
364 loadsegment(fs, __USER_DS);
365 loadsegment(fs, 0);
366 } else {
367 /*
368 * If the previous index is zero and ARCH_SET_FS
369 * didn't change the base, then the base is
370 * also zero and we don't need to do anything.
371 */
296f781a 372 if (prev->fsbase || prev_fsindex)
3e2b68d7
AL
373 loadsegment(fs, 0);
374 }
375 }
1da177e4 376 }
3e2b68d7
AL
377 /*
378 * Save the old state and preserve the invariant.
379 * NB: if prev_fsindex == 0, then we can't reliably learn the base
380 * without RDMSR because Intel user code can zero it without telling
381 * us and AMD user code can program any 32-bit value without telling
382 * us.
383 */
384 if (prev_fsindex)
296f781a 385 prev->fsbase = 0;
3e2b68d7 386 prev->fsindex = prev_fsindex;
87b935a0 387
3e2b68d7
AL
388 if (next->gsindex) {
389 /* Loading a nonzero value into GS sets the index and base. */
87b935a0 390 load_gs_index(next->gsindex);
3e2b68d7 391 } else {
296f781a 392 if (next->gsbase) {
3e2b68d7
AL
393 /* Next index is zero but next base is nonzero. */
394 if (prev_gsindex)
395 load_gs_index(0);
296f781a 396 wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
3e2b68d7
AL
397 } else {
398 /* Next base and index are both zero. */
399 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
400 /*
401 * We don't know the previous base and can't
402 * find out without RDMSR. Forcibly clear it.
403 *
404 * This contains a pointless SWAPGS pair.
405 * Fixing it would involve an explicit check
406 * for Xen or a new pvop.
407 */
408 load_gs_index(__USER_DS);
409 load_gs_index(0);
410 } else {
411 /*
412 * If the previous index is zero and ARCH_SET_GS
413 * didn't change the base, then the base is
414 * also zero and we don't need to do anything.
415 */
296f781a 416 if (prev->gsbase || prev_gsindex)
3e2b68d7
AL
417 load_gs_index(0);
418 }
419 }
1da177e4 420 }
3e2b68d7
AL
421 /*
422 * Save the old state and preserve the invariant.
423 * NB: if prev_gsindex == 0, then we can't reliably learn the base
424 * without RDMSR because Intel user code can zero it without telling
425 * us and AMD user code can program any 32-bit value without telling
426 * us.
427 */
428 if (prev_gsindex)
296f781a 429 prev->gsbase = 0;
3e2b68d7 430 prev->gsindex = prev_gsindex;
1da177e4 431
c474e507 432 switch_fpu_finish(next_fpu, cpu);
34ddc81a 433
7de08b4e 434 /*
45948d77 435 * Switch the PDA and FPU contexts.
1da177e4 436 */
c6ae41e7 437 this_cpu_write(current_task, next_p);
18bd057b 438
b27559a4
AL
439 /* Reload esp0 and ss1. This changes current_thread_info(). */
440 load_sp0(tss, next);
441
1da177e4 442 /*
d3a4f48d 443 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 444 */
eee3af4a
MM
445 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
446 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 447 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 448
5e57f1d6 449#ifdef CONFIG_XEN_PV
b7a58459
AL
450 /*
451 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
452 * current_pt_regs()->flags may not match the current task's
453 * intended IOPL. We need to switch it manually.
454 */
455 if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
456 prev->iopl != next->iopl))
457 xen_set_iopl_mask(next->iopl);
458#endif
459
61f01dd9
AL
460 if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
461 /*
462 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
463 * does not update the cached descriptor. As a result, if we
464 * do SYSRET while SS is NULL, we'll end up in user mode with
465 * SS apparently equal to __USER_DS but actually unusable.
466 *
467 * The straightforward workaround would be to fix it up just
468 * before SYSRET, but that would slow down the system call
469 * fast paths. Instead, we ensure that SS is never NULL in
470 * system call context. We do this by replacing NULL SS
471 * selectors at every context switch. SYSCALL sets up a valid
472 * SS, so the only way to get NULL is to re-enter the kernel
473 * from CPL 3 through an interrupt. Since that can't happen
474 * in the same task as a running syscall, we are guaranteed to
475 * context switch between every interrupt vector entry and a
476 * subsequent SYSRET.
477 *
478 * We read SS first because SS reads are much faster than
479 * writes. Out of caution, we force SS to __KERNEL_DS even if
480 * it previously had a different non-NULL value.
481 */
482 unsigned short ss_sel;
483 savesegment(ss, ss_sel);
484 if (ss_sel != __KERNEL_DS)
485 loadsegment(ss, __KERNEL_DS);
486 }
487
4f341a5e
FY
488 /* Load the Intel cache allocation PQR MSR. */
489 intel_rdt_sched_in();
490
1da177e4
LT
491 return prev_p;
492}
493
1da177e4
LT
494void set_personality_64bit(void)
495{
496 /* inherit personality from parent */
497
498 /* Make sure to be in 64bit mode */
6612538c 499 clear_thread_flag(TIF_IA32);
6bd33008 500 clear_thread_flag(TIF_ADDR32);
bb212724 501 clear_thread_flag(TIF_X32);
ada26481
DS
502 /* Pretend that this comes from a 64bit execve */
503 task_pt_regs(current)->orig_ax = __NR_execve;
1da177e4 504
375906f8
SW
505 /* Ensure the corresponding mm is not marked. */
506 if (current->mm)
507 current->mm->context.ia32_compat = 0;
508
1da177e4
LT
509 /* TBD: overwrites user setup. Should have two bits.
510 But 64bit processes have always behaved this way,
511 so it's not too bad. The main problem is just that
6612538c 512 32bit childs are affected again. */
1da177e4
LT
513 current->personality &= ~READ_IMPLIES_EXEC;
514}
515
ada26481 516static void __set_personality_x32(void)
05d43ed8 517{
ada26481
DS
518#ifdef CONFIG_X86_X32
519 clear_thread_flag(TIF_IA32);
520 set_thread_flag(TIF_X32);
521 if (current->mm)
522 current->mm->context.ia32_compat = TIF_X32;
523 current->personality &= ~READ_IMPLIES_EXEC;
524 /*
525 * in_compat_syscall() uses the presence of the x32 syscall bit
526 * flag to determine compat status. The x86 mmap() code relies on
527 * the syscall bitness so set x32 syscall bit right here to make
528 * in_compat_syscall() work during exec().
529 *
530 * Pretend to come from a x32 execve.
531 */
532 task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
533 current->thread.status &= ~TS_COMPAT;
534#endif
535}
05d43ed8 536
ada26481
DS
537static void __set_personality_ia32(void)
538{
539#ifdef CONFIG_IA32_EMULATION
540 set_thread_flag(TIF_IA32);
541 clear_thread_flag(TIF_X32);
542 if (current->mm)
543 current->mm->context.ia32_compat = TIF_IA32;
544 current->personality |= force_personality32;
545 /* Prepare the first "return" to user space */
546 task_pt_regs(current)->orig_ax = __NR_ia32_execve;
547 current->thread.status |= TS_COMPAT;
548#endif
549}
550
551void set_personality_ia32(bool x32)
552{
05d43ed8 553 /* Make sure to be in 32bit mode */
6bd33008 554 set_thread_flag(TIF_ADDR32);
05d43ed8 555
ada26481
DS
556 if (x32)
557 __set_personality_x32();
558 else
559 __set_personality_ia32();
05d43ed8 560}
febb72a6 561EXPORT_SYMBOL_GPL(set_personality_ia32);
05d43ed8 562
91b7bd39 563#ifdef CONFIG_CHECKPOINT_RESTORE
2eefd878
DS
564static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
565{
566 int ret;
567
568 ret = map_vdso_once(image, addr);
569 if (ret)
570 return ret;
571
572 return (long)image->size;
573}
91b7bd39 574#endif
2eefd878 575
17a6e1b8 576long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
7de08b4e
GP
577{
578 int ret = 0;
1da177e4
LT
579 int doit = task == current;
580 int cpu;
581
dd93938a 582 switch (option) {
1da177e4 583 case ARCH_SET_GS:
17a6e1b8 584 if (arg2 >= TASK_SIZE_MAX)
7de08b4e 585 return -EPERM;
1da177e4 586 cpu = get_cpu();
731e33e3 587 task->thread.gsindex = 0;
17a6e1b8 588 task->thread.gsbase = arg2;
731e33e3
AL
589 if (doit) {
590 load_gs_index(0);
17a6e1b8 591 ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
1da177e4 592 }
4afd0565 593 put_cpu();
1da177e4
LT
594 break;
595 case ARCH_SET_FS:
596 /* Not strictly needed for fs, but do it for symmetry
597 with gs */
17a6e1b8 598 if (arg2 >= TASK_SIZE_MAX)
6612538c 599 return -EPERM;
1da177e4 600 cpu = get_cpu();
731e33e3 601 task->thread.fsindex = 0;
17a6e1b8 602 task->thread.fsbase = arg2;
731e33e3
AL
603 if (doit) {
604 /* set the selector to 0 to not confuse __switch_to */
605 loadsegment(fs, 0);
17a6e1b8 606 ret = wrmsrl_safe(MSR_FS_BASE, arg2);
1da177e4
LT
607 }
608 put_cpu();
609 break;
6612538c
HS
610 case ARCH_GET_FS: {
611 unsigned long base;
17a6e1b8 612
d47b50e7 613 if (doit)
1da177e4 614 rdmsrl(MSR_FS_BASE, base);
a88cde13 615 else
296f781a 616 base = task->thread.fsbase;
17a6e1b8 617 ret = put_user(base, (unsigned long __user *)arg2);
6612538c 618 break;
1da177e4 619 }
6612538c 620 case ARCH_GET_GS: {
1da177e4 621 unsigned long base;
17a6e1b8 622
d47b50e7
AL
623 if (doit)
624 rdmsrl(MSR_KERNEL_GS_BASE, base);
d47b50e7 625 else
296f781a 626 base = task->thread.gsbase;
17a6e1b8 627 ret = put_user(base, (unsigned long __user *)arg2);
1da177e4
LT
628 break;
629 }
630
2eefd878 631#ifdef CONFIG_CHECKPOINT_RESTORE
6e68b087 632# ifdef CONFIG_X86_X32_ABI
2eefd878 633 case ARCH_MAP_VDSO_X32:
17a6e1b8 634 return prctl_map_vdso(&vdso_image_x32, arg2);
91b7bd39
IM
635# endif
636# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
2eefd878 637 case ARCH_MAP_VDSO_32:
17a6e1b8 638 return prctl_map_vdso(&vdso_image_32, arg2);
91b7bd39 639# endif
2eefd878 640 case ARCH_MAP_VDSO_64:
17a6e1b8 641 return prctl_map_vdso(&vdso_image_64, arg2);
2eefd878
DS
642#endif
643
1da177e4
LT
644 default:
645 ret = -EINVAL;
646 break;
6612538c 647 }
1da177e4 648
6612538c
HS
649 return ret;
650}
1da177e4 651
17a6e1b8 652SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
1da177e4 653{
b0b9b014
KH
654 long ret;
655
656 ret = do_arch_prctl_64(current, option, arg2);
657 if (ret == -EINVAL)
658 ret = do_arch_prctl_common(current, option, arg2);
659
660 return ret;
1da177e4
LT
661}
662
79170fda
KH
663#ifdef CONFIG_IA32_EMULATION
664COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
665{
666 return do_arch_prctl_common(current, option, arg2);
667}
668#endif
669
89240ba0
SS
670unsigned long KSTK_ESP(struct task_struct *task)
671{
263042e4 672 return task_pt_regs(task)->sp;
89240ba0 673}