]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/process_64.c
x86/arch_prctl/64: Use SYSCALL_DEFINE2 to define sys_arch_prctl()
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
76e4f660 17#include <linux/cpu.h>
1da177e4
LT
18#include <linux/errno.h>
19#include <linux/sched.h>
29930025 20#include <linux/sched/task.h>
68db0cf1 21#include <linux/sched/task_stack.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4
LT
29#include <linux/interrupt.h>
30#include <linux/delay.h>
186f4360 31#include <linux/export.h>
1da177e4 32#include <linux/ptrace.h>
95833c83 33#include <linux/notifier.h>
c6fd91f0 34#include <linux/kprobes.h>
1eeb66a1 35#include <linux/kdebug.h>
529e25f6 36#include <linux/prctl.h>
7de08b4e
GP
37#include <linux/uaccess.h>
38#include <linux/io.h>
8b96f011 39#include <linux/ftrace.h>
ff3f097e 40#include <linux/syscalls.h>
1da177e4 41
1da177e4 42#include <asm/pgtable.h>
1da177e4 43#include <asm/processor.h>
78f7f1e5 44#include <asm/fpu/internal.h>
1da177e4 45#include <asm/mmu_context.h>
1da177e4 46#include <asm/prctl.h>
1da177e4
LT
47#include <asm/desc.h>
48#include <asm/proto.h>
49#include <asm/ia32.h>
bbc1f698 50#include <asm/syscalls.h>
66cb5917 51#include <asm/debugreg.h>
f05e798a 52#include <asm/switch_to.h>
b7a58459 53#include <asm/xen/hypervisor.h>
2eefd878 54#include <asm/vdso.h>
4f341a5e 55#include <asm/intel_rdt.h>
1da177e4 56
c38e5038 57__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
1da177e4 58
6612538c 59/* Prints also some state that isn't saved in the pt_regs */
e2ce07c8 60void __show_regs(struct pt_regs *regs, int all)
1da177e4
LT
61{
62 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 63 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
64 unsigned int fsindex, gsindex;
65 unsigned int ds, cs, es;
814e2c84 66
bb5e5ce5
JP
67 printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
68 (void *)regs->ip);
6fa81a12 69 printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
bb5e5ce5 70 regs->sp, regs->flags);
6fa81a12
JP
71 if (regs->orig_ax != -1)
72 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
73 else
74 pr_cont("\n");
75
d015a092 76 printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 77 regs->ax, regs->bx, regs->cx);
d015a092 78 printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 79 regs->dx, regs->si, regs->di);
d015a092 80 printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 81 regs->bp, regs->r8, regs->r9);
d015a092 82 printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
7de08b4e 83 regs->r10, regs->r11, regs->r12);
d015a092 84 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
7de08b4e 85 regs->r13, regs->r14, regs->r15);
1da177e4 86
7de08b4e
GP
87 asm("movl %%ds,%0" : "=r" (ds));
88 asm("movl %%cs,%0" : "=r" (cs));
89 asm("movl %%es,%0" : "=r" (es));
1da177e4
LT
90 asm("movl %%fs,%0" : "=r" (fsindex));
91 asm("movl %%gs,%0" : "=r" (gsindex));
92
93 rdmsrl(MSR_FS_BASE, fs);
7de08b4e
GP
94 rdmsrl(MSR_GS_BASE, gs);
95 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
1da177e4 96
e2ce07c8
PE
97 if (!all)
98 return;
1da177e4 99
f51c9452
GOC
100 cr0 = read_cr0();
101 cr2 = read_cr2();
102 cr3 = read_cr3();
1e02ce4c 103 cr4 = __read_cr4();
1da177e4 104
d015a092 105 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
7de08b4e 106 fs, fsindex, gs, gsindex, shadowgs);
d015a092 107 printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
8092c654 108 es, cr0);
d015a092 109 printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
8092c654 110 cr4);
bb1995d5
AS
111
112 get_debugreg(d0, 0);
113 get_debugreg(d1, 1);
114 get_debugreg(d2, 2);
bb1995d5
AS
115 get_debugreg(d3, 3);
116 get_debugreg(d6, 6);
117 get_debugreg(d7, 7);
4338774c
DJ
118
119 /* Only print out debug registers if they are in their non-default state. */
ba6d018e
NI
120 if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
121 (d6 == DR6_RESERVED) && (d7 == 0x400))) {
122 printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
123 d0, d1, d2);
124 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
125 d3, d6, d7);
126 }
4338774c 127
c0b17b5b
DH
128 if (boot_cpu_has(X86_FEATURE_OSPKE))
129 printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
1da177e4
LT
130}
131
1da177e4
LT
132void release_thread(struct task_struct *dead_task)
133{
134 if (dead_task->mm) {
a5b9e5a2 135#ifdef CONFIG_MODIFY_LDT_SYSCALL
37868fe1 136 if (dead_task->mm->context.ldt) {
349eab6e 137 pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
c767a54b 138 dead_task->comm,
0d430e3f 139 dead_task->mm->context.ldt->entries,
37868fe1 140 dead_task->mm->context.ldt->size);
1da177e4
LT
141 BUG();
142 }
a5b9e5a2 143#endif
1da177e4
LT
144 }
145}
146
c1bd55f9
JT
147int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
148 unsigned long arg, struct task_struct *p, unsigned long tls)
1da177e4
LT
149{
150 int err;
7de08b4e 151 struct pt_regs *childregs;
0100301b
BG
152 struct fork_frame *fork_frame;
153 struct inactive_task_frame *frame;
1da177e4
LT
154 struct task_struct *me = current;
155
7076aada
AV
156 p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
157 childregs = task_pt_regs(p);
0100301b
BG
158 fork_frame = container_of(childregs, struct fork_frame, regs);
159 frame = &fork_frame->frame;
160 frame->bp = 0;
161 frame->ret_addr = (unsigned long) ret_from_fork;
162 p->thread.sp = (unsigned long) fork_frame;
66cb5917 163 p->thread.io_bitmap_ptr = NULL;
1da177e4 164
ada85708 165 savesegment(gs, p->thread.gsindex);
296f781a 166 p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
ada85708 167 savesegment(fs, p->thread.fsindex);
296f781a 168 p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
ada85708
JF
169 savesegment(es, p->thread.es);
170 savesegment(ds, p->thread.ds);
7076aada
AV
171 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
172
1d4b4b29 173 if (unlikely(p->flags & PF_KTHREAD)) {
7076aada
AV
174 /* kernel thread */
175 memset(childregs, 0, sizeof(struct pt_regs));
616d2483
BG
176 frame->bx = sp; /* function */
177 frame->r12 = arg;
7076aada
AV
178 return 0;
179 }
616d2483 180 frame->bx = 0;
1d4b4b29 181 *childregs = *current_pt_regs();
7076aada
AV
182
183 childregs->ax = 0;
1d4b4b29
AV
184 if (sp)
185 childregs->sp = sp;
1da177e4 186
66cb5917 187 err = -ENOMEM;
d3a4f48d 188 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
cced4022
TM
189 p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
190 IO_BITMAP_BYTES, GFP_KERNEL);
1da177e4
LT
191 if (!p->thread.io_bitmap_ptr) {
192 p->thread.io_bitmap_max = 0;
193 return -ENOMEM;
194 }
d3a4f48d 195 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 196 }
1da177e4
LT
197
198 /*
199 * Set a new TLS for the child thread?
200 */
201 if (clone_flags & CLONE_SETTLS) {
202#ifdef CONFIG_IA32_EMULATION
abfb9498 203 if (in_ia32_syscall())
efd1ca52 204 err = do_set_thread_area(p, -1,
c1bd55f9 205 (struct user_desc __user *)tls, 0);
7de08b4e
GP
206 else
207#endif
c1bd55f9 208 err = do_arch_prctl(p, ARCH_SET_FS, tls);
7de08b4e 209 if (err)
1da177e4
LT
210 goto out;
211 }
212 err = 0;
213out:
214 if (err && p->thread.io_bitmap_ptr) {
215 kfree(p->thread.io_bitmap_ptr);
216 p->thread.io_bitmap_max = 0;
217 }
66cb5917 218
1da177e4
LT
219 return err;
220}
221
e634d8fc
PA
222static void
223start_thread_common(struct pt_regs *regs, unsigned long new_ip,
224 unsigned long new_sp,
225 unsigned int _cs, unsigned int _ss, unsigned int _ds)
513ad84b 226{
ada85708 227 loadsegment(fs, 0);
e634d8fc
PA
228 loadsegment(es, _ds);
229 loadsegment(ds, _ds);
513ad84b
IM
230 load_gs_index(0);
231 regs->ip = new_ip;
232 regs->sp = new_sp;
e634d8fc
PA
233 regs->cs = _cs;
234 regs->ss = _ss;
a6f05a6a 235 regs->flags = X86_EFLAGS_IF;
1daeaa31 236 force_iret();
513ad84b 237}
e634d8fc
PA
238
239void
240start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
241{
242 start_thread_common(regs, new_ip, new_sp,
243 __USER_CS, __USER_DS, 0);
244}
513ad84b 245
7da77078
BG
246#ifdef CONFIG_COMPAT
247void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
a6f05a6a 248{
e634d8fc 249 start_thread_common(regs, new_ip, new_sp,
d1a797f3
PA
250 test_thread_flag(TIF_X32)
251 ? __USER_CS : __USER32_CS,
252 __USER_DS, __USER_DS);
a6f05a6a
PA
253}
254#endif
513ad84b 255
1da177e4
LT
256/*
257 * switch_to(x,y) should switch tasks from x to y.
258 *
6612538c 259 * This could still be optimized:
1da177e4
LT
260 * - fold all the options into a flag word and test it with a single test.
261 * - could test fs/gs bitsliced
099f318b
AK
262 *
263 * Kprobes not supported here. Set the probe on schedule instead.
8b96f011 264 * Function graph tracer not supported too.
1da177e4 265 */
35ea7903 266__visible __notrace_funcgraph struct task_struct *
a88cde13 267__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4 268{
87b935a0
JF
269 struct thread_struct *prev = &prev_p->thread;
270 struct thread_struct *next = &next_p->thread;
384a23f9
IM
271 struct fpu *prev_fpu = &prev->fpu;
272 struct fpu *next_fpu = &next->fpu;
6612538c 273 int cpu = smp_processor_id();
24933b82 274 struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
3e2b68d7 275 unsigned prev_fsindex, prev_gsindex;
e07e23e1 276
c474e507 277 switch_fpu_prepare(prev_fpu, cpu);
4903062b 278
478de5a9
JF
279 /* We must save %fs and %gs before load_TLS() because
280 * %fs and %gs may be cleared by load_TLS().
281 *
282 * (e.g. xen_load_tls())
283 */
3e2b68d7
AL
284 savesegment(fs, prev_fsindex);
285 savesegment(gs, prev_gsindex);
478de5a9 286
f647d7c1
AL
287 /*
288 * Load TLS before restoring any segments so that segment loads
289 * reference the correct GDT entries.
290 */
1da177e4
LT
291 load_TLS(next, cpu);
292
3fe0a63e 293 /*
f647d7c1
AL
294 * Leave lazy mode, flushing any hypercalls made here. This
295 * must be done after loading TLS entries in the GDT but before
296 * loading segments that might reference them, and and it must
3a0aee48 297 * be done before fpu__restore(), so the TS bit is up to
f647d7c1 298 * date.
3fe0a63e 299 */
224101ed 300 arch_end_context_switch(next_p);
3fe0a63e 301
f647d7c1
AL
302 /* Switch DS and ES.
303 *
304 * Reading them only returns the selectors, but writing them (if
305 * nonzero) loads the full descriptor from the GDT or LDT. The
306 * LDT for next is loaded in switch_mm, and the GDT is loaded
307 * above.
308 *
309 * We therefore need to write new values to the segment
310 * registers on every context switch unless both the new and old
311 * values are zero.
312 *
313 * Note that we don't need to do anything for CS and SS, as
314 * those are saved and restored as part of pt_regs.
315 */
316 savesegment(es, prev->es);
317 if (unlikely(next->es | prev->es))
318 loadsegment(es, next->es);
319
320 savesegment(ds, prev->ds);
321 if (unlikely(next->ds | prev->ds))
322 loadsegment(ds, next->ds);
323
7de08b4e 324 /*
1da177e4 325 * Switch FS and GS.
87b935a0 326 *
558a65bc 327 * These are even more complicated than DS and ES: they have
3e2b68d7
AL
328 * 64-bit bases are that controlled by arch_prctl. The bases
329 * don't necessarily match the selectors, as user code can do
330 * any number of things to cause them to be inconsistent.
f647d7c1 331 *
3e2b68d7
AL
332 * We don't promise to preserve the bases if the selectors are
333 * nonzero. We also don't promise to preserve the base if the
334 * selector is zero and the base doesn't match whatever was
335 * most recently passed to ARCH_SET_FS/GS. (If/when the
336 * FSGSBASE instructions are enabled, we'll need to offer
337 * stronger guarantees.)
f647d7c1 338 *
3e2b68d7 339 * As an invariant,
296f781a 340 * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
3e2b68d7 341 * impossible.
1da177e4 342 */
3e2b68d7
AL
343 if (next->fsindex) {
344 /* Loading a nonzero value into FS sets the index and base. */
87b935a0 345 loadsegment(fs, next->fsindex);
3e2b68d7 346 } else {
296f781a 347 if (next->fsbase) {
3e2b68d7
AL
348 /* Next index is zero but next base is nonzero. */
349 if (prev_fsindex)
350 loadsegment(fs, 0);
296f781a 351 wrmsrl(MSR_FS_BASE, next->fsbase);
3e2b68d7
AL
352 } else {
353 /* Next base and index are both zero. */
354 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
355 /*
356 * We don't know the previous base and can't
357 * find out without RDMSR. Forcibly clear it.
358 */
359 loadsegment(fs, __USER_DS);
360 loadsegment(fs, 0);
361 } else {
362 /*
363 * If the previous index is zero and ARCH_SET_FS
364 * didn't change the base, then the base is
365 * also zero and we don't need to do anything.
366 */
296f781a 367 if (prev->fsbase || prev_fsindex)
3e2b68d7
AL
368 loadsegment(fs, 0);
369 }
370 }
1da177e4 371 }
3e2b68d7
AL
372 /*
373 * Save the old state and preserve the invariant.
374 * NB: if prev_fsindex == 0, then we can't reliably learn the base
375 * without RDMSR because Intel user code can zero it without telling
376 * us and AMD user code can program any 32-bit value without telling
377 * us.
378 */
379 if (prev_fsindex)
296f781a 380 prev->fsbase = 0;
3e2b68d7 381 prev->fsindex = prev_fsindex;
87b935a0 382
3e2b68d7
AL
383 if (next->gsindex) {
384 /* Loading a nonzero value into GS sets the index and base. */
87b935a0 385 load_gs_index(next->gsindex);
3e2b68d7 386 } else {
296f781a 387 if (next->gsbase) {
3e2b68d7
AL
388 /* Next index is zero but next base is nonzero. */
389 if (prev_gsindex)
390 load_gs_index(0);
296f781a 391 wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
3e2b68d7
AL
392 } else {
393 /* Next base and index are both zero. */
394 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
395 /*
396 * We don't know the previous base and can't
397 * find out without RDMSR. Forcibly clear it.
398 *
399 * This contains a pointless SWAPGS pair.
400 * Fixing it would involve an explicit check
401 * for Xen or a new pvop.
402 */
403 load_gs_index(__USER_DS);
404 load_gs_index(0);
405 } else {
406 /*
407 * If the previous index is zero and ARCH_SET_GS
408 * didn't change the base, then the base is
409 * also zero and we don't need to do anything.
410 */
296f781a 411 if (prev->gsbase || prev_gsindex)
3e2b68d7
AL
412 load_gs_index(0);
413 }
414 }
1da177e4 415 }
3e2b68d7
AL
416 /*
417 * Save the old state and preserve the invariant.
418 * NB: if prev_gsindex == 0, then we can't reliably learn the base
419 * without RDMSR because Intel user code can zero it without telling
420 * us and AMD user code can program any 32-bit value without telling
421 * us.
422 */
423 if (prev_gsindex)
296f781a 424 prev->gsbase = 0;
3e2b68d7 425 prev->gsindex = prev_gsindex;
1da177e4 426
c474e507 427 switch_fpu_finish(next_fpu, cpu);
34ddc81a 428
7de08b4e 429 /*
45948d77 430 * Switch the PDA and FPU contexts.
1da177e4 431 */
c6ae41e7 432 this_cpu_write(current_task, next_p);
18bd057b 433
b27559a4
AL
434 /* Reload esp0 and ss1. This changes current_thread_info(). */
435 load_sp0(tss, next);
436
1da177e4 437 /*
d3a4f48d 438 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 439 */
eee3af4a
MM
440 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
441 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 442 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 443
b7a58459
AL
444#ifdef CONFIG_XEN
445 /*
446 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
447 * current_pt_regs()->flags may not match the current task's
448 * intended IOPL. We need to switch it manually.
449 */
450 if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
451 prev->iopl != next->iopl))
452 xen_set_iopl_mask(next->iopl);
453#endif
454
61f01dd9
AL
455 if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
456 /*
457 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
458 * does not update the cached descriptor. As a result, if we
459 * do SYSRET while SS is NULL, we'll end up in user mode with
460 * SS apparently equal to __USER_DS but actually unusable.
461 *
462 * The straightforward workaround would be to fix it up just
463 * before SYSRET, but that would slow down the system call
464 * fast paths. Instead, we ensure that SS is never NULL in
465 * system call context. We do this by replacing NULL SS
466 * selectors at every context switch. SYSCALL sets up a valid
467 * SS, so the only way to get NULL is to re-enter the kernel
468 * from CPL 3 through an interrupt. Since that can't happen
469 * in the same task as a running syscall, we are guaranteed to
470 * context switch between every interrupt vector entry and a
471 * subsequent SYSRET.
472 *
473 * We read SS first because SS reads are much faster than
474 * writes. Out of caution, we force SS to __KERNEL_DS even if
475 * it previously had a different non-NULL value.
476 */
477 unsigned short ss_sel;
478 savesegment(ss, ss_sel);
479 if (ss_sel != __KERNEL_DS)
480 loadsegment(ss, __KERNEL_DS);
481 }
482
4f341a5e
FY
483 /* Load the Intel cache allocation PQR MSR. */
484 intel_rdt_sched_in();
485
1da177e4
LT
486 return prev_p;
487}
488
1da177e4
LT
489void set_personality_64bit(void)
490{
491 /* inherit personality from parent */
492
493 /* Make sure to be in 64bit mode */
6612538c 494 clear_thread_flag(TIF_IA32);
6bd33008 495 clear_thread_flag(TIF_ADDR32);
bb212724 496 clear_thread_flag(TIF_X32);
1da177e4 497
375906f8
SW
498 /* Ensure the corresponding mm is not marked. */
499 if (current->mm)
500 current->mm->context.ia32_compat = 0;
501
1da177e4
LT
502 /* TBD: overwrites user setup. Should have two bits.
503 But 64bit processes have always behaved this way,
504 so it's not too bad. The main problem is just that
6612538c 505 32bit childs are affected again. */
1da177e4
LT
506 current->personality &= ~READ_IMPLIES_EXEC;
507}
508
d1a797f3 509void set_personality_ia32(bool x32)
05d43ed8
PA
510{
511 /* inherit personality from parent */
512
513 /* Make sure to be in 32bit mode */
6bd33008 514 set_thread_flag(TIF_ADDR32);
05d43ed8 515
375906f8 516 /* Mark the associated mm as containing 32-bit tasks. */
d1a797f3
PA
517 if (x32) {
518 clear_thread_flag(TIF_IA32);
519 set_thread_flag(TIF_X32);
b24dc8da
ON
520 if (current->mm)
521 current->mm->context.ia32_compat = TIF_X32;
d1a797f3 522 current->personality &= ~READ_IMPLIES_EXEC;
f970165b 523 /* in_compat_syscall() uses the presence of the x32
ce5f7a99 524 syscall bit flag to determine compat status */
b9d989c7 525 current->thread.status &= ~TS_COMPAT;
d1a797f3
PA
526 } else {
527 set_thread_flag(TIF_IA32);
528 clear_thread_flag(TIF_X32);
b24dc8da
ON
529 if (current->mm)
530 current->mm->context.ia32_compat = TIF_IA32;
d1a797f3
PA
531 current->personality |= force_personality32;
532 /* Prepare the first "return" to user space */
b9d989c7 533 current->thread.status |= TS_COMPAT;
d1a797f3 534 }
05d43ed8 535}
febb72a6 536EXPORT_SYMBOL_GPL(set_personality_ia32);
05d43ed8 537
91b7bd39 538#ifdef CONFIG_CHECKPOINT_RESTORE
2eefd878
DS
539static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
540{
541 int ret;
542
543 ret = map_vdso_once(image, addr);
544 if (ret)
545 return ret;
546
547 return (long)image->size;
548}
91b7bd39 549#endif
2eefd878 550
dd93938a 551long do_arch_prctl(struct task_struct *task, int option, unsigned long addr)
7de08b4e
GP
552{
553 int ret = 0;
1da177e4
LT
554 int doit = task == current;
555 int cpu;
556
dd93938a 557 switch (option) {
1da177e4 558 case ARCH_SET_GS:
d696ca01 559 if (addr >= TASK_SIZE_MAX)
7de08b4e 560 return -EPERM;
1da177e4 561 cpu = get_cpu();
731e33e3 562 task->thread.gsindex = 0;
296f781a 563 task->thread.gsbase = addr;
731e33e3
AL
564 if (doit) {
565 load_gs_index(0);
566 ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
1da177e4 567 }
4afd0565 568 put_cpu();
1da177e4
LT
569 break;
570 case ARCH_SET_FS:
571 /* Not strictly needed for fs, but do it for symmetry
572 with gs */
d696ca01 573 if (addr >= TASK_SIZE_MAX)
6612538c 574 return -EPERM;
1da177e4 575 cpu = get_cpu();
731e33e3 576 task->thread.fsindex = 0;
296f781a 577 task->thread.fsbase = addr;
731e33e3
AL
578 if (doit) {
579 /* set the selector to 0 to not confuse __switch_to */
580 loadsegment(fs, 0);
581 ret = wrmsrl_safe(MSR_FS_BASE, addr);
1da177e4
LT
582 }
583 put_cpu();
584 break;
6612538c
HS
585 case ARCH_GET_FS: {
586 unsigned long base;
d47b50e7 587 if (doit)
1da177e4 588 rdmsrl(MSR_FS_BASE, base);
a88cde13 589 else
296f781a 590 base = task->thread.fsbase;
6612538c
HS
591 ret = put_user(base, (unsigned long __user *)addr);
592 break;
1da177e4 593 }
6612538c 594 case ARCH_GET_GS: {
1da177e4 595 unsigned long base;
d47b50e7
AL
596 if (doit)
597 rdmsrl(MSR_KERNEL_GS_BASE, base);
d47b50e7 598 else
296f781a 599 base = task->thread.gsbase;
6612538c 600 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
601 break;
602 }
603
2eefd878 604#ifdef CONFIG_CHECKPOINT_RESTORE
6e68b087 605# ifdef CONFIG_X86_X32_ABI
2eefd878
DS
606 case ARCH_MAP_VDSO_X32:
607 return prctl_map_vdso(&vdso_image_x32, addr);
91b7bd39
IM
608# endif
609# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
2eefd878
DS
610 case ARCH_MAP_VDSO_32:
611 return prctl_map_vdso(&vdso_image_32, addr);
91b7bd39 612# endif
2eefd878
DS
613 case ARCH_MAP_VDSO_64:
614 return prctl_map_vdso(&vdso_image_64, addr);
615#endif
616
1da177e4
LT
617 default:
618 ret = -EINVAL;
619 break;
6612538c 620 }
1da177e4 621
6612538c
HS
622 return ret;
623}
1da177e4 624
ff3f097e 625SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, addr)
1da177e4 626{
dd93938a 627 return do_arch_prctl(current, option, addr);
1da177e4
LT
628}
629
89240ba0
SS
630unsigned long KSTK_ESP(struct task_struct *task)
631{
263042e4 632 return task_pt_regs(task)->sp;
89240ba0 633}