]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/process_64.c
x86/vdso: Only define map_vdso_randomized() if CONFIG_X86_64
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
76e4f660 17#include <linux/cpu.h>
1da177e4
LT
18#include <linux/errno.h>
19#include <linux/sched.h>
6612538c 20#include <linux/fs.h>
1da177e4
LT
21#include <linux/kernel.h>
22#include <linux/mm.h>
23#include <linux/elfcore.h>
24#include <linux/smp.h>
25#include <linux/slab.h>
26#include <linux/user.h>
1da177e4
LT
27#include <linux/interrupt.h>
28#include <linux/delay.h>
186f4360 29#include <linux/export.h>
1da177e4 30#include <linux/ptrace.h>
95833c83 31#include <linux/notifier.h>
c6fd91f0 32#include <linux/kprobes.h>
1eeb66a1 33#include <linux/kdebug.h>
529e25f6 34#include <linux/prctl.h>
7de08b4e
GP
35#include <linux/uaccess.h>
36#include <linux/io.h>
8b96f011 37#include <linux/ftrace.h>
1da177e4 38
1da177e4 39#include <asm/pgtable.h>
1da177e4 40#include <asm/processor.h>
78f7f1e5 41#include <asm/fpu/internal.h>
1da177e4 42#include <asm/mmu_context.h>
1da177e4 43#include <asm/prctl.h>
1da177e4
LT
44#include <asm/desc.h>
45#include <asm/proto.h>
46#include <asm/ia32.h>
95833c83 47#include <asm/idle.h>
bbc1f698 48#include <asm/syscalls.h>
66cb5917 49#include <asm/debugreg.h>
f05e798a 50#include <asm/switch_to.h>
b7a58459 51#include <asm/xen/hypervisor.h>
2eefd878 52#include <asm/vdso.h>
1da177e4
LT
53
54asmlinkage extern void ret_from_fork(void);
55
c38e5038 56__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
1da177e4 57
6612538c 58/* Prints also some state that isn't saved in the pt_regs */
e2ce07c8 59void __show_regs(struct pt_regs *regs, int all)
1da177e4
LT
60{
61 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 62 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
63 unsigned int fsindex, gsindex;
64 unsigned int ds, cs, es;
814e2c84 65
d015a092 66 printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
5f01c988 67 printk_address(regs->ip);
d015a092 68 printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
8092c654 69 regs->sp, regs->flags);
d015a092 70 printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 71 regs->ax, regs->bx, regs->cx);
d015a092 72 printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 73 regs->dx, regs->si, regs->di);
d015a092 74 printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 75 regs->bp, regs->r8, regs->r9);
d015a092 76 printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
7de08b4e 77 regs->r10, regs->r11, regs->r12);
d015a092 78 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
7de08b4e 79 regs->r13, regs->r14, regs->r15);
1da177e4 80
7de08b4e
GP
81 asm("movl %%ds,%0" : "=r" (ds));
82 asm("movl %%cs,%0" : "=r" (cs));
83 asm("movl %%es,%0" : "=r" (es));
1da177e4
LT
84 asm("movl %%fs,%0" : "=r" (fsindex));
85 asm("movl %%gs,%0" : "=r" (gsindex));
86
87 rdmsrl(MSR_FS_BASE, fs);
7de08b4e
GP
88 rdmsrl(MSR_GS_BASE, gs);
89 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
1da177e4 90
e2ce07c8
PE
91 if (!all)
92 return;
1da177e4 93
f51c9452
GOC
94 cr0 = read_cr0();
95 cr2 = read_cr2();
96 cr3 = read_cr3();
1e02ce4c 97 cr4 = __read_cr4();
1da177e4 98
d015a092 99 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
7de08b4e 100 fs, fsindex, gs, gsindex, shadowgs);
d015a092 101 printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
8092c654 102 es, cr0);
d015a092 103 printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
8092c654 104 cr4);
bb1995d5
AS
105
106 get_debugreg(d0, 0);
107 get_debugreg(d1, 1);
108 get_debugreg(d2, 2);
bb1995d5
AS
109 get_debugreg(d3, 3);
110 get_debugreg(d6, 6);
111 get_debugreg(d7, 7);
4338774c
DJ
112
113 /* Only print out debug registers if they are in their non-default state. */
114 if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
115 (d6 == DR6_RESERVED) && (d7 == 0x400))
116 return;
117
118 printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
d015a092 119 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
4338774c 120
c0b17b5b
DH
121 if (boot_cpu_has(X86_FEATURE_OSPKE))
122 printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
1da177e4
LT
123}
124
1da177e4
LT
125void release_thread(struct task_struct *dead_task)
126{
127 if (dead_task->mm) {
a5b9e5a2 128#ifdef CONFIG_MODIFY_LDT_SYSCALL
37868fe1 129 if (dead_task->mm->context.ldt) {
349eab6e 130 pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
c767a54b 131 dead_task->comm,
0d430e3f 132 dead_task->mm->context.ldt->entries,
37868fe1 133 dead_task->mm->context.ldt->size);
1da177e4
LT
134 BUG();
135 }
a5b9e5a2 136#endif
1da177e4
LT
137 }
138}
139
c1bd55f9
JT
140int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
141 unsigned long arg, struct task_struct *p, unsigned long tls)
1da177e4
LT
142{
143 int err;
7de08b4e 144 struct pt_regs *childregs;
1da177e4
LT
145 struct task_struct *me = current;
146
7076aada
AV
147 p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
148 childregs = task_pt_regs(p);
faca6227 149 p->thread.sp = (unsigned long) childregs;
e4f17c43 150 set_tsk_thread_flag(p, TIF_FORK);
66cb5917 151 p->thread.io_bitmap_ptr = NULL;
1da177e4 152
ada85708 153 savesegment(gs, p->thread.gsindex);
296f781a 154 p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
ada85708 155 savesegment(fs, p->thread.fsindex);
296f781a 156 p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
ada85708
JF
157 savesegment(es, p->thread.es);
158 savesegment(ds, p->thread.ds);
7076aada
AV
159 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
160
1d4b4b29 161 if (unlikely(p->flags & PF_KTHREAD)) {
7076aada
AV
162 /* kernel thread */
163 memset(childregs, 0, sizeof(struct pt_regs));
164 childregs->sp = (unsigned long)childregs;
165 childregs->ss = __KERNEL_DS;
166 childregs->bx = sp; /* function */
167 childregs->bp = arg;
168 childregs->orig_ax = -1;
169 childregs->cs = __KERNEL_CS | get_kernel_rpl();
1adfa76a 170 childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
7076aada
AV
171 return 0;
172 }
1d4b4b29 173 *childregs = *current_pt_regs();
7076aada
AV
174
175 childregs->ax = 0;
1d4b4b29
AV
176 if (sp)
177 childregs->sp = sp;
1da177e4 178
66cb5917 179 err = -ENOMEM;
d3a4f48d 180 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
cced4022
TM
181 p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
182 IO_BITMAP_BYTES, GFP_KERNEL);
1da177e4
LT
183 if (!p->thread.io_bitmap_ptr) {
184 p->thread.io_bitmap_max = 0;
185 return -ENOMEM;
186 }
d3a4f48d 187 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 188 }
1da177e4
LT
189
190 /*
191 * Set a new TLS for the child thread?
192 */
193 if (clone_flags & CLONE_SETTLS) {
194#ifdef CONFIG_IA32_EMULATION
abfb9498 195 if (in_ia32_syscall())
efd1ca52 196 err = do_set_thread_area(p, -1,
c1bd55f9 197 (struct user_desc __user *)tls, 0);
7de08b4e
GP
198 else
199#endif
c1bd55f9 200 err = do_arch_prctl(p, ARCH_SET_FS, tls);
7de08b4e 201 if (err)
1da177e4
LT
202 goto out;
203 }
204 err = 0;
205out:
206 if (err && p->thread.io_bitmap_ptr) {
207 kfree(p->thread.io_bitmap_ptr);
208 p->thread.io_bitmap_max = 0;
209 }
66cb5917 210
1da177e4
LT
211 return err;
212}
213
e634d8fc
PA
214static void
215start_thread_common(struct pt_regs *regs, unsigned long new_ip,
216 unsigned long new_sp,
217 unsigned int _cs, unsigned int _ss, unsigned int _ds)
513ad84b 218{
ada85708 219 loadsegment(fs, 0);
e634d8fc
PA
220 loadsegment(es, _ds);
221 loadsegment(ds, _ds);
513ad84b
IM
222 load_gs_index(0);
223 regs->ip = new_ip;
224 regs->sp = new_sp;
e634d8fc
PA
225 regs->cs = _cs;
226 regs->ss = _ss;
a6f05a6a 227 regs->flags = X86_EFLAGS_IF;
1daeaa31 228 force_iret();
513ad84b 229}
e634d8fc
PA
230
231void
232start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
233{
234 start_thread_common(regs, new_ip, new_sp,
235 __USER_CS, __USER_DS, 0);
236}
513ad84b 237
7da77078
BG
238#ifdef CONFIG_COMPAT
239void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
a6f05a6a 240{
e634d8fc 241 start_thread_common(regs, new_ip, new_sp,
d1a797f3
PA
242 test_thread_flag(TIF_X32)
243 ? __USER_CS : __USER32_CS,
244 __USER_DS, __USER_DS);
a6f05a6a
PA
245}
246#endif
513ad84b 247
1da177e4
LT
248/*
249 * switch_to(x,y) should switch tasks from x to y.
250 *
6612538c 251 * This could still be optimized:
1da177e4
LT
252 * - fold all the options into a flag word and test it with a single test.
253 * - could test fs/gs bitsliced
099f318b
AK
254 *
255 * Kprobes not supported here. Set the probe on schedule instead.
8b96f011 256 * Function graph tracer not supported too.
1da177e4 257 */
35ea7903 258__visible __notrace_funcgraph struct task_struct *
a88cde13 259__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4 260{
87b935a0
JF
261 struct thread_struct *prev = &prev_p->thread;
262 struct thread_struct *next = &next_p->thread;
384a23f9
IM
263 struct fpu *prev_fpu = &prev->fpu;
264 struct fpu *next_fpu = &next->fpu;
6612538c 265 int cpu = smp_processor_id();
24933b82 266 struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
3e2b68d7 267 unsigned prev_fsindex, prev_gsindex;
384a23f9 268 fpu_switch_t fpu_switch;
e07e23e1 269
384a23f9 270 fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
4903062b 271
478de5a9
JF
272 /* We must save %fs and %gs before load_TLS() because
273 * %fs and %gs may be cleared by load_TLS().
274 *
275 * (e.g. xen_load_tls())
276 */
3e2b68d7
AL
277 savesegment(fs, prev_fsindex);
278 savesegment(gs, prev_gsindex);
478de5a9 279
f647d7c1
AL
280 /*
281 * Load TLS before restoring any segments so that segment loads
282 * reference the correct GDT entries.
283 */
1da177e4
LT
284 load_TLS(next, cpu);
285
3fe0a63e 286 /*
f647d7c1
AL
287 * Leave lazy mode, flushing any hypercalls made here. This
288 * must be done after loading TLS entries in the GDT but before
289 * loading segments that might reference them, and and it must
3a0aee48 290 * be done before fpu__restore(), so the TS bit is up to
f647d7c1 291 * date.
3fe0a63e 292 */
224101ed 293 arch_end_context_switch(next_p);
3fe0a63e 294
f647d7c1
AL
295 /* Switch DS and ES.
296 *
297 * Reading them only returns the selectors, but writing them (if
298 * nonzero) loads the full descriptor from the GDT or LDT. The
299 * LDT for next is loaded in switch_mm, and the GDT is loaded
300 * above.
301 *
302 * We therefore need to write new values to the segment
303 * registers on every context switch unless both the new and old
304 * values are zero.
305 *
306 * Note that we don't need to do anything for CS and SS, as
307 * those are saved and restored as part of pt_regs.
308 */
309 savesegment(es, prev->es);
310 if (unlikely(next->es | prev->es))
311 loadsegment(es, next->es);
312
313 savesegment(ds, prev->ds);
314 if (unlikely(next->ds | prev->ds))
315 loadsegment(ds, next->ds);
316
7de08b4e 317 /*
1da177e4 318 * Switch FS and GS.
87b935a0 319 *
558a65bc 320 * These are even more complicated than DS and ES: they have
3e2b68d7
AL
321 * 64-bit bases are that controlled by arch_prctl. The bases
322 * don't necessarily match the selectors, as user code can do
323 * any number of things to cause them to be inconsistent.
f647d7c1 324 *
3e2b68d7
AL
325 * We don't promise to preserve the bases if the selectors are
326 * nonzero. We also don't promise to preserve the base if the
327 * selector is zero and the base doesn't match whatever was
328 * most recently passed to ARCH_SET_FS/GS. (If/when the
329 * FSGSBASE instructions are enabled, we'll need to offer
330 * stronger guarantees.)
f647d7c1 331 *
3e2b68d7 332 * As an invariant,
296f781a 333 * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
3e2b68d7 334 * impossible.
1da177e4 335 */
3e2b68d7
AL
336 if (next->fsindex) {
337 /* Loading a nonzero value into FS sets the index and base. */
87b935a0 338 loadsegment(fs, next->fsindex);
3e2b68d7 339 } else {
296f781a 340 if (next->fsbase) {
3e2b68d7
AL
341 /* Next index is zero but next base is nonzero. */
342 if (prev_fsindex)
343 loadsegment(fs, 0);
296f781a 344 wrmsrl(MSR_FS_BASE, next->fsbase);
3e2b68d7
AL
345 } else {
346 /* Next base and index are both zero. */
347 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
348 /*
349 * We don't know the previous base and can't
350 * find out without RDMSR. Forcibly clear it.
351 */
352 loadsegment(fs, __USER_DS);
353 loadsegment(fs, 0);
354 } else {
355 /*
356 * If the previous index is zero and ARCH_SET_FS
357 * didn't change the base, then the base is
358 * also zero and we don't need to do anything.
359 */
296f781a 360 if (prev->fsbase || prev_fsindex)
3e2b68d7
AL
361 loadsegment(fs, 0);
362 }
363 }
1da177e4 364 }
3e2b68d7
AL
365 /*
366 * Save the old state and preserve the invariant.
367 * NB: if prev_fsindex == 0, then we can't reliably learn the base
368 * without RDMSR because Intel user code can zero it without telling
369 * us and AMD user code can program any 32-bit value without telling
370 * us.
371 */
372 if (prev_fsindex)
296f781a 373 prev->fsbase = 0;
3e2b68d7 374 prev->fsindex = prev_fsindex;
87b935a0 375
3e2b68d7
AL
376 if (next->gsindex) {
377 /* Loading a nonzero value into GS sets the index and base. */
87b935a0 378 load_gs_index(next->gsindex);
3e2b68d7 379 } else {
296f781a 380 if (next->gsbase) {
3e2b68d7
AL
381 /* Next index is zero but next base is nonzero. */
382 if (prev_gsindex)
383 load_gs_index(0);
296f781a 384 wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
3e2b68d7
AL
385 } else {
386 /* Next base and index are both zero. */
387 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
388 /*
389 * We don't know the previous base and can't
390 * find out without RDMSR. Forcibly clear it.
391 *
392 * This contains a pointless SWAPGS pair.
393 * Fixing it would involve an explicit check
394 * for Xen or a new pvop.
395 */
396 load_gs_index(__USER_DS);
397 load_gs_index(0);
398 } else {
399 /*
400 * If the previous index is zero and ARCH_SET_GS
401 * didn't change the base, then the base is
402 * also zero and we don't need to do anything.
403 */
296f781a 404 if (prev->gsbase || prev_gsindex)
3e2b68d7
AL
405 load_gs_index(0);
406 }
407 }
1da177e4 408 }
3e2b68d7
AL
409 /*
410 * Save the old state and preserve the invariant.
411 * NB: if prev_gsindex == 0, then we can't reliably learn the base
412 * without RDMSR because Intel user code can zero it without telling
413 * us and AMD user code can program any 32-bit value without telling
414 * us.
415 */
416 if (prev_gsindex)
296f781a 417 prev->gsbase = 0;
3e2b68d7 418 prev->gsindex = prev_gsindex;
1da177e4 419
384a23f9 420 switch_fpu_finish(next_fpu, fpu_switch);
34ddc81a 421
7de08b4e 422 /*
45948d77 423 * Switch the PDA and FPU contexts.
1da177e4 424 */
c6ae41e7 425 this_cpu_write(current_task, next_p);
18bd057b 426
b27559a4
AL
427 /* Reload esp0 and ss1. This changes current_thread_info(). */
428 load_sp0(tss, next);
429
1da177e4 430 /*
d3a4f48d 431 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 432 */
eee3af4a
MM
433 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
434 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 435 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 436
b7a58459
AL
437#ifdef CONFIG_XEN
438 /*
439 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
440 * current_pt_regs()->flags may not match the current task's
441 * intended IOPL. We need to switch it manually.
442 */
443 if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
444 prev->iopl != next->iopl))
445 xen_set_iopl_mask(next->iopl);
446#endif
447
61f01dd9
AL
448 if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
449 /*
450 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
451 * does not update the cached descriptor. As a result, if we
452 * do SYSRET while SS is NULL, we'll end up in user mode with
453 * SS apparently equal to __USER_DS but actually unusable.
454 *
455 * The straightforward workaround would be to fix it up just
456 * before SYSRET, but that would slow down the system call
457 * fast paths. Instead, we ensure that SS is never NULL in
458 * system call context. We do this by replacing NULL SS
459 * selectors at every context switch. SYSCALL sets up a valid
460 * SS, so the only way to get NULL is to re-enter the kernel
461 * from CPL 3 through an interrupt. Since that can't happen
462 * in the same task as a running syscall, we are guaranteed to
463 * context switch between every interrupt vector entry and a
464 * subsequent SYSRET.
465 *
466 * We read SS first because SS reads are much faster than
467 * writes. Out of caution, we force SS to __KERNEL_DS even if
468 * it previously had a different non-NULL value.
469 */
470 unsigned short ss_sel;
471 savesegment(ss, ss_sel);
472 if (ss_sel != __KERNEL_DS)
473 loadsegment(ss, __KERNEL_DS);
474 }
475
1da177e4
LT
476 return prev_p;
477}
478
1da177e4
LT
479void set_personality_64bit(void)
480{
481 /* inherit personality from parent */
482
483 /* Make sure to be in 64bit mode */
6612538c 484 clear_thread_flag(TIF_IA32);
6bd33008 485 clear_thread_flag(TIF_ADDR32);
bb212724 486 clear_thread_flag(TIF_X32);
1da177e4 487
375906f8
SW
488 /* Ensure the corresponding mm is not marked. */
489 if (current->mm)
490 current->mm->context.ia32_compat = 0;
491
1da177e4
LT
492 /* TBD: overwrites user setup. Should have two bits.
493 But 64bit processes have always behaved this way,
494 so it's not too bad. The main problem is just that
6612538c 495 32bit childs are affected again. */
1da177e4
LT
496 current->personality &= ~READ_IMPLIES_EXEC;
497}
498
d1a797f3 499void set_personality_ia32(bool x32)
05d43ed8
PA
500{
501 /* inherit personality from parent */
502
503 /* Make sure to be in 32bit mode */
6bd33008 504 set_thread_flag(TIF_ADDR32);
05d43ed8 505
375906f8 506 /* Mark the associated mm as containing 32-bit tasks. */
d1a797f3
PA
507 if (x32) {
508 clear_thread_flag(TIF_IA32);
509 set_thread_flag(TIF_X32);
b24dc8da
ON
510 if (current->mm)
511 current->mm->context.ia32_compat = TIF_X32;
d1a797f3 512 current->personality &= ~READ_IMPLIES_EXEC;
f970165b 513 /* in_compat_syscall() uses the presence of the x32
ce5f7a99
BP
514 syscall bit flag to determine compat status */
515 current_thread_info()->status &= ~TS_COMPAT;
d1a797f3
PA
516 } else {
517 set_thread_flag(TIF_IA32);
518 clear_thread_flag(TIF_X32);
b24dc8da
ON
519 if (current->mm)
520 current->mm->context.ia32_compat = TIF_IA32;
d1a797f3
PA
521 current->personality |= force_personality32;
522 /* Prepare the first "return" to user space */
523 current_thread_info()->status |= TS_COMPAT;
524 }
05d43ed8 525}
febb72a6 526EXPORT_SYMBOL_GPL(set_personality_ia32);
05d43ed8 527
91b7bd39 528#ifdef CONFIG_CHECKPOINT_RESTORE
2eefd878
DS
529static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
530{
531 int ret;
532
533 ret = map_vdso_once(image, addr);
534 if (ret)
535 return ret;
536
537 return (long)image->size;
538}
91b7bd39 539#endif
2eefd878 540
1da177e4 541long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
7de08b4e
GP
542{
543 int ret = 0;
1da177e4
LT
544 int doit = task == current;
545 int cpu;
546
7de08b4e 547 switch (code) {
1da177e4 548 case ARCH_SET_GS:
d696ca01 549 if (addr >= TASK_SIZE_MAX)
7de08b4e 550 return -EPERM;
1da177e4 551 cpu = get_cpu();
731e33e3 552 task->thread.gsindex = 0;
296f781a 553 task->thread.gsbase = addr;
731e33e3
AL
554 if (doit) {
555 load_gs_index(0);
556 ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
1da177e4 557 }
4afd0565 558 put_cpu();
1da177e4
LT
559 break;
560 case ARCH_SET_FS:
561 /* Not strictly needed for fs, but do it for symmetry
562 with gs */
d696ca01 563 if (addr >= TASK_SIZE_MAX)
6612538c 564 return -EPERM;
1da177e4 565 cpu = get_cpu();
731e33e3 566 task->thread.fsindex = 0;
296f781a 567 task->thread.fsbase = addr;
731e33e3
AL
568 if (doit) {
569 /* set the selector to 0 to not confuse __switch_to */
570 loadsegment(fs, 0);
571 ret = wrmsrl_safe(MSR_FS_BASE, addr);
1da177e4
LT
572 }
573 put_cpu();
574 break;
6612538c
HS
575 case ARCH_GET_FS: {
576 unsigned long base;
d47b50e7 577 if (doit)
1da177e4 578 rdmsrl(MSR_FS_BASE, base);
a88cde13 579 else
296f781a 580 base = task->thread.fsbase;
6612538c
HS
581 ret = put_user(base, (unsigned long __user *)addr);
582 break;
1da177e4 583 }
6612538c 584 case ARCH_GET_GS: {
1da177e4 585 unsigned long base;
d47b50e7
AL
586 if (doit)
587 rdmsrl(MSR_KERNEL_GS_BASE, base);
d47b50e7 588 else
296f781a 589 base = task->thread.gsbase;
6612538c 590 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
591 break;
592 }
593
2eefd878 594#ifdef CONFIG_CHECKPOINT_RESTORE
91b7bd39 595# ifdef CONFIG_X86_X32
2eefd878
DS
596 case ARCH_MAP_VDSO_X32:
597 return prctl_map_vdso(&vdso_image_x32, addr);
91b7bd39
IM
598# endif
599# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
2eefd878
DS
600 case ARCH_MAP_VDSO_32:
601 return prctl_map_vdso(&vdso_image_32, addr);
91b7bd39 602# endif
2eefd878
DS
603 case ARCH_MAP_VDSO_64:
604 return prctl_map_vdso(&vdso_image_64, addr);
605#endif
606
1da177e4
LT
607 default:
608 ret = -EINVAL;
609 break;
6612538c 610 }
1da177e4 611
6612538c
HS
612 return ret;
613}
1da177e4
LT
614
615long sys_arch_prctl(int code, unsigned long addr)
616{
617 return do_arch_prctl(current, code, addr);
1da177e4
LT
618}
619
89240ba0
SS
620unsigned long KSTK_ESP(struct task_struct *task)
621{
263042e4 622 return task_pt_regs(task)->sp;
89240ba0 623}