]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/process_64.c
x86/dumpstack: Fix duplicate RIP address display in __show_regs()
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
76e4f660 17#include <linux/cpu.h>
1da177e4
LT
18#include <linux/errno.h>
19#include <linux/sched.h>
6612538c 20#include <linux/fs.h>
1da177e4
LT
21#include <linux/kernel.h>
22#include <linux/mm.h>
23#include <linux/elfcore.h>
24#include <linux/smp.h>
25#include <linux/slab.h>
26#include <linux/user.h>
1da177e4
LT
27#include <linux/interrupt.h>
28#include <linux/delay.h>
186f4360 29#include <linux/export.h>
1da177e4 30#include <linux/ptrace.h>
95833c83 31#include <linux/notifier.h>
c6fd91f0 32#include <linux/kprobes.h>
1eeb66a1 33#include <linux/kdebug.h>
529e25f6 34#include <linux/prctl.h>
7de08b4e
GP
35#include <linux/uaccess.h>
36#include <linux/io.h>
8b96f011 37#include <linux/ftrace.h>
1da177e4 38
1da177e4 39#include <asm/pgtable.h>
1da177e4 40#include <asm/processor.h>
78f7f1e5 41#include <asm/fpu/internal.h>
1da177e4 42#include <asm/mmu_context.h>
1da177e4 43#include <asm/prctl.h>
1da177e4
LT
44#include <asm/desc.h>
45#include <asm/proto.h>
46#include <asm/ia32.h>
95833c83 47#include <asm/idle.h>
bbc1f698 48#include <asm/syscalls.h>
66cb5917 49#include <asm/debugreg.h>
f05e798a 50#include <asm/switch_to.h>
b7a58459 51#include <asm/xen/hypervisor.h>
2eefd878 52#include <asm/vdso.h>
1da177e4 53
c38e5038 54__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
1da177e4 55
6612538c 56/* Prints also some state that isn't saved in the pt_regs */
e2ce07c8 57void __show_regs(struct pt_regs *regs, int all)
1da177e4
LT
58{
59 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 60 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
61 unsigned int fsindex, gsindex;
62 unsigned int ds, cs, es;
814e2c84 63
1141c3e3
JP
64 printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] %pS\n", regs->cs & 0xffff,
65 regs->ip, (void *)regs->ip);
d015a092 66 printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
8092c654 67 regs->sp, regs->flags);
d015a092 68 printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 69 regs->ax, regs->bx, regs->cx);
d015a092 70 printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 71 regs->dx, regs->si, regs->di);
d015a092 72 printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 73 regs->bp, regs->r8, regs->r9);
d015a092 74 printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
7de08b4e 75 regs->r10, regs->r11, regs->r12);
d015a092 76 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
7de08b4e 77 regs->r13, regs->r14, regs->r15);
1da177e4 78
7de08b4e
GP
79 asm("movl %%ds,%0" : "=r" (ds));
80 asm("movl %%cs,%0" : "=r" (cs));
81 asm("movl %%es,%0" : "=r" (es));
1da177e4
LT
82 asm("movl %%fs,%0" : "=r" (fsindex));
83 asm("movl %%gs,%0" : "=r" (gsindex));
84
85 rdmsrl(MSR_FS_BASE, fs);
7de08b4e
GP
86 rdmsrl(MSR_GS_BASE, gs);
87 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
1da177e4 88
e2ce07c8
PE
89 if (!all)
90 return;
1da177e4 91
f51c9452
GOC
92 cr0 = read_cr0();
93 cr2 = read_cr2();
94 cr3 = read_cr3();
1e02ce4c 95 cr4 = __read_cr4();
1da177e4 96
d015a092 97 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
7de08b4e 98 fs, fsindex, gs, gsindex, shadowgs);
d015a092 99 printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
8092c654 100 es, cr0);
d015a092 101 printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
8092c654 102 cr4);
bb1995d5
AS
103
104 get_debugreg(d0, 0);
105 get_debugreg(d1, 1);
106 get_debugreg(d2, 2);
bb1995d5
AS
107 get_debugreg(d3, 3);
108 get_debugreg(d6, 6);
109 get_debugreg(d7, 7);
4338774c
DJ
110
111 /* Only print out debug registers if they are in their non-default state. */
ba6d018e
NI
112 if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
113 (d6 == DR6_RESERVED) && (d7 == 0x400))) {
114 printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
115 d0, d1, d2);
116 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
117 d3, d6, d7);
118 }
4338774c 119
c0b17b5b
DH
120 if (boot_cpu_has(X86_FEATURE_OSPKE))
121 printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
1da177e4
LT
122}
123
1da177e4
LT
124void release_thread(struct task_struct *dead_task)
125{
126 if (dead_task->mm) {
a5b9e5a2 127#ifdef CONFIG_MODIFY_LDT_SYSCALL
37868fe1 128 if (dead_task->mm->context.ldt) {
349eab6e 129 pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
c767a54b 130 dead_task->comm,
0d430e3f 131 dead_task->mm->context.ldt->entries,
37868fe1 132 dead_task->mm->context.ldt->size);
1da177e4
LT
133 BUG();
134 }
a5b9e5a2 135#endif
1da177e4
LT
136 }
137}
138
c1bd55f9
JT
139int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
140 unsigned long arg, struct task_struct *p, unsigned long tls)
1da177e4
LT
141{
142 int err;
7de08b4e 143 struct pt_regs *childregs;
0100301b
BG
144 struct fork_frame *fork_frame;
145 struct inactive_task_frame *frame;
1da177e4
LT
146 struct task_struct *me = current;
147
7076aada
AV
148 p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
149 childregs = task_pt_regs(p);
0100301b
BG
150 fork_frame = container_of(childregs, struct fork_frame, regs);
151 frame = &fork_frame->frame;
152 frame->bp = 0;
153 frame->ret_addr = (unsigned long) ret_from_fork;
154 p->thread.sp = (unsigned long) fork_frame;
66cb5917 155 p->thread.io_bitmap_ptr = NULL;
1da177e4 156
ada85708 157 savesegment(gs, p->thread.gsindex);
296f781a 158 p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
ada85708 159 savesegment(fs, p->thread.fsindex);
296f781a 160 p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
ada85708
JF
161 savesegment(es, p->thread.es);
162 savesegment(ds, p->thread.ds);
7076aada
AV
163 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
164
1d4b4b29 165 if (unlikely(p->flags & PF_KTHREAD)) {
7076aada
AV
166 /* kernel thread */
167 memset(childregs, 0, sizeof(struct pt_regs));
616d2483
BG
168 frame->bx = sp; /* function */
169 frame->r12 = arg;
7076aada
AV
170 return 0;
171 }
616d2483 172 frame->bx = 0;
1d4b4b29 173 *childregs = *current_pt_regs();
7076aada
AV
174
175 childregs->ax = 0;
1d4b4b29
AV
176 if (sp)
177 childregs->sp = sp;
1da177e4 178
66cb5917 179 err = -ENOMEM;
d3a4f48d 180 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
cced4022
TM
181 p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
182 IO_BITMAP_BYTES, GFP_KERNEL);
1da177e4
LT
183 if (!p->thread.io_bitmap_ptr) {
184 p->thread.io_bitmap_max = 0;
185 return -ENOMEM;
186 }
d3a4f48d 187 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 188 }
1da177e4
LT
189
190 /*
191 * Set a new TLS for the child thread?
192 */
193 if (clone_flags & CLONE_SETTLS) {
194#ifdef CONFIG_IA32_EMULATION
abfb9498 195 if (in_ia32_syscall())
efd1ca52 196 err = do_set_thread_area(p, -1,
c1bd55f9 197 (struct user_desc __user *)tls, 0);
7de08b4e
GP
198 else
199#endif
c1bd55f9 200 err = do_arch_prctl(p, ARCH_SET_FS, tls);
7de08b4e 201 if (err)
1da177e4
LT
202 goto out;
203 }
204 err = 0;
205out:
206 if (err && p->thread.io_bitmap_ptr) {
207 kfree(p->thread.io_bitmap_ptr);
208 p->thread.io_bitmap_max = 0;
209 }
66cb5917 210
1da177e4
LT
211 return err;
212}
213
e634d8fc
PA
214static void
215start_thread_common(struct pt_regs *regs, unsigned long new_ip,
216 unsigned long new_sp,
217 unsigned int _cs, unsigned int _ss, unsigned int _ds)
513ad84b 218{
ada85708 219 loadsegment(fs, 0);
e634d8fc
PA
220 loadsegment(es, _ds);
221 loadsegment(ds, _ds);
513ad84b
IM
222 load_gs_index(0);
223 regs->ip = new_ip;
224 regs->sp = new_sp;
e634d8fc
PA
225 regs->cs = _cs;
226 regs->ss = _ss;
a6f05a6a 227 regs->flags = X86_EFLAGS_IF;
1daeaa31 228 force_iret();
513ad84b 229}
e634d8fc
PA
230
231void
232start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
233{
234 start_thread_common(regs, new_ip, new_sp,
235 __USER_CS, __USER_DS, 0);
236}
513ad84b 237
7da77078
BG
238#ifdef CONFIG_COMPAT
239void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
a6f05a6a 240{
e634d8fc 241 start_thread_common(regs, new_ip, new_sp,
d1a797f3
PA
242 test_thread_flag(TIF_X32)
243 ? __USER_CS : __USER32_CS,
244 __USER_DS, __USER_DS);
a6f05a6a
PA
245}
246#endif
513ad84b 247
1da177e4
LT
248/*
249 * switch_to(x,y) should switch tasks from x to y.
250 *
6612538c 251 * This could still be optimized:
1da177e4
LT
252 * - fold all the options into a flag word and test it with a single test.
253 * - could test fs/gs bitsliced
099f318b
AK
254 *
255 * Kprobes not supported here. Set the probe on schedule instead.
8b96f011 256 * Function graph tracer not supported too.
1da177e4 257 */
35ea7903 258__visible __notrace_funcgraph struct task_struct *
a88cde13 259__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4 260{
87b935a0
JF
261 struct thread_struct *prev = &prev_p->thread;
262 struct thread_struct *next = &next_p->thread;
384a23f9
IM
263 struct fpu *prev_fpu = &prev->fpu;
264 struct fpu *next_fpu = &next->fpu;
6612538c 265 int cpu = smp_processor_id();
24933b82 266 struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
3e2b68d7 267 unsigned prev_fsindex, prev_gsindex;
384a23f9 268 fpu_switch_t fpu_switch;
e07e23e1 269
384a23f9 270 fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
4903062b 271
478de5a9
JF
272 /* We must save %fs and %gs before load_TLS() because
273 * %fs and %gs may be cleared by load_TLS().
274 *
275 * (e.g. xen_load_tls())
276 */
3e2b68d7
AL
277 savesegment(fs, prev_fsindex);
278 savesegment(gs, prev_gsindex);
478de5a9 279
f647d7c1
AL
280 /*
281 * Load TLS before restoring any segments so that segment loads
282 * reference the correct GDT entries.
283 */
1da177e4
LT
284 load_TLS(next, cpu);
285
3fe0a63e 286 /*
f647d7c1
AL
287 * Leave lazy mode, flushing any hypercalls made here. This
288 * must be done after loading TLS entries in the GDT but before
289 * loading segments that might reference them, and and it must
3a0aee48 290 * be done before fpu__restore(), so the TS bit is up to
f647d7c1 291 * date.
3fe0a63e 292 */
224101ed 293 arch_end_context_switch(next_p);
3fe0a63e 294
f647d7c1
AL
295 /* Switch DS and ES.
296 *
297 * Reading them only returns the selectors, but writing them (if
298 * nonzero) loads the full descriptor from the GDT or LDT. The
299 * LDT for next is loaded in switch_mm, and the GDT is loaded
300 * above.
301 *
302 * We therefore need to write new values to the segment
303 * registers on every context switch unless both the new and old
304 * values are zero.
305 *
306 * Note that we don't need to do anything for CS and SS, as
307 * those are saved and restored as part of pt_regs.
308 */
309 savesegment(es, prev->es);
310 if (unlikely(next->es | prev->es))
311 loadsegment(es, next->es);
312
313 savesegment(ds, prev->ds);
314 if (unlikely(next->ds | prev->ds))
315 loadsegment(ds, next->ds);
316
7de08b4e 317 /*
1da177e4 318 * Switch FS and GS.
87b935a0 319 *
558a65bc 320 * These are even more complicated than DS and ES: they have
3e2b68d7
AL
321 * 64-bit bases are that controlled by arch_prctl. The bases
322 * don't necessarily match the selectors, as user code can do
323 * any number of things to cause them to be inconsistent.
f647d7c1 324 *
3e2b68d7
AL
325 * We don't promise to preserve the bases if the selectors are
326 * nonzero. We also don't promise to preserve the base if the
327 * selector is zero and the base doesn't match whatever was
328 * most recently passed to ARCH_SET_FS/GS. (If/when the
329 * FSGSBASE instructions are enabled, we'll need to offer
330 * stronger guarantees.)
f647d7c1 331 *
3e2b68d7 332 * As an invariant,
296f781a 333 * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
3e2b68d7 334 * impossible.
1da177e4 335 */
3e2b68d7
AL
336 if (next->fsindex) {
337 /* Loading a nonzero value into FS sets the index and base. */
87b935a0 338 loadsegment(fs, next->fsindex);
3e2b68d7 339 } else {
296f781a 340 if (next->fsbase) {
3e2b68d7
AL
341 /* Next index is zero but next base is nonzero. */
342 if (prev_fsindex)
343 loadsegment(fs, 0);
296f781a 344 wrmsrl(MSR_FS_BASE, next->fsbase);
3e2b68d7
AL
345 } else {
346 /* Next base and index are both zero. */
347 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
348 /*
349 * We don't know the previous base and can't
350 * find out without RDMSR. Forcibly clear it.
351 */
352 loadsegment(fs, __USER_DS);
353 loadsegment(fs, 0);
354 } else {
355 /*
356 * If the previous index is zero and ARCH_SET_FS
357 * didn't change the base, then the base is
358 * also zero and we don't need to do anything.
359 */
296f781a 360 if (prev->fsbase || prev_fsindex)
3e2b68d7
AL
361 loadsegment(fs, 0);
362 }
363 }
1da177e4 364 }
3e2b68d7
AL
365 /*
366 * Save the old state and preserve the invariant.
367 * NB: if prev_fsindex == 0, then we can't reliably learn the base
368 * without RDMSR because Intel user code can zero it without telling
369 * us and AMD user code can program any 32-bit value without telling
370 * us.
371 */
372 if (prev_fsindex)
296f781a 373 prev->fsbase = 0;
3e2b68d7 374 prev->fsindex = prev_fsindex;
87b935a0 375
3e2b68d7
AL
376 if (next->gsindex) {
377 /* Loading a nonzero value into GS sets the index and base. */
87b935a0 378 load_gs_index(next->gsindex);
3e2b68d7 379 } else {
296f781a 380 if (next->gsbase) {
3e2b68d7
AL
381 /* Next index is zero but next base is nonzero. */
382 if (prev_gsindex)
383 load_gs_index(0);
296f781a 384 wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
3e2b68d7
AL
385 } else {
386 /* Next base and index are both zero. */
387 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
388 /*
389 * We don't know the previous base and can't
390 * find out without RDMSR. Forcibly clear it.
391 *
392 * This contains a pointless SWAPGS pair.
393 * Fixing it would involve an explicit check
394 * for Xen or a new pvop.
395 */
396 load_gs_index(__USER_DS);
397 load_gs_index(0);
398 } else {
399 /*
400 * If the previous index is zero and ARCH_SET_GS
401 * didn't change the base, then the base is
402 * also zero and we don't need to do anything.
403 */
296f781a 404 if (prev->gsbase || prev_gsindex)
3e2b68d7
AL
405 load_gs_index(0);
406 }
407 }
1da177e4 408 }
3e2b68d7
AL
409 /*
410 * Save the old state and preserve the invariant.
411 * NB: if prev_gsindex == 0, then we can't reliably learn the base
412 * without RDMSR because Intel user code can zero it without telling
413 * us and AMD user code can program any 32-bit value without telling
414 * us.
415 */
416 if (prev_gsindex)
296f781a 417 prev->gsbase = 0;
3e2b68d7 418 prev->gsindex = prev_gsindex;
1da177e4 419
384a23f9 420 switch_fpu_finish(next_fpu, fpu_switch);
34ddc81a 421
7de08b4e 422 /*
45948d77 423 * Switch the PDA and FPU contexts.
1da177e4 424 */
c6ae41e7 425 this_cpu_write(current_task, next_p);
18bd057b 426
b27559a4
AL
427 /* Reload esp0 and ss1. This changes current_thread_info(). */
428 load_sp0(tss, next);
429
1da177e4 430 /*
d3a4f48d 431 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 432 */
eee3af4a
MM
433 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
434 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 435 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 436
b7a58459
AL
437#ifdef CONFIG_XEN
438 /*
439 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
440 * current_pt_regs()->flags may not match the current task's
441 * intended IOPL. We need to switch it manually.
442 */
443 if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
444 prev->iopl != next->iopl))
445 xen_set_iopl_mask(next->iopl);
446#endif
447
61f01dd9
AL
448 if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
449 /*
450 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
451 * does not update the cached descriptor. As a result, if we
452 * do SYSRET while SS is NULL, we'll end up in user mode with
453 * SS apparently equal to __USER_DS but actually unusable.
454 *
455 * The straightforward workaround would be to fix it up just
456 * before SYSRET, but that would slow down the system call
457 * fast paths. Instead, we ensure that SS is never NULL in
458 * system call context. We do this by replacing NULL SS
459 * selectors at every context switch. SYSCALL sets up a valid
460 * SS, so the only way to get NULL is to re-enter the kernel
461 * from CPL 3 through an interrupt. Since that can't happen
462 * in the same task as a running syscall, we are guaranteed to
463 * context switch between every interrupt vector entry and a
464 * subsequent SYSRET.
465 *
466 * We read SS first because SS reads are much faster than
467 * writes. Out of caution, we force SS to __KERNEL_DS even if
468 * it previously had a different non-NULL value.
469 */
470 unsigned short ss_sel;
471 savesegment(ss, ss_sel);
472 if (ss_sel != __KERNEL_DS)
473 loadsegment(ss, __KERNEL_DS);
474 }
475
1da177e4
LT
476 return prev_p;
477}
478
1da177e4
LT
479void set_personality_64bit(void)
480{
481 /* inherit personality from parent */
482
483 /* Make sure to be in 64bit mode */
6612538c 484 clear_thread_flag(TIF_IA32);
6bd33008 485 clear_thread_flag(TIF_ADDR32);
bb212724 486 clear_thread_flag(TIF_X32);
1da177e4 487
375906f8
SW
488 /* Ensure the corresponding mm is not marked. */
489 if (current->mm)
490 current->mm->context.ia32_compat = 0;
491
1da177e4
LT
492 /* TBD: overwrites user setup. Should have two bits.
493 But 64bit processes have always behaved this way,
494 so it's not too bad. The main problem is just that
6612538c 495 32bit childs are affected again. */
1da177e4
LT
496 current->personality &= ~READ_IMPLIES_EXEC;
497}
498
d1a797f3 499void set_personality_ia32(bool x32)
05d43ed8
PA
500{
501 /* inherit personality from parent */
502
503 /* Make sure to be in 32bit mode */
6bd33008 504 set_thread_flag(TIF_ADDR32);
05d43ed8 505
375906f8 506 /* Mark the associated mm as containing 32-bit tasks. */
d1a797f3
PA
507 if (x32) {
508 clear_thread_flag(TIF_IA32);
509 set_thread_flag(TIF_X32);
b24dc8da
ON
510 if (current->mm)
511 current->mm->context.ia32_compat = TIF_X32;
d1a797f3 512 current->personality &= ~READ_IMPLIES_EXEC;
f970165b 513 /* in_compat_syscall() uses the presence of the x32
ce5f7a99 514 syscall bit flag to determine compat status */
b9d989c7 515 current->thread.status &= ~TS_COMPAT;
d1a797f3
PA
516 } else {
517 set_thread_flag(TIF_IA32);
518 clear_thread_flag(TIF_X32);
b24dc8da
ON
519 if (current->mm)
520 current->mm->context.ia32_compat = TIF_IA32;
d1a797f3
PA
521 current->personality |= force_personality32;
522 /* Prepare the first "return" to user space */
b9d989c7 523 current->thread.status |= TS_COMPAT;
d1a797f3 524 }
05d43ed8 525}
febb72a6 526EXPORT_SYMBOL_GPL(set_personality_ia32);
05d43ed8 527
91b7bd39 528#ifdef CONFIG_CHECKPOINT_RESTORE
2eefd878
DS
529static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
530{
531 int ret;
532
533 ret = map_vdso_once(image, addr);
534 if (ret)
535 return ret;
536
537 return (long)image->size;
538}
91b7bd39 539#endif
2eefd878 540
1da177e4 541long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
7de08b4e
GP
542{
543 int ret = 0;
1da177e4
LT
544 int doit = task == current;
545 int cpu;
546
7de08b4e 547 switch (code) {
1da177e4 548 case ARCH_SET_GS:
d696ca01 549 if (addr >= TASK_SIZE_MAX)
7de08b4e 550 return -EPERM;
1da177e4 551 cpu = get_cpu();
731e33e3 552 task->thread.gsindex = 0;
296f781a 553 task->thread.gsbase = addr;
731e33e3
AL
554 if (doit) {
555 load_gs_index(0);
556 ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
1da177e4 557 }
4afd0565 558 put_cpu();
1da177e4
LT
559 break;
560 case ARCH_SET_FS:
561 /* Not strictly needed for fs, but do it for symmetry
562 with gs */
d696ca01 563 if (addr >= TASK_SIZE_MAX)
6612538c 564 return -EPERM;
1da177e4 565 cpu = get_cpu();
731e33e3 566 task->thread.fsindex = 0;
296f781a 567 task->thread.fsbase = addr;
731e33e3
AL
568 if (doit) {
569 /* set the selector to 0 to not confuse __switch_to */
570 loadsegment(fs, 0);
571 ret = wrmsrl_safe(MSR_FS_BASE, addr);
1da177e4
LT
572 }
573 put_cpu();
574 break;
6612538c
HS
575 case ARCH_GET_FS: {
576 unsigned long base;
d47b50e7 577 if (doit)
1da177e4 578 rdmsrl(MSR_FS_BASE, base);
a88cde13 579 else
296f781a 580 base = task->thread.fsbase;
6612538c
HS
581 ret = put_user(base, (unsigned long __user *)addr);
582 break;
1da177e4 583 }
6612538c 584 case ARCH_GET_GS: {
1da177e4 585 unsigned long base;
d47b50e7
AL
586 if (doit)
587 rdmsrl(MSR_KERNEL_GS_BASE, base);
d47b50e7 588 else
296f781a 589 base = task->thread.gsbase;
6612538c 590 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
591 break;
592 }
593
2eefd878 594#ifdef CONFIG_CHECKPOINT_RESTORE
6e68b087 595# ifdef CONFIG_X86_X32_ABI
2eefd878
DS
596 case ARCH_MAP_VDSO_X32:
597 return prctl_map_vdso(&vdso_image_x32, addr);
91b7bd39
IM
598# endif
599# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
2eefd878
DS
600 case ARCH_MAP_VDSO_32:
601 return prctl_map_vdso(&vdso_image_32, addr);
91b7bd39 602# endif
2eefd878
DS
603 case ARCH_MAP_VDSO_64:
604 return prctl_map_vdso(&vdso_image_64, addr);
605#endif
606
1da177e4
LT
607 default:
608 ret = -EINVAL;
609 break;
6612538c 610 }
1da177e4 611
6612538c
HS
612 return ret;
613}
1da177e4
LT
614
615long sys_arch_prctl(int code, unsigned long addr)
616{
617 return do_arch_prctl(current, code, addr);
1da177e4
LT
618}
619
89240ba0
SS
620unsigned long KSTK_ESP(struct task_struct *task)
621{
263042e4 622 return task_pt_regs(task)->sp;
89240ba0 623}