]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - arch/x86/kernel/process_64.c
x86/msr: Rename MISC_FEATURE_ENABLES to MISC_FEATURES_ENABLES
[mirror_ubuntu-bionic-kernel.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
76e4f660 17#include <linux/cpu.h>
1da177e4
LT
18#include <linux/errno.h>
19#include <linux/sched.h>
29930025 20#include <linux/sched/task.h>
68db0cf1 21#include <linux/sched/task_stack.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4
LT
29#include <linux/interrupt.h>
30#include <linux/delay.h>
186f4360 31#include <linux/export.h>
1da177e4 32#include <linux/ptrace.h>
95833c83 33#include <linux/notifier.h>
c6fd91f0 34#include <linux/kprobes.h>
1eeb66a1 35#include <linux/kdebug.h>
529e25f6 36#include <linux/prctl.h>
7de08b4e
GP
37#include <linux/uaccess.h>
38#include <linux/io.h>
8b96f011 39#include <linux/ftrace.h>
1da177e4 40
1da177e4 41#include <asm/pgtable.h>
1da177e4 42#include <asm/processor.h>
78f7f1e5 43#include <asm/fpu/internal.h>
1da177e4 44#include <asm/mmu_context.h>
1da177e4 45#include <asm/prctl.h>
1da177e4
LT
46#include <asm/desc.h>
47#include <asm/proto.h>
48#include <asm/ia32.h>
bbc1f698 49#include <asm/syscalls.h>
66cb5917 50#include <asm/debugreg.h>
f05e798a 51#include <asm/switch_to.h>
b7a58459 52#include <asm/xen/hypervisor.h>
2eefd878 53#include <asm/vdso.h>
4f341a5e 54#include <asm/intel_rdt.h>
1da177e4 55
c38e5038 56__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
1da177e4 57
6612538c 58/* Prints also some state that isn't saved in the pt_regs */
e2ce07c8 59void __show_regs(struct pt_regs *regs, int all)
1da177e4
LT
60{
61 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 62 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
63 unsigned int fsindex, gsindex;
64 unsigned int ds, cs, es;
814e2c84 65
bb5e5ce5
JP
66 printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
67 (void *)regs->ip);
6fa81a12 68 printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
bb5e5ce5 69 regs->sp, regs->flags);
6fa81a12
JP
70 if (regs->orig_ax != -1)
71 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
72 else
73 pr_cont("\n");
74
d015a092 75 printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 76 regs->ax, regs->bx, regs->cx);
d015a092 77 printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 78 regs->dx, regs->si, regs->di);
d015a092 79 printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 80 regs->bp, regs->r8, regs->r9);
d015a092 81 printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
7de08b4e 82 regs->r10, regs->r11, regs->r12);
d015a092 83 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
7de08b4e 84 regs->r13, regs->r14, regs->r15);
1da177e4 85
7de08b4e
GP
86 asm("movl %%ds,%0" : "=r" (ds));
87 asm("movl %%cs,%0" : "=r" (cs));
88 asm("movl %%es,%0" : "=r" (es));
1da177e4
LT
89 asm("movl %%fs,%0" : "=r" (fsindex));
90 asm("movl %%gs,%0" : "=r" (gsindex));
91
92 rdmsrl(MSR_FS_BASE, fs);
7de08b4e
GP
93 rdmsrl(MSR_GS_BASE, gs);
94 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
1da177e4 95
e2ce07c8
PE
96 if (!all)
97 return;
1da177e4 98
f51c9452
GOC
99 cr0 = read_cr0();
100 cr2 = read_cr2();
101 cr3 = read_cr3();
1e02ce4c 102 cr4 = __read_cr4();
1da177e4 103
d015a092 104 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
7de08b4e 105 fs, fsindex, gs, gsindex, shadowgs);
d015a092 106 printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
8092c654 107 es, cr0);
d015a092 108 printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
8092c654 109 cr4);
bb1995d5
AS
110
111 get_debugreg(d0, 0);
112 get_debugreg(d1, 1);
113 get_debugreg(d2, 2);
bb1995d5
AS
114 get_debugreg(d3, 3);
115 get_debugreg(d6, 6);
116 get_debugreg(d7, 7);
4338774c
DJ
117
118 /* Only print out debug registers if they are in their non-default state. */
ba6d018e
NI
119 if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
120 (d6 == DR6_RESERVED) && (d7 == 0x400))) {
121 printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
122 d0, d1, d2);
123 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
124 d3, d6, d7);
125 }
4338774c 126
c0b17b5b
DH
127 if (boot_cpu_has(X86_FEATURE_OSPKE))
128 printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
1da177e4
LT
129}
130
1da177e4
LT
131void release_thread(struct task_struct *dead_task)
132{
133 if (dead_task->mm) {
a5b9e5a2 134#ifdef CONFIG_MODIFY_LDT_SYSCALL
37868fe1 135 if (dead_task->mm->context.ldt) {
349eab6e 136 pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
c767a54b 137 dead_task->comm,
0d430e3f 138 dead_task->mm->context.ldt->entries,
37868fe1 139 dead_task->mm->context.ldt->size);
1da177e4
LT
140 BUG();
141 }
a5b9e5a2 142#endif
1da177e4
LT
143 }
144}
145
c1bd55f9
JT
146int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
147 unsigned long arg, struct task_struct *p, unsigned long tls)
1da177e4
LT
148{
149 int err;
7de08b4e 150 struct pt_regs *childregs;
0100301b
BG
151 struct fork_frame *fork_frame;
152 struct inactive_task_frame *frame;
1da177e4
LT
153 struct task_struct *me = current;
154
7076aada
AV
155 p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
156 childregs = task_pt_regs(p);
0100301b
BG
157 fork_frame = container_of(childregs, struct fork_frame, regs);
158 frame = &fork_frame->frame;
159 frame->bp = 0;
160 frame->ret_addr = (unsigned long) ret_from_fork;
161 p->thread.sp = (unsigned long) fork_frame;
66cb5917 162 p->thread.io_bitmap_ptr = NULL;
1da177e4 163
ada85708 164 savesegment(gs, p->thread.gsindex);
296f781a 165 p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
ada85708 166 savesegment(fs, p->thread.fsindex);
296f781a 167 p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
ada85708
JF
168 savesegment(es, p->thread.es);
169 savesegment(ds, p->thread.ds);
7076aada
AV
170 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
171
1d4b4b29 172 if (unlikely(p->flags & PF_KTHREAD)) {
7076aada
AV
173 /* kernel thread */
174 memset(childregs, 0, sizeof(struct pt_regs));
616d2483
BG
175 frame->bx = sp; /* function */
176 frame->r12 = arg;
7076aada
AV
177 return 0;
178 }
616d2483 179 frame->bx = 0;
1d4b4b29 180 *childregs = *current_pt_regs();
7076aada
AV
181
182 childregs->ax = 0;
1d4b4b29
AV
183 if (sp)
184 childregs->sp = sp;
1da177e4 185
66cb5917 186 err = -ENOMEM;
d3a4f48d 187 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
cced4022
TM
188 p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
189 IO_BITMAP_BYTES, GFP_KERNEL);
1da177e4
LT
190 if (!p->thread.io_bitmap_ptr) {
191 p->thread.io_bitmap_max = 0;
192 return -ENOMEM;
193 }
d3a4f48d 194 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 195 }
1da177e4
LT
196
197 /*
198 * Set a new TLS for the child thread?
199 */
200 if (clone_flags & CLONE_SETTLS) {
201#ifdef CONFIG_IA32_EMULATION
abfb9498 202 if (in_ia32_syscall())
efd1ca52 203 err = do_set_thread_area(p, -1,
c1bd55f9 204 (struct user_desc __user *)tls, 0);
7de08b4e
GP
205 else
206#endif
c1bd55f9 207 err = do_arch_prctl(p, ARCH_SET_FS, tls);
7de08b4e 208 if (err)
1da177e4
LT
209 goto out;
210 }
211 err = 0;
212out:
213 if (err && p->thread.io_bitmap_ptr) {
214 kfree(p->thread.io_bitmap_ptr);
215 p->thread.io_bitmap_max = 0;
216 }
66cb5917 217
1da177e4
LT
218 return err;
219}
220
e634d8fc
PA
221static void
222start_thread_common(struct pt_regs *regs, unsigned long new_ip,
223 unsigned long new_sp,
224 unsigned int _cs, unsigned int _ss, unsigned int _ds)
513ad84b 225{
ada85708 226 loadsegment(fs, 0);
e634d8fc
PA
227 loadsegment(es, _ds);
228 loadsegment(ds, _ds);
513ad84b
IM
229 load_gs_index(0);
230 regs->ip = new_ip;
231 regs->sp = new_sp;
e634d8fc
PA
232 regs->cs = _cs;
233 regs->ss = _ss;
a6f05a6a 234 regs->flags = X86_EFLAGS_IF;
1daeaa31 235 force_iret();
513ad84b 236}
e634d8fc
PA
237
238void
239start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
240{
241 start_thread_common(regs, new_ip, new_sp,
242 __USER_CS, __USER_DS, 0);
243}
513ad84b 244
7da77078
BG
245#ifdef CONFIG_COMPAT
246void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
a6f05a6a 247{
e634d8fc 248 start_thread_common(regs, new_ip, new_sp,
d1a797f3
PA
249 test_thread_flag(TIF_X32)
250 ? __USER_CS : __USER32_CS,
251 __USER_DS, __USER_DS);
a6f05a6a
PA
252}
253#endif
513ad84b 254
1da177e4
LT
255/*
256 * switch_to(x,y) should switch tasks from x to y.
257 *
6612538c 258 * This could still be optimized:
1da177e4
LT
259 * - fold all the options into a flag word and test it with a single test.
260 * - could test fs/gs bitsliced
099f318b
AK
261 *
262 * Kprobes not supported here. Set the probe on schedule instead.
8b96f011 263 * Function graph tracer not supported too.
1da177e4 264 */
35ea7903 265__visible __notrace_funcgraph struct task_struct *
a88cde13 266__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4 267{
87b935a0
JF
268 struct thread_struct *prev = &prev_p->thread;
269 struct thread_struct *next = &next_p->thread;
384a23f9
IM
270 struct fpu *prev_fpu = &prev->fpu;
271 struct fpu *next_fpu = &next->fpu;
6612538c 272 int cpu = smp_processor_id();
24933b82 273 struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
3e2b68d7 274 unsigned prev_fsindex, prev_gsindex;
e07e23e1 275
c474e507 276 switch_fpu_prepare(prev_fpu, cpu);
4903062b 277
478de5a9
JF
278 /* We must save %fs and %gs before load_TLS() because
279 * %fs and %gs may be cleared by load_TLS().
280 *
281 * (e.g. xen_load_tls())
282 */
3e2b68d7
AL
283 savesegment(fs, prev_fsindex);
284 savesegment(gs, prev_gsindex);
478de5a9 285
f647d7c1
AL
286 /*
287 * Load TLS before restoring any segments so that segment loads
288 * reference the correct GDT entries.
289 */
1da177e4
LT
290 load_TLS(next, cpu);
291
3fe0a63e 292 /*
f647d7c1
AL
293 * Leave lazy mode, flushing any hypercalls made here. This
294 * must be done after loading TLS entries in the GDT but before
295 * loading segments that might reference them, and and it must
3a0aee48 296 * be done before fpu__restore(), so the TS bit is up to
f647d7c1 297 * date.
3fe0a63e 298 */
224101ed 299 arch_end_context_switch(next_p);
3fe0a63e 300
f647d7c1
AL
301 /* Switch DS and ES.
302 *
303 * Reading them only returns the selectors, but writing them (if
304 * nonzero) loads the full descriptor from the GDT or LDT. The
305 * LDT for next is loaded in switch_mm, and the GDT is loaded
306 * above.
307 *
308 * We therefore need to write new values to the segment
309 * registers on every context switch unless both the new and old
310 * values are zero.
311 *
312 * Note that we don't need to do anything for CS and SS, as
313 * those are saved and restored as part of pt_regs.
314 */
315 savesegment(es, prev->es);
316 if (unlikely(next->es | prev->es))
317 loadsegment(es, next->es);
318
319 savesegment(ds, prev->ds);
320 if (unlikely(next->ds | prev->ds))
321 loadsegment(ds, next->ds);
322
7de08b4e 323 /*
1da177e4 324 * Switch FS and GS.
87b935a0 325 *
558a65bc 326 * These are even more complicated than DS and ES: they have
3e2b68d7
AL
327 * 64-bit bases are that controlled by arch_prctl. The bases
328 * don't necessarily match the selectors, as user code can do
329 * any number of things to cause them to be inconsistent.
f647d7c1 330 *
3e2b68d7
AL
331 * We don't promise to preserve the bases if the selectors are
332 * nonzero. We also don't promise to preserve the base if the
333 * selector is zero and the base doesn't match whatever was
334 * most recently passed to ARCH_SET_FS/GS. (If/when the
335 * FSGSBASE instructions are enabled, we'll need to offer
336 * stronger guarantees.)
f647d7c1 337 *
3e2b68d7 338 * As an invariant,
296f781a 339 * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
3e2b68d7 340 * impossible.
1da177e4 341 */
3e2b68d7
AL
342 if (next->fsindex) {
343 /* Loading a nonzero value into FS sets the index and base. */
87b935a0 344 loadsegment(fs, next->fsindex);
3e2b68d7 345 } else {
296f781a 346 if (next->fsbase) {
3e2b68d7
AL
347 /* Next index is zero but next base is nonzero. */
348 if (prev_fsindex)
349 loadsegment(fs, 0);
296f781a 350 wrmsrl(MSR_FS_BASE, next->fsbase);
3e2b68d7
AL
351 } else {
352 /* Next base and index are both zero. */
353 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
354 /*
355 * We don't know the previous base and can't
356 * find out without RDMSR. Forcibly clear it.
357 */
358 loadsegment(fs, __USER_DS);
359 loadsegment(fs, 0);
360 } else {
361 /*
362 * If the previous index is zero and ARCH_SET_FS
363 * didn't change the base, then the base is
364 * also zero and we don't need to do anything.
365 */
296f781a 366 if (prev->fsbase || prev_fsindex)
3e2b68d7
AL
367 loadsegment(fs, 0);
368 }
369 }
1da177e4 370 }
3e2b68d7
AL
371 /*
372 * Save the old state and preserve the invariant.
373 * NB: if prev_fsindex == 0, then we can't reliably learn the base
374 * without RDMSR because Intel user code can zero it without telling
375 * us and AMD user code can program any 32-bit value without telling
376 * us.
377 */
378 if (prev_fsindex)
296f781a 379 prev->fsbase = 0;
3e2b68d7 380 prev->fsindex = prev_fsindex;
87b935a0 381
3e2b68d7
AL
382 if (next->gsindex) {
383 /* Loading a nonzero value into GS sets the index and base. */
87b935a0 384 load_gs_index(next->gsindex);
3e2b68d7 385 } else {
296f781a 386 if (next->gsbase) {
3e2b68d7
AL
387 /* Next index is zero but next base is nonzero. */
388 if (prev_gsindex)
389 load_gs_index(0);
296f781a 390 wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
3e2b68d7
AL
391 } else {
392 /* Next base and index are both zero. */
393 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
394 /*
395 * We don't know the previous base and can't
396 * find out without RDMSR. Forcibly clear it.
397 *
398 * This contains a pointless SWAPGS pair.
399 * Fixing it would involve an explicit check
400 * for Xen or a new pvop.
401 */
402 load_gs_index(__USER_DS);
403 load_gs_index(0);
404 } else {
405 /*
406 * If the previous index is zero and ARCH_SET_GS
407 * didn't change the base, then the base is
408 * also zero and we don't need to do anything.
409 */
296f781a 410 if (prev->gsbase || prev_gsindex)
3e2b68d7
AL
411 load_gs_index(0);
412 }
413 }
1da177e4 414 }
3e2b68d7
AL
415 /*
416 * Save the old state and preserve the invariant.
417 * NB: if prev_gsindex == 0, then we can't reliably learn the base
418 * without RDMSR because Intel user code can zero it without telling
419 * us and AMD user code can program any 32-bit value without telling
420 * us.
421 */
422 if (prev_gsindex)
296f781a 423 prev->gsbase = 0;
3e2b68d7 424 prev->gsindex = prev_gsindex;
1da177e4 425
c474e507 426 switch_fpu_finish(next_fpu, cpu);
34ddc81a 427
7de08b4e 428 /*
45948d77 429 * Switch the PDA and FPU contexts.
1da177e4 430 */
c6ae41e7 431 this_cpu_write(current_task, next_p);
18bd057b 432
b27559a4
AL
433 /* Reload esp0 and ss1. This changes current_thread_info(). */
434 load_sp0(tss, next);
435
1da177e4 436 /*
d3a4f48d 437 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 438 */
eee3af4a
MM
439 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
440 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 441 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 442
b7a58459
AL
443#ifdef CONFIG_XEN
444 /*
445 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
446 * current_pt_regs()->flags may not match the current task's
447 * intended IOPL. We need to switch it manually.
448 */
449 if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
450 prev->iopl != next->iopl))
451 xen_set_iopl_mask(next->iopl);
452#endif
453
61f01dd9
AL
454 if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
455 /*
456 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
457 * does not update the cached descriptor. As a result, if we
458 * do SYSRET while SS is NULL, we'll end up in user mode with
459 * SS apparently equal to __USER_DS but actually unusable.
460 *
461 * The straightforward workaround would be to fix it up just
462 * before SYSRET, but that would slow down the system call
463 * fast paths. Instead, we ensure that SS is never NULL in
464 * system call context. We do this by replacing NULL SS
465 * selectors at every context switch. SYSCALL sets up a valid
466 * SS, so the only way to get NULL is to re-enter the kernel
467 * from CPL 3 through an interrupt. Since that can't happen
468 * in the same task as a running syscall, we are guaranteed to
469 * context switch between every interrupt vector entry and a
470 * subsequent SYSRET.
471 *
472 * We read SS first because SS reads are much faster than
473 * writes. Out of caution, we force SS to __KERNEL_DS even if
474 * it previously had a different non-NULL value.
475 */
476 unsigned short ss_sel;
477 savesegment(ss, ss_sel);
478 if (ss_sel != __KERNEL_DS)
479 loadsegment(ss, __KERNEL_DS);
480 }
481
4f341a5e
FY
482 /* Load the Intel cache allocation PQR MSR. */
483 intel_rdt_sched_in();
484
1da177e4
LT
485 return prev_p;
486}
487
1da177e4
LT
488void set_personality_64bit(void)
489{
490 /* inherit personality from parent */
491
492 /* Make sure to be in 64bit mode */
6612538c 493 clear_thread_flag(TIF_IA32);
6bd33008 494 clear_thread_flag(TIF_ADDR32);
bb212724 495 clear_thread_flag(TIF_X32);
1da177e4 496
375906f8
SW
497 /* Ensure the corresponding mm is not marked. */
498 if (current->mm)
499 current->mm->context.ia32_compat = 0;
500
1da177e4
LT
501 /* TBD: overwrites user setup. Should have two bits.
502 But 64bit processes have always behaved this way,
503 so it's not too bad. The main problem is just that
6612538c 504 32bit childs are affected again. */
1da177e4
LT
505 current->personality &= ~READ_IMPLIES_EXEC;
506}
507
d1a797f3 508void set_personality_ia32(bool x32)
05d43ed8
PA
509{
510 /* inherit personality from parent */
511
512 /* Make sure to be in 32bit mode */
6bd33008 513 set_thread_flag(TIF_ADDR32);
05d43ed8 514
375906f8 515 /* Mark the associated mm as containing 32-bit tasks. */
d1a797f3
PA
516 if (x32) {
517 clear_thread_flag(TIF_IA32);
518 set_thread_flag(TIF_X32);
b24dc8da
ON
519 if (current->mm)
520 current->mm->context.ia32_compat = TIF_X32;
d1a797f3 521 current->personality &= ~READ_IMPLIES_EXEC;
f970165b 522 /* in_compat_syscall() uses the presence of the x32
ce5f7a99 523 syscall bit flag to determine compat status */
b9d989c7 524 current->thread.status &= ~TS_COMPAT;
d1a797f3
PA
525 } else {
526 set_thread_flag(TIF_IA32);
527 clear_thread_flag(TIF_X32);
b24dc8da
ON
528 if (current->mm)
529 current->mm->context.ia32_compat = TIF_IA32;
d1a797f3
PA
530 current->personality |= force_personality32;
531 /* Prepare the first "return" to user space */
b9d989c7 532 current->thread.status |= TS_COMPAT;
d1a797f3 533 }
05d43ed8 534}
febb72a6 535EXPORT_SYMBOL_GPL(set_personality_ia32);
05d43ed8 536
91b7bd39 537#ifdef CONFIG_CHECKPOINT_RESTORE
2eefd878
DS
538static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
539{
540 int ret;
541
542 ret = map_vdso_once(image, addr);
543 if (ret)
544 return ret;
545
546 return (long)image->size;
547}
91b7bd39 548#endif
2eefd878 549
1da177e4 550long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
7de08b4e
GP
551{
552 int ret = 0;
1da177e4
LT
553 int doit = task == current;
554 int cpu;
555
7de08b4e 556 switch (code) {
1da177e4 557 case ARCH_SET_GS:
d696ca01 558 if (addr >= TASK_SIZE_MAX)
7de08b4e 559 return -EPERM;
1da177e4 560 cpu = get_cpu();
731e33e3 561 task->thread.gsindex = 0;
296f781a 562 task->thread.gsbase = addr;
731e33e3
AL
563 if (doit) {
564 load_gs_index(0);
565 ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
1da177e4 566 }
4afd0565 567 put_cpu();
1da177e4
LT
568 break;
569 case ARCH_SET_FS:
570 /* Not strictly needed for fs, but do it for symmetry
571 with gs */
d696ca01 572 if (addr >= TASK_SIZE_MAX)
6612538c 573 return -EPERM;
1da177e4 574 cpu = get_cpu();
731e33e3 575 task->thread.fsindex = 0;
296f781a 576 task->thread.fsbase = addr;
731e33e3
AL
577 if (doit) {
578 /* set the selector to 0 to not confuse __switch_to */
579 loadsegment(fs, 0);
580 ret = wrmsrl_safe(MSR_FS_BASE, addr);
1da177e4
LT
581 }
582 put_cpu();
583 break;
6612538c
HS
584 case ARCH_GET_FS: {
585 unsigned long base;
d47b50e7 586 if (doit)
1da177e4 587 rdmsrl(MSR_FS_BASE, base);
a88cde13 588 else
296f781a 589 base = task->thread.fsbase;
6612538c
HS
590 ret = put_user(base, (unsigned long __user *)addr);
591 break;
1da177e4 592 }
6612538c 593 case ARCH_GET_GS: {
1da177e4 594 unsigned long base;
d47b50e7
AL
595 if (doit)
596 rdmsrl(MSR_KERNEL_GS_BASE, base);
d47b50e7 597 else
296f781a 598 base = task->thread.gsbase;
6612538c 599 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
600 break;
601 }
602
2eefd878 603#ifdef CONFIG_CHECKPOINT_RESTORE
6e68b087 604# ifdef CONFIG_X86_X32_ABI
2eefd878
DS
605 case ARCH_MAP_VDSO_X32:
606 return prctl_map_vdso(&vdso_image_x32, addr);
91b7bd39
IM
607# endif
608# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
2eefd878
DS
609 case ARCH_MAP_VDSO_32:
610 return prctl_map_vdso(&vdso_image_32, addr);
91b7bd39 611# endif
2eefd878
DS
612 case ARCH_MAP_VDSO_64:
613 return prctl_map_vdso(&vdso_image_64, addr);
614#endif
615
1da177e4
LT
616 default:
617 ret = -EINVAL;
618 break;
6612538c 619 }
1da177e4 620
6612538c
HS
621 return ret;
622}
1da177e4
LT
623
624long sys_arch_prctl(int code, unsigned long addr)
625{
626 return do_arch_prctl(current, code, addr);
1da177e4
LT
627}
628
89240ba0
SS
629unsigned long KSTK_ESP(struct task_struct *task)
630{
263042e4 631 return task_pt_regs(task)->sp;
89240ba0 632}