]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/process_64.c
x86: remove extra barriers from load_gs_base()
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
42059429 19#include <linux/stackprotector.h>
76e4f660 20#include <linux/cpu.h>
1da177e4
LT
21#include <linux/errno.h>
22#include <linux/sched.h>
6612538c 23#include <linux/fs.h>
1da177e4
LT
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/elfcore.h>
27#include <linux/smp.h>
28#include <linux/slab.h>
29#include <linux/user.h>
1da177e4 30#include <linux/interrupt.h>
6612538c 31#include <linux/utsname.h>
1da177e4 32#include <linux/delay.h>
6612538c 33#include <linux/module.h>
1da177e4 34#include <linux/ptrace.h>
1da177e4 35#include <linux/random.h>
95833c83 36#include <linux/notifier.h>
c6fd91f0 37#include <linux/kprobes.h>
1eeb66a1 38#include <linux/kdebug.h>
02290683 39#include <linux/tick.h>
529e25f6 40#include <linux/prctl.h>
7de08b4e
GP
41#include <linux/uaccess.h>
42#include <linux/io.h>
8b96f011 43#include <linux/ftrace.h>
1da177e4 44
1da177e4
LT
45#include <asm/pgtable.h>
46#include <asm/system.h>
1da177e4
LT
47#include <asm/processor.h>
48#include <asm/i387.h>
49#include <asm/mmu_context.h>
1da177e4 50#include <asm/prctl.h>
1da177e4
LT
51#include <asm/desc.h>
52#include <asm/proto.h>
53#include <asm/ia32.h>
95833c83 54#include <asm/idle.h>
bbc1f698 55#include <asm/syscalls.h>
bf53de90 56#include <asm/ds.h>
1da177e4
LT
57
58asmlinkage extern void ret_from_fork(void);
59
c6f5e0ac
BG
60DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
61EXPORT_PER_CPU_SYMBOL(current_task);
62
3d1e42a7 63DEFINE_PER_CPU(unsigned long, old_rsp);
c2558e0e 64static DEFINE_PER_CPU(unsigned char, is_idle);
3d1e42a7 65
1da177e4
LT
66unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
67
e041c683 68static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
69
70void idle_notifier_register(struct notifier_block *n)
71{
e041c683 72 atomic_notifier_chain_register(&idle_notifier, n);
95833c83 73}
c7d87d79
VP
74EXPORT_SYMBOL_GPL(idle_notifier_register);
75
76void idle_notifier_unregister(struct notifier_block *n)
77{
78 atomic_notifier_chain_unregister(&idle_notifier, n);
79}
80EXPORT_SYMBOL_GPL(idle_notifier_unregister);
95833c83 81
95833c83
AK
82void enter_idle(void)
83{
c2558e0e 84 percpu_write(is_idle, 1);
e041c683 85 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
86}
87
88static void __exit_idle(void)
89{
c2558e0e 90 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
a15da49d 91 return;
e041c683 92 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
93}
94
95/* Called from interrupts to signify idle end */
96void exit_idle(void)
97{
a15da49d
AK
98 /* idle loop has pid 0 */
99 if (current->pid)
95833c83
AK
100 return;
101 __exit_idle();
102}
103
913da64b 104#ifndef CONFIG_SMP
76e4f660
AR
105static inline void play_dead(void)
106{
107 BUG();
108}
913da64b 109#endif
76e4f660 110
1da177e4
LT
111/*
112 * The idle thread. There's no useful work to be
113 * done, so just try to conserve power and have a
114 * low exit latency (ie sit in a loop waiting for
115 * somebody to say that they'd like to reschedule)
116 */
b10db7f0 117void cpu_idle(void)
1da177e4 118{
495ab9c0 119 current_thread_info()->status |= TS_POLLING;
ce22bd92 120
ce22bd92
AV
121 /*
122 * If we're the non-boot CPU, nothing set the PDA stack
7e09b2a0
IM
123 * canary up for us - and if we are the boot CPU we have
124 * a 0 stack canary. This is a good place for updating
125 * it, as we wont ever return from this function (so the
126 * invalid canaries already on the stack wont ever
127 * trigger):
ce22bd92 128 */
18aa8bb1
IM
129 boot_init_stack_canary();
130
1da177e4
LT
131 /* endless idle loop with no priority at all */
132 while (1) {
b8f8c3cf 133 tick_nohz_stop_sched_tick(1);
1da177e4 134 while (!need_resched()) {
1da177e4 135
1da177e4 136 rmb();
6ddd2a27 137
76e4f660
AR
138 if (cpu_is_offline(smp_processor_id()))
139 play_dead();
d331e739
VP
140 /*
141 * Idle routines should keep interrupts disabled
142 * from here on, until they go to idle.
143 * Otherwise, idle callbacks can misfire.
144 */
145 local_irq_disable();
95833c83 146 enter_idle();
81d68a96
SR
147 /* Don't trace irqs off for idle */
148 stop_critical_timings();
6ddd2a27 149 pm_idle();
81d68a96 150 start_critical_timings();
a15da49d
AK
151 /* In many cases the interrupt that ended idle
152 has already called exit_idle. But some idle
153 loops can be woken up without interrupt. */
95833c83 154 __exit_idle();
1da177e4
LT
155 }
156
02290683 157 tick_nohz_restart_sched_tick();
5bfb5d69 158 preempt_enable_no_resched();
1da177e4 159 schedule();
5bfb5d69 160 preempt_disable();
1da177e4
LT
161 }
162}
163
6612538c 164/* Prints also some state that isn't saved in the pt_regs */
e2ce07c8 165void __show_regs(struct pt_regs *regs, int all)
1da177e4
LT
166{
167 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 168 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
169 unsigned int fsindex, gsindex;
170 unsigned int ds, cs, es;
1da177e4
LT
171
172 printk("\n");
173 print_modules();
8092c654 174 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
9acf23c4 175 current->pid, current->comm, print_tainted(),
96b644bd
SH
176 init_utsname()->release,
177 (int)strcspn(init_utsname()->version, " "),
178 init_utsname()->version);
8092c654 179 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
aafbd7eb 180 printk_address(regs->ip, 1);
8092c654
GP
181 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
182 regs->sp, regs->flags);
183 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 184 regs->ax, regs->bx, regs->cx);
8092c654 185 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 186 regs->dx, regs->si, regs->di);
8092c654 187 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 188 regs->bp, regs->r8, regs->r9);
8092c654 189 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
7de08b4e 190 regs->r10, regs->r11, regs->r12);
8092c654 191 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
7de08b4e 192 regs->r13, regs->r14, regs->r15);
1da177e4 193
7de08b4e
GP
194 asm("movl %%ds,%0" : "=r" (ds));
195 asm("movl %%cs,%0" : "=r" (cs));
196 asm("movl %%es,%0" : "=r" (es));
1da177e4
LT
197 asm("movl %%fs,%0" : "=r" (fsindex));
198 asm("movl %%gs,%0" : "=r" (gsindex));
199
200 rdmsrl(MSR_FS_BASE, fs);
7de08b4e
GP
201 rdmsrl(MSR_GS_BASE, gs);
202 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
1da177e4 203
e2ce07c8
PE
204 if (!all)
205 return;
1da177e4 206
f51c9452
GOC
207 cr0 = read_cr0();
208 cr2 = read_cr2();
209 cr3 = read_cr3();
210 cr4 = read_cr4();
1da177e4 211
8092c654 212 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
7de08b4e 213 fs, fsindex, gs, gsindex, shadowgs);
8092c654
GP
214 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
215 es, cr0);
216 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
217 cr4);
bb1995d5
AS
218
219 get_debugreg(d0, 0);
220 get_debugreg(d1, 1);
221 get_debugreg(d2, 2);
8092c654 222 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
bb1995d5
AS
223 get_debugreg(d3, 3);
224 get_debugreg(d6, 6);
225 get_debugreg(d7, 7);
8092c654 226 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1da177e4
LT
227}
228
229void show_regs(struct pt_regs *regs)
230{
8092c654 231 printk(KERN_INFO "CPU %d:", smp_processor_id());
e2ce07c8 232 __show_regs(regs, 1);
bc850d6b 233 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
1da177e4
LT
234}
235
236/*
237 * Free current thread data structures etc..
238 */
239void exit_thread(void)
240{
241 struct task_struct *me = current;
242 struct thread_struct *t = &me->thread;
73649dab 243
6612538c 244 if (me->thread.io_bitmap_ptr) {
1da177e4
LT
245 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
246
247 kfree(t->io_bitmap_ptr);
248 t->io_bitmap_ptr = NULL;
d3a4f48d 249 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
250 /*
251 * Careful, clear this in the TSS too:
252 */
253 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
254 t->io_bitmap_max = 0;
255 put_cpu();
256 }
bf53de90
MM
257
258 ds_exit_thread(current);
1da177e4
LT
259}
260
261void flush_thread(void)
262{
263 struct task_struct *tsk = current;
1da177e4 264
303cd153
MD
265 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
266 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
267 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
268 clear_tsk_thread_flag(tsk, TIF_IA32);
269 } else {
270 set_tsk_thread_flag(tsk, TIF_IA32);
4d9bc79c 271 current_thread_info()->status |= TS_COMPAT;
303cd153 272 }
4d9bc79c 273 }
303cd153 274 clear_tsk_thread_flag(tsk, TIF_DEBUG);
1da177e4
LT
275
276 tsk->thread.debugreg0 = 0;
277 tsk->thread.debugreg1 = 0;
278 tsk->thread.debugreg2 = 0;
279 tsk->thread.debugreg3 = 0;
280 tsk->thread.debugreg6 = 0;
281 tsk->thread.debugreg7 = 0;
6612538c 282 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
1da177e4
LT
283 /*
284 * Forget coprocessor state..
285 */
75118a82 286 tsk->fpu_counter = 0;
1da177e4
LT
287 clear_fpu(tsk);
288 clear_used_math();
289}
290
291void release_thread(struct task_struct *dead_task)
292{
293 if (dead_task->mm) {
294 if (dead_task->mm->context.size) {
295 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
296 dead_task->comm,
297 dead_task->mm->context.ldt,
298 dead_task->mm->context.size);
299 BUG();
300 }
301 }
302}
303
304static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
305{
6612538c 306 struct user_desc ud = {
1da177e4
LT
307 .base_addr = addr,
308 .limit = 0xfffff,
309 .seg_32bit = 1,
310 .limit_in_pages = 1,
311 .useable = 1,
312 };
ade1af77 313 struct desc_struct *desc = t->thread.tls_array;
1da177e4 314 desc += tls;
80fbb69a 315 fill_ldt(desc, &ud);
1da177e4
LT
316}
317
318static inline u32 read_32bit_tls(struct task_struct *t, int tls)
319{
91394eb0 320 return get_desc_base(&t->thread.tls_array[tls]);
1da177e4
LT
321}
322
323/*
324 * This gets called before we allocate a new thread and copy
325 * the current task into it.
326 */
327void prepare_to_copy(struct task_struct *tsk)
328{
329 unlazy_fpu(tsk);
330}
331
65ea5b03 332int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
1da177e4 333 unsigned long unused,
7de08b4e 334 struct task_struct *p, struct pt_regs *regs)
1da177e4
LT
335{
336 int err;
7de08b4e 337 struct pt_regs *childregs;
1da177e4
LT
338 struct task_struct *me = current;
339
a88cde13 340 childregs = ((struct pt_regs *)
57eafdc2 341 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
342 *childregs = *regs;
343
65ea5b03
PA
344 childregs->ax = 0;
345 childregs->sp = sp;
346 if (sp == ~0UL)
347 childregs->sp = (unsigned long)childregs;
1da177e4 348
faca6227
PA
349 p->thread.sp = (unsigned long) childregs;
350 p->thread.sp0 = (unsigned long) (childregs+1);
351 p->thread.usersp = me->thread.usersp;
1da177e4 352
e4f17c43 353 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
354
355 p->thread.fs = me->thread.fs;
356 p->thread.gs = me->thread.gs;
357
ada85708
JF
358 savesegment(gs, p->thread.gsindex);
359 savesegment(fs, p->thread.fsindex);
360 savesegment(es, p->thread.es);
361 savesegment(ds, p->thread.ds);
1da177e4 362
d3a4f48d 363 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
364 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
365 if (!p->thread.io_bitmap_ptr) {
366 p->thread.io_bitmap_max = 0;
367 return -ENOMEM;
368 }
a88cde13
AK
369 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
370 IO_BITMAP_BYTES);
d3a4f48d 371 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 372 }
1da177e4
LT
373
374 /*
375 * Set a new TLS for the child thread?
376 */
377 if (clone_flags & CLONE_SETTLS) {
378#ifdef CONFIG_IA32_EMULATION
379 if (test_thread_flag(TIF_IA32))
efd1ca52 380 err = do_set_thread_area(p, -1,
65ea5b03 381 (struct user_desc __user *)childregs->si, 0);
7de08b4e
GP
382 else
383#endif
384 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
385 if (err)
1da177e4
LT
386 goto out;
387 }
bf53de90
MM
388
389 ds_copy_thread(p, me);
390
391 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
392 p->thread.debugctlmsr = 0;
393
1da177e4
LT
394 err = 0;
395out:
396 if (err && p->thread.io_bitmap_ptr) {
397 kfree(p->thread.io_bitmap_ptr);
398 p->thread.io_bitmap_max = 0;
399 }
400 return err;
401}
402
513ad84b
IM
403void
404start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
405{
ada85708
JF
406 loadsegment(fs, 0);
407 loadsegment(es, 0);
408 loadsegment(ds, 0);
513ad84b
IM
409 load_gs_index(0);
410 regs->ip = new_ip;
411 regs->sp = new_sp;
3d1e42a7 412 percpu_write(old_rsp, new_sp);
513ad84b
IM
413 regs->cs = __USER_CS;
414 regs->ss = __USER_DS;
415 regs->flags = 0x200;
416 set_fs(USER_DS);
aa283f49
SS
417 /*
418 * Free the old FP and other extended state
419 */
420 free_thread_xstate(current);
513ad84b
IM
421}
422EXPORT_SYMBOL_GPL(start_thread);
423
529e25f6
EB
424static void hard_disable_TSC(void)
425{
426 write_cr4(read_cr4() | X86_CR4_TSD);
427}
428
429void disable_TSC(void)
430{
431 preempt_disable();
432 if (!test_and_set_thread_flag(TIF_NOTSC))
433 /*
434 * Must flip the CPU state synchronously with
435 * TIF_NOTSC in the current running context.
436 */
437 hard_disable_TSC();
438 preempt_enable();
439}
440
441static void hard_enable_TSC(void)
442{
443 write_cr4(read_cr4() & ~X86_CR4_TSD);
444}
445
a4928cff 446static void enable_TSC(void)
529e25f6
EB
447{
448 preempt_disable();
449 if (test_and_clear_thread_flag(TIF_NOTSC))
450 /*
451 * Must flip the CPU state synchronously with
452 * TIF_NOTSC in the current running context.
453 */
454 hard_enable_TSC();
455 preempt_enable();
456}
457
458int get_tsc_mode(unsigned long adr)
459{
460 unsigned int val;
461
462 if (test_thread_flag(TIF_NOTSC))
463 val = PR_TSC_SIGSEGV;
464 else
465 val = PR_TSC_ENABLE;
466
467 return put_user(val, (unsigned int __user *)adr);
468}
469
470int set_tsc_mode(unsigned int val)
471{
472 if (val == PR_TSC_SIGSEGV)
473 disable_TSC();
474 else if (val == PR_TSC_ENABLE)
475 enable_TSC();
476 else
477 return -EINVAL;
478
479 return 0;
480}
481
1da177e4
LT
482/*
483 * This special macro can be used to load a debugging register
484 */
6612538c
HS
485#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
486
d3a4f48d 487static inline void __switch_to_xtra(struct task_struct *prev_p,
6612538c
HS
488 struct task_struct *next_p,
489 struct tss_struct *tss)
d3a4f48d
SE
490{
491 struct thread_struct *prev, *next;
492
493 prev = &prev_p->thread,
494 next = &next_p->thread;
495
c2724775
MM
496 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
497 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
498 ds_switch_to(prev_p, next_p);
499 else if (next->debugctlmsr != prev->debugctlmsr)
5b0e5084 500 update_debugctlmsr(next->debugctlmsr);
7e991604 501
d3a4f48d
SE
502 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
503 loaddebug(next, 0);
504 loaddebug(next, 1);
505 loaddebug(next, 2);
506 loaddebug(next, 3);
507 /* no 4 and 5 */
508 loaddebug(next, 6);
509 loaddebug(next, 7);
510 }
511
529e25f6
EB
512 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
513 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
514 /* prev and next are different */
515 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
516 hard_disable_TSC();
517 else
518 hard_enable_TSC();
519 }
520
d3a4f48d
SE
521 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
522 /*
523 * Copy the relevant range of the IO bitmap.
524 * Normally this is 128 bytes or less:
525 */
526 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
527 max(prev->io_bitmap_max, next->io_bitmap_max));
528 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
529 /*
530 * Clear any possible leftover bits:
531 */
532 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
533 }
534}
535
1da177e4
LT
536/*
537 * switch_to(x,y) should switch tasks from x to y.
538 *
6612538c 539 * This could still be optimized:
1da177e4
LT
540 * - fold all the options into a flag word and test it with a single test.
541 * - could test fs/gs bitsliced
099f318b
AK
542 *
543 * Kprobes not supported here. Set the probe on schedule instead.
8b96f011 544 * Function graph tracer not supported too.
1da177e4 545 */
8b96f011 546__notrace_funcgraph struct task_struct *
a88cde13 547__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4 548{
87b935a0
JF
549 struct thread_struct *prev = &prev_p->thread;
550 struct thread_struct *next = &next_p->thread;
6612538c 551 int cpu = smp_processor_id();
1da177e4 552 struct tss_struct *tss = &per_cpu(init_tss, cpu);
478de5a9 553 unsigned fsindex, gsindex;
1da177e4 554
e07e23e1 555 /* we're going to use this soon, after a few expensive things */
7de08b4e 556 if (next_p->fpu_counter > 5)
61c4628b 557 prefetch(next->xstate);
e07e23e1 558
1da177e4
LT
559 /*
560 * Reload esp0, LDT and the page table pointer:
561 */
7818a1e0 562 load_sp0(tss, next);
1da177e4 563
7de08b4e 564 /*
1da177e4
LT
565 * Switch DS and ES.
566 * This won't pick up thread selector changes, but I guess that is ok.
567 */
ada85708 568 savesegment(es, prev->es);
1da177e4 569 if (unlikely(next->es | prev->es))
7de08b4e 570 loadsegment(es, next->es);
ada85708
JF
571
572 savesegment(ds, prev->ds);
1da177e4
LT
573 if (unlikely(next->ds | prev->ds))
574 loadsegment(ds, next->ds);
575
478de5a9
JF
576
577 /* We must save %fs and %gs before load_TLS() because
578 * %fs and %gs may be cleared by load_TLS().
579 *
580 * (e.g. xen_load_tls())
581 */
582 savesegment(fs, fsindex);
583 savesegment(gs, gsindex);
584
1da177e4
LT
585 load_TLS(next, cpu);
586
3fe0a63e
JF
587 /*
588 * Leave lazy mode, flushing any hypercalls made here.
589 * This must be done before restoring TLS segments so
590 * the GDT and LDT are properly updated, and must be
591 * done before math_state_restore, so the TS bit is up
592 * to date.
593 */
594 arch_leave_lazy_cpu_mode();
595
7de08b4e 596 /*
1da177e4 597 * Switch FS and GS.
87b935a0
JF
598 *
599 * Segment register != 0 always requires a reload. Also
600 * reload when it has changed. When prev process used 64bit
601 * base always reload to avoid an information leak.
1da177e4 602 */
87b935a0
JF
603 if (unlikely(fsindex | next->fsindex | prev->fs)) {
604 loadsegment(fs, next->fsindex);
7de08b4e 605 /*
87b935a0
JF
606 * Check if the user used a selector != 0; if yes
607 * clear 64bit base, since overloaded base is always
608 * mapped to the Null selector
609 */
610 if (fsindex)
7de08b4e 611 prev->fs = 0;
1da177e4 612 }
87b935a0
JF
613 /* when next process has a 64bit base use it */
614 if (next->fs)
615 wrmsrl(MSR_FS_BASE, next->fs);
616 prev->fsindex = fsindex;
617
618 if (unlikely(gsindex | next->gsindex | prev->gs)) {
619 load_gs_index(next->gsindex);
620 if (gsindex)
7de08b4e 621 prev->gs = 0;
1da177e4 622 }
87b935a0
JF
623 if (next->gs)
624 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
625 prev->gsindex = gsindex;
1da177e4 626
0a5ace2a
AK
627 /* Must be after DS reload */
628 unlazy_fpu(prev_p);
629
7de08b4e 630 /*
45948d77 631 * Switch the PDA and FPU contexts.
1da177e4 632 */
3d1e42a7
BG
633 prev->usersp = percpu_read(old_rsp);
634 percpu_write(old_rsp, next->usersp);
c6f5e0ac 635 percpu_write(current_task, next_p);
18bd057b 636
9af45651 637 percpu_write(kernel_stack,
87b935a0 638 (unsigned long)task_stack_page(next_p) +
9af45651 639 THREAD_SIZE - KERNEL_STACK_OFFSET);
1da177e4
LT
640
641 /*
d3a4f48d 642 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 643 */
eee3af4a
MM
644 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
645 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 646 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 647
e07e23e1
AV
648 /* If the task has used fpu the last 5 timeslices, just do a full
649 * restore of the math state immediately to avoid the trap; the
650 * chances of needing FPU soon are obviously high now
870568b3
SS
651 *
652 * tsk_used_math() checks prevent calling math_state_restore(),
653 * which can sleep in the case of !tsk_used_math()
e07e23e1 654 */
870568b3 655 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
e07e23e1 656 math_state_restore();
1da177e4
LT
657 return prev_p;
658}
659
660/*
661 * sys_execve() executes a new program.
662 */
6612538c 663asmlinkage
1da177e4 664long sys_execve(char __user *name, char __user * __user *argv,
5d119b2c 665 char __user * __user *envp, struct pt_regs *regs)
1da177e4
LT
666{
667 long error;
7de08b4e 668 char *filename;
1da177e4
LT
669
670 filename = getname(name);
671 error = PTR_ERR(filename);
5d119b2c 672 if (IS_ERR(filename))
1da177e4 673 return error;
5d119b2c 674 error = do_execve(filename, argv, envp, regs);
1da177e4
LT
675 putname(filename);
676 return error;
677}
678
679void set_personality_64bit(void)
680{
681 /* inherit personality from parent */
682
683 /* Make sure to be in 64bit mode */
6612538c 684 clear_thread_flag(TIF_IA32);
1da177e4
LT
685
686 /* TBD: overwrites user setup. Should have two bits.
687 But 64bit processes have always behaved this way,
688 so it's not too bad. The main problem is just that
6612538c 689 32bit childs are affected again. */
1da177e4
LT
690 current->personality &= ~READ_IMPLIES_EXEC;
691}
692
693asmlinkage long sys_fork(struct pt_regs *regs)
694{
65ea5b03 695 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
1da177e4
LT
696}
697
a88cde13
AK
698asmlinkage long
699sys_clone(unsigned long clone_flags, unsigned long newsp,
700 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
701{
702 if (!newsp)
65ea5b03 703 newsp = regs->sp;
1da177e4
LT
704 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
705}
706
707/*
708 * This is trivial, and on the face of it looks like it
709 * could equally well be done in user mode.
710 *
711 * Not so, for quite unobvious reasons - register pressure.
712 * In user mode vfork() cannot have a stack frame, and if
713 * done by calling the "clone()" system call directly, you
714 * do not have enough call-clobbered registers to hold all
715 * the information you need.
716 */
717asmlinkage long sys_vfork(struct pt_regs *regs)
718{
65ea5b03 719 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
1da177e4
LT
720 NULL, NULL);
721}
722
723unsigned long get_wchan(struct task_struct *p)
724{
725 unsigned long stack;
7de08b4e 726 u64 fp, ip;
1da177e4
LT
727 int count = 0;
728
7de08b4e
GP
729 if (!p || p == current || p->state == TASK_RUNNING)
730 return 0;
57eafdc2 731 stack = (unsigned long)task_stack_page(p);
e1e23bb0 732 if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
1da177e4 733 return 0;
faca6227 734 fp = *(u64 *)(p->thread.sp);
7de08b4e 735 do {
a88cde13 736 if (fp < (unsigned long)stack ||
e1e23bb0 737 fp >= (unsigned long)stack+THREAD_SIZE)
7de08b4e 738 return 0;
65ea5b03
PA
739 ip = *(u64 *)(fp+8);
740 if (!in_sched_functions(ip))
741 return ip;
7de08b4e
GP
742 fp = *(u64 *)fp;
743 } while (count++ < 16);
1da177e4
LT
744 return 0;
745}
746
747long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
7de08b4e
GP
748{
749 int ret = 0;
1da177e4
LT
750 int doit = task == current;
751 int cpu;
752
7de08b4e 753 switch (code) {
1da177e4 754 case ARCH_SET_GS:
84929801 755 if (addr >= TASK_SIZE_OF(task))
7de08b4e 756 return -EPERM;
1da177e4 757 cpu = get_cpu();
7de08b4e 758 /* handle small bases via the GDT because that's faster to
1da177e4 759 switch. */
7de08b4e
GP
760 if (addr <= 0xffffffff) {
761 set_32bit_tls(task, GS_TLS, addr);
762 if (doit) {
1da177e4 763 load_TLS(&task->thread, cpu);
7de08b4e 764 load_gs_index(GS_TLS_SEL);
1da177e4 765 }
7de08b4e 766 task->thread.gsindex = GS_TLS_SEL;
1da177e4 767 task->thread.gs = 0;
7de08b4e 768 } else {
1da177e4
LT
769 task->thread.gsindex = 0;
770 task->thread.gs = addr;
771 if (doit) {
a88cde13
AK
772 load_gs_index(0);
773 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
7de08b4e 774 }
1da177e4
LT
775 }
776 put_cpu();
777 break;
778 case ARCH_SET_FS:
779 /* Not strictly needed for fs, but do it for symmetry
780 with gs */
84929801 781 if (addr >= TASK_SIZE_OF(task))
6612538c 782 return -EPERM;
1da177e4 783 cpu = get_cpu();
6612538c 784 /* handle small bases via the GDT because that's faster to
1da177e4 785 switch. */
6612538c 786 if (addr <= 0xffffffff) {
1da177e4 787 set_32bit_tls(task, FS_TLS, addr);
6612538c
HS
788 if (doit) {
789 load_TLS(&task->thread, cpu);
ada85708 790 loadsegment(fs, FS_TLS_SEL);
1da177e4
LT
791 }
792 task->thread.fsindex = FS_TLS_SEL;
793 task->thread.fs = 0;
6612538c 794 } else {
1da177e4
LT
795 task->thread.fsindex = 0;
796 task->thread.fs = addr;
797 if (doit) {
798 /* set the selector to 0 to not confuse
799 __switch_to */
ada85708 800 loadsegment(fs, 0);
a88cde13 801 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
802 }
803 }
804 put_cpu();
805 break;
6612538c
HS
806 case ARCH_GET_FS: {
807 unsigned long base;
1da177e4
LT
808 if (task->thread.fsindex == FS_TLS_SEL)
809 base = read_32bit_tls(task, FS_TLS);
a88cde13 810 else if (doit)
1da177e4 811 rdmsrl(MSR_FS_BASE, base);
a88cde13 812 else
1da177e4 813 base = task->thread.fs;
6612538c
HS
814 ret = put_user(base, (unsigned long __user *)addr);
815 break;
1da177e4 816 }
6612538c 817 case ARCH_GET_GS: {
1da177e4 818 unsigned long base;
97c2803c 819 unsigned gsindex;
1da177e4
LT
820 if (task->thread.gsindex == GS_TLS_SEL)
821 base = read_32bit_tls(task, GS_TLS);
97c2803c 822 else if (doit) {
ada85708 823 savesegment(gs, gsindex);
97c2803c
JB
824 if (gsindex)
825 rdmsrl(MSR_KERNEL_GS_BASE, base);
826 else
827 base = task->thread.gs;
7de08b4e 828 } else
1da177e4 829 base = task->thread.gs;
6612538c 830 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
831 break;
832 }
833
834 default:
835 ret = -EINVAL;
836 break;
6612538c 837 }
1da177e4 838
6612538c
HS
839 return ret;
840}
1da177e4
LT
841
842long sys_arch_prctl(int code, unsigned long addr)
843{
844 return do_arch_prctl(current, code, addr);
1da177e4
LT
845}
846
847unsigned long arch_align_stack(unsigned long sp)
848{
c16b63e0 849 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
850 sp -= get_random_int() % 8192;
851 return sp & ~0xf;
852}
c1d171a0
JK
853
854unsigned long arch_randomize_brk(struct mm_struct *mm)
855{
856 unsigned long range_end = mm->brk + 0x02000000;
857 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
858}