]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/process_64.c
x86: tls32 moved
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
76e4f660 19#include <linux/cpu.h>
1da177e4
LT
20#include <linux/errno.h>
21#include <linux/sched.h>
22#include <linux/kernel.h>
23#include <linux/mm.h>
4e950f6f 24#include <linux/fs.h>
1da177e4
LT
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
29#include <linux/module.h>
30#include <linux/a.out.h>
31#include <linux/interrupt.h>
32#include <linux/delay.h>
1da177e4
LT
33#include <linux/ptrace.h>
34#include <linux/utsname.h>
35#include <linux/random.h>
95833c83 36#include <linux/notifier.h>
c6fd91f0 37#include <linux/kprobes.h>
1eeb66a1 38#include <linux/kdebug.h>
02290683 39#include <linux/tick.h>
1da177e4
LT
40
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
1da177e4
LT
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
95833c83 53#include <asm/idle.h>
1da177e4
LT
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
1da177e4
LT
59unsigned long boot_option_idle_override = 0;
60EXPORT_SYMBOL(boot_option_idle_override);
61
62/*
63 * Powermanagement idle function, if any..
64 */
65void (*pm_idle)(void);
2ee60e17 66EXPORT_SYMBOL(pm_idle);
1da177e4
LT
67static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
e041c683 69static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
70
71void idle_notifier_register(struct notifier_block *n)
72{
e041c683 73 atomic_notifier_chain_register(&idle_notifier, n);
95833c83 74}
95833c83 75
95833c83
AK
76void enter_idle(void)
77{
a15da49d 78 write_pda(isidle, 1);
e041c683 79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
80}
81
82static void __exit_idle(void)
83{
9446868b 84 if (test_and_clear_bit_pda(0, isidle) == 0)
a15da49d 85 return;
e041c683 86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
87}
88
89/* Called from interrupts to signify idle end */
90void exit_idle(void)
91{
a15da49d
AK
92 /* idle loop has pid 0 */
93 if (current->pid)
95833c83
AK
94 return;
95 __exit_idle();
96}
97
1da177e4
LT
98/*
99 * We use this if we don't have any better
100 * idle routine..
101 */
cdb04527 102static void default_idle(void)
1da177e4 103{
495ab9c0 104 current_thread_info()->status &= ~TS_POLLING;
0888f06a
IM
105 /*
106 * TS_POLLING-cleared state must be visible before we
107 * test NEED_RESCHED:
108 */
109 smp_mb();
72690a21
AK
110 local_irq_disable();
111 if (!need_resched()) {
5ee613b6
IM
112 ktime_t t0, t1;
113 u64 t0n, t1n;
114
115 t0 = ktime_get();
116 t0n = ktime_to_ns(t0);
117 safe_halt(); /* enables interrupts racelessly */
118 local_irq_disable();
119 t1 = ktime_get();
120 t1n = ktime_to_ns(t1);
121 sched_clock_idle_wakeup_event(t1n - t0n);
39d44a51
HS
122 }
123 local_irq_enable();
495ab9c0 124 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
125}
126
127/*
128 * On SMP it's slightly faster (but much more power-consuming!)
129 * to poll the ->need_resched flag instead of waiting for the
130 * cross-CPU IPI to arrive. Use this option with caution.
131 */
132static void poll_idle (void)
133{
d331e739 134 local_irq_enable();
72690a21 135 cpu_relax();
1da177e4
LT
136}
137
40d6a146
SR
138static void do_nothing(void *unused)
139{
140}
141
1da177e4
LT
142void cpu_idle_wait(void)
143{
144 unsigned int cpu, this_cpu = get_cpu();
dc1829a4 145 cpumask_t map, tmp = current->cpus_allowed;
1da177e4
LT
146
147 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
148 put_cpu();
149
150 cpus_clear(map);
151 for_each_online_cpu(cpu) {
152 per_cpu(cpu_idle_state, cpu) = 1;
153 cpu_set(cpu, map);
154 }
155
156 __get_cpu_var(cpu_idle_state) = 0;
157
158 wmb();
159 do {
160 ssleep(1);
161 for_each_online_cpu(cpu) {
a88cde13
AK
162 if (cpu_isset(cpu, map) &&
163 !per_cpu(cpu_idle_state, cpu))
1da177e4
LT
164 cpu_clear(cpu, map);
165 }
166 cpus_and(map, map, cpu_online_map);
40d6a146
SR
167 /*
168 * We waited 1 sec, if a CPU still did not call idle
169 * it may be because it is in idle and not waking up
170 * because it has nothing to do.
171 * Give all the remaining CPUS a kick.
172 */
173 smp_call_function_mask(map, do_nothing, 0, 0);
1da177e4 174 } while (!cpus_empty(map));
dc1829a4
IM
175
176 set_cpus_allowed(current, tmp);
1da177e4
LT
177}
178EXPORT_SYMBOL_GPL(cpu_idle_wait);
179
76e4f660
AR
180#ifdef CONFIG_HOTPLUG_CPU
181DECLARE_PER_CPU(int, cpu_state);
182
183#include <asm/nmi.h>
1fa744e6 184/* We halt the CPU with physical CPU hotplug */
76e4f660
AR
185static inline void play_dead(void)
186{
187 idle_task_exit();
188 wbinvd();
189 mb();
190 /* Ack it */
191 __get_cpu_var(cpu_state) = CPU_DEAD;
192
1fa744e6 193 local_irq_disable();
76e4f660 194 while (1)
1fa744e6 195 halt();
76e4f660
AR
196}
197#else
198static inline void play_dead(void)
199{
200 BUG();
201}
202#endif /* CONFIG_HOTPLUG_CPU */
203
1da177e4
LT
204/*
205 * The idle thread. There's no useful work to be
206 * done, so just try to conserve power and have a
207 * low exit latency (ie sit in a loop waiting for
208 * somebody to say that they'd like to reschedule)
209 */
b10db7f0 210void cpu_idle(void)
1da177e4 211{
495ab9c0 212 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
213 /* endless idle loop with no priority at all */
214 while (1) {
215 while (!need_resched()) {
216 void (*idle)(void);
217
218 if (__get_cpu_var(cpu_idle_state))
219 __get_cpu_var(cpu_idle_state) = 0;
220
02290683
CW
221 tick_nohz_stop_sched_tick();
222
1da177e4
LT
223 rmb();
224 idle = pm_idle;
225 if (!idle)
226 idle = default_idle;
76e4f660
AR
227 if (cpu_is_offline(smp_processor_id()))
228 play_dead();
d331e739
VP
229 /*
230 * Idle routines should keep interrupts disabled
231 * from here on, until they go to idle.
232 * Otherwise, idle callbacks can misfire.
233 */
234 local_irq_disable();
95833c83 235 enter_idle();
1da177e4 236 idle();
a15da49d
AK
237 /* In many cases the interrupt that ended idle
238 has already called exit_idle. But some idle
239 loops can be woken up without interrupt. */
95833c83 240 __exit_idle();
1da177e4
LT
241 }
242
02290683 243 tick_nohz_restart_sched_tick();
5bfb5d69 244 preempt_enable_no_resched();
1da177e4 245 schedule();
5bfb5d69 246 preempt_disable();
1da177e4
LT
247 }
248}
249
250/*
251 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
252 * which can obviate IPI to trigger checking of need_resched.
253 * We execute MONITOR against need_resched and enter optimized wait state
254 * through MWAIT. Whenever someone changes need_resched, we would be woken
255 * up from MWAIT (without an IPI).
991528d7
VP
256 *
257 * New with Core Duo processors, MWAIT can take some hints based on CPU
258 * capability.
1da177e4 259 */
991528d7 260void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
1da177e4 261{
991528d7 262 if (!need_resched()) {
64c7c8f8
NP
263 __monitor((void *)&current_thread_info()->flags, 0, 0);
264 smp_mb();
991528d7
VP
265 if (!need_resched())
266 __mwait(eax, ecx);
1da177e4
LT
267 }
268}
269
991528d7
VP
270/* Default MONITOR/MWAIT with no hints, used for default C1 state */
271static void mwait_idle(void)
272{
d331e739
VP
273 if (!need_resched()) {
274 __monitor((void *)&current_thread_info()->flags, 0, 0);
275 smp_mb();
276 if (!need_resched())
277 __sti_mwait(0, 0);
278 else
279 local_irq_enable();
280 } else {
281 local_irq_enable();
282 }
991528d7
VP
283}
284
e6982c67 285void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
1da177e4
LT
286{
287 static int printed;
288 if (cpu_has(c, X86_FEATURE_MWAIT)) {
289 /*
290 * Skip, if setup has overridden idle.
291 * One CPU supports mwait => All CPUs supports mwait
292 */
293 if (!pm_idle) {
294 if (!printed) {
2d4fa2f6 295 printk(KERN_INFO "using mwait in idle threads.\n");
1da177e4
LT
296 printed = 1;
297 }
298 pm_idle = mwait_idle;
299 }
300 }
301}
302
303static int __init idle_setup (char *str)
304{
f039b754 305 if (!strcmp(str, "poll")) {
1da177e4
LT
306 printk("using polling idle threads.\n");
307 pm_idle = poll_idle;
f039b754
AK
308 } else if (!strcmp(str, "mwait"))
309 force_mwait = 1;
310 else
311 return -1;
1da177e4
LT
312
313 boot_option_idle_override = 1;
f039b754 314 return 0;
1da177e4 315}
f039b754 316early_param("idle", idle_setup);
1da177e4
LT
317
318/* Prints also some state that isn't saved in the pt_regs */
319void __show_regs(struct pt_regs * regs)
320{
321 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 322 unsigned long d0, d1, d2, d3, d6, d7;
1da177e4
LT
323 unsigned int fsindex,gsindex;
324 unsigned int ds,cs,es;
325
326 printk("\n");
327 print_modules();
9acf23c4
AK
328 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
329 current->pid, current->comm, print_tainted(),
96b644bd
SH
330 init_utsname()->release,
331 (int)strcspn(init_utsname()->version, " "),
332 init_utsname()->version);
1da177e4
LT
333 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
334 printk_address(regs->rip);
3ac94932 335 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
a88cde13 336 regs->eflags);
1da177e4
LT
337 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
338 regs->rax, regs->rbx, regs->rcx);
339 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
340 regs->rdx, regs->rsi, regs->rdi);
341 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
342 regs->rbp, regs->r8, regs->r9);
343 printk("R10: %016lx R11: %016lx R12: %016lx\n",
344 regs->r10, regs->r11, regs->r12);
345 printk("R13: %016lx R14: %016lx R15: %016lx\n",
346 regs->r13, regs->r14, regs->r15);
347
348 asm("movl %%ds,%0" : "=r" (ds));
349 asm("movl %%cs,%0" : "=r" (cs));
350 asm("movl %%es,%0" : "=r" (es));
351 asm("movl %%fs,%0" : "=r" (fsindex));
352 asm("movl %%gs,%0" : "=r" (gsindex));
353
354 rdmsrl(MSR_FS_BASE, fs);
355 rdmsrl(MSR_GS_BASE, gs);
356 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
357
f51c9452
GOC
358 cr0 = read_cr0();
359 cr2 = read_cr2();
360 cr3 = read_cr3();
361 cr4 = read_cr4();
1da177e4
LT
362
363 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
364 fs,fsindex,gs,gsindex,shadowgs);
365 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
366 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
bb1995d5
AS
367
368 get_debugreg(d0, 0);
369 get_debugreg(d1, 1);
370 get_debugreg(d2, 2);
371 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
372 get_debugreg(d3, 3);
373 get_debugreg(d6, 6);
374 get_debugreg(d7, 7);
375 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1da177e4
LT
376}
377
378void show_regs(struct pt_regs *regs)
379{
c078d326 380 printk("CPU %d:", smp_processor_id());
1da177e4 381 __show_regs(regs);
b538ed27 382 show_trace(NULL, regs, (void *)(regs + 1));
1da177e4
LT
383}
384
385/*
386 * Free current thread data structures etc..
387 */
388void exit_thread(void)
389{
390 struct task_struct *me = current;
391 struct thread_struct *t = &me->thread;
73649dab 392
1da177e4
LT
393 if (me->thread.io_bitmap_ptr) {
394 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
395
396 kfree(t->io_bitmap_ptr);
397 t->io_bitmap_ptr = NULL;
d3a4f48d 398 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
399 /*
400 * Careful, clear this in the TSS too:
401 */
402 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
403 t->io_bitmap_max = 0;
404 put_cpu();
405 }
406}
407
408void flush_thread(void)
409{
410 struct task_struct *tsk = current;
1da177e4 411
303cd153
MD
412 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
413 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
414 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
415 clear_tsk_thread_flag(tsk, TIF_IA32);
416 } else {
417 set_tsk_thread_flag(tsk, TIF_IA32);
4d9bc79c 418 current_thread_info()->status |= TS_COMPAT;
303cd153 419 }
4d9bc79c 420 }
303cd153 421 clear_tsk_thread_flag(tsk, TIF_DEBUG);
1da177e4
LT
422
423 tsk->thread.debugreg0 = 0;
424 tsk->thread.debugreg1 = 0;
425 tsk->thread.debugreg2 = 0;
426 tsk->thread.debugreg3 = 0;
427 tsk->thread.debugreg6 = 0;
428 tsk->thread.debugreg7 = 0;
429 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
430 /*
431 * Forget coprocessor state..
432 */
433 clear_fpu(tsk);
434 clear_used_math();
435}
436
437void release_thread(struct task_struct *dead_task)
438{
439 if (dead_task->mm) {
440 if (dead_task->mm->context.size) {
441 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
442 dead_task->comm,
443 dead_task->mm->context.ldt,
444 dead_task->mm->context.size);
445 BUG();
446 }
447 }
448}
449
450static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
451{
452 struct user_desc ud = {
453 .base_addr = addr,
454 .limit = 0xfffff,
455 .seg_32bit = 1,
456 .limit_in_pages = 1,
457 .useable = 1,
458 };
459 struct n_desc_struct *desc = (void *)t->thread.tls_array;
460 desc += tls;
461 desc->a = LDT_entry_a(&ud);
462 desc->b = LDT_entry_b(&ud);
463}
464
465static inline u32 read_32bit_tls(struct task_struct *t, int tls)
466{
91394eb0 467 return get_desc_base(&t->thread.tls_array[tls]);
1da177e4
LT
468}
469
470/*
471 * This gets called before we allocate a new thread and copy
472 * the current task into it.
473 */
474void prepare_to_copy(struct task_struct *tsk)
475{
476 unlazy_fpu(tsk);
477}
478
479int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
480 unsigned long unused,
481 struct task_struct * p, struct pt_regs * regs)
482{
483 int err;
484 struct pt_regs * childregs;
485 struct task_struct *me = current;
486
a88cde13 487 childregs = ((struct pt_regs *)
57eafdc2 488 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
489 *childregs = *regs;
490
491 childregs->rax = 0;
492 childregs->rsp = rsp;
a88cde13 493 if (rsp == ~0UL)
1da177e4 494 childregs->rsp = (unsigned long)childregs;
1da177e4
LT
495
496 p->thread.rsp = (unsigned long) childregs;
497 p->thread.rsp0 = (unsigned long) (childregs+1);
498 p->thread.userrsp = me->thread.userrsp;
499
e4f17c43 500 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
501
502 p->thread.fs = me->thread.fs;
503 p->thread.gs = me->thread.gs;
504
fd51f666
L
505 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
506 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
507 asm("mov %%es,%0" : "=m" (p->thread.es));
508 asm("mov %%ds,%0" : "=m" (p->thread.ds));
1da177e4 509
d3a4f48d 510 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
511 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
512 if (!p->thread.io_bitmap_ptr) {
513 p->thread.io_bitmap_max = 0;
514 return -ENOMEM;
515 }
a88cde13
AK
516 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
517 IO_BITMAP_BYTES);
d3a4f48d 518 set_tsk_thread_flag(p, TIF_IO_BITMAP);
1da177e4
LT
519 }
520
521 /*
522 * Set a new TLS for the child thread?
523 */
524 if (clone_flags & CLONE_SETTLS) {
525#ifdef CONFIG_IA32_EMULATION
526 if (test_thread_flag(TIF_IA32))
527 err = ia32_child_tls(p, childregs);
528 else
529#endif
530 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
531 if (err)
532 goto out;
533 }
534 err = 0;
535out:
536 if (err && p->thread.io_bitmap_ptr) {
537 kfree(p->thread.io_bitmap_ptr);
538 p->thread.io_bitmap_max = 0;
539 }
540 return err;
541}
542
543/*
544 * This special macro can be used to load a debugging register
545 */
2b514e74 546#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
1da177e4 547
d3a4f48d
SE
548static inline void __switch_to_xtra(struct task_struct *prev_p,
549 struct task_struct *next_p,
550 struct tss_struct *tss)
551{
552 struct thread_struct *prev, *next;
553
554 prev = &prev_p->thread,
555 next = &next_p->thread;
556
557 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
558 loaddebug(next, 0);
559 loaddebug(next, 1);
560 loaddebug(next, 2);
561 loaddebug(next, 3);
562 /* no 4 and 5 */
563 loaddebug(next, 6);
564 loaddebug(next, 7);
565 }
566
567 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
568 /*
569 * Copy the relevant range of the IO bitmap.
570 * Normally this is 128 bytes or less:
571 */
572 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
573 max(prev->io_bitmap_max, next->io_bitmap_max));
574 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
575 /*
576 * Clear any possible leftover bits:
577 */
578 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
579 }
580}
581
1da177e4
LT
582/*
583 * switch_to(x,y) should switch tasks from x to y.
584 *
585 * This could still be optimized:
586 * - fold all the options into a flag word and test it with a single test.
587 * - could test fs/gs bitsliced
099f318b
AK
588 *
589 * Kprobes not supported here. Set the probe on schedule instead.
1da177e4 590 */
f438d914 591struct task_struct *
a88cde13 592__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4
LT
593{
594 struct thread_struct *prev = &prev_p->thread,
595 *next = &next_p->thread;
596 int cpu = smp_processor_id();
597 struct tss_struct *tss = &per_cpu(init_tss, cpu);
598
e07e23e1
AV
599 /* we're going to use this soon, after a few expensive things */
600 if (next_p->fpu_counter>5)
601 prefetch(&next->i387.fxsave);
602
1da177e4
LT
603 /*
604 * Reload esp0, LDT and the page table pointer:
605 */
606 tss->rsp0 = next->rsp0;
607
608 /*
609 * Switch DS and ES.
610 * This won't pick up thread selector changes, but I guess that is ok.
611 */
fd51f666 612 asm volatile("mov %%es,%0" : "=m" (prev->es));
1da177e4
LT
613 if (unlikely(next->es | prev->es))
614 loadsegment(es, next->es);
615
fd51f666 616 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
1da177e4
LT
617 if (unlikely(next->ds | prev->ds))
618 loadsegment(ds, next->ds);
619
620 load_TLS(next, cpu);
621
622 /*
623 * Switch FS and GS.
624 */
625 {
626 unsigned fsindex;
627 asm volatile("movl %%fs,%0" : "=r" (fsindex));
628 /* segment register != 0 always requires a reload.
629 also reload when it has changed.
630 when prev process used 64bit base always reload
631 to avoid an information leak. */
632 if (unlikely(fsindex | next->fsindex | prev->fs)) {
633 loadsegment(fs, next->fsindex);
634 /* check if the user used a selector != 0
635 * if yes clear 64bit base, since overloaded base
636 * is always mapped to the Null selector
637 */
638 if (fsindex)
639 prev->fs = 0;
640 }
641 /* when next process has a 64bit base use it */
642 if (next->fs)
643 wrmsrl(MSR_FS_BASE, next->fs);
644 prev->fsindex = fsindex;
645 }
646 {
647 unsigned gsindex;
648 asm volatile("movl %%gs,%0" : "=r" (gsindex));
649 if (unlikely(gsindex | next->gsindex | prev->gs)) {
650 load_gs_index(next->gsindex);
651 if (gsindex)
652 prev->gs = 0;
653 }
654 if (next->gs)
655 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
656 prev->gsindex = gsindex;
657 }
658
0a5ace2a
AK
659 /* Must be after DS reload */
660 unlazy_fpu(prev_p);
661
1da177e4 662 /*
45948d77 663 * Switch the PDA and FPU contexts.
1da177e4
LT
664 */
665 prev->userrsp = read_pda(oldrsp);
666 write_pda(oldrsp, next->userrsp);
667 write_pda(pcurrent, next_p);
18bd057b 668
a88cde13 669 write_pda(kernelstack,
7b0bda74 670 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
0a425405
AV
671#ifdef CONFIG_CC_STACKPROTECTOR
672 write_pda(stack_canary, next_p->stack_canary);
673 /*
674 * Build time only check to make sure the stack_canary is at
675 * offset 40 in the pda; this is a gcc ABI requirement
676 */
677 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
678#endif
1da177e4
LT
679
680 /*
d3a4f48d 681 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 682 */
d3a4f48d
SE
683 if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
684 || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
685 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 686
e07e23e1
AV
687 /* If the task has used fpu the last 5 timeslices, just do a full
688 * restore of the math state immediately to avoid the trap; the
689 * chances of needing FPU soon are obviously high now
690 */
691 if (next_p->fpu_counter>5)
692 math_state_restore();
1da177e4
LT
693 return prev_p;
694}
695
696/*
697 * sys_execve() executes a new program.
698 */
699asmlinkage
700long sys_execve(char __user *name, char __user * __user *argv,
701 char __user * __user *envp, struct pt_regs regs)
702{
703 long error;
704 char * filename;
705
706 filename = getname(name);
707 error = PTR_ERR(filename);
708 if (IS_ERR(filename))
709 return error;
710 error = do_execve(filename, argv, envp, &regs);
711 if (error == 0) {
712 task_lock(current);
713 current->ptrace &= ~PT_DTRACE;
714 task_unlock(current);
715 }
716 putname(filename);
717 return error;
718}
719
720void set_personality_64bit(void)
721{
722 /* inherit personality from parent */
723
724 /* Make sure to be in 64bit mode */
725 clear_thread_flag(TIF_IA32);
726
727 /* TBD: overwrites user setup. Should have two bits.
728 But 64bit processes have always behaved this way,
729 so it's not too bad. The main problem is just that
730 32bit childs are affected again. */
731 current->personality &= ~READ_IMPLIES_EXEC;
732}
733
734asmlinkage long sys_fork(struct pt_regs *regs)
735{
736 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
737}
738
a88cde13
AK
739asmlinkage long
740sys_clone(unsigned long clone_flags, unsigned long newsp,
741 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
742{
743 if (!newsp)
744 newsp = regs->rsp;
745 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
746}
747
748/*
749 * This is trivial, and on the face of it looks like it
750 * could equally well be done in user mode.
751 *
752 * Not so, for quite unobvious reasons - register pressure.
753 * In user mode vfork() cannot have a stack frame, and if
754 * done by calling the "clone()" system call directly, you
755 * do not have enough call-clobbered registers to hold all
756 * the information you need.
757 */
758asmlinkage long sys_vfork(struct pt_regs *regs)
759{
760 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
761 NULL, NULL);
762}
763
764unsigned long get_wchan(struct task_struct *p)
765{
766 unsigned long stack;
767 u64 fp,rip;
768 int count = 0;
769
770 if (!p || p == current || p->state==TASK_RUNNING)
771 return 0;
57eafdc2 772 stack = (unsigned long)task_stack_page(p);
1da177e4
LT
773 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
774 return 0;
775 fp = *(u64 *)(p->thread.rsp);
776 do {
a88cde13
AK
777 if (fp < (unsigned long)stack ||
778 fp > (unsigned long)stack+THREAD_SIZE)
1da177e4
LT
779 return 0;
780 rip = *(u64 *)(fp+8);
781 if (!in_sched_functions(rip))
782 return rip;
783 fp = *(u64 *)fp;
784 } while (count++ < 16);
785 return 0;
786}
787
788long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
789{
790 int ret = 0;
791 int doit = task == current;
792 int cpu;
793
794 switch (code) {
795 case ARCH_SET_GS:
84929801 796 if (addr >= TASK_SIZE_OF(task))
1da177e4
LT
797 return -EPERM;
798 cpu = get_cpu();
799 /* handle small bases via the GDT because that's faster to
800 switch. */
801 if (addr <= 0xffffffff) {
802 set_32bit_tls(task, GS_TLS, addr);
803 if (doit) {
804 load_TLS(&task->thread, cpu);
805 load_gs_index(GS_TLS_SEL);
806 }
807 task->thread.gsindex = GS_TLS_SEL;
808 task->thread.gs = 0;
809 } else {
810 task->thread.gsindex = 0;
811 task->thread.gs = addr;
812 if (doit) {
a88cde13
AK
813 load_gs_index(0);
814 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
1da177e4
LT
815 }
816 }
817 put_cpu();
818 break;
819 case ARCH_SET_FS:
820 /* Not strictly needed for fs, but do it for symmetry
821 with gs */
84929801 822 if (addr >= TASK_SIZE_OF(task))
1da177e4
LT
823 return -EPERM;
824 cpu = get_cpu();
825 /* handle small bases via the GDT because that's faster to
826 switch. */
827 if (addr <= 0xffffffff) {
828 set_32bit_tls(task, FS_TLS, addr);
829 if (doit) {
830 load_TLS(&task->thread, cpu);
a88cde13 831 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
1da177e4
LT
832 }
833 task->thread.fsindex = FS_TLS_SEL;
834 task->thread.fs = 0;
835 } else {
836 task->thread.fsindex = 0;
837 task->thread.fs = addr;
838 if (doit) {
839 /* set the selector to 0 to not confuse
840 __switch_to */
a88cde13
AK
841 asm volatile("movl %0,%%fs" :: "r" (0));
842 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
843 }
844 }
845 put_cpu();
846 break;
847 case ARCH_GET_FS: {
848 unsigned long base;
849 if (task->thread.fsindex == FS_TLS_SEL)
850 base = read_32bit_tls(task, FS_TLS);
a88cde13 851 else if (doit)
1da177e4 852 rdmsrl(MSR_FS_BASE, base);
a88cde13 853 else
1da177e4
LT
854 base = task->thread.fs;
855 ret = put_user(base, (unsigned long __user *)addr);
856 break;
857 }
858 case ARCH_GET_GS: {
859 unsigned long base;
97c2803c 860 unsigned gsindex;
1da177e4
LT
861 if (task->thread.gsindex == GS_TLS_SEL)
862 base = read_32bit_tls(task, GS_TLS);
97c2803c
JB
863 else if (doit) {
864 asm("movl %%gs,%0" : "=r" (gsindex));
865 if (gsindex)
866 rdmsrl(MSR_KERNEL_GS_BASE, base);
867 else
868 base = task->thread.gs;
869 }
a88cde13 870 else
1da177e4
LT
871 base = task->thread.gs;
872 ret = put_user(base, (unsigned long __user *)addr);
873 break;
874 }
875
876 default:
877 ret = -EINVAL;
878 break;
879 }
880
881 return ret;
882}
883
884long sys_arch_prctl(int code, unsigned long addr)
885{
886 return do_arch_prctl(current, code, addr);
887}
888
889/*
890 * Capture the user space registers if the task is not running (in user space)
891 */
892int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
893{
894 struct pt_regs *pp, ptregs;
895
bb049232 896 pp = task_pt_regs(tsk);
1da177e4
LT
897
898 ptregs = *pp;
899 ptregs.cs &= 0xffff;
900 ptregs.ss &= 0xffff;
901
902 elf_core_copy_regs(regs, &ptregs);
903
904 return 1;
905}
906
907unsigned long arch_align_stack(unsigned long sp)
908{
c16b63e0 909 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
910 sp -= get_random_int() % 8192;
911 return sp & ~0xf;
912}
c1d171a0
JK
913
914unsigned long arch_randomize_brk(struct mm_struct *mm)
915{
916 unsigned long range_end = mm->brk + 0x02000000;
917 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
918}
919