]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/x86/kernel/process_64.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / process_64.c
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * X86-64 port
8 * Andi Kleen.
9 *
10 * CPU hotplug support - ashok.raj@intel.com
11 */
12
13 /*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17 #include <stdarg.h>
18
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/utsname.h>
31 #include <linux/delay.h>
32 #include <linux/module.h>
33 #include <linux/ptrace.h>
34 #include <linux/random.h>
35 #include <linux/notifier.h>
36 #include <linux/kprobes.h>
37 #include <linux/kdebug.h>
38 #include <linux/tick.h>
39 #include <linux/prctl.h>
40
41 #include <asm/uaccess.h>
42 #include <asm/pgtable.h>
43 #include <asm/system.h>
44 #include <asm/io.h>
45 #include <asm/processor.h>
46 #include <asm/i387.h>
47 #include <asm/mmu_context.h>
48 #include <asm/pda.h>
49 #include <asm/prctl.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
54
55 asmlinkage extern void ret_from_fork(void);
56
57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59 unsigned long boot_option_idle_override = 0;
60 EXPORT_SYMBOL(boot_option_idle_override);
61
62 /*
63 * Powermanagement idle function, if any..
64 */
65 void (*pm_idle)(void);
66 EXPORT_SYMBOL(pm_idle);
67
68 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
69
70 void idle_notifier_register(struct notifier_block *n)
71 {
72 atomic_notifier_chain_register(&idle_notifier, n);
73 }
74
75 void enter_idle(void)
76 {
77 write_pda(isidle, 1);
78 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
79 }
80
81 static void __exit_idle(void)
82 {
83 if (test_and_clear_bit_pda(0, isidle) == 0)
84 return;
85 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
86 }
87
88 /* Called from interrupts to signify idle end */
89 void exit_idle(void)
90 {
91 /* idle loop has pid 0 */
92 if (current->pid)
93 return;
94 __exit_idle();
95 }
96
97 /*
98 * We use this if we don't have any better
99 * idle routine..
100 */
101 void default_idle(void)
102 {
103 current_thread_info()->status &= ~TS_POLLING;
104 /*
105 * TS_POLLING-cleared state must be visible before we
106 * test NEED_RESCHED:
107 */
108 smp_mb();
109 local_irq_disable();
110 if (!need_resched()) {
111 safe_halt(); /* enables interrupts racelessly */
112 local_irq_disable();
113 }
114 local_irq_enable();
115 current_thread_info()->status |= TS_POLLING;
116 }
117
118 /*
119 * On SMP it's slightly faster (but much more power-consuming!)
120 * to poll the ->need_resched flag instead of waiting for the
121 * cross-CPU IPI to arrive. Use this option with caution.
122 */
123 static void poll_idle(void)
124 {
125 local_irq_enable();
126 cpu_relax();
127 }
128
129 #ifdef CONFIG_HOTPLUG_CPU
130 DECLARE_PER_CPU(int, cpu_state);
131
132 #include <asm/nmi.h>
133 /* We halt the CPU with physical CPU hotplug */
134 static inline void play_dead(void)
135 {
136 idle_task_exit();
137 wbinvd();
138 mb();
139 /* Ack it */
140 __get_cpu_var(cpu_state) = CPU_DEAD;
141
142 local_irq_disable();
143 while (1)
144 halt();
145 }
146 #else
147 static inline void play_dead(void)
148 {
149 BUG();
150 }
151 #endif /* CONFIG_HOTPLUG_CPU */
152
153 /*
154 * The idle thread. There's no useful work to be
155 * done, so just try to conserve power and have a
156 * low exit latency (ie sit in a loop waiting for
157 * somebody to say that they'd like to reschedule)
158 */
159 void cpu_idle(void)
160 {
161 current_thread_info()->status |= TS_POLLING;
162 /* endless idle loop with no priority at all */
163 while (1) {
164 tick_nohz_stop_sched_tick();
165 while (!need_resched()) {
166 void (*idle)(void);
167
168 rmb();
169 idle = pm_idle;
170 if (!idle)
171 idle = default_idle;
172 if (cpu_is_offline(smp_processor_id()))
173 play_dead();
174 /*
175 * Idle routines should keep interrupts disabled
176 * from here on, until they go to idle.
177 * Otherwise, idle callbacks can misfire.
178 */
179 local_irq_disable();
180 enter_idle();
181 idle();
182 /* In many cases the interrupt that ended idle
183 has already called exit_idle. But some idle
184 loops can be woken up without interrupt. */
185 __exit_idle();
186 }
187
188 tick_nohz_restart_sched_tick();
189 preempt_enable_no_resched();
190 schedule();
191 preempt_disable();
192 }
193 }
194
195 static void do_nothing(void *unused)
196 {
197 }
198
199 /*
200 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
201 * pm_idle and update to new pm_idle value. Required while changing pm_idle
202 * handler on SMP systems.
203 *
204 * Caller must have changed pm_idle to the new value before the call. Old
205 * pm_idle value will not be used by any CPU after the return of this function.
206 */
207 void cpu_idle_wait(void)
208 {
209 smp_mb();
210 /* kick all the CPUs so that they exit out of pm_idle */
211 smp_call_function(do_nothing, NULL, 0, 1);
212 }
213 EXPORT_SYMBOL_GPL(cpu_idle_wait);
214
215 /*
216 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
217 * which can obviate IPI to trigger checking of need_resched.
218 * We execute MONITOR against need_resched and enter optimized wait state
219 * through MWAIT. Whenever someone changes need_resched, we would be woken
220 * up from MWAIT (without an IPI).
221 *
222 * New with Core Duo processors, MWAIT can take some hints based on CPU
223 * capability.
224 */
225 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
226 {
227 if (!need_resched()) {
228 __monitor((void *)&current_thread_info()->flags, 0, 0);
229 smp_mb();
230 if (!need_resched())
231 __mwait(ax, cx);
232 }
233 }
234
235 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
236 static void mwait_idle(void)
237 {
238 if (!need_resched()) {
239 __monitor((void *)&current_thread_info()->flags, 0, 0);
240 smp_mb();
241 if (!need_resched())
242 __sti_mwait(0, 0);
243 else
244 local_irq_enable();
245 } else {
246 local_irq_enable();
247 }
248 }
249
250
251 static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
252 {
253 if (force_mwait)
254 return 1;
255 /* Any C1 states supported? */
256 return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
257 }
258
259 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
260 {
261 static int selected;
262
263 if (selected)
264 return;
265 #ifdef CONFIG_X86_SMP
266 if (pm_idle == poll_idle && smp_num_siblings > 1) {
267 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
268 " performance may degrade.\n");
269 }
270 #endif
271 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
272 /*
273 * Skip, if setup has overridden idle.
274 * One CPU supports mwait => All CPUs supports mwait
275 */
276 if (!pm_idle) {
277 printk(KERN_INFO "using mwait in idle threads.\n");
278 pm_idle = mwait_idle;
279 }
280 }
281 selected = 1;
282 }
283
284 static int __init idle_setup(char *str)
285 {
286 if (!strcmp(str, "poll")) {
287 printk("using polling idle threads.\n");
288 pm_idle = poll_idle;
289 } else if (!strcmp(str, "mwait"))
290 force_mwait = 1;
291 else
292 return -1;
293
294 boot_option_idle_override = 1;
295 return 0;
296 }
297 early_param("idle", idle_setup);
298
299 /* Prints also some state that isn't saved in the pt_regs */
300 void __show_regs(struct pt_regs * regs)
301 {
302 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
303 unsigned long d0, d1, d2, d3, d6, d7;
304 unsigned int fsindex, gsindex;
305 unsigned int ds, cs, es;
306
307 printk("\n");
308 print_modules();
309 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
310 current->pid, current->comm, print_tainted(),
311 init_utsname()->release,
312 (int)strcspn(init_utsname()->version, " "),
313 init_utsname()->version);
314 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
315 printk_address(regs->ip, 1);
316 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
317 regs->flags);
318 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
319 regs->ax, regs->bx, regs->cx);
320 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
321 regs->dx, regs->si, regs->di);
322 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
323 regs->bp, regs->r8, regs->r9);
324 printk("R10: %016lx R11: %016lx R12: %016lx\n",
325 regs->r10, regs->r11, regs->r12);
326 printk("R13: %016lx R14: %016lx R15: %016lx\n",
327 regs->r13, regs->r14, regs->r15);
328
329 asm("movl %%ds,%0" : "=r" (ds));
330 asm("movl %%cs,%0" : "=r" (cs));
331 asm("movl %%es,%0" : "=r" (es));
332 asm("movl %%fs,%0" : "=r" (fsindex));
333 asm("movl %%gs,%0" : "=r" (gsindex));
334
335 rdmsrl(MSR_FS_BASE, fs);
336 rdmsrl(MSR_GS_BASE, gs);
337 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
338
339 cr0 = read_cr0();
340 cr2 = read_cr2();
341 cr3 = read_cr3();
342 cr4 = read_cr4();
343
344 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
345 fs,fsindex,gs,gsindex,shadowgs);
346 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
347 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
348
349 get_debugreg(d0, 0);
350 get_debugreg(d1, 1);
351 get_debugreg(d2, 2);
352 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
353 get_debugreg(d3, 3);
354 get_debugreg(d6, 6);
355 get_debugreg(d7, 7);
356 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
357 }
358
359 void show_regs(struct pt_regs *regs)
360 {
361 printk("CPU %d:", smp_processor_id());
362 __show_regs(regs);
363 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
364 }
365
366 /*
367 * Free current thread data structures etc..
368 */
369 void exit_thread(void)
370 {
371 struct task_struct *me = current;
372 struct thread_struct *t = &me->thread;
373
374 if (me->thread.io_bitmap_ptr) {
375 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
376
377 kfree(t->io_bitmap_ptr);
378 t->io_bitmap_ptr = NULL;
379 clear_thread_flag(TIF_IO_BITMAP);
380 /*
381 * Careful, clear this in the TSS too:
382 */
383 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
384 t->io_bitmap_max = 0;
385 put_cpu();
386 }
387 }
388
389 void flush_thread(void)
390 {
391 struct task_struct *tsk = current;
392
393 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
394 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
395 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
396 clear_tsk_thread_flag(tsk, TIF_IA32);
397 } else {
398 set_tsk_thread_flag(tsk, TIF_IA32);
399 current_thread_info()->status |= TS_COMPAT;
400 }
401 }
402 clear_tsk_thread_flag(tsk, TIF_DEBUG);
403
404 tsk->thread.debugreg0 = 0;
405 tsk->thread.debugreg1 = 0;
406 tsk->thread.debugreg2 = 0;
407 tsk->thread.debugreg3 = 0;
408 tsk->thread.debugreg6 = 0;
409 tsk->thread.debugreg7 = 0;
410 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
411 /*
412 * Forget coprocessor state..
413 */
414 clear_fpu(tsk);
415 clear_used_math();
416 }
417
418 void release_thread(struct task_struct *dead_task)
419 {
420 if (dead_task->mm) {
421 if (dead_task->mm->context.size) {
422 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
423 dead_task->comm,
424 dead_task->mm->context.ldt,
425 dead_task->mm->context.size);
426 BUG();
427 }
428 }
429 }
430
431 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
432 {
433 struct user_desc ud = {
434 .base_addr = addr,
435 .limit = 0xfffff,
436 .seg_32bit = 1,
437 .limit_in_pages = 1,
438 .useable = 1,
439 };
440 struct desc_struct *desc = t->thread.tls_array;
441 desc += tls;
442 fill_ldt(desc, &ud);
443 }
444
445 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
446 {
447 return get_desc_base(&t->thread.tls_array[tls]);
448 }
449
450 /*
451 * This gets called before we allocate a new thread and copy
452 * the current task into it.
453 */
454 void prepare_to_copy(struct task_struct *tsk)
455 {
456 unlazy_fpu(tsk);
457 }
458
459 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
460 unsigned long unused,
461 struct task_struct * p, struct pt_regs * regs)
462 {
463 int err;
464 struct pt_regs * childregs;
465 struct task_struct *me = current;
466
467 childregs = ((struct pt_regs *)
468 (THREAD_SIZE + task_stack_page(p))) - 1;
469 *childregs = *regs;
470
471 childregs->ax = 0;
472 childregs->sp = sp;
473 if (sp == ~0UL)
474 childregs->sp = (unsigned long)childregs;
475
476 p->thread.sp = (unsigned long) childregs;
477 p->thread.sp0 = (unsigned long) (childregs+1);
478 p->thread.usersp = me->thread.usersp;
479
480 set_tsk_thread_flag(p, TIF_FORK);
481
482 p->thread.fs = me->thread.fs;
483 p->thread.gs = me->thread.gs;
484
485 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
486 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
487 asm("mov %%es,%0" : "=m" (p->thread.es));
488 asm("mov %%ds,%0" : "=m" (p->thread.ds));
489
490 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
491 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
492 if (!p->thread.io_bitmap_ptr) {
493 p->thread.io_bitmap_max = 0;
494 return -ENOMEM;
495 }
496 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
497 IO_BITMAP_BYTES);
498 set_tsk_thread_flag(p, TIF_IO_BITMAP);
499 }
500
501 /*
502 * Set a new TLS for the child thread?
503 */
504 if (clone_flags & CLONE_SETTLS) {
505 #ifdef CONFIG_IA32_EMULATION
506 if (test_thread_flag(TIF_IA32))
507 err = do_set_thread_area(p, -1,
508 (struct user_desc __user *)childregs->si, 0);
509 else
510 #endif
511 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
512 if (err)
513 goto out;
514 }
515 err = 0;
516 out:
517 if (err && p->thread.io_bitmap_ptr) {
518 kfree(p->thread.io_bitmap_ptr);
519 p->thread.io_bitmap_max = 0;
520 }
521 return err;
522 }
523
524 void
525 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
526 {
527 asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
528 load_gs_index(0);
529 regs->ip = new_ip;
530 regs->sp = new_sp;
531 write_pda(oldrsp, new_sp);
532 regs->cs = __USER_CS;
533 regs->ss = __USER_DS;
534 regs->flags = 0x200;
535 set_fs(USER_DS);
536 /*
537 * Free the old FP and other extended state
538 */
539 free_thread_xstate(current);
540 }
541 EXPORT_SYMBOL_GPL(start_thread);
542
543 static void hard_disable_TSC(void)
544 {
545 write_cr4(read_cr4() | X86_CR4_TSD);
546 }
547
548 void disable_TSC(void)
549 {
550 preempt_disable();
551 if (!test_and_set_thread_flag(TIF_NOTSC))
552 /*
553 * Must flip the CPU state synchronously with
554 * TIF_NOTSC in the current running context.
555 */
556 hard_disable_TSC();
557 preempt_enable();
558 }
559
560 static void hard_enable_TSC(void)
561 {
562 write_cr4(read_cr4() & ~X86_CR4_TSD);
563 }
564
565 void enable_TSC(void)
566 {
567 preempt_disable();
568 if (test_and_clear_thread_flag(TIF_NOTSC))
569 /*
570 * Must flip the CPU state synchronously with
571 * TIF_NOTSC in the current running context.
572 */
573 hard_enable_TSC();
574 preempt_enable();
575 }
576
577 int get_tsc_mode(unsigned long adr)
578 {
579 unsigned int val;
580
581 if (test_thread_flag(TIF_NOTSC))
582 val = PR_TSC_SIGSEGV;
583 else
584 val = PR_TSC_ENABLE;
585
586 return put_user(val, (unsigned int __user *)adr);
587 }
588
589 int set_tsc_mode(unsigned int val)
590 {
591 if (val == PR_TSC_SIGSEGV)
592 disable_TSC();
593 else if (val == PR_TSC_ENABLE)
594 enable_TSC();
595 else
596 return -EINVAL;
597
598 return 0;
599 }
600
601 /*
602 * This special macro can be used to load a debugging register
603 */
604 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
605
606 static inline void __switch_to_xtra(struct task_struct *prev_p,
607 struct task_struct *next_p,
608 struct tss_struct *tss)
609 {
610 struct thread_struct *prev, *next;
611 unsigned long debugctl;
612
613 prev = &prev_p->thread,
614 next = &next_p->thread;
615
616 debugctl = prev->debugctlmsr;
617 if (next->ds_area_msr != prev->ds_area_msr) {
618 /* we clear debugctl to make sure DS
619 * is not in use when we change it */
620 debugctl = 0;
621 update_debugctlmsr(0);
622 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
623 }
624
625 if (next->debugctlmsr != debugctl)
626 update_debugctlmsr(next->debugctlmsr);
627
628 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
629 loaddebug(next, 0);
630 loaddebug(next, 1);
631 loaddebug(next, 2);
632 loaddebug(next, 3);
633 /* no 4 and 5 */
634 loaddebug(next, 6);
635 loaddebug(next, 7);
636 }
637
638 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
639 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
640 /* prev and next are different */
641 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
642 hard_disable_TSC();
643 else
644 hard_enable_TSC();
645 }
646
647 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
648 /*
649 * Copy the relevant range of the IO bitmap.
650 * Normally this is 128 bytes or less:
651 */
652 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
653 max(prev->io_bitmap_max, next->io_bitmap_max));
654 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
655 /*
656 * Clear any possible leftover bits:
657 */
658 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
659 }
660
661 #ifdef X86_BTS
662 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
663 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
664
665 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
666 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
667 #endif
668 }
669
670 /*
671 * switch_to(x,y) should switch tasks from x to y.
672 *
673 * This could still be optimized:
674 * - fold all the options into a flag word and test it with a single test.
675 * - could test fs/gs bitsliced
676 *
677 * Kprobes not supported here. Set the probe on schedule instead.
678 */
679 struct task_struct *
680 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
681 {
682 struct thread_struct *prev = &prev_p->thread,
683 *next = &next_p->thread;
684 int cpu = smp_processor_id();
685 struct tss_struct *tss = &per_cpu(init_tss, cpu);
686
687 /* we're going to use this soon, after a few expensive things */
688 if (next_p->fpu_counter>5)
689 prefetch(next->xstate);
690
691 /*
692 * Reload esp0, LDT and the page table pointer:
693 */
694 load_sp0(tss, next);
695
696 /*
697 * Switch DS and ES.
698 * This won't pick up thread selector changes, but I guess that is ok.
699 */
700 asm volatile("mov %%es,%0" : "=m" (prev->es));
701 if (unlikely(next->es | prev->es))
702 loadsegment(es, next->es);
703
704 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
705 if (unlikely(next->ds | prev->ds))
706 loadsegment(ds, next->ds);
707
708 load_TLS(next, cpu);
709
710 /*
711 * Switch FS and GS.
712 */
713 {
714 unsigned fsindex;
715 asm volatile("movl %%fs,%0" : "=r" (fsindex));
716 /* segment register != 0 always requires a reload.
717 also reload when it has changed.
718 when prev process used 64bit base always reload
719 to avoid an information leak. */
720 if (unlikely(fsindex | next->fsindex | prev->fs)) {
721 loadsegment(fs, next->fsindex);
722 /* check if the user used a selector != 0
723 * if yes clear 64bit base, since overloaded base
724 * is always mapped to the Null selector
725 */
726 if (fsindex)
727 prev->fs = 0;
728 }
729 /* when next process has a 64bit base use it */
730 if (next->fs)
731 wrmsrl(MSR_FS_BASE, next->fs);
732 prev->fsindex = fsindex;
733 }
734 {
735 unsigned gsindex;
736 asm volatile("movl %%gs,%0" : "=r" (gsindex));
737 if (unlikely(gsindex | next->gsindex | prev->gs)) {
738 load_gs_index(next->gsindex);
739 if (gsindex)
740 prev->gs = 0;
741 }
742 if (next->gs)
743 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
744 prev->gsindex = gsindex;
745 }
746
747 /* Must be after DS reload */
748 unlazy_fpu(prev_p);
749
750 /*
751 * Switch the PDA and FPU contexts.
752 */
753 prev->usersp = read_pda(oldrsp);
754 write_pda(oldrsp, next->usersp);
755 write_pda(pcurrent, next_p);
756
757 write_pda(kernelstack,
758 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
759 #ifdef CONFIG_CC_STACKPROTECTOR
760 write_pda(stack_canary, next_p->stack_canary);
761 /*
762 * Build time only check to make sure the stack_canary is at
763 * offset 40 in the pda; this is a gcc ABI requirement
764 */
765 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
766 #endif
767
768 /*
769 * Now maybe reload the debug registers and handle I/O bitmaps
770 */
771 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
772 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
773 __switch_to_xtra(prev_p, next_p, tss);
774
775 /* If the task has used fpu the last 5 timeslices, just do a full
776 * restore of the math state immediately to avoid the trap; the
777 * chances of needing FPU soon are obviously high now
778 */
779 if (next_p->fpu_counter>5)
780 math_state_restore();
781 return prev_p;
782 }
783
784 /*
785 * sys_execve() executes a new program.
786 */
787 asmlinkage
788 long sys_execve(char __user *name, char __user * __user *argv,
789 char __user * __user *envp, struct pt_regs *regs)
790 {
791 long error;
792 char * filename;
793
794 filename = getname(name);
795 error = PTR_ERR(filename);
796 if (IS_ERR(filename))
797 return error;
798 error = do_execve(filename, argv, envp, regs);
799 putname(filename);
800 return error;
801 }
802
803 void set_personality_64bit(void)
804 {
805 /* inherit personality from parent */
806
807 /* Make sure to be in 64bit mode */
808 clear_thread_flag(TIF_IA32);
809
810 /* TBD: overwrites user setup. Should have two bits.
811 But 64bit processes have always behaved this way,
812 so it's not too bad. The main problem is just that
813 32bit childs are affected again. */
814 current->personality &= ~READ_IMPLIES_EXEC;
815 }
816
817 asmlinkage long sys_fork(struct pt_regs *regs)
818 {
819 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
820 }
821
822 asmlinkage long
823 sys_clone(unsigned long clone_flags, unsigned long newsp,
824 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
825 {
826 if (!newsp)
827 newsp = regs->sp;
828 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
829 }
830
831 /*
832 * This is trivial, and on the face of it looks like it
833 * could equally well be done in user mode.
834 *
835 * Not so, for quite unobvious reasons - register pressure.
836 * In user mode vfork() cannot have a stack frame, and if
837 * done by calling the "clone()" system call directly, you
838 * do not have enough call-clobbered registers to hold all
839 * the information you need.
840 */
841 asmlinkage long sys_vfork(struct pt_regs *regs)
842 {
843 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
844 NULL, NULL);
845 }
846
847 unsigned long get_wchan(struct task_struct *p)
848 {
849 unsigned long stack;
850 u64 fp,ip;
851 int count = 0;
852
853 if (!p || p == current || p->state==TASK_RUNNING)
854 return 0;
855 stack = (unsigned long)task_stack_page(p);
856 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
857 return 0;
858 fp = *(u64 *)(p->thread.sp);
859 do {
860 if (fp < (unsigned long)stack ||
861 fp > (unsigned long)stack+THREAD_SIZE)
862 return 0;
863 ip = *(u64 *)(fp+8);
864 if (!in_sched_functions(ip))
865 return ip;
866 fp = *(u64 *)fp;
867 } while (count++ < 16);
868 return 0;
869 }
870
871 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
872 {
873 int ret = 0;
874 int doit = task == current;
875 int cpu;
876
877 switch (code) {
878 case ARCH_SET_GS:
879 if (addr >= TASK_SIZE_OF(task))
880 return -EPERM;
881 cpu = get_cpu();
882 /* handle small bases via the GDT because that's faster to
883 switch. */
884 if (addr <= 0xffffffff) {
885 set_32bit_tls(task, GS_TLS, addr);
886 if (doit) {
887 load_TLS(&task->thread, cpu);
888 load_gs_index(GS_TLS_SEL);
889 }
890 task->thread.gsindex = GS_TLS_SEL;
891 task->thread.gs = 0;
892 } else {
893 task->thread.gsindex = 0;
894 task->thread.gs = addr;
895 if (doit) {
896 load_gs_index(0);
897 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
898 }
899 }
900 put_cpu();
901 break;
902 case ARCH_SET_FS:
903 /* Not strictly needed for fs, but do it for symmetry
904 with gs */
905 if (addr >= TASK_SIZE_OF(task))
906 return -EPERM;
907 cpu = get_cpu();
908 /* handle small bases via the GDT because that's faster to
909 switch. */
910 if (addr <= 0xffffffff) {
911 set_32bit_tls(task, FS_TLS, addr);
912 if (doit) {
913 load_TLS(&task->thread, cpu);
914 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
915 }
916 task->thread.fsindex = FS_TLS_SEL;
917 task->thread.fs = 0;
918 } else {
919 task->thread.fsindex = 0;
920 task->thread.fs = addr;
921 if (doit) {
922 /* set the selector to 0 to not confuse
923 __switch_to */
924 asm volatile("movl %0,%%fs" :: "r" (0));
925 ret = checking_wrmsrl(MSR_FS_BASE, addr);
926 }
927 }
928 put_cpu();
929 break;
930 case ARCH_GET_FS: {
931 unsigned long base;
932 if (task->thread.fsindex == FS_TLS_SEL)
933 base = read_32bit_tls(task, FS_TLS);
934 else if (doit)
935 rdmsrl(MSR_FS_BASE, base);
936 else
937 base = task->thread.fs;
938 ret = put_user(base, (unsigned long __user *)addr);
939 break;
940 }
941 case ARCH_GET_GS: {
942 unsigned long base;
943 unsigned gsindex;
944 if (task->thread.gsindex == GS_TLS_SEL)
945 base = read_32bit_tls(task, GS_TLS);
946 else if (doit) {
947 asm("movl %%gs,%0" : "=r" (gsindex));
948 if (gsindex)
949 rdmsrl(MSR_KERNEL_GS_BASE, base);
950 else
951 base = task->thread.gs;
952 }
953 else
954 base = task->thread.gs;
955 ret = put_user(base, (unsigned long __user *)addr);
956 break;
957 }
958
959 default:
960 ret = -EINVAL;
961 break;
962 }
963
964 return ret;
965 }
966
967 long sys_arch_prctl(int code, unsigned long addr)
968 {
969 return do_arch_prctl(current, code, addr);
970 }
971
972 unsigned long arch_align_stack(unsigned long sp)
973 {
974 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
975 sp -= get_random_int() % 8192;
976 return sp & ~0xf;
977 }
978
979 unsigned long arch_randomize_brk(struct mm_struct *mm)
980 {
981 unsigned long range_end = mm->brk + 0x02000000;
982 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
983 }