]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/x86/kernel/process_64.c
x86: simplify idle selection
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / process_64.c
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * X86-64 port
8 * Andi Kleen.
9 *
10 * CPU hotplug support - ashok.raj@intel.com
11 */
12
13 /*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17 #include <stdarg.h>
18
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/utsname.h>
31 #include <linux/delay.h>
32 #include <linux/module.h>
33 #include <linux/ptrace.h>
34 #include <linux/random.h>
35 #include <linux/notifier.h>
36 #include <linux/kprobes.h>
37 #include <linux/kdebug.h>
38 #include <linux/tick.h>
39 #include <linux/prctl.h>
40
41 #include <asm/uaccess.h>
42 #include <asm/pgtable.h>
43 #include <asm/system.h>
44 #include <asm/io.h>
45 #include <asm/processor.h>
46 #include <asm/i387.h>
47 #include <asm/mmu_context.h>
48 #include <asm/pda.h>
49 #include <asm/prctl.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
54
55 asmlinkage extern void ret_from_fork(void);
56
57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59 unsigned long boot_option_idle_override = 0;
60 EXPORT_SYMBOL(boot_option_idle_override);
61
62 /*
63 * Powermanagement idle function, if any..
64 */
65 void (*pm_idle)(void);
66 EXPORT_SYMBOL(pm_idle);
67
68 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
69
70 void idle_notifier_register(struct notifier_block *n)
71 {
72 atomic_notifier_chain_register(&idle_notifier, n);
73 }
74
75 void enter_idle(void)
76 {
77 write_pda(isidle, 1);
78 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
79 }
80
81 static void __exit_idle(void)
82 {
83 if (test_and_clear_bit_pda(0, isidle) == 0)
84 return;
85 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
86 }
87
88 /* Called from interrupts to signify idle end */
89 void exit_idle(void)
90 {
91 /* idle loop has pid 0 */
92 if (current->pid)
93 return;
94 __exit_idle();
95 }
96
97 /*
98 * We use this if we don't have any better
99 * idle routine..
100 */
101 void default_idle(void)
102 {
103 current_thread_info()->status &= ~TS_POLLING;
104 /*
105 * TS_POLLING-cleared state must be visible before we
106 * test NEED_RESCHED:
107 */
108 smp_mb();
109 if (!need_resched())
110 safe_halt(); /* enables interrupts racelessly */
111 else
112 local_irq_enable();
113 current_thread_info()->status |= TS_POLLING;
114 }
115
116 #ifdef CONFIG_HOTPLUG_CPU
117 DECLARE_PER_CPU(int, cpu_state);
118
119 #include <asm/nmi.h>
120 /* We halt the CPU with physical CPU hotplug */
121 static inline void play_dead(void)
122 {
123 idle_task_exit();
124 wbinvd();
125 mb();
126 /* Ack it */
127 __get_cpu_var(cpu_state) = CPU_DEAD;
128
129 local_irq_disable();
130 while (1)
131 halt();
132 }
133 #else
134 static inline void play_dead(void)
135 {
136 BUG();
137 }
138 #endif /* CONFIG_HOTPLUG_CPU */
139
140 /*
141 * The idle thread. There's no useful work to be
142 * done, so just try to conserve power and have a
143 * low exit latency (ie sit in a loop waiting for
144 * somebody to say that they'd like to reschedule)
145 */
146 void cpu_idle(void)
147 {
148 current_thread_info()->status |= TS_POLLING;
149 /* endless idle loop with no priority at all */
150 while (1) {
151 tick_nohz_stop_sched_tick();
152 while (!need_resched()) {
153
154 rmb();
155
156 if (cpu_is_offline(smp_processor_id()))
157 play_dead();
158 /*
159 * Idle routines should keep interrupts disabled
160 * from here on, until they go to idle.
161 * Otherwise, idle callbacks can misfire.
162 */
163 local_irq_disable();
164 enter_idle();
165 pm_idle();
166 /* In many cases the interrupt that ended idle
167 has already called exit_idle. But some idle
168 loops can be woken up without interrupt. */
169 __exit_idle();
170 }
171
172 tick_nohz_restart_sched_tick();
173 preempt_enable_no_resched();
174 schedule();
175 preempt_disable();
176 }
177 }
178
179 /* Prints also some state that isn't saved in the pt_regs */
180 void __show_regs(struct pt_regs * regs)
181 {
182 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
183 unsigned long d0, d1, d2, d3, d6, d7;
184 unsigned int fsindex, gsindex;
185 unsigned int ds, cs, es;
186
187 printk("\n");
188 print_modules();
189 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
190 current->pid, current->comm, print_tainted(),
191 init_utsname()->release,
192 (int)strcspn(init_utsname()->version, " "),
193 init_utsname()->version);
194 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
195 printk_address(regs->ip, 1);
196 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
197 regs->flags);
198 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
199 regs->ax, regs->bx, regs->cx);
200 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
201 regs->dx, regs->si, regs->di);
202 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
203 regs->bp, regs->r8, regs->r9);
204 printk("R10: %016lx R11: %016lx R12: %016lx\n",
205 regs->r10, regs->r11, regs->r12);
206 printk("R13: %016lx R14: %016lx R15: %016lx\n",
207 regs->r13, regs->r14, regs->r15);
208
209 asm("movl %%ds,%0" : "=r" (ds));
210 asm("movl %%cs,%0" : "=r" (cs));
211 asm("movl %%es,%0" : "=r" (es));
212 asm("movl %%fs,%0" : "=r" (fsindex));
213 asm("movl %%gs,%0" : "=r" (gsindex));
214
215 rdmsrl(MSR_FS_BASE, fs);
216 rdmsrl(MSR_GS_BASE, gs);
217 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
218
219 cr0 = read_cr0();
220 cr2 = read_cr2();
221 cr3 = read_cr3();
222 cr4 = read_cr4();
223
224 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
225 fs,fsindex,gs,gsindex,shadowgs);
226 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
227 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
228
229 get_debugreg(d0, 0);
230 get_debugreg(d1, 1);
231 get_debugreg(d2, 2);
232 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
233 get_debugreg(d3, 3);
234 get_debugreg(d6, 6);
235 get_debugreg(d7, 7);
236 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
237 }
238
239 void show_regs(struct pt_regs *regs)
240 {
241 printk("CPU %d:", smp_processor_id());
242 __show_regs(regs);
243 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
244 }
245
246 /*
247 * Free current thread data structures etc..
248 */
249 void exit_thread(void)
250 {
251 struct task_struct *me = current;
252 struct thread_struct *t = &me->thread;
253
254 if (me->thread.io_bitmap_ptr) {
255 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
256
257 kfree(t->io_bitmap_ptr);
258 t->io_bitmap_ptr = NULL;
259 clear_thread_flag(TIF_IO_BITMAP);
260 /*
261 * Careful, clear this in the TSS too:
262 */
263 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
264 t->io_bitmap_max = 0;
265 put_cpu();
266 }
267 }
268
269 void flush_thread(void)
270 {
271 struct task_struct *tsk = current;
272
273 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
274 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
275 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
276 clear_tsk_thread_flag(tsk, TIF_IA32);
277 } else {
278 set_tsk_thread_flag(tsk, TIF_IA32);
279 current_thread_info()->status |= TS_COMPAT;
280 }
281 }
282 clear_tsk_thread_flag(tsk, TIF_DEBUG);
283
284 tsk->thread.debugreg0 = 0;
285 tsk->thread.debugreg1 = 0;
286 tsk->thread.debugreg2 = 0;
287 tsk->thread.debugreg3 = 0;
288 tsk->thread.debugreg6 = 0;
289 tsk->thread.debugreg7 = 0;
290 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
291 /*
292 * Forget coprocessor state..
293 */
294 clear_fpu(tsk);
295 clear_used_math();
296 }
297
298 void release_thread(struct task_struct *dead_task)
299 {
300 if (dead_task->mm) {
301 if (dead_task->mm->context.size) {
302 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
303 dead_task->comm,
304 dead_task->mm->context.ldt,
305 dead_task->mm->context.size);
306 BUG();
307 }
308 }
309 }
310
311 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
312 {
313 struct user_desc ud = {
314 .base_addr = addr,
315 .limit = 0xfffff,
316 .seg_32bit = 1,
317 .limit_in_pages = 1,
318 .useable = 1,
319 };
320 struct desc_struct *desc = t->thread.tls_array;
321 desc += tls;
322 fill_ldt(desc, &ud);
323 }
324
325 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
326 {
327 return get_desc_base(&t->thread.tls_array[tls]);
328 }
329
330 /*
331 * This gets called before we allocate a new thread and copy
332 * the current task into it.
333 */
334 void prepare_to_copy(struct task_struct *tsk)
335 {
336 unlazy_fpu(tsk);
337 }
338
339 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
340 unsigned long unused,
341 struct task_struct * p, struct pt_regs * regs)
342 {
343 int err;
344 struct pt_regs * childregs;
345 struct task_struct *me = current;
346
347 childregs = ((struct pt_regs *)
348 (THREAD_SIZE + task_stack_page(p))) - 1;
349 *childregs = *regs;
350
351 childregs->ax = 0;
352 childregs->sp = sp;
353 if (sp == ~0UL)
354 childregs->sp = (unsigned long)childregs;
355
356 p->thread.sp = (unsigned long) childregs;
357 p->thread.sp0 = (unsigned long) (childregs+1);
358 p->thread.usersp = me->thread.usersp;
359
360 set_tsk_thread_flag(p, TIF_FORK);
361
362 p->thread.fs = me->thread.fs;
363 p->thread.gs = me->thread.gs;
364
365 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
366 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
367 asm("mov %%es,%0" : "=m" (p->thread.es));
368 asm("mov %%ds,%0" : "=m" (p->thread.ds));
369
370 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
371 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
372 if (!p->thread.io_bitmap_ptr) {
373 p->thread.io_bitmap_max = 0;
374 return -ENOMEM;
375 }
376 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
377 IO_BITMAP_BYTES);
378 set_tsk_thread_flag(p, TIF_IO_BITMAP);
379 }
380
381 /*
382 * Set a new TLS for the child thread?
383 */
384 if (clone_flags & CLONE_SETTLS) {
385 #ifdef CONFIG_IA32_EMULATION
386 if (test_thread_flag(TIF_IA32))
387 err = do_set_thread_area(p, -1,
388 (struct user_desc __user *)childregs->si, 0);
389 else
390 #endif
391 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
392 if (err)
393 goto out;
394 }
395 err = 0;
396 out:
397 if (err && p->thread.io_bitmap_ptr) {
398 kfree(p->thread.io_bitmap_ptr);
399 p->thread.io_bitmap_max = 0;
400 }
401 return err;
402 }
403
404 void
405 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
406 {
407 asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
408 load_gs_index(0);
409 regs->ip = new_ip;
410 regs->sp = new_sp;
411 write_pda(oldrsp, new_sp);
412 regs->cs = __USER_CS;
413 regs->ss = __USER_DS;
414 regs->flags = 0x200;
415 set_fs(USER_DS);
416 /*
417 * Free the old FP and other extended state
418 */
419 free_thread_xstate(current);
420 }
421 EXPORT_SYMBOL_GPL(start_thread);
422
423 static void hard_disable_TSC(void)
424 {
425 write_cr4(read_cr4() | X86_CR4_TSD);
426 }
427
428 void disable_TSC(void)
429 {
430 preempt_disable();
431 if (!test_and_set_thread_flag(TIF_NOTSC))
432 /*
433 * Must flip the CPU state synchronously with
434 * TIF_NOTSC in the current running context.
435 */
436 hard_disable_TSC();
437 preempt_enable();
438 }
439
440 static void hard_enable_TSC(void)
441 {
442 write_cr4(read_cr4() & ~X86_CR4_TSD);
443 }
444
445 static void enable_TSC(void)
446 {
447 preempt_disable();
448 if (test_and_clear_thread_flag(TIF_NOTSC))
449 /*
450 * Must flip the CPU state synchronously with
451 * TIF_NOTSC in the current running context.
452 */
453 hard_enable_TSC();
454 preempt_enable();
455 }
456
457 int get_tsc_mode(unsigned long adr)
458 {
459 unsigned int val;
460
461 if (test_thread_flag(TIF_NOTSC))
462 val = PR_TSC_SIGSEGV;
463 else
464 val = PR_TSC_ENABLE;
465
466 return put_user(val, (unsigned int __user *)adr);
467 }
468
469 int set_tsc_mode(unsigned int val)
470 {
471 if (val == PR_TSC_SIGSEGV)
472 disable_TSC();
473 else if (val == PR_TSC_ENABLE)
474 enable_TSC();
475 else
476 return -EINVAL;
477
478 return 0;
479 }
480
481 /*
482 * This special macro can be used to load a debugging register
483 */
484 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
485
486 static inline void __switch_to_xtra(struct task_struct *prev_p,
487 struct task_struct *next_p,
488 struct tss_struct *tss)
489 {
490 struct thread_struct *prev, *next;
491 unsigned long debugctl;
492
493 prev = &prev_p->thread,
494 next = &next_p->thread;
495
496 debugctl = prev->debugctlmsr;
497 if (next->ds_area_msr != prev->ds_area_msr) {
498 /* we clear debugctl to make sure DS
499 * is not in use when we change it */
500 debugctl = 0;
501 update_debugctlmsr(0);
502 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
503 }
504
505 if (next->debugctlmsr != debugctl)
506 update_debugctlmsr(next->debugctlmsr);
507
508 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
509 loaddebug(next, 0);
510 loaddebug(next, 1);
511 loaddebug(next, 2);
512 loaddebug(next, 3);
513 /* no 4 and 5 */
514 loaddebug(next, 6);
515 loaddebug(next, 7);
516 }
517
518 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
519 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
520 /* prev and next are different */
521 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
522 hard_disable_TSC();
523 else
524 hard_enable_TSC();
525 }
526
527 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
528 /*
529 * Copy the relevant range of the IO bitmap.
530 * Normally this is 128 bytes or less:
531 */
532 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
533 max(prev->io_bitmap_max, next->io_bitmap_max));
534 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
535 /*
536 * Clear any possible leftover bits:
537 */
538 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
539 }
540
541 #ifdef X86_BTS
542 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
543 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
544
545 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
546 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
547 #endif
548 }
549
550 /*
551 * switch_to(x,y) should switch tasks from x to y.
552 *
553 * This could still be optimized:
554 * - fold all the options into a flag word and test it with a single test.
555 * - could test fs/gs bitsliced
556 *
557 * Kprobes not supported here. Set the probe on schedule instead.
558 */
559 struct task_struct *
560 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
561 {
562 struct thread_struct *prev = &prev_p->thread,
563 *next = &next_p->thread;
564 int cpu = smp_processor_id();
565 struct tss_struct *tss = &per_cpu(init_tss, cpu);
566
567 /* we're going to use this soon, after a few expensive things */
568 if (next_p->fpu_counter>5)
569 prefetch(next->xstate);
570
571 /*
572 * Reload esp0, LDT and the page table pointer:
573 */
574 load_sp0(tss, next);
575
576 /*
577 * Switch DS and ES.
578 * This won't pick up thread selector changes, but I guess that is ok.
579 */
580 asm volatile("mov %%es,%0" : "=m" (prev->es));
581 if (unlikely(next->es | prev->es))
582 loadsegment(es, next->es);
583
584 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
585 if (unlikely(next->ds | prev->ds))
586 loadsegment(ds, next->ds);
587
588 load_TLS(next, cpu);
589
590 /*
591 * Switch FS and GS.
592 */
593 {
594 unsigned fsindex;
595 asm volatile("movl %%fs,%0" : "=r" (fsindex));
596 /* segment register != 0 always requires a reload.
597 also reload when it has changed.
598 when prev process used 64bit base always reload
599 to avoid an information leak. */
600 if (unlikely(fsindex | next->fsindex | prev->fs)) {
601 loadsegment(fs, next->fsindex);
602 /* check if the user used a selector != 0
603 * if yes clear 64bit base, since overloaded base
604 * is always mapped to the Null selector
605 */
606 if (fsindex)
607 prev->fs = 0;
608 }
609 /* when next process has a 64bit base use it */
610 if (next->fs)
611 wrmsrl(MSR_FS_BASE, next->fs);
612 prev->fsindex = fsindex;
613 }
614 {
615 unsigned gsindex;
616 asm volatile("movl %%gs,%0" : "=r" (gsindex));
617 if (unlikely(gsindex | next->gsindex | prev->gs)) {
618 load_gs_index(next->gsindex);
619 if (gsindex)
620 prev->gs = 0;
621 }
622 if (next->gs)
623 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
624 prev->gsindex = gsindex;
625 }
626
627 /* Must be after DS reload */
628 unlazy_fpu(prev_p);
629
630 /*
631 * Switch the PDA and FPU contexts.
632 */
633 prev->usersp = read_pda(oldrsp);
634 write_pda(oldrsp, next->usersp);
635 write_pda(pcurrent, next_p);
636
637 write_pda(kernelstack,
638 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
639 #ifdef CONFIG_CC_STACKPROTECTOR
640 write_pda(stack_canary, next_p->stack_canary);
641 /*
642 * Build time only check to make sure the stack_canary is at
643 * offset 40 in the pda; this is a gcc ABI requirement
644 */
645 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
646 #endif
647
648 /*
649 * Now maybe reload the debug registers and handle I/O bitmaps
650 */
651 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
652 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
653 __switch_to_xtra(prev_p, next_p, tss);
654
655 /* If the task has used fpu the last 5 timeslices, just do a full
656 * restore of the math state immediately to avoid the trap; the
657 * chances of needing FPU soon are obviously high now
658 */
659 if (next_p->fpu_counter>5)
660 math_state_restore();
661 return prev_p;
662 }
663
664 /*
665 * sys_execve() executes a new program.
666 */
667 asmlinkage
668 long sys_execve(char __user *name, char __user * __user *argv,
669 char __user * __user *envp, struct pt_regs *regs)
670 {
671 long error;
672 char * filename;
673
674 filename = getname(name);
675 error = PTR_ERR(filename);
676 if (IS_ERR(filename))
677 return error;
678 error = do_execve(filename, argv, envp, regs);
679 putname(filename);
680 return error;
681 }
682
683 void set_personality_64bit(void)
684 {
685 /* inherit personality from parent */
686
687 /* Make sure to be in 64bit mode */
688 clear_thread_flag(TIF_IA32);
689
690 /* TBD: overwrites user setup. Should have two bits.
691 But 64bit processes have always behaved this way,
692 so it's not too bad. The main problem is just that
693 32bit childs are affected again. */
694 current->personality &= ~READ_IMPLIES_EXEC;
695 }
696
697 asmlinkage long sys_fork(struct pt_regs *regs)
698 {
699 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
700 }
701
702 asmlinkage long
703 sys_clone(unsigned long clone_flags, unsigned long newsp,
704 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
705 {
706 if (!newsp)
707 newsp = regs->sp;
708 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
709 }
710
711 /*
712 * This is trivial, and on the face of it looks like it
713 * could equally well be done in user mode.
714 *
715 * Not so, for quite unobvious reasons - register pressure.
716 * In user mode vfork() cannot have a stack frame, and if
717 * done by calling the "clone()" system call directly, you
718 * do not have enough call-clobbered registers to hold all
719 * the information you need.
720 */
721 asmlinkage long sys_vfork(struct pt_regs *regs)
722 {
723 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
724 NULL, NULL);
725 }
726
727 unsigned long get_wchan(struct task_struct *p)
728 {
729 unsigned long stack;
730 u64 fp,ip;
731 int count = 0;
732
733 if (!p || p == current || p->state==TASK_RUNNING)
734 return 0;
735 stack = (unsigned long)task_stack_page(p);
736 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
737 return 0;
738 fp = *(u64 *)(p->thread.sp);
739 do {
740 if (fp < (unsigned long)stack ||
741 fp > (unsigned long)stack+THREAD_SIZE)
742 return 0;
743 ip = *(u64 *)(fp+8);
744 if (!in_sched_functions(ip))
745 return ip;
746 fp = *(u64 *)fp;
747 } while (count++ < 16);
748 return 0;
749 }
750
751 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
752 {
753 int ret = 0;
754 int doit = task == current;
755 int cpu;
756
757 switch (code) {
758 case ARCH_SET_GS:
759 if (addr >= TASK_SIZE_OF(task))
760 return -EPERM;
761 cpu = get_cpu();
762 /* handle small bases via the GDT because that's faster to
763 switch. */
764 if (addr <= 0xffffffff) {
765 set_32bit_tls(task, GS_TLS, addr);
766 if (doit) {
767 load_TLS(&task->thread, cpu);
768 load_gs_index(GS_TLS_SEL);
769 }
770 task->thread.gsindex = GS_TLS_SEL;
771 task->thread.gs = 0;
772 } else {
773 task->thread.gsindex = 0;
774 task->thread.gs = addr;
775 if (doit) {
776 load_gs_index(0);
777 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
778 }
779 }
780 put_cpu();
781 break;
782 case ARCH_SET_FS:
783 /* Not strictly needed for fs, but do it for symmetry
784 with gs */
785 if (addr >= TASK_SIZE_OF(task))
786 return -EPERM;
787 cpu = get_cpu();
788 /* handle small bases via the GDT because that's faster to
789 switch. */
790 if (addr <= 0xffffffff) {
791 set_32bit_tls(task, FS_TLS, addr);
792 if (doit) {
793 load_TLS(&task->thread, cpu);
794 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
795 }
796 task->thread.fsindex = FS_TLS_SEL;
797 task->thread.fs = 0;
798 } else {
799 task->thread.fsindex = 0;
800 task->thread.fs = addr;
801 if (doit) {
802 /* set the selector to 0 to not confuse
803 __switch_to */
804 asm volatile("movl %0,%%fs" :: "r" (0));
805 ret = checking_wrmsrl(MSR_FS_BASE, addr);
806 }
807 }
808 put_cpu();
809 break;
810 case ARCH_GET_FS: {
811 unsigned long base;
812 if (task->thread.fsindex == FS_TLS_SEL)
813 base = read_32bit_tls(task, FS_TLS);
814 else if (doit)
815 rdmsrl(MSR_FS_BASE, base);
816 else
817 base = task->thread.fs;
818 ret = put_user(base, (unsigned long __user *)addr);
819 break;
820 }
821 case ARCH_GET_GS: {
822 unsigned long base;
823 unsigned gsindex;
824 if (task->thread.gsindex == GS_TLS_SEL)
825 base = read_32bit_tls(task, GS_TLS);
826 else if (doit) {
827 asm("movl %%gs,%0" : "=r" (gsindex));
828 if (gsindex)
829 rdmsrl(MSR_KERNEL_GS_BASE, base);
830 else
831 base = task->thread.gs;
832 }
833 else
834 base = task->thread.gs;
835 ret = put_user(base, (unsigned long __user *)addr);
836 break;
837 }
838
839 default:
840 ret = -EINVAL;
841 break;
842 }
843
844 return ret;
845 }
846
847 long sys_arch_prctl(int code, unsigned long addr)
848 {
849 return do_arch_prctl(current, code, addr);
850 }
851
852 unsigned long arch_align_stack(unsigned long sp)
853 {
854 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
855 sp -= get_random_int() % 8192;
856 return sp & ~0xf;
857 }
858
859 unsigned long arch_randomize_brk(struct mm_struct *mm)
860 {
861 unsigned long range_end = mm->brk + 0x02000000;
862 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
863 }