]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/x86/kernel/process_64.c
ftrace: trace irq disabled critical timings
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / process_64.c
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * X86-64 port
8 * Andi Kleen.
9 *
10 * CPU hotplug support - ashok.raj@intel.com
11 */
12
13 /*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17 #include <stdarg.h>
18
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/utsname.h>
31 #include <linux/delay.h>
32 #include <linux/module.h>
33 #include <linux/ptrace.h>
34 #include <linux/random.h>
35 #include <linux/notifier.h>
36 #include <linux/kprobes.h>
37 #include <linux/kdebug.h>
38 #include <linux/tick.h>
39 #include <linux/prctl.h>
40
41 #include <asm/uaccess.h>
42 #include <asm/pgtable.h>
43 #include <asm/system.h>
44 #include <asm/io.h>
45 #include <asm/processor.h>
46 #include <asm/i387.h>
47 #include <asm/mmu_context.h>
48 #include <asm/pda.h>
49 #include <asm/prctl.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
54
55 asmlinkage extern void ret_from_fork(void);
56
57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59 unsigned long boot_option_idle_override = 0;
60 EXPORT_SYMBOL(boot_option_idle_override);
61
62 /*
63 * Powermanagement idle function, if any..
64 */
65 void (*pm_idle)(void);
66 EXPORT_SYMBOL(pm_idle);
67
68 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
69
70 void idle_notifier_register(struct notifier_block *n)
71 {
72 atomic_notifier_chain_register(&idle_notifier, n);
73 }
74
75 void enter_idle(void)
76 {
77 write_pda(isidle, 1);
78 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
79 }
80
81 static void __exit_idle(void)
82 {
83 if (test_and_clear_bit_pda(0, isidle) == 0)
84 return;
85 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
86 }
87
88 /* Called from interrupts to signify idle end */
89 void exit_idle(void)
90 {
91 /* idle loop has pid 0 */
92 if (current->pid)
93 return;
94 __exit_idle();
95 }
96
97 /*
98 * We use this if we don't have any better
99 * idle routine..
100 */
101 void default_idle(void)
102 {
103 current_thread_info()->status &= ~TS_POLLING;
104 /*
105 * TS_POLLING-cleared state must be visible before we
106 * test NEED_RESCHED:
107 */
108 smp_mb();
109 if (!need_resched())
110 safe_halt(); /* enables interrupts racelessly */
111 else
112 local_irq_enable();
113 current_thread_info()->status |= TS_POLLING;
114 }
115
116 #ifdef CONFIG_HOTPLUG_CPU
117 DECLARE_PER_CPU(int, cpu_state);
118
119 #include <asm/nmi.h>
120 /* We halt the CPU with physical CPU hotplug */
121 static inline void play_dead(void)
122 {
123 idle_task_exit();
124 wbinvd();
125 mb();
126 /* Ack it */
127 __get_cpu_var(cpu_state) = CPU_DEAD;
128
129 local_irq_disable();
130 while (1)
131 halt();
132 }
133 #else
134 static inline void play_dead(void)
135 {
136 BUG();
137 }
138 #endif /* CONFIG_HOTPLUG_CPU */
139
140 /*
141 * The idle thread. There's no useful work to be
142 * done, so just try to conserve power and have a
143 * low exit latency (ie sit in a loop waiting for
144 * somebody to say that they'd like to reschedule)
145 */
146 void cpu_idle(void)
147 {
148 current_thread_info()->status |= TS_POLLING;
149 /* endless idle loop with no priority at all */
150 while (1) {
151 tick_nohz_stop_sched_tick();
152 while (!need_resched()) {
153 void (*idle)(void);
154
155 rmb();
156 idle = pm_idle;
157 if (!idle)
158 idle = default_idle;
159 if (cpu_is_offline(smp_processor_id()))
160 play_dead();
161 /*
162 * Idle routines should keep interrupts disabled
163 * from here on, until they go to idle.
164 * Otherwise, idle callbacks can misfire.
165 */
166 local_irq_disable();
167 enter_idle();
168 /* Don't trace irqs off for idle */
169 stop_critical_timings();
170 idle();
171 start_critical_timings();
172 /* In many cases the interrupt that ended idle
173 has already called exit_idle. But some idle
174 loops can be woken up without interrupt. */
175 __exit_idle();
176 }
177
178 tick_nohz_restart_sched_tick();
179 preempt_enable_no_resched();
180 schedule();
181 preempt_disable();
182 }
183 }
184
185 /* Prints also some state that isn't saved in the pt_regs */
186 void __show_regs(struct pt_regs * regs)
187 {
188 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
189 unsigned long d0, d1, d2, d3, d6, d7;
190 unsigned int fsindex, gsindex;
191 unsigned int ds, cs, es;
192
193 printk("\n");
194 print_modules();
195 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
196 current->pid, current->comm, print_tainted(),
197 init_utsname()->release,
198 (int)strcspn(init_utsname()->version, " "),
199 init_utsname()->version);
200 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
201 printk_address(regs->ip, 1);
202 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
203 regs->flags);
204 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
205 regs->ax, regs->bx, regs->cx);
206 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
207 regs->dx, regs->si, regs->di);
208 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
209 regs->bp, regs->r8, regs->r9);
210 printk("R10: %016lx R11: %016lx R12: %016lx\n",
211 regs->r10, regs->r11, regs->r12);
212 printk("R13: %016lx R14: %016lx R15: %016lx\n",
213 regs->r13, regs->r14, regs->r15);
214
215 asm("movl %%ds,%0" : "=r" (ds));
216 asm("movl %%cs,%0" : "=r" (cs));
217 asm("movl %%es,%0" : "=r" (es));
218 asm("movl %%fs,%0" : "=r" (fsindex));
219 asm("movl %%gs,%0" : "=r" (gsindex));
220
221 rdmsrl(MSR_FS_BASE, fs);
222 rdmsrl(MSR_GS_BASE, gs);
223 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
224
225 cr0 = read_cr0();
226 cr2 = read_cr2();
227 cr3 = read_cr3();
228 cr4 = read_cr4();
229
230 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
231 fs,fsindex,gs,gsindex,shadowgs);
232 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
233 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
234
235 get_debugreg(d0, 0);
236 get_debugreg(d1, 1);
237 get_debugreg(d2, 2);
238 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
239 get_debugreg(d3, 3);
240 get_debugreg(d6, 6);
241 get_debugreg(d7, 7);
242 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
243 }
244
245 void show_regs(struct pt_regs *regs)
246 {
247 printk("CPU %d:", smp_processor_id());
248 __show_regs(regs);
249 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
250 }
251
252 /*
253 * Free current thread data structures etc..
254 */
255 void exit_thread(void)
256 {
257 struct task_struct *me = current;
258 struct thread_struct *t = &me->thread;
259
260 if (me->thread.io_bitmap_ptr) {
261 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
262
263 kfree(t->io_bitmap_ptr);
264 t->io_bitmap_ptr = NULL;
265 clear_thread_flag(TIF_IO_BITMAP);
266 /*
267 * Careful, clear this in the TSS too:
268 */
269 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
270 t->io_bitmap_max = 0;
271 put_cpu();
272 }
273 }
274
275 void flush_thread(void)
276 {
277 struct task_struct *tsk = current;
278
279 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
280 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
281 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
282 clear_tsk_thread_flag(tsk, TIF_IA32);
283 } else {
284 set_tsk_thread_flag(tsk, TIF_IA32);
285 current_thread_info()->status |= TS_COMPAT;
286 }
287 }
288 clear_tsk_thread_flag(tsk, TIF_DEBUG);
289
290 tsk->thread.debugreg0 = 0;
291 tsk->thread.debugreg1 = 0;
292 tsk->thread.debugreg2 = 0;
293 tsk->thread.debugreg3 = 0;
294 tsk->thread.debugreg6 = 0;
295 tsk->thread.debugreg7 = 0;
296 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
297 /*
298 * Forget coprocessor state..
299 */
300 clear_fpu(tsk);
301 clear_used_math();
302 }
303
304 void release_thread(struct task_struct *dead_task)
305 {
306 if (dead_task->mm) {
307 if (dead_task->mm->context.size) {
308 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
309 dead_task->comm,
310 dead_task->mm->context.ldt,
311 dead_task->mm->context.size);
312 BUG();
313 }
314 }
315 }
316
317 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
318 {
319 struct user_desc ud = {
320 .base_addr = addr,
321 .limit = 0xfffff,
322 .seg_32bit = 1,
323 .limit_in_pages = 1,
324 .useable = 1,
325 };
326 struct desc_struct *desc = t->thread.tls_array;
327 desc += tls;
328 fill_ldt(desc, &ud);
329 }
330
331 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
332 {
333 return get_desc_base(&t->thread.tls_array[tls]);
334 }
335
336 /*
337 * This gets called before we allocate a new thread and copy
338 * the current task into it.
339 */
340 void prepare_to_copy(struct task_struct *tsk)
341 {
342 unlazy_fpu(tsk);
343 }
344
345 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
346 unsigned long unused,
347 struct task_struct * p, struct pt_regs * regs)
348 {
349 int err;
350 struct pt_regs * childregs;
351 struct task_struct *me = current;
352
353 childregs = ((struct pt_regs *)
354 (THREAD_SIZE + task_stack_page(p))) - 1;
355 *childregs = *regs;
356
357 childregs->ax = 0;
358 childregs->sp = sp;
359 if (sp == ~0UL)
360 childregs->sp = (unsigned long)childregs;
361
362 p->thread.sp = (unsigned long) childregs;
363 p->thread.sp0 = (unsigned long) (childregs+1);
364 p->thread.usersp = me->thread.usersp;
365
366 set_tsk_thread_flag(p, TIF_FORK);
367
368 p->thread.fs = me->thread.fs;
369 p->thread.gs = me->thread.gs;
370
371 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
372 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
373 asm("mov %%es,%0" : "=m" (p->thread.es));
374 asm("mov %%ds,%0" : "=m" (p->thread.ds));
375
376 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
377 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
378 if (!p->thread.io_bitmap_ptr) {
379 p->thread.io_bitmap_max = 0;
380 return -ENOMEM;
381 }
382 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
383 IO_BITMAP_BYTES);
384 set_tsk_thread_flag(p, TIF_IO_BITMAP);
385 }
386
387 /*
388 * Set a new TLS for the child thread?
389 */
390 if (clone_flags & CLONE_SETTLS) {
391 #ifdef CONFIG_IA32_EMULATION
392 if (test_thread_flag(TIF_IA32))
393 err = do_set_thread_area(p, -1,
394 (struct user_desc __user *)childregs->si, 0);
395 else
396 #endif
397 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
398 if (err)
399 goto out;
400 }
401 err = 0;
402 out:
403 if (err && p->thread.io_bitmap_ptr) {
404 kfree(p->thread.io_bitmap_ptr);
405 p->thread.io_bitmap_max = 0;
406 }
407 return err;
408 }
409
410 void
411 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
412 {
413 asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
414 load_gs_index(0);
415 regs->ip = new_ip;
416 regs->sp = new_sp;
417 write_pda(oldrsp, new_sp);
418 regs->cs = __USER_CS;
419 regs->ss = __USER_DS;
420 regs->flags = 0x200;
421 set_fs(USER_DS);
422 /*
423 * Free the old FP and other extended state
424 */
425 free_thread_xstate(current);
426 }
427 EXPORT_SYMBOL_GPL(start_thread);
428
429 static void hard_disable_TSC(void)
430 {
431 write_cr4(read_cr4() | X86_CR4_TSD);
432 }
433
434 void disable_TSC(void)
435 {
436 preempt_disable();
437 if (!test_and_set_thread_flag(TIF_NOTSC))
438 /*
439 * Must flip the CPU state synchronously with
440 * TIF_NOTSC in the current running context.
441 */
442 hard_disable_TSC();
443 preempt_enable();
444 }
445
446 static void hard_enable_TSC(void)
447 {
448 write_cr4(read_cr4() & ~X86_CR4_TSD);
449 }
450
451 static void enable_TSC(void)
452 {
453 preempt_disable();
454 if (test_and_clear_thread_flag(TIF_NOTSC))
455 /*
456 * Must flip the CPU state synchronously with
457 * TIF_NOTSC in the current running context.
458 */
459 hard_enable_TSC();
460 preempt_enable();
461 }
462
463 int get_tsc_mode(unsigned long adr)
464 {
465 unsigned int val;
466
467 if (test_thread_flag(TIF_NOTSC))
468 val = PR_TSC_SIGSEGV;
469 else
470 val = PR_TSC_ENABLE;
471
472 return put_user(val, (unsigned int __user *)adr);
473 }
474
475 int set_tsc_mode(unsigned int val)
476 {
477 if (val == PR_TSC_SIGSEGV)
478 disable_TSC();
479 else if (val == PR_TSC_ENABLE)
480 enable_TSC();
481 else
482 return -EINVAL;
483
484 return 0;
485 }
486
487 /*
488 * This special macro can be used to load a debugging register
489 */
490 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
491
492 static inline void __switch_to_xtra(struct task_struct *prev_p,
493 struct task_struct *next_p,
494 struct tss_struct *tss)
495 {
496 struct thread_struct *prev, *next;
497 unsigned long debugctl;
498
499 prev = &prev_p->thread,
500 next = &next_p->thread;
501
502 debugctl = prev->debugctlmsr;
503 if (next->ds_area_msr != prev->ds_area_msr) {
504 /* we clear debugctl to make sure DS
505 * is not in use when we change it */
506 debugctl = 0;
507 update_debugctlmsr(0);
508 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
509 }
510
511 if (next->debugctlmsr != debugctl)
512 update_debugctlmsr(next->debugctlmsr);
513
514 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
515 loaddebug(next, 0);
516 loaddebug(next, 1);
517 loaddebug(next, 2);
518 loaddebug(next, 3);
519 /* no 4 and 5 */
520 loaddebug(next, 6);
521 loaddebug(next, 7);
522 }
523
524 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
525 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
526 /* prev and next are different */
527 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
528 hard_disable_TSC();
529 else
530 hard_enable_TSC();
531 }
532
533 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
534 /*
535 * Copy the relevant range of the IO bitmap.
536 * Normally this is 128 bytes or less:
537 */
538 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
539 max(prev->io_bitmap_max, next->io_bitmap_max));
540 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
541 /*
542 * Clear any possible leftover bits:
543 */
544 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
545 }
546
547 #ifdef X86_BTS
548 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
549 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
550
551 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
552 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
553 #endif
554 }
555
556 /*
557 * switch_to(x,y) should switch tasks from x to y.
558 *
559 * This could still be optimized:
560 * - fold all the options into a flag word and test it with a single test.
561 * - could test fs/gs bitsliced
562 *
563 * Kprobes not supported here. Set the probe on schedule instead.
564 */
565 struct task_struct *
566 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
567 {
568 struct thread_struct *prev = &prev_p->thread,
569 *next = &next_p->thread;
570 int cpu = smp_processor_id();
571 struct tss_struct *tss = &per_cpu(init_tss, cpu);
572
573 /* we're going to use this soon, after a few expensive things */
574 if (next_p->fpu_counter>5)
575 prefetch(next->xstate);
576
577 /*
578 * Reload esp0, LDT and the page table pointer:
579 */
580 load_sp0(tss, next);
581
582 /*
583 * Switch DS and ES.
584 * This won't pick up thread selector changes, but I guess that is ok.
585 */
586 asm volatile("mov %%es,%0" : "=m" (prev->es));
587 if (unlikely(next->es | prev->es))
588 loadsegment(es, next->es);
589
590 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
591 if (unlikely(next->ds | prev->ds))
592 loadsegment(ds, next->ds);
593
594 load_TLS(next, cpu);
595
596 /*
597 * Switch FS and GS.
598 */
599 {
600 unsigned fsindex;
601 asm volatile("movl %%fs,%0" : "=r" (fsindex));
602 /* segment register != 0 always requires a reload.
603 also reload when it has changed.
604 when prev process used 64bit base always reload
605 to avoid an information leak. */
606 if (unlikely(fsindex | next->fsindex | prev->fs)) {
607 loadsegment(fs, next->fsindex);
608 /* check if the user used a selector != 0
609 * if yes clear 64bit base, since overloaded base
610 * is always mapped to the Null selector
611 */
612 if (fsindex)
613 prev->fs = 0;
614 }
615 /* when next process has a 64bit base use it */
616 if (next->fs)
617 wrmsrl(MSR_FS_BASE, next->fs);
618 prev->fsindex = fsindex;
619 }
620 {
621 unsigned gsindex;
622 asm volatile("movl %%gs,%0" : "=r" (gsindex));
623 if (unlikely(gsindex | next->gsindex | prev->gs)) {
624 load_gs_index(next->gsindex);
625 if (gsindex)
626 prev->gs = 0;
627 }
628 if (next->gs)
629 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
630 prev->gsindex = gsindex;
631 }
632
633 /* Must be after DS reload */
634 unlazy_fpu(prev_p);
635
636 /*
637 * Switch the PDA and FPU contexts.
638 */
639 prev->usersp = read_pda(oldrsp);
640 write_pda(oldrsp, next->usersp);
641 write_pda(pcurrent, next_p);
642
643 write_pda(kernelstack,
644 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
645 #ifdef CONFIG_CC_STACKPROTECTOR
646 write_pda(stack_canary, next_p->stack_canary);
647 /*
648 * Build time only check to make sure the stack_canary is at
649 * offset 40 in the pda; this is a gcc ABI requirement
650 */
651 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
652 #endif
653
654 /*
655 * Now maybe reload the debug registers and handle I/O bitmaps
656 */
657 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
658 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
659 __switch_to_xtra(prev_p, next_p, tss);
660
661 /* If the task has used fpu the last 5 timeslices, just do a full
662 * restore of the math state immediately to avoid the trap; the
663 * chances of needing FPU soon are obviously high now
664 */
665 if (next_p->fpu_counter>5)
666 math_state_restore();
667 return prev_p;
668 }
669
670 /*
671 * sys_execve() executes a new program.
672 */
673 asmlinkage
674 long sys_execve(char __user *name, char __user * __user *argv,
675 char __user * __user *envp, struct pt_regs *regs)
676 {
677 long error;
678 char * filename;
679
680 filename = getname(name);
681 error = PTR_ERR(filename);
682 if (IS_ERR(filename))
683 return error;
684 error = do_execve(filename, argv, envp, regs);
685 putname(filename);
686 return error;
687 }
688
689 void set_personality_64bit(void)
690 {
691 /* inherit personality from parent */
692
693 /* Make sure to be in 64bit mode */
694 clear_thread_flag(TIF_IA32);
695
696 /* TBD: overwrites user setup. Should have two bits.
697 But 64bit processes have always behaved this way,
698 so it's not too bad. The main problem is just that
699 32bit childs are affected again. */
700 current->personality &= ~READ_IMPLIES_EXEC;
701 }
702
703 asmlinkage long sys_fork(struct pt_regs *regs)
704 {
705 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
706 }
707
708 asmlinkage long
709 sys_clone(unsigned long clone_flags, unsigned long newsp,
710 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
711 {
712 if (!newsp)
713 newsp = regs->sp;
714 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
715 }
716
717 /*
718 * This is trivial, and on the face of it looks like it
719 * could equally well be done in user mode.
720 *
721 * Not so, for quite unobvious reasons - register pressure.
722 * In user mode vfork() cannot have a stack frame, and if
723 * done by calling the "clone()" system call directly, you
724 * do not have enough call-clobbered registers to hold all
725 * the information you need.
726 */
727 asmlinkage long sys_vfork(struct pt_regs *regs)
728 {
729 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
730 NULL, NULL);
731 }
732
733 unsigned long get_wchan(struct task_struct *p)
734 {
735 unsigned long stack;
736 u64 fp,ip;
737 int count = 0;
738
739 if (!p || p == current || p->state==TASK_RUNNING)
740 return 0;
741 stack = (unsigned long)task_stack_page(p);
742 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
743 return 0;
744 fp = *(u64 *)(p->thread.sp);
745 do {
746 if (fp < (unsigned long)stack ||
747 fp > (unsigned long)stack+THREAD_SIZE)
748 return 0;
749 ip = *(u64 *)(fp+8);
750 if (!in_sched_functions(ip))
751 return ip;
752 fp = *(u64 *)fp;
753 } while (count++ < 16);
754 return 0;
755 }
756
757 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
758 {
759 int ret = 0;
760 int doit = task == current;
761 int cpu;
762
763 switch (code) {
764 case ARCH_SET_GS:
765 if (addr >= TASK_SIZE_OF(task))
766 return -EPERM;
767 cpu = get_cpu();
768 /* handle small bases via the GDT because that's faster to
769 switch. */
770 if (addr <= 0xffffffff) {
771 set_32bit_tls(task, GS_TLS, addr);
772 if (doit) {
773 load_TLS(&task->thread, cpu);
774 load_gs_index(GS_TLS_SEL);
775 }
776 task->thread.gsindex = GS_TLS_SEL;
777 task->thread.gs = 0;
778 } else {
779 task->thread.gsindex = 0;
780 task->thread.gs = addr;
781 if (doit) {
782 load_gs_index(0);
783 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
784 }
785 }
786 put_cpu();
787 break;
788 case ARCH_SET_FS:
789 /* Not strictly needed for fs, but do it for symmetry
790 with gs */
791 if (addr >= TASK_SIZE_OF(task))
792 return -EPERM;
793 cpu = get_cpu();
794 /* handle small bases via the GDT because that's faster to
795 switch. */
796 if (addr <= 0xffffffff) {
797 set_32bit_tls(task, FS_TLS, addr);
798 if (doit) {
799 load_TLS(&task->thread, cpu);
800 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
801 }
802 task->thread.fsindex = FS_TLS_SEL;
803 task->thread.fs = 0;
804 } else {
805 task->thread.fsindex = 0;
806 task->thread.fs = addr;
807 if (doit) {
808 /* set the selector to 0 to not confuse
809 __switch_to */
810 asm volatile("movl %0,%%fs" :: "r" (0));
811 ret = checking_wrmsrl(MSR_FS_BASE, addr);
812 }
813 }
814 put_cpu();
815 break;
816 case ARCH_GET_FS: {
817 unsigned long base;
818 if (task->thread.fsindex == FS_TLS_SEL)
819 base = read_32bit_tls(task, FS_TLS);
820 else if (doit)
821 rdmsrl(MSR_FS_BASE, base);
822 else
823 base = task->thread.fs;
824 ret = put_user(base, (unsigned long __user *)addr);
825 break;
826 }
827 case ARCH_GET_GS: {
828 unsigned long base;
829 unsigned gsindex;
830 if (task->thread.gsindex == GS_TLS_SEL)
831 base = read_32bit_tls(task, GS_TLS);
832 else if (doit) {
833 asm("movl %%gs,%0" : "=r" (gsindex));
834 if (gsindex)
835 rdmsrl(MSR_KERNEL_GS_BASE, base);
836 else
837 base = task->thread.gs;
838 }
839 else
840 base = task->thread.gs;
841 ret = put_user(base, (unsigned long __user *)addr);
842 break;
843 }
844
845 default:
846 ret = -EINVAL;
847 break;
848 }
849
850 return ret;
851 }
852
853 long sys_arch_prctl(int code, unsigned long addr)
854 {
855 return do_arch_prctl(current, code, addr);
856 }
857
858 unsigned long arch_align_stack(unsigned long sp)
859 {
860 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
861 sp -= get_random_int() % 8192;
862 return sp & ~0xf;
863 }
864
865 unsigned long arch_randomize_brk(struct mm_struct *mm)
866 {
867 unsigned long range_end = mm->brk + 0x02000000;
868 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
869 }