]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blob - arch/x86/kernel/process_64.c
Merge branches 'x86/asm', 'x86/cleanups', 'x86/cpudetect', 'x86/debug', 'x86/doc...
[mirror_ubuntu-focal-kernel.git] / arch / x86 / kernel / process_64.c
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * X86-64 port
8 * Andi Kleen.
9 *
10 * CPU hotplug support - ashok.raj@intel.com
11 */
12
13 /*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17 #include <stdarg.h>
18
19 #include <linux/stackprotector.h>
20 #include <linux/cpu.h>
21 #include <linux/errno.h>
22 #include <linux/sched.h>
23 #include <linux/fs.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/elfcore.h>
27 #include <linux/smp.h>
28 #include <linux/slab.h>
29 #include <linux/user.h>
30 #include <linux/interrupt.h>
31 #include <linux/utsname.h>
32 #include <linux/delay.h>
33 #include <linux/module.h>
34 #include <linux/ptrace.h>
35 #include <linux/random.h>
36 #include <linux/notifier.h>
37 #include <linux/kprobes.h>
38 #include <linux/kdebug.h>
39 #include <linux/tick.h>
40 #include <linux/prctl.h>
41 #include <linux/uaccess.h>
42 #include <linux/io.h>
43 #include <linux/ftrace.h>
44
45 #include <asm/pgtable.h>
46 #include <asm/system.h>
47 #include <asm/processor.h>
48 #include <asm/i387.h>
49 #include <asm/mmu_context.h>
50 #include <asm/prctl.h>
51 #include <asm/desc.h>
52 #include <asm/proto.h>
53 #include <asm/ia32.h>
54 #include <asm/idle.h>
55 #include <asm/syscalls.h>
56 #include <asm/ds.h>
57
58 asmlinkage extern void ret_from_fork(void);
59
60 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
61 EXPORT_PER_CPU_SYMBOL(current_task);
62
63 DEFINE_PER_CPU(unsigned long, old_rsp);
64 static DEFINE_PER_CPU(unsigned char, is_idle);
65
66 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
67
68 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
69
70 void idle_notifier_register(struct notifier_block *n)
71 {
72 atomic_notifier_chain_register(&idle_notifier, n);
73 }
74 EXPORT_SYMBOL_GPL(idle_notifier_register);
75
76 void idle_notifier_unregister(struct notifier_block *n)
77 {
78 atomic_notifier_chain_unregister(&idle_notifier, n);
79 }
80 EXPORT_SYMBOL_GPL(idle_notifier_unregister);
81
82 void enter_idle(void)
83 {
84 percpu_write(is_idle, 1);
85 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
86 }
87
88 static void __exit_idle(void)
89 {
90 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
91 return;
92 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
93 }
94
95 /* Called from interrupts to signify idle end */
96 void exit_idle(void)
97 {
98 /* idle loop has pid 0 */
99 if (current->pid)
100 return;
101 __exit_idle();
102 }
103
104 #ifndef CONFIG_SMP
105 static inline void play_dead(void)
106 {
107 BUG();
108 }
109 #endif
110
111 /*
112 * The idle thread. There's no useful work to be
113 * done, so just try to conserve power and have a
114 * low exit latency (ie sit in a loop waiting for
115 * somebody to say that they'd like to reschedule)
116 */
117 void cpu_idle(void)
118 {
119 current_thread_info()->status |= TS_POLLING;
120
121 /*
122 * If we're the non-boot CPU, nothing set the PDA stack
123 * canary up for us - and if we are the boot CPU we have
124 * a 0 stack canary. This is a good place for updating
125 * it, as we wont ever return from this function (so the
126 * invalid canaries already on the stack wont ever
127 * trigger):
128 */
129 boot_init_stack_canary();
130
131 /* endless idle loop with no priority at all */
132 while (1) {
133 tick_nohz_stop_sched_tick(1);
134 while (!need_resched()) {
135
136 rmb();
137
138 if (cpu_is_offline(smp_processor_id()))
139 play_dead();
140 /*
141 * Idle routines should keep interrupts disabled
142 * from here on, until they go to idle.
143 * Otherwise, idle callbacks can misfire.
144 */
145 local_irq_disable();
146 enter_idle();
147 /* Don't trace irqs off for idle */
148 stop_critical_timings();
149 pm_idle();
150 start_critical_timings();
151 /* In many cases the interrupt that ended idle
152 has already called exit_idle. But some idle
153 loops can be woken up without interrupt. */
154 __exit_idle();
155 }
156
157 tick_nohz_restart_sched_tick();
158 preempt_enable_no_resched();
159 schedule();
160 preempt_disable();
161 }
162 }
163
164 /* Prints also some state that isn't saved in the pt_regs */
165 void __show_regs(struct pt_regs *regs, int all)
166 {
167 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
168 unsigned long d0, d1, d2, d3, d6, d7;
169 unsigned int fsindex, gsindex;
170 unsigned int ds, cs, es;
171
172 printk("\n");
173 print_modules();
174 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
175 current->pid, current->comm, print_tainted(),
176 init_utsname()->release,
177 (int)strcspn(init_utsname()->version, " "),
178 init_utsname()->version);
179 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
180 printk_address(regs->ip, 1);
181 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
182 regs->sp, regs->flags);
183 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
184 regs->ax, regs->bx, regs->cx);
185 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
186 regs->dx, regs->si, regs->di);
187 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
188 regs->bp, regs->r8, regs->r9);
189 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
190 regs->r10, regs->r11, regs->r12);
191 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
192 regs->r13, regs->r14, regs->r15);
193
194 asm("movl %%ds,%0" : "=r" (ds));
195 asm("movl %%cs,%0" : "=r" (cs));
196 asm("movl %%es,%0" : "=r" (es));
197 asm("movl %%fs,%0" : "=r" (fsindex));
198 asm("movl %%gs,%0" : "=r" (gsindex));
199
200 rdmsrl(MSR_FS_BASE, fs);
201 rdmsrl(MSR_GS_BASE, gs);
202 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
203
204 if (!all)
205 return;
206
207 cr0 = read_cr0();
208 cr2 = read_cr2();
209 cr3 = read_cr3();
210 cr4 = read_cr4();
211
212 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
213 fs, fsindex, gs, gsindex, shadowgs);
214 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
215 es, cr0);
216 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
217 cr4);
218
219 get_debugreg(d0, 0);
220 get_debugreg(d1, 1);
221 get_debugreg(d2, 2);
222 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
223 get_debugreg(d3, 3);
224 get_debugreg(d6, 6);
225 get_debugreg(d7, 7);
226 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
227 }
228
229 void show_regs(struct pt_regs *regs)
230 {
231 printk(KERN_INFO "CPU %d:", smp_processor_id());
232 __show_regs(regs, 1);
233 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
234 }
235
236 /*
237 * Free current thread data structures etc..
238 */
239 void exit_thread(void)
240 {
241 struct task_struct *me = current;
242 struct thread_struct *t = &me->thread;
243
244 if (me->thread.io_bitmap_ptr) {
245 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
246
247 kfree(t->io_bitmap_ptr);
248 t->io_bitmap_ptr = NULL;
249 clear_thread_flag(TIF_IO_BITMAP);
250 /*
251 * Careful, clear this in the TSS too:
252 */
253 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
254 t->io_bitmap_max = 0;
255 put_cpu();
256 }
257
258 ds_exit_thread(current);
259 }
260
261 void flush_thread(void)
262 {
263 struct task_struct *tsk = current;
264
265 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
266 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
267 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
268 clear_tsk_thread_flag(tsk, TIF_IA32);
269 } else {
270 set_tsk_thread_flag(tsk, TIF_IA32);
271 current_thread_info()->status |= TS_COMPAT;
272 }
273 }
274 clear_tsk_thread_flag(tsk, TIF_DEBUG);
275
276 tsk->thread.debugreg0 = 0;
277 tsk->thread.debugreg1 = 0;
278 tsk->thread.debugreg2 = 0;
279 tsk->thread.debugreg3 = 0;
280 tsk->thread.debugreg6 = 0;
281 tsk->thread.debugreg7 = 0;
282 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
283 /*
284 * Forget coprocessor state..
285 */
286 tsk->fpu_counter = 0;
287 clear_fpu(tsk);
288 clear_used_math();
289 }
290
291 void release_thread(struct task_struct *dead_task)
292 {
293 if (dead_task->mm) {
294 if (dead_task->mm->context.size) {
295 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
296 dead_task->comm,
297 dead_task->mm->context.ldt,
298 dead_task->mm->context.size);
299 BUG();
300 }
301 }
302 }
303
304 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
305 {
306 struct user_desc ud = {
307 .base_addr = addr,
308 .limit = 0xfffff,
309 .seg_32bit = 1,
310 .limit_in_pages = 1,
311 .useable = 1,
312 };
313 struct desc_struct *desc = t->thread.tls_array;
314 desc += tls;
315 fill_ldt(desc, &ud);
316 }
317
318 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
319 {
320 return get_desc_base(&t->thread.tls_array[tls]);
321 }
322
323 /*
324 * This gets called before we allocate a new thread and copy
325 * the current task into it.
326 */
327 void prepare_to_copy(struct task_struct *tsk)
328 {
329 unlazy_fpu(tsk);
330 }
331
332 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
333 unsigned long unused,
334 struct task_struct *p, struct pt_regs *regs)
335 {
336 int err;
337 struct pt_regs *childregs;
338 struct task_struct *me = current;
339
340 childregs = ((struct pt_regs *)
341 (THREAD_SIZE + task_stack_page(p))) - 1;
342 *childregs = *regs;
343
344 childregs->ax = 0;
345 childregs->sp = sp;
346 if (sp == ~0UL)
347 childregs->sp = (unsigned long)childregs;
348
349 p->thread.sp = (unsigned long) childregs;
350 p->thread.sp0 = (unsigned long) (childregs+1);
351 p->thread.usersp = me->thread.usersp;
352
353 set_tsk_thread_flag(p, TIF_FORK);
354
355 p->thread.fs = me->thread.fs;
356 p->thread.gs = me->thread.gs;
357
358 savesegment(gs, p->thread.gsindex);
359 savesegment(fs, p->thread.fsindex);
360 savesegment(es, p->thread.es);
361 savesegment(ds, p->thread.ds);
362
363 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
364 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
365 if (!p->thread.io_bitmap_ptr) {
366 p->thread.io_bitmap_max = 0;
367 return -ENOMEM;
368 }
369 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
370 IO_BITMAP_BYTES);
371 set_tsk_thread_flag(p, TIF_IO_BITMAP);
372 }
373
374 /*
375 * Set a new TLS for the child thread?
376 */
377 if (clone_flags & CLONE_SETTLS) {
378 #ifdef CONFIG_IA32_EMULATION
379 if (test_thread_flag(TIF_IA32))
380 err = do_set_thread_area(p, -1,
381 (struct user_desc __user *)childregs->si, 0);
382 else
383 #endif
384 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
385 if (err)
386 goto out;
387 }
388
389 ds_copy_thread(p, me);
390
391 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
392 p->thread.debugctlmsr = 0;
393
394 err = 0;
395 out:
396 if (err && p->thread.io_bitmap_ptr) {
397 kfree(p->thread.io_bitmap_ptr);
398 p->thread.io_bitmap_max = 0;
399 }
400 return err;
401 }
402
403 void
404 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
405 {
406 loadsegment(fs, 0);
407 loadsegment(es, 0);
408 loadsegment(ds, 0);
409 load_gs_index(0);
410 regs->ip = new_ip;
411 regs->sp = new_sp;
412 percpu_write(old_rsp, new_sp);
413 regs->cs = __USER_CS;
414 regs->ss = __USER_DS;
415 regs->flags = 0x200;
416 set_fs(USER_DS);
417 /*
418 * Free the old FP and other extended state
419 */
420 free_thread_xstate(current);
421 }
422 EXPORT_SYMBOL_GPL(start_thread);
423
424 static void hard_disable_TSC(void)
425 {
426 write_cr4(read_cr4() | X86_CR4_TSD);
427 }
428
429 void disable_TSC(void)
430 {
431 preempt_disable();
432 if (!test_and_set_thread_flag(TIF_NOTSC))
433 /*
434 * Must flip the CPU state synchronously with
435 * TIF_NOTSC in the current running context.
436 */
437 hard_disable_TSC();
438 preempt_enable();
439 }
440
441 static void hard_enable_TSC(void)
442 {
443 write_cr4(read_cr4() & ~X86_CR4_TSD);
444 }
445
446 static void enable_TSC(void)
447 {
448 preempt_disable();
449 if (test_and_clear_thread_flag(TIF_NOTSC))
450 /*
451 * Must flip the CPU state synchronously with
452 * TIF_NOTSC in the current running context.
453 */
454 hard_enable_TSC();
455 preempt_enable();
456 }
457
458 int get_tsc_mode(unsigned long adr)
459 {
460 unsigned int val;
461
462 if (test_thread_flag(TIF_NOTSC))
463 val = PR_TSC_SIGSEGV;
464 else
465 val = PR_TSC_ENABLE;
466
467 return put_user(val, (unsigned int __user *)adr);
468 }
469
470 int set_tsc_mode(unsigned int val)
471 {
472 if (val == PR_TSC_SIGSEGV)
473 disable_TSC();
474 else if (val == PR_TSC_ENABLE)
475 enable_TSC();
476 else
477 return -EINVAL;
478
479 return 0;
480 }
481
482 /*
483 * This special macro can be used to load a debugging register
484 */
485 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
486
487 static inline void __switch_to_xtra(struct task_struct *prev_p,
488 struct task_struct *next_p,
489 struct tss_struct *tss)
490 {
491 struct thread_struct *prev, *next;
492
493 prev = &prev_p->thread,
494 next = &next_p->thread;
495
496 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
497 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
498 ds_switch_to(prev_p, next_p);
499 else if (next->debugctlmsr != prev->debugctlmsr)
500 update_debugctlmsr(next->debugctlmsr);
501
502 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
503 loaddebug(next, 0);
504 loaddebug(next, 1);
505 loaddebug(next, 2);
506 loaddebug(next, 3);
507 /* no 4 and 5 */
508 loaddebug(next, 6);
509 loaddebug(next, 7);
510 }
511
512 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
513 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
514 /* prev and next are different */
515 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
516 hard_disable_TSC();
517 else
518 hard_enable_TSC();
519 }
520
521 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
522 /*
523 * Copy the relevant range of the IO bitmap.
524 * Normally this is 128 bytes or less:
525 */
526 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
527 max(prev->io_bitmap_max, next->io_bitmap_max));
528 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
529 /*
530 * Clear any possible leftover bits:
531 */
532 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
533 }
534 }
535
536 /*
537 * switch_to(x,y) should switch tasks from x to y.
538 *
539 * This could still be optimized:
540 * - fold all the options into a flag word and test it with a single test.
541 * - could test fs/gs bitsliced
542 *
543 * Kprobes not supported here. Set the probe on schedule instead.
544 * Function graph tracer not supported too.
545 */
546 __notrace_funcgraph struct task_struct *
547 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
548 {
549 struct thread_struct *prev = &prev_p->thread;
550 struct thread_struct *next = &next_p->thread;
551 int cpu = smp_processor_id();
552 struct tss_struct *tss = &per_cpu(init_tss, cpu);
553 unsigned fsindex, gsindex;
554
555 /* we're going to use this soon, after a few expensive things */
556 if (next_p->fpu_counter > 5)
557 prefetch(next->xstate);
558
559 /*
560 * Reload esp0, LDT and the page table pointer:
561 */
562 load_sp0(tss, next);
563
564 /*
565 * Switch DS and ES.
566 * This won't pick up thread selector changes, but I guess that is ok.
567 */
568 savesegment(es, prev->es);
569 if (unlikely(next->es | prev->es))
570 loadsegment(es, next->es);
571
572 savesegment(ds, prev->ds);
573 if (unlikely(next->ds | prev->ds))
574 loadsegment(ds, next->ds);
575
576
577 /* We must save %fs and %gs before load_TLS() because
578 * %fs and %gs may be cleared by load_TLS().
579 *
580 * (e.g. xen_load_tls())
581 */
582 savesegment(fs, fsindex);
583 savesegment(gs, gsindex);
584
585 load_TLS(next, cpu);
586
587 /*
588 * Leave lazy mode, flushing any hypercalls made here.
589 * This must be done before restoring TLS segments so
590 * the GDT and LDT are properly updated, and must be
591 * done before math_state_restore, so the TS bit is up
592 * to date.
593 */
594 arch_leave_lazy_cpu_mode();
595
596 /*
597 * Switch FS and GS.
598 *
599 * Segment register != 0 always requires a reload. Also
600 * reload when it has changed. When prev process used 64bit
601 * base always reload to avoid an information leak.
602 */
603 if (unlikely(fsindex | next->fsindex | prev->fs)) {
604 loadsegment(fs, next->fsindex);
605 /*
606 * Check if the user used a selector != 0; if yes
607 * clear 64bit base, since overloaded base is always
608 * mapped to the Null selector
609 */
610 if (fsindex)
611 prev->fs = 0;
612 }
613 /* when next process has a 64bit base use it */
614 if (next->fs)
615 wrmsrl(MSR_FS_BASE, next->fs);
616 prev->fsindex = fsindex;
617
618 if (unlikely(gsindex | next->gsindex | prev->gs)) {
619 load_gs_index(next->gsindex);
620 if (gsindex)
621 prev->gs = 0;
622 }
623 if (next->gs)
624 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
625 prev->gsindex = gsindex;
626
627 /* Must be after DS reload */
628 unlazy_fpu(prev_p);
629
630 /*
631 * Switch the PDA and FPU contexts.
632 */
633 prev->usersp = percpu_read(old_rsp);
634 percpu_write(old_rsp, next->usersp);
635 percpu_write(current_task, next_p);
636
637 percpu_write(kernel_stack,
638 (unsigned long)task_stack_page(next_p) +
639 THREAD_SIZE - KERNEL_STACK_OFFSET);
640
641 /*
642 * Now maybe reload the debug registers and handle I/O bitmaps
643 */
644 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
645 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
646 __switch_to_xtra(prev_p, next_p, tss);
647
648 /* If the task has used fpu the last 5 timeslices, just do a full
649 * restore of the math state immediately to avoid the trap; the
650 * chances of needing FPU soon are obviously high now
651 *
652 * tsk_used_math() checks prevent calling math_state_restore(),
653 * which can sleep in the case of !tsk_used_math()
654 */
655 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
656 math_state_restore();
657 return prev_p;
658 }
659
660 /*
661 * sys_execve() executes a new program.
662 */
663 asmlinkage
664 long sys_execve(char __user *name, char __user * __user *argv,
665 char __user * __user *envp, struct pt_regs *regs)
666 {
667 long error;
668 char *filename;
669
670 filename = getname(name);
671 error = PTR_ERR(filename);
672 if (IS_ERR(filename))
673 return error;
674 error = do_execve(filename, argv, envp, regs);
675 putname(filename);
676 return error;
677 }
678
679 void set_personality_64bit(void)
680 {
681 /* inherit personality from parent */
682
683 /* Make sure to be in 64bit mode */
684 clear_thread_flag(TIF_IA32);
685
686 /* TBD: overwrites user setup. Should have two bits.
687 But 64bit processes have always behaved this way,
688 so it's not too bad. The main problem is just that
689 32bit childs are affected again. */
690 current->personality &= ~READ_IMPLIES_EXEC;
691 }
692
693 asmlinkage long sys_fork(struct pt_regs *regs)
694 {
695 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
696 }
697
698 asmlinkage long
699 sys_clone(unsigned long clone_flags, unsigned long newsp,
700 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
701 {
702 if (!newsp)
703 newsp = regs->sp;
704 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
705 }
706
707 /*
708 * This is trivial, and on the face of it looks like it
709 * could equally well be done in user mode.
710 *
711 * Not so, for quite unobvious reasons - register pressure.
712 * In user mode vfork() cannot have a stack frame, and if
713 * done by calling the "clone()" system call directly, you
714 * do not have enough call-clobbered registers to hold all
715 * the information you need.
716 */
717 asmlinkage long sys_vfork(struct pt_regs *regs)
718 {
719 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
720 NULL, NULL);
721 }
722
723 unsigned long get_wchan(struct task_struct *p)
724 {
725 unsigned long stack;
726 u64 fp, ip;
727 int count = 0;
728
729 if (!p || p == current || p->state == TASK_RUNNING)
730 return 0;
731 stack = (unsigned long)task_stack_page(p);
732 if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
733 return 0;
734 fp = *(u64 *)(p->thread.sp);
735 do {
736 if (fp < (unsigned long)stack ||
737 fp >= (unsigned long)stack+THREAD_SIZE)
738 return 0;
739 ip = *(u64 *)(fp+8);
740 if (!in_sched_functions(ip))
741 return ip;
742 fp = *(u64 *)fp;
743 } while (count++ < 16);
744 return 0;
745 }
746
747 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
748 {
749 int ret = 0;
750 int doit = task == current;
751 int cpu;
752
753 switch (code) {
754 case ARCH_SET_GS:
755 if (addr >= TASK_SIZE_OF(task))
756 return -EPERM;
757 cpu = get_cpu();
758 /* handle small bases via the GDT because that's faster to
759 switch. */
760 if (addr <= 0xffffffff) {
761 set_32bit_tls(task, GS_TLS, addr);
762 if (doit) {
763 load_TLS(&task->thread, cpu);
764 load_gs_index(GS_TLS_SEL);
765 }
766 task->thread.gsindex = GS_TLS_SEL;
767 task->thread.gs = 0;
768 } else {
769 task->thread.gsindex = 0;
770 task->thread.gs = addr;
771 if (doit) {
772 load_gs_index(0);
773 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
774 }
775 }
776 put_cpu();
777 break;
778 case ARCH_SET_FS:
779 /* Not strictly needed for fs, but do it for symmetry
780 with gs */
781 if (addr >= TASK_SIZE_OF(task))
782 return -EPERM;
783 cpu = get_cpu();
784 /* handle small bases via the GDT because that's faster to
785 switch. */
786 if (addr <= 0xffffffff) {
787 set_32bit_tls(task, FS_TLS, addr);
788 if (doit) {
789 load_TLS(&task->thread, cpu);
790 loadsegment(fs, FS_TLS_SEL);
791 }
792 task->thread.fsindex = FS_TLS_SEL;
793 task->thread.fs = 0;
794 } else {
795 task->thread.fsindex = 0;
796 task->thread.fs = addr;
797 if (doit) {
798 /* set the selector to 0 to not confuse
799 __switch_to */
800 loadsegment(fs, 0);
801 ret = checking_wrmsrl(MSR_FS_BASE, addr);
802 }
803 }
804 put_cpu();
805 break;
806 case ARCH_GET_FS: {
807 unsigned long base;
808 if (task->thread.fsindex == FS_TLS_SEL)
809 base = read_32bit_tls(task, FS_TLS);
810 else if (doit)
811 rdmsrl(MSR_FS_BASE, base);
812 else
813 base = task->thread.fs;
814 ret = put_user(base, (unsigned long __user *)addr);
815 break;
816 }
817 case ARCH_GET_GS: {
818 unsigned long base;
819 unsigned gsindex;
820 if (task->thread.gsindex == GS_TLS_SEL)
821 base = read_32bit_tls(task, GS_TLS);
822 else if (doit) {
823 savesegment(gs, gsindex);
824 if (gsindex)
825 rdmsrl(MSR_KERNEL_GS_BASE, base);
826 else
827 base = task->thread.gs;
828 } else
829 base = task->thread.gs;
830 ret = put_user(base, (unsigned long __user *)addr);
831 break;
832 }
833
834 default:
835 ret = -EINVAL;
836 break;
837 }
838
839 return ret;
840 }
841
842 long sys_arch_prctl(int code, unsigned long addr)
843 {
844 return do_arch_prctl(current, code, addr);
845 }
846
847 unsigned long arch_align_stack(unsigned long sp)
848 {
849 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
850 sp -= get_random_int() % 8192;
851 return sp & ~0xf;
852 }
853
854 unsigned long arch_randomize_brk(struct mm_struct *mm)
855 {
856 unsigned long range_end = mm->brk + 0x02000000;
857 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
858 }