]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blob - arch/x86/kernel/process_64.c
Merge branch 'x86/urgent' into core/percpu
[mirror_ubuntu-focal-kernel.git] / arch / x86 / kernel / process_64.c
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * X86-64 port
8 * Andi Kleen.
9 *
10 * CPU hotplug support - ashok.raj@intel.com
11 */
12
13 /*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17 #include <stdarg.h>
18
19 #include <linux/stackprotector.h>
20 #include <linux/cpu.h>
21 #include <linux/errno.h>
22 #include <linux/sched.h>
23 #include <linux/fs.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/elfcore.h>
27 #include <linux/smp.h>
28 #include <linux/slab.h>
29 #include <linux/user.h>
30 #include <linux/interrupt.h>
31 #include <linux/utsname.h>
32 #include <linux/delay.h>
33 #include <linux/module.h>
34 #include <linux/ptrace.h>
35 #include <linux/random.h>
36 #include <linux/notifier.h>
37 #include <linux/kprobes.h>
38 #include <linux/kdebug.h>
39 #include <linux/tick.h>
40 #include <linux/prctl.h>
41 #include <linux/uaccess.h>
42 #include <linux/io.h>
43 #include <linux/ftrace.h>
44 #include <linux/dmi.h>
45
46 #include <asm/pgtable.h>
47 #include <asm/system.h>
48 #include <asm/processor.h>
49 #include <asm/i387.h>
50 #include <asm/mmu_context.h>
51 #include <asm/prctl.h>
52 #include <asm/desc.h>
53 #include <asm/proto.h>
54 #include <asm/ia32.h>
55 #include <asm/idle.h>
56 #include <asm/syscalls.h>
57 #include <asm/ds.h>
58
59 asmlinkage extern void ret_from_fork(void);
60
61 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
62 EXPORT_PER_CPU_SYMBOL(current_task);
63
64 DEFINE_PER_CPU(unsigned long, old_rsp);
65 static DEFINE_PER_CPU(unsigned char, is_idle);
66
67 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
68
69 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
70
71 void idle_notifier_register(struct notifier_block *n)
72 {
73 atomic_notifier_chain_register(&idle_notifier, n);
74 }
75 EXPORT_SYMBOL_GPL(idle_notifier_register);
76
77 void idle_notifier_unregister(struct notifier_block *n)
78 {
79 atomic_notifier_chain_unregister(&idle_notifier, n);
80 }
81 EXPORT_SYMBOL_GPL(idle_notifier_unregister);
82
83 void enter_idle(void)
84 {
85 percpu_write(is_idle, 1);
86 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
87 }
88
89 static void __exit_idle(void)
90 {
91 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
92 return;
93 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
94 }
95
96 /* Called from interrupts to signify idle end */
97 void exit_idle(void)
98 {
99 /* idle loop has pid 0 */
100 if (current->pid)
101 return;
102 __exit_idle();
103 }
104
105 #ifndef CONFIG_SMP
106 static inline void play_dead(void)
107 {
108 BUG();
109 }
110 #endif
111
112 /*
113 * The idle thread. There's no useful work to be
114 * done, so just try to conserve power and have a
115 * low exit latency (ie sit in a loop waiting for
116 * somebody to say that they'd like to reschedule)
117 */
118 void cpu_idle(void)
119 {
120 current_thread_info()->status |= TS_POLLING;
121
122 /*
123 * If we're the non-boot CPU, nothing set the PDA stack
124 * canary up for us - and if we are the boot CPU we have
125 * a 0 stack canary. This is a good place for updating
126 * it, as we wont ever return from this function (so the
127 * invalid canaries already on the stack wont ever
128 * trigger):
129 */
130 boot_init_stack_canary();
131
132 /* endless idle loop with no priority at all */
133 while (1) {
134 tick_nohz_stop_sched_tick(1);
135 while (!need_resched()) {
136
137 rmb();
138
139 if (cpu_is_offline(smp_processor_id()))
140 play_dead();
141 /*
142 * Idle routines should keep interrupts disabled
143 * from here on, until they go to idle.
144 * Otherwise, idle callbacks can misfire.
145 */
146 local_irq_disable();
147 enter_idle();
148 /* Don't trace irqs off for idle */
149 stop_critical_timings();
150 pm_idle();
151 start_critical_timings();
152 /* In many cases the interrupt that ended idle
153 has already called exit_idle. But some idle
154 loops can be woken up without interrupt. */
155 __exit_idle();
156 }
157
158 tick_nohz_restart_sched_tick();
159 preempt_enable_no_resched();
160 schedule();
161 preempt_disable();
162 }
163 }
164
165 /* Prints also some state that isn't saved in the pt_regs */
166 void __show_regs(struct pt_regs *regs, int all)
167 {
168 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
169 unsigned long d0, d1, d2, d3, d6, d7;
170 unsigned int fsindex, gsindex;
171 unsigned int ds, cs, es;
172 const char *board;
173
174 printk("\n");
175 print_modules();
176 board = dmi_get_system_info(DMI_PRODUCT_NAME);
177 if (!board)
178 board = "";
179 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
180 current->pid, current->comm, print_tainted(),
181 init_utsname()->release,
182 (int)strcspn(init_utsname()->version, " "),
183 init_utsname()->version, board);
184 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
185 printk_address(regs->ip, 1);
186 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
187 regs->sp, regs->flags);
188 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
189 regs->ax, regs->bx, regs->cx);
190 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
191 regs->dx, regs->si, regs->di);
192 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
193 regs->bp, regs->r8, regs->r9);
194 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
195 regs->r10, regs->r11, regs->r12);
196 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
197 regs->r13, regs->r14, regs->r15);
198
199 asm("movl %%ds,%0" : "=r" (ds));
200 asm("movl %%cs,%0" : "=r" (cs));
201 asm("movl %%es,%0" : "=r" (es));
202 asm("movl %%fs,%0" : "=r" (fsindex));
203 asm("movl %%gs,%0" : "=r" (gsindex));
204
205 rdmsrl(MSR_FS_BASE, fs);
206 rdmsrl(MSR_GS_BASE, gs);
207 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
208
209 if (!all)
210 return;
211
212 cr0 = read_cr0();
213 cr2 = read_cr2();
214 cr3 = read_cr3();
215 cr4 = read_cr4();
216
217 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
218 fs, fsindex, gs, gsindex, shadowgs);
219 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
220 es, cr0);
221 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
222 cr4);
223
224 get_debugreg(d0, 0);
225 get_debugreg(d1, 1);
226 get_debugreg(d2, 2);
227 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
228 get_debugreg(d3, 3);
229 get_debugreg(d6, 6);
230 get_debugreg(d7, 7);
231 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
232 }
233
234 void show_regs(struct pt_regs *regs)
235 {
236 printk(KERN_INFO "CPU %d:", smp_processor_id());
237 __show_regs(regs, 1);
238 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
239 }
240
241 /*
242 * Free current thread data structures etc..
243 */
244 void exit_thread(void)
245 {
246 struct task_struct *me = current;
247 struct thread_struct *t = &me->thread;
248
249 if (me->thread.io_bitmap_ptr) {
250 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
251
252 kfree(t->io_bitmap_ptr);
253 t->io_bitmap_ptr = NULL;
254 clear_thread_flag(TIF_IO_BITMAP);
255 /*
256 * Careful, clear this in the TSS too:
257 */
258 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
259 t->io_bitmap_max = 0;
260 put_cpu();
261 }
262
263 ds_exit_thread(current);
264 }
265
266 void flush_thread(void)
267 {
268 struct task_struct *tsk = current;
269
270 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
271 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
272 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
273 clear_tsk_thread_flag(tsk, TIF_IA32);
274 } else {
275 set_tsk_thread_flag(tsk, TIF_IA32);
276 current_thread_info()->status |= TS_COMPAT;
277 }
278 }
279 clear_tsk_thread_flag(tsk, TIF_DEBUG);
280
281 tsk->thread.debugreg0 = 0;
282 tsk->thread.debugreg1 = 0;
283 tsk->thread.debugreg2 = 0;
284 tsk->thread.debugreg3 = 0;
285 tsk->thread.debugreg6 = 0;
286 tsk->thread.debugreg7 = 0;
287 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
288 /*
289 * Forget coprocessor state..
290 */
291 tsk->fpu_counter = 0;
292 clear_fpu(tsk);
293 clear_used_math();
294 }
295
296 void release_thread(struct task_struct *dead_task)
297 {
298 if (dead_task->mm) {
299 if (dead_task->mm->context.size) {
300 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
301 dead_task->comm,
302 dead_task->mm->context.ldt,
303 dead_task->mm->context.size);
304 BUG();
305 }
306 }
307 }
308
309 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
310 {
311 struct user_desc ud = {
312 .base_addr = addr,
313 .limit = 0xfffff,
314 .seg_32bit = 1,
315 .limit_in_pages = 1,
316 .useable = 1,
317 };
318 struct desc_struct *desc = t->thread.tls_array;
319 desc += tls;
320 fill_ldt(desc, &ud);
321 }
322
323 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
324 {
325 return get_desc_base(&t->thread.tls_array[tls]);
326 }
327
328 /*
329 * This gets called before we allocate a new thread and copy
330 * the current task into it.
331 */
332 void prepare_to_copy(struct task_struct *tsk)
333 {
334 unlazy_fpu(tsk);
335 }
336
337 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
338 unsigned long unused,
339 struct task_struct *p, struct pt_regs *regs)
340 {
341 int err;
342 struct pt_regs *childregs;
343 struct task_struct *me = current;
344
345 childregs = ((struct pt_regs *)
346 (THREAD_SIZE + task_stack_page(p))) - 1;
347 *childregs = *regs;
348
349 childregs->ax = 0;
350 childregs->sp = sp;
351 if (sp == ~0UL)
352 childregs->sp = (unsigned long)childregs;
353
354 p->thread.sp = (unsigned long) childregs;
355 p->thread.sp0 = (unsigned long) (childregs+1);
356 p->thread.usersp = me->thread.usersp;
357
358 set_tsk_thread_flag(p, TIF_FORK);
359
360 p->thread.fs = me->thread.fs;
361 p->thread.gs = me->thread.gs;
362
363 savesegment(gs, p->thread.gsindex);
364 savesegment(fs, p->thread.fsindex);
365 savesegment(es, p->thread.es);
366 savesegment(ds, p->thread.ds);
367
368 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
369 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
370 if (!p->thread.io_bitmap_ptr) {
371 p->thread.io_bitmap_max = 0;
372 return -ENOMEM;
373 }
374 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
375 IO_BITMAP_BYTES);
376 set_tsk_thread_flag(p, TIF_IO_BITMAP);
377 }
378
379 /*
380 * Set a new TLS for the child thread?
381 */
382 if (clone_flags & CLONE_SETTLS) {
383 #ifdef CONFIG_IA32_EMULATION
384 if (test_thread_flag(TIF_IA32))
385 err = do_set_thread_area(p, -1,
386 (struct user_desc __user *)childregs->si, 0);
387 else
388 #endif
389 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
390 if (err)
391 goto out;
392 }
393
394 ds_copy_thread(p, me);
395
396 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
397 p->thread.debugctlmsr = 0;
398
399 err = 0;
400 out:
401 if (err && p->thread.io_bitmap_ptr) {
402 kfree(p->thread.io_bitmap_ptr);
403 p->thread.io_bitmap_max = 0;
404 }
405 return err;
406 }
407
408 void
409 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
410 {
411 loadsegment(fs, 0);
412 loadsegment(es, 0);
413 loadsegment(ds, 0);
414 load_gs_index(0);
415 regs->ip = new_ip;
416 regs->sp = new_sp;
417 percpu_write(old_rsp, new_sp);
418 regs->cs = __USER_CS;
419 regs->ss = __USER_DS;
420 regs->flags = 0x200;
421 set_fs(USER_DS);
422 /*
423 * Free the old FP and other extended state
424 */
425 free_thread_xstate(current);
426 }
427 EXPORT_SYMBOL_GPL(start_thread);
428
429 static void hard_disable_TSC(void)
430 {
431 write_cr4(read_cr4() | X86_CR4_TSD);
432 }
433
434 void disable_TSC(void)
435 {
436 preempt_disable();
437 if (!test_and_set_thread_flag(TIF_NOTSC))
438 /*
439 * Must flip the CPU state synchronously with
440 * TIF_NOTSC in the current running context.
441 */
442 hard_disable_TSC();
443 preempt_enable();
444 }
445
446 static void hard_enable_TSC(void)
447 {
448 write_cr4(read_cr4() & ~X86_CR4_TSD);
449 }
450
451 static void enable_TSC(void)
452 {
453 preempt_disable();
454 if (test_and_clear_thread_flag(TIF_NOTSC))
455 /*
456 * Must flip the CPU state synchronously with
457 * TIF_NOTSC in the current running context.
458 */
459 hard_enable_TSC();
460 preempt_enable();
461 }
462
463 int get_tsc_mode(unsigned long adr)
464 {
465 unsigned int val;
466
467 if (test_thread_flag(TIF_NOTSC))
468 val = PR_TSC_SIGSEGV;
469 else
470 val = PR_TSC_ENABLE;
471
472 return put_user(val, (unsigned int __user *)adr);
473 }
474
475 int set_tsc_mode(unsigned int val)
476 {
477 if (val == PR_TSC_SIGSEGV)
478 disable_TSC();
479 else if (val == PR_TSC_ENABLE)
480 enable_TSC();
481 else
482 return -EINVAL;
483
484 return 0;
485 }
486
487 /*
488 * This special macro can be used to load a debugging register
489 */
490 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
491
492 static inline void __switch_to_xtra(struct task_struct *prev_p,
493 struct task_struct *next_p,
494 struct tss_struct *tss)
495 {
496 struct thread_struct *prev, *next;
497
498 prev = &prev_p->thread,
499 next = &next_p->thread;
500
501 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
502 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
503 ds_switch_to(prev_p, next_p);
504 else if (next->debugctlmsr != prev->debugctlmsr)
505 update_debugctlmsr(next->debugctlmsr);
506
507 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
508 loaddebug(next, 0);
509 loaddebug(next, 1);
510 loaddebug(next, 2);
511 loaddebug(next, 3);
512 /* no 4 and 5 */
513 loaddebug(next, 6);
514 loaddebug(next, 7);
515 }
516
517 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
518 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
519 /* prev and next are different */
520 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
521 hard_disable_TSC();
522 else
523 hard_enable_TSC();
524 }
525
526 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
527 /*
528 * Copy the relevant range of the IO bitmap.
529 * Normally this is 128 bytes or less:
530 */
531 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
532 max(prev->io_bitmap_max, next->io_bitmap_max));
533 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
534 /*
535 * Clear any possible leftover bits:
536 */
537 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
538 }
539 }
540
541 /*
542 * switch_to(x,y) should switch tasks from x to y.
543 *
544 * This could still be optimized:
545 * - fold all the options into a flag word and test it with a single test.
546 * - could test fs/gs bitsliced
547 *
548 * Kprobes not supported here. Set the probe on schedule instead.
549 * Function graph tracer not supported too.
550 */
551 __notrace_funcgraph struct task_struct *
552 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
553 {
554 struct thread_struct *prev = &prev_p->thread;
555 struct thread_struct *next = &next_p->thread;
556 int cpu = smp_processor_id();
557 struct tss_struct *tss = &per_cpu(init_tss, cpu);
558 unsigned fsindex, gsindex;
559
560 /* we're going to use this soon, after a few expensive things */
561 if (next_p->fpu_counter > 5)
562 prefetch(next->xstate);
563
564 /*
565 * Reload esp0, LDT and the page table pointer:
566 */
567 load_sp0(tss, next);
568
569 /*
570 * Switch DS and ES.
571 * This won't pick up thread selector changes, but I guess that is ok.
572 */
573 savesegment(es, prev->es);
574 if (unlikely(next->es | prev->es))
575 loadsegment(es, next->es);
576
577 savesegment(ds, prev->ds);
578 if (unlikely(next->ds | prev->ds))
579 loadsegment(ds, next->ds);
580
581
582 /* We must save %fs and %gs before load_TLS() because
583 * %fs and %gs may be cleared by load_TLS().
584 *
585 * (e.g. xen_load_tls())
586 */
587 savesegment(fs, fsindex);
588 savesegment(gs, gsindex);
589
590 load_TLS(next, cpu);
591
592 /*
593 * Leave lazy mode, flushing any hypercalls made here.
594 * This must be done before restoring TLS segments so
595 * the GDT and LDT are properly updated, and must be
596 * done before math_state_restore, so the TS bit is up
597 * to date.
598 */
599 arch_leave_lazy_cpu_mode();
600
601 /*
602 * Switch FS and GS.
603 *
604 * Segment register != 0 always requires a reload. Also
605 * reload when it has changed. When prev process used 64bit
606 * base always reload to avoid an information leak.
607 */
608 if (unlikely(fsindex | next->fsindex | prev->fs)) {
609 loadsegment(fs, next->fsindex);
610 /*
611 * Check if the user used a selector != 0; if yes
612 * clear 64bit base, since overloaded base is always
613 * mapped to the Null selector
614 */
615 if (fsindex)
616 prev->fs = 0;
617 }
618 /* when next process has a 64bit base use it */
619 if (next->fs)
620 wrmsrl(MSR_FS_BASE, next->fs);
621 prev->fsindex = fsindex;
622
623 if (unlikely(gsindex | next->gsindex | prev->gs)) {
624 load_gs_index(next->gsindex);
625 if (gsindex)
626 prev->gs = 0;
627 }
628 if (next->gs)
629 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
630 prev->gsindex = gsindex;
631
632 /* Must be after DS reload */
633 unlazy_fpu(prev_p);
634
635 /*
636 * Switch the PDA and FPU contexts.
637 */
638 prev->usersp = percpu_read(old_rsp);
639 percpu_write(old_rsp, next->usersp);
640 percpu_write(current_task, next_p);
641
642 percpu_write(kernel_stack,
643 (unsigned long)task_stack_page(next_p) +
644 THREAD_SIZE - KERNEL_STACK_OFFSET);
645
646 /*
647 * Now maybe reload the debug registers and handle I/O bitmaps
648 */
649 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
650 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
651 __switch_to_xtra(prev_p, next_p, tss);
652
653 /* If the task has used fpu the last 5 timeslices, just do a full
654 * restore of the math state immediately to avoid the trap; the
655 * chances of needing FPU soon are obviously high now
656 *
657 * tsk_used_math() checks prevent calling math_state_restore(),
658 * which can sleep in the case of !tsk_used_math()
659 */
660 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
661 math_state_restore();
662 return prev_p;
663 }
664
665 /*
666 * sys_execve() executes a new program.
667 */
668 asmlinkage
669 long sys_execve(char __user *name, char __user * __user *argv,
670 char __user * __user *envp, struct pt_regs *regs)
671 {
672 long error;
673 char *filename;
674
675 filename = getname(name);
676 error = PTR_ERR(filename);
677 if (IS_ERR(filename))
678 return error;
679 error = do_execve(filename, argv, envp, regs);
680 putname(filename);
681 return error;
682 }
683
684 void set_personality_64bit(void)
685 {
686 /* inherit personality from parent */
687
688 /* Make sure to be in 64bit mode */
689 clear_thread_flag(TIF_IA32);
690
691 /* TBD: overwrites user setup. Should have two bits.
692 But 64bit processes have always behaved this way,
693 so it's not too bad. The main problem is just that
694 32bit childs are affected again. */
695 current->personality &= ~READ_IMPLIES_EXEC;
696 }
697
698 asmlinkage long sys_fork(struct pt_regs *regs)
699 {
700 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
701 }
702
703 asmlinkage long
704 sys_clone(unsigned long clone_flags, unsigned long newsp,
705 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
706 {
707 if (!newsp)
708 newsp = regs->sp;
709 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
710 }
711
712 /*
713 * This is trivial, and on the face of it looks like it
714 * could equally well be done in user mode.
715 *
716 * Not so, for quite unobvious reasons - register pressure.
717 * In user mode vfork() cannot have a stack frame, and if
718 * done by calling the "clone()" system call directly, you
719 * do not have enough call-clobbered registers to hold all
720 * the information you need.
721 */
722 asmlinkage long sys_vfork(struct pt_regs *regs)
723 {
724 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
725 NULL, NULL);
726 }
727
728 unsigned long get_wchan(struct task_struct *p)
729 {
730 unsigned long stack;
731 u64 fp, ip;
732 int count = 0;
733
734 if (!p || p == current || p->state == TASK_RUNNING)
735 return 0;
736 stack = (unsigned long)task_stack_page(p);
737 if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
738 return 0;
739 fp = *(u64 *)(p->thread.sp);
740 do {
741 if (fp < (unsigned long)stack ||
742 fp >= (unsigned long)stack+THREAD_SIZE)
743 return 0;
744 ip = *(u64 *)(fp+8);
745 if (!in_sched_functions(ip))
746 return ip;
747 fp = *(u64 *)fp;
748 } while (count++ < 16);
749 return 0;
750 }
751
752 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
753 {
754 int ret = 0;
755 int doit = task == current;
756 int cpu;
757
758 switch (code) {
759 case ARCH_SET_GS:
760 if (addr >= TASK_SIZE_OF(task))
761 return -EPERM;
762 cpu = get_cpu();
763 /* handle small bases via the GDT because that's faster to
764 switch. */
765 if (addr <= 0xffffffff) {
766 set_32bit_tls(task, GS_TLS, addr);
767 if (doit) {
768 load_TLS(&task->thread, cpu);
769 load_gs_index(GS_TLS_SEL);
770 }
771 task->thread.gsindex = GS_TLS_SEL;
772 task->thread.gs = 0;
773 } else {
774 task->thread.gsindex = 0;
775 task->thread.gs = addr;
776 if (doit) {
777 load_gs_index(0);
778 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
779 }
780 }
781 put_cpu();
782 break;
783 case ARCH_SET_FS:
784 /* Not strictly needed for fs, but do it for symmetry
785 with gs */
786 if (addr >= TASK_SIZE_OF(task))
787 return -EPERM;
788 cpu = get_cpu();
789 /* handle small bases via the GDT because that's faster to
790 switch. */
791 if (addr <= 0xffffffff) {
792 set_32bit_tls(task, FS_TLS, addr);
793 if (doit) {
794 load_TLS(&task->thread, cpu);
795 loadsegment(fs, FS_TLS_SEL);
796 }
797 task->thread.fsindex = FS_TLS_SEL;
798 task->thread.fs = 0;
799 } else {
800 task->thread.fsindex = 0;
801 task->thread.fs = addr;
802 if (doit) {
803 /* set the selector to 0 to not confuse
804 __switch_to */
805 loadsegment(fs, 0);
806 ret = checking_wrmsrl(MSR_FS_BASE, addr);
807 }
808 }
809 put_cpu();
810 break;
811 case ARCH_GET_FS: {
812 unsigned long base;
813 if (task->thread.fsindex == FS_TLS_SEL)
814 base = read_32bit_tls(task, FS_TLS);
815 else if (doit)
816 rdmsrl(MSR_FS_BASE, base);
817 else
818 base = task->thread.fs;
819 ret = put_user(base, (unsigned long __user *)addr);
820 break;
821 }
822 case ARCH_GET_GS: {
823 unsigned long base;
824 unsigned gsindex;
825 if (task->thread.gsindex == GS_TLS_SEL)
826 base = read_32bit_tls(task, GS_TLS);
827 else if (doit) {
828 savesegment(gs, gsindex);
829 if (gsindex)
830 rdmsrl(MSR_KERNEL_GS_BASE, base);
831 else
832 base = task->thread.gs;
833 } else
834 base = task->thread.gs;
835 ret = put_user(base, (unsigned long __user *)addr);
836 break;
837 }
838
839 default:
840 ret = -EINVAL;
841 break;
842 }
843
844 return ret;
845 }
846
847 long sys_arch_prctl(int code, unsigned long addr)
848 {
849 return do_arch_prctl(current, code, addr);
850 }
851
852 unsigned long arch_align_stack(unsigned long sp)
853 {
854 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
855 sp -= get_random_int() % 8192;
856 return sp & ~0xf;
857 }
858
859 unsigned long arch_randomize_brk(struct mm_struct *mm)
860 {
861 unsigned long range_end = mm->brk + 0x02000000;
862 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
863 }