]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blob - arch/x86/kernel/process_64.c
x86: cleanup stack protector
[mirror_ubuntu-focal-kernel.git] / arch / x86 / kernel / process_64.c
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * X86-64 port
8 * Andi Kleen.
9 *
10 * CPU hotplug support - ashok.raj@intel.com
11 */
12
13 /*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17 #include <stdarg.h>
18
19 #include <linux/stackprotector.h>
20 #include <linux/cpu.h>
21 #include <linux/errno.h>
22 #include <linux/sched.h>
23 #include <linux/fs.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/elfcore.h>
27 #include <linux/smp.h>
28 #include <linux/slab.h>
29 #include <linux/user.h>
30 #include <linux/interrupt.h>
31 #include <linux/utsname.h>
32 #include <linux/delay.h>
33 #include <linux/module.h>
34 #include <linux/ptrace.h>
35 #include <linux/random.h>
36 #include <linux/notifier.h>
37 #include <linux/kprobes.h>
38 #include <linux/kdebug.h>
39 #include <linux/tick.h>
40 #include <linux/prctl.h>
41 #include <linux/uaccess.h>
42 #include <linux/io.h>
43 #include <linux/ftrace.h>
44
45 #include <asm/pgtable.h>
46 #include <asm/system.h>
47 #include <asm/processor.h>
48 #include <asm/i387.h>
49 #include <asm/mmu_context.h>
50 #include <asm/pda.h>
51 #include <asm/prctl.h>
52 #include <asm/desc.h>
53 #include <asm/proto.h>
54 #include <asm/ia32.h>
55 #include <asm/idle.h>
56 #include <asm/syscalls.h>
57 #include <asm/ds.h>
58
59 asmlinkage extern void ret_from_fork(void);
60
61 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
62 EXPORT_PER_CPU_SYMBOL(current_task);
63
64 DEFINE_PER_CPU(unsigned long, old_rsp);
65 static DEFINE_PER_CPU(unsigned char, is_idle);
66
67 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
68
69 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
70
71 void idle_notifier_register(struct notifier_block *n)
72 {
73 atomic_notifier_chain_register(&idle_notifier, n);
74 }
75 EXPORT_SYMBOL_GPL(idle_notifier_register);
76
77 void idle_notifier_unregister(struct notifier_block *n)
78 {
79 atomic_notifier_chain_unregister(&idle_notifier, n);
80 }
81 EXPORT_SYMBOL_GPL(idle_notifier_unregister);
82
83 void enter_idle(void)
84 {
85 percpu_write(is_idle, 1);
86 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
87 }
88
89 static void __exit_idle(void)
90 {
91 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
92 return;
93 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
94 }
95
96 /* Called from interrupts to signify idle end */
97 void exit_idle(void)
98 {
99 /* idle loop has pid 0 */
100 if (current->pid)
101 return;
102 __exit_idle();
103 }
104
105 #ifndef CONFIG_SMP
106 static inline void play_dead(void)
107 {
108 BUG();
109 }
110 #endif
111
112 /*
113 * The idle thread. There's no useful work to be
114 * done, so just try to conserve power and have a
115 * low exit latency (ie sit in a loop waiting for
116 * somebody to say that they'd like to reschedule)
117 */
118 void cpu_idle(void)
119 {
120 current_thread_info()->status |= TS_POLLING;
121
122 /*
123 * If we're the non-boot CPU, nothing set the PDA stack
124 * canary up for us - and if we are the boot CPU we have
125 * a 0 stack canary. This is a good place for updating
126 * it, as we wont ever return from this function (so the
127 * invalid canaries already on the stack wont ever
128 * trigger):
129 */
130 boot_init_stack_canary();
131
132 /* endless idle loop with no priority at all */
133 while (1) {
134 tick_nohz_stop_sched_tick(1);
135 while (!need_resched()) {
136
137 rmb();
138
139 if (cpu_is_offline(smp_processor_id()))
140 play_dead();
141 /*
142 * Idle routines should keep interrupts disabled
143 * from here on, until they go to idle.
144 * Otherwise, idle callbacks can misfire.
145 */
146 local_irq_disable();
147 enter_idle();
148 /* Don't trace irqs off for idle */
149 stop_critical_timings();
150 pm_idle();
151 start_critical_timings();
152 /* In many cases the interrupt that ended idle
153 has already called exit_idle. But some idle
154 loops can be woken up without interrupt. */
155 __exit_idle();
156 }
157
158 tick_nohz_restart_sched_tick();
159 preempt_enable_no_resched();
160 schedule();
161 preempt_disable();
162 }
163 }
164
165 /* Prints also some state that isn't saved in the pt_regs */
166 void __show_regs(struct pt_regs *regs, int all)
167 {
168 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
169 unsigned long d0, d1, d2, d3, d6, d7;
170 unsigned int fsindex, gsindex;
171 unsigned int ds, cs, es;
172
173 printk("\n");
174 print_modules();
175 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
176 current->pid, current->comm, print_tainted(),
177 init_utsname()->release,
178 (int)strcspn(init_utsname()->version, " "),
179 init_utsname()->version);
180 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
181 printk_address(regs->ip, 1);
182 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
183 regs->sp, regs->flags);
184 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
185 regs->ax, regs->bx, regs->cx);
186 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
187 regs->dx, regs->si, regs->di);
188 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
189 regs->bp, regs->r8, regs->r9);
190 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
191 regs->r10, regs->r11, regs->r12);
192 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
193 regs->r13, regs->r14, regs->r15);
194
195 asm("movl %%ds,%0" : "=r" (ds));
196 asm("movl %%cs,%0" : "=r" (cs));
197 asm("movl %%es,%0" : "=r" (es));
198 asm("movl %%fs,%0" : "=r" (fsindex));
199 asm("movl %%gs,%0" : "=r" (gsindex));
200
201 rdmsrl(MSR_FS_BASE, fs);
202 rdmsrl(MSR_GS_BASE, gs);
203 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
204
205 if (!all)
206 return;
207
208 cr0 = read_cr0();
209 cr2 = read_cr2();
210 cr3 = read_cr3();
211 cr4 = read_cr4();
212
213 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
214 fs, fsindex, gs, gsindex, shadowgs);
215 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
216 es, cr0);
217 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
218 cr4);
219
220 get_debugreg(d0, 0);
221 get_debugreg(d1, 1);
222 get_debugreg(d2, 2);
223 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
224 get_debugreg(d3, 3);
225 get_debugreg(d6, 6);
226 get_debugreg(d7, 7);
227 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
228 }
229
230 void show_regs(struct pt_regs *regs)
231 {
232 printk(KERN_INFO "CPU %d:", smp_processor_id());
233 __show_regs(regs, 1);
234 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
235 }
236
237 /*
238 * Free current thread data structures etc..
239 */
240 void exit_thread(void)
241 {
242 struct task_struct *me = current;
243 struct thread_struct *t = &me->thread;
244
245 if (me->thread.io_bitmap_ptr) {
246 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
247
248 kfree(t->io_bitmap_ptr);
249 t->io_bitmap_ptr = NULL;
250 clear_thread_flag(TIF_IO_BITMAP);
251 /*
252 * Careful, clear this in the TSS too:
253 */
254 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
255 t->io_bitmap_max = 0;
256 put_cpu();
257 }
258
259 ds_exit_thread(current);
260 }
261
262 void flush_thread(void)
263 {
264 struct task_struct *tsk = current;
265
266 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
267 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
268 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
269 clear_tsk_thread_flag(tsk, TIF_IA32);
270 } else {
271 set_tsk_thread_flag(tsk, TIF_IA32);
272 current_thread_info()->status |= TS_COMPAT;
273 }
274 }
275 clear_tsk_thread_flag(tsk, TIF_DEBUG);
276
277 tsk->thread.debugreg0 = 0;
278 tsk->thread.debugreg1 = 0;
279 tsk->thread.debugreg2 = 0;
280 tsk->thread.debugreg3 = 0;
281 tsk->thread.debugreg6 = 0;
282 tsk->thread.debugreg7 = 0;
283 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
284 /*
285 * Forget coprocessor state..
286 */
287 tsk->fpu_counter = 0;
288 clear_fpu(tsk);
289 clear_used_math();
290 }
291
292 void release_thread(struct task_struct *dead_task)
293 {
294 if (dead_task->mm) {
295 if (dead_task->mm->context.size) {
296 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
297 dead_task->comm,
298 dead_task->mm->context.ldt,
299 dead_task->mm->context.size);
300 BUG();
301 }
302 }
303 }
304
305 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
306 {
307 struct user_desc ud = {
308 .base_addr = addr,
309 .limit = 0xfffff,
310 .seg_32bit = 1,
311 .limit_in_pages = 1,
312 .useable = 1,
313 };
314 struct desc_struct *desc = t->thread.tls_array;
315 desc += tls;
316 fill_ldt(desc, &ud);
317 }
318
319 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
320 {
321 return get_desc_base(&t->thread.tls_array[tls]);
322 }
323
324 /*
325 * This gets called before we allocate a new thread and copy
326 * the current task into it.
327 */
328 void prepare_to_copy(struct task_struct *tsk)
329 {
330 unlazy_fpu(tsk);
331 }
332
333 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
334 unsigned long unused,
335 struct task_struct *p, struct pt_regs *regs)
336 {
337 int err;
338 struct pt_regs *childregs;
339 struct task_struct *me = current;
340
341 childregs = ((struct pt_regs *)
342 (THREAD_SIZE + task_stack_page(p))) - 1;
343 *childregs = *regs;
344
345 childregs->ax = 0;
346 childregs->sp = sp;
347 if (sp == ~0UL)
348 childregs->sp = (unsigned long)childregs;
349
350 p->thread.sp = (unsigned long) childregs;
351 p->thread.sp0 = (unsigned long) (childregs+1);
352 p->thread.usersp = me->thread.usersp;
353
354 set_tsk_thread_flag(p, TIF_FORK);
355
356 p->thread.fs = me->thread.fs;
357 p->thread.gs = me->thread.gs;
358
359 savesegment(gs, p->thread.gsindex);
360 savesegment(fs, p->thread.fsindex);
361 savesegment(es, p->thread.es);
362 savesegment(ds, p->thread.ds);
363
364 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
365 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
366 if (!p->thread.io_bitmap_ptr) {
367 p->thread.io_bitmap_max = 0;
368 return -ENOMEM;
369 }
370 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
371 IO_BITMAP_BYTES);
372 set_tsk_thread_flag(p, TIF_IO_BITMAP);
373 }
374
375 /*
376 * Set a new TLS for the child thread?
377 */
378 if (clone_flags & CLONE_SETTLS) {
379 #ifdef CONFIG_IA32_EMULATION
380 if (test_thread_flag(TIF_IA32))
381 err = do_set_thread_area(p, -1,
382 (struct user_desc __user *)childregs->si, 0);
383 else
384 #endif
385 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
386 if (err)
387 goto out;
388 }
389
390 ds_copy_thread(p, me);
391
392 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
393 p->thread.debugctlmsr = 0;
394
395 err = 0;
396 out:
397 if (err && p->thread.io_bitmap_ptr) {
398 kfree(p->thread.io_bitmap_ptr);
399 p->thread.io_bitmap_max = 0;
400 }
401 return err;
402 }
403
404 void
405 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
406 {
407 loadsegment(fs, 0);
408 loadsegment(es, 0);
409 loadsegment(ds, 0);
410 load_gs_index(0);
411 regs->ip = new_ip;
412 regs->sp = new_sp;
413 percpu_write(old_rsp, new_sp);
414 regs->cs = __USER_CS;
415 regs->ss = __USER_DS;
416 regs->flags = 0x200;
417 set_fs(USER_DS);
418 /*
419 * Free the old FP and other extended state
420 */
421 free_thread_xstate(current);
422 }
423 EXPORT_SYMBOL_GPL(start_thread);
424
425 static void hard_disable_TSC(void)
426 {
427 write_cr4(read_cr4() | X86_CR4_TSD);
428 }
429
430 void disable_TSC(void)
431 {
432 preempt_disable();
433 if (!test_and_set_thread_flag(TIF_NOTSC))
434 /*
435 * Must flip the CPU state synchronously with
436 * TIF_NOTSC in the current running context.
437 */
438 hard_disable_TSC();
439 preempt_enable();
440 }
441
442 static void hard_enable_TSC(void)
443 {
444 write_cr4(read_cr4() & ~X86_CR4_TSD);
445 }
446
447 static void enable_TSC(void)
448 {
449 preempt_disable();
450 if (test_and_clear_thread_flag(TIF_NOTSC))
451 /*
452 * Must flip the CPU state synchronously with
453 * TIF_NOTSC in the current running context.
454 */
455 hard_enable_TSC();
456 preempt_enable();
457 }
458
459 int get_tsc_mode(unsigned long adr)
460 {
461 unsigned int val;
462
463 if (test_thread_flag(TIF_NOTSC))
464 val = PR_TSC_SIGSEGV;
465 else
466 val = PR_TSC_ENABLE;
467
468 return put_user(val, (unsigned int __user *)adr);
469 }
470
471 int set_tsc_mode(unsigned int val)
472 {
473 if (val == PR_TSC_SIGSEGV)
474 disable_TSC();
475 else if (val == PR_TSC_ENABLE)
476 enable_TSC();
477 else
478 return -EINVAL;
479
480 return 0;
481 }
482
483 /*
484 * This special macro can be used to load a debugging register
485 */
486 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
487
488 static inline void __switch_to_xtra(struct task_struct *prev_p,
489 struct task_struct *next_p,
490 struct tss_struct *tss)
491 {
492 struct thread_struct *prev, *next;
493
494 prev = &prev_p->thread,
495 next = &next_p->thread;
496
497 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
498 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
499 ds_switch_to(prev_p, next_p);
500 else if (next->debugctlmsr != prev->debugctlmsr)
501 update_debugctlmsr(next->debugctlmsr);
502
503 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
504 loaddebug(next, 0);
505 loaddebug(next, 1);
506 loaddebug(next, 2);
507 loaddebug(next, 3);
508 /* no 4 and 5 */
509 loaddebug(next, 6);
510 loaddebug(next, 7);
511 }
512
513 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
514 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
515 /* prev and next are different */
516 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
517 hard_disable_TSC();
518 else
519 hard_enable_TSC();
520 }
521
522 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
523 /*
524 * Copy the relevant range of the IO bitmap.
525 * Normally this is 128 bytes or less:
526 */
527 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
528 max(prev->io_bitmap_max, next->io_bitmap_max));
529 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
530 /*
531 * Clear any possible leftover bits:
532 */
533 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
534 }
535 }
536
537 /*
538 * switch_to(x,y) should switch tasks from x to y.
539 *
540 * This could still be optimized:
541 * - fold all the options into a flag word and test it with a single test.
542 * - could test fs/gs bitsliced
543 *
544 * Kprobes not supported here. Set the probe on schedule instead.
545 * Function graph tracer not supported too.
546 */
547 __notrace_funcgraph struct task_struct *
548 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
549 {
550 struct thread_struct *prev = &prev_p->thread;
551 struct thread_struct *next = &next_p->thread;
552 int cpu = smp_processor_id();
553 struct tss_struct *tss = &per_cpu(init_tss, cpu);
554 unsigned fsindex, gsindex;
555
556 /* we're going to use this soon, after a few expensive things */
557 if (next_p->fpu_counter > 5)
558 prefetch(next->xstate);
559
560 /*
561 * Reload esp0, LDT and the page table pointer:
562 */
563 load_sp0(tss, next);
564
565 /*
566 * Switch DS and ES.
567 * This won't pick up thread selector changes, but I guess that is ok.
568 */
569 savesegment(es, prev->es);
570 if (unlikely(next->es | prev->es))
571 loadsegment(es, next->es);
572
573 savesegment(ds, prev->ds);
574 if (unlikely(next->ds | prev->ds))
575 loadsegment(ds, next->ds);
576
577
578 /* We must save %fs and %gs before load_TLS() because
579 * %fs and %gs may be cleared by load_TLS().
580 *
581 * (e.g. xen_load_tls())
582 */
583 savesegment(fs, fsindex);
584 savesegment(gs, gsindex);
585
586 load_TLS(next, cpu);
587
588 /*
589 * Leave lazy mode, flushing any hypercalls made here.
590 * This must be done before restoring TLS segments so
591 * the GDT and LDT are properly updated, and must be
592 * done before math_state_restore, so the TS bit is up
593 * to date.
594 */
595 arch_leave_lazy_cpu_mode();
596
597 /*
598 * Switch FS and GS.
599 *
600 * Segment register != 0 always requires a reload. Also
601 * reload when it has changed. When prev process used 64bit
602 * base always reload to avoid an information leak.
603 */
604 if (unlikely(fsindex | next->fsindex | prev->fs)) {
605 loadsegment(fs, next->fsindex);
606 /*
607 * Check if the user used a selector != 0; if yes
608 * clear 64bit base, since overloaded base is always
609 * mapped to the Null selector
610 */
611 if (fsindex)
612 prev->fs = 0;
613 }
614 /* when next process has a 64bit base use it */
615 if (next->fs)
616 wrmsrl(MSR_FS_BASE, next->fs);
617 prev->fsindex = fsindex;
618
619 if (unlikely(gsindex | next->gsindex | prev->gs)) {
620 load_gs_index(next->gsindex);
621 if (gsindex)
622 prev->gs = 0;
623 }
624 if (next->gs)
625 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
626 prev->gsindex = gsindex;
627
628 /* Must be after DS reload */
629 unlazy_fpu(prev_p);
630
631 /*
632 * Switch the PDA and FPU contexts.
633 */
634 prev->usersp = percpu_read(old_rsp);
635 percpu_write(old_rsp, next->usersp);
636 percpu_write(current_task, next_p);
637
638 percpu_write(kernel_stack,
639 (unsigned long)task_stack_page(next_p) +
640 THREAD_SIZE - KERNEL_STACK_OFFSET);
641
642 /*
643 * Now maybe reload the debug registers and handle I/O bitmaps
644 */
645 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
646 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
647 __switch_to_xtra(prev_p, next_p, tss);
648
649 /* If the task has used fpu the last 5 timeslices, just do a full
650 * restore of the math state immediately to avoid the trap; the
651 * chances of needing FPU soon are obviously high now
652 *
653 * tsk_used_math() checks prevent calling math_state_restore(),
654 * which can sleep in the case of !tsk_used_math()
655 */
656 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
657 math_state_restore();
658 return prev_p;
659 }
660
661 /*
662 * sys_execve() executes a new program.
663 */
664 asmlinkage
665 long sys_execve(char __user *name, char __user * __user *argv,
666 char __user * __user *envp, struct pt_regs *regs)
667 {
668 long error;
669 char *filename;
670
671 filename = getname(name);
672 error = PTR_ERR(filename);
673 if (IS_ERR(filename))
674 return error;
675 error = do_execve(filename, argv, envp, regs);
676 putname(filename);
677 return error;
678 }
679
680 void set_personality_64bit(void)
681 {
682 /* inherit personality from parent */
683
684 /* Make sure to be in 64bit mode */
685 clear_thread_flag(TIF_IA32);
686
687 /* TBD: overwrites user setup. Should have two bits.
688 But 64bit processes have always behaved this way,
689 so it's not too bad. The main problem is just that
690 32bit childs are affected again. */
691 current->personality &= ~READ_IMPLIES_EXEC;
692 }
693
694 asmlinkage long sys_fork(struct pt_regs *regs)
695 {
696 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
697 }
698
699 asmlinkage long
700 sys_clone(unsigned long clone_flags, unsigned long newsp,
701 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
702 {
703 if (!newsp)
704 newsp = regs->sp;
705 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
706 }
707
708 /*
709 * This is trivial, and on the face of it looks like it
710 * could equally well be done in user mode.
711 *
712 * Not so, for quite unobvious reasons - register pressure.
713 * In user mode vfork() cannot have a stack frame, and if
714 * done by calling the "clone()" system call directly, you
715 * do not have enough call-clobbered registers to hold all
716 * the information you need.
717 */
718 asmlinkage long sys_vfork(struct pt_regs *regs)
719 {
720 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
721 NULL, NULL);
722 }
723
724 unsigned long get_wchan(struct task_struct *p)
725 {
726 unsigned long stack;
727 u64 fp, ip;
728 int count = 0;
729
730 if (!p || p == current || p->state == TASK_RUNNING)
731 return 0;
732 stack = (unsigned long)task_stack_page(p);
733 if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
734 return 0;
735 fp = *(u64 *)(p->thread.sp);
736 do {
737 if (fp < (unsigned long)stack ||
738 fp >= (unsigned long)stack+THREAD_SIZE)
739 return 0;
740 ip = *(u64 *)(fp+8);
741 if (!in_sched_functions(ip))
742 return ip;
743 fp = *(u64 *)fp;
744 } while (count++ < 16);
745 return 0;
746 }
747
748 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
749 {
750 int ret = 0;
751 int doit = task == current;
752 int cpu;
753
754 switch (code) {
755 case ARCH_SET_GS:
756 if (addr >= TASK_SIZE_OF(task))
757 return -EPERM;
758 cpu = get_cpu();
759 /* handle small bases via the GDT because that's faster to
760 switch. */
761 if (addr <= 0xffffffff) {
762 set_32bit_tls(task, GS_TLS, addr);
763 if (doit) {
764 load_TLS(&task->thread, cpu);
765 load_gs_index(GS_TLS_SEL);
766 }
767 task->thread.gsindex = GS_TLS_SEL;
768 task->thread.gs = 0;
769 } else {
770 task->thread.gsindex = 0;
771 task->thread.gs = addr;
772 if (doit) {
773 load_gs_index(0);
774 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
775 }
776 }
777 put_cpu();
778 break;
779 case ARCH_SET_FS:
780 /* Not strictly needed for fs, but do it for symmetry
781 with gs */
782 if (addr >= TASK_SIZE_OF(task))
783 return -EPERM;
784 cpu = get_cpu();
785 /* handle small bases via the GDT because that's faster to
786 switch. */
787 if (addr <= 0xffffffff) {
788 set_32bit_tls(task, FS_TLS, addr);
789 if (doit) {
790 load_TLS(&task->thread, cpu);
791 loadsegment(fs, FS_TLS_SEL);
792 }
793 task->thread.fsindex = FS_TLS_SEL;
794 task->thread.fs = 0;
795 } else {
796 task->thread.fsindex = 0;
797 task->thread.fs = addr;
798 if (doit) {
799 /* set the selector to 0 to not confuse
800 __switch_to */
801 loadsegment(fs, 0);
802 ret = checking_wrmsrl(MSR_FS_BASE, addr);
803 }
804 }
805 put_cpu();
806 break;
807 case ARCH_GET_FS: {
808 unsigned long base;
809 if (task->thread.fsindex == FS_TLS_SEL)
810 base = read_32bit_tls(task, FS_TLS);
811 else if (doit)
812 rdmsrl(MSR_FS_BASE, base);
813 else
814 base = task->thread.fs;
815 ret = put_user(base, (unsigned long __user *)addr);
816 break;
817 }
818 case ARCH_GET_GS: {
819 unsigned long base;
820 unsigned gsindex;
821 if (task->thread.gsindex == GS_TLS_SEL)
822 base = read_32bit_tls(task, GS_TLS);
823 else if (doit) {
824 savesegment(gs, gsindex);
825 if (gsindex)
826 rdmsrl(MSR_KERNEL_GS_BASE, base);
827 else
828 base = task->thread.gs;
829 } else
830 base = task->thread.gs;
831 ret = put_user(base, (unsigned long __user *)addr);
832 break;
833 }
834
835 default:
836 ret = -EINVAL;
837 break;
838 }
839
840 return ret;
841 }
842
843 long sys_arch_prctl(int code, unsigned long addr)
844 {
845 return do_arch_prctl(current, code, addr);
846 }
847
848 unsigned long arch_align_stack(unsigned long sp)
849 {
850 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
851 sp -= get_random_int() % 8192;
852 return sp & ~0xf;
853 }
854
855 unsigned long arch_randomize_brk(struct mm_struct *mm)
856 {
857 unsigned long range_end = mm->brk + 0x02000000;
858 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
859 }