]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - arch/x86/kernel/process_64.c
Xen: fix cpu_hotplug build when !CONFIG_SMP
[mirror_ubuntu-bionic-kernel.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
76e4f660 19#include <linux/cpu.h>
1da177e4
LT
20#include <linux/errno.h>
21#include <linux/sched.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4 29#include <linux/interrupt.h>
6612538c 30#include <linux/utsname.h>
1da177e4 31#include <linux/delay.h>
6612538c 32#include <linux/module.h>
1da177e4 33#include <linux/ptrace.h>
1da177e4 34#include <linux/random.h>
95833c83 35#include <linux/notifier.h>
c6fd91f0 36#include <linux/kprobes.h>
1eeb66a1 37#include <linux/kdebug.h>
02290683 38#include <linux/tick.h>
529e25f6 39#include <linux/prctl.h>
1da177e4
LT
40
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
1da177e4
LT
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
95833c83 53#include <asm/idle.h>
1da177e4
LT
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
e041c683 59static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
60
61void idle_notifier_register(struct notifier_block *n)
62{
e041c683 63 atomic_notifier_chain_register(&idle_notifier, n);
95833c83 64}
95833c83 65
95833c83
AK
66void enter_idle(void)
67{
a15da49d 68 write_pda(isidle, 1);
e041c683 69 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
70}
71
72static void __exit_idle(void)
73{
9446868b 74 if (test_and_clear_bit_pda(0, isidle) == 0)
a15da49d 75 return;
e041c683 76 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
77}
78
79/* Called from interrupts to signify idle end */
80void exit_idle(void)
81{
a15da49d
AK
82 /* idle loop has pid 0 */
83 if (current->pid)
95833c83
AK
84 return;
85 __exit_idle();
86}
87
1da177e4
LT
88/*
89 * The idle thread. There's no useful work to be
90 * done, so just try to conserve power and have a
91 * low exit latency (ie sit in a loop waiting for
92 * somebody to say that they'd like to reschedule)
93 */
b10db7f0 94void cpu_idle(void)
1da177e4 95{
495ab9c0 96 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
97 /* endless idle loop with no priority at all */
98 while (1) {
b8f8c3cf 99 tick_nohz_stop_sched_tick(1);
1da177e4 100 while (!need_resched()) {
1da177e4 101
1da177e4 102 rmb();
6ddd2a27 103
76e4f660
AR
104 if (cpu_is_offline(smp_processor_id()))
105 play_dead();
d331e739
VP
106 /*
107 * Idle routines should keep interrupts disabled
108 * from here on, until they go to idle.
109 * Otherwise, idle callbacks can misfire.
110 */
111 local_irq_disable();
95833c83 112 enter_idle();
81d68a96
SR
113 /* Don't trace irqs off for idle */
114 stop_critical_timings();
6ddd2a27 115 pm_idle();
81d68a96 116 start_critical_timings();
a15da49d
AK
117 /* In many cases the interrupt that ended idle
118 has already called exit_idle. But some idle
119 loops can be woken up without interrupt. */
95833c83 120 __exit_idle();
1da177e4
LT
121 }
122
02290683 123 tick_nohz_restart_sched_tick();
5bfb5d69 124 preempt_enable_no_resched();
1da177e4 125 schedule();
5bfb5d69 126 preempt_disable();
1da177e4
LT
127 }
128}
129
6612538c 130/* Prints also some state that isn't saved in the pt_regs */
1da177e4
LT
131void __show_regs(struct pt_regs * regs)
132{
133 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 134 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
135 unsigned int fsindex, gsindex;
136 unsigned int ds, cs, es;
1da177e4
LT
137
138 printk("\n");
139 print_modules();
9acf23c4
AK
140 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
141 current->pid, current->comm, print_tainted(),
96b644bd
SH
142 init_utsname()->release,
143 (int)strcspn(init_utsname()->version, " "),
144 init_utsname()->version);
65ea5b03 145 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
aafbd7eb 146 printk_address(regs->ip, 1);
65ea5b03
PA
147 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
148 regs->flags);
1da177e4 149 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 150 regs->ax, regs->bx, regs->cx);
1da177e4 151 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 152 regs->dx, regs->si, regs->di);
1da177e4 153 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 154 regs->bp, regs->r8, regs->r9);
1da177e4
LT
155 printk("R10: %016lx R11: %016lx R12: %016lx\n",
156 regs->r10, regs->r11, regs->r12);
157 printk("R13: %016lx R14: %016lx R15: %016lx\n",
158 regs->r13, regs->r14, regs->r15);
159
160 asm("movl %%ds,%0" : "=r" (ds));
161 asm("movl %%cs,%0" : "=r" (cs));
162 asm("movl %%es,%0" : "=r" (es));
163 asm("movl %%fs,%0" : "=r" (fsindex));
164 asm("movl %%gs,%0" : "=r" (gsindex));
165
166 rdmsrl(MSR_FS_BASE, fs);
167 rdmsrl(MSR_GS_BASE, gs);
168 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
169
f51c9452
GOC
170 cr0 = read_cr0();
171 cr2 = read_cr2();
172 cr3 = read_cr3();
173 cr4 = read_cr4();
1da177e4
LT
174
175 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
176 fs,fsindex,gs,gsindex,shadowgs);
177 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
178 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
bb1995d5
AS
179
180 get_debugreg(d0, 0);
181 get_debugreg(d1, 1);
182 get_debugreg(d2, 2);
183 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
184 get_debugreg(d3, 3);
185 get_debugreg(d6, 6);
186 get_debugreg(d7, 7);
187 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1da177e4
LT
188}
189
190void show_regs(struct pt_regs *regs)
191{
c078d326 192 printk("CPU %d:", smp_processor_id());
1da177e4 193 __show_regs(regs);
bc850d6b 194 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
1da177e4
LT
195}
196
197/*
198 * Free current thread data structures etc..
199 */
200void exit_thread(void)
201{
202 struct task_struct *me = current;
203 struct thread_struct *t = &me->thread;
73649dab 204
6612538c 205 if (me->thread.io_bitmap_ptr) {
1da177e4
LT
206 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
207
208 kfree(t->io_bitmap_ptr);
209 t->io_bitmap_ptr = NULL;
d3a4f48d 210 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
211 /*
212 * Careful, clear this in the TSS too:
213 */
214 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
215 t->io_bitmap_max = 0;
216 put_cpu();
217 }
218}
219
220void flush_thread(void)
221{
222 struct task_struct *tsk = current;
1da177e4 223
303cd153
MD
224 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
225 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
226 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
227 clear_tsk_thread_flag(tsk, TIF_IA32);
228 } else {
229 set_tsk_thread_flag(tsk, TIF_IA32);
4d9bc79c 230 current_thread_info()->status |= TS_COMPAT;
303cd153 231 }
4d9bc79c 232 }
303cd153 233 clear_tsk_thread_flag(tsk, TIF_DEBUG);
1da177e4
LT
234
235 tsk->thread.debugreg0 = 0;
236 tsk->thread.debugreg1 = 0;
237 tsk->thread.debugreg2 = 0;
238 tsk->thread.debugreg3 = 0;
239 tsk->thread.debugreg6 = 0;
240 tsk->thread.debugreg7 = 0;
6612538c 241 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
1da177e4
LT
242 /*
243 * Forget coprocessor state..
244 */
75118a82 245 tsk->fpu_counter = 0;
1da177e4
LT
246 clear_fpu(tsk);
247 clear_used_math();
248}
249
250void release_thread(struct task_struct *dead_task)
251{
252 if (dead_task->mm) {
253 if (dead_task->mm->context.size) {
254 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
255 dead_task->comm,
256 dead_task->mm->context.ldt,
257 dead_task->mm->context.size);
258 BUG();
259 }
260 }
261}
262
263static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
264{
6612538c 265 struct user_desc ud = {
1da177e4
LT
266 .base_addr = addr,
267 .limit = 0xfffff,
268 .seg_32bit = 1,
269 .limit_in_pages = 1,
270 .useable = 1,
271 };
ade1af77 272 struct desc_struct *desc = t->thread.tls_array;
1da177e4 273 desc += tls;
80fbb69a 274 fill_ldt(desc, &ud);
1da177e4
LT
275}
276
277static inline u32 read_32bit_tls(struct task_struct *t, int tls)
278{
91394eb0 279 return get_desc_base(&t->thread.tls_array[tls]);
1da177e4
LT
280}
281
282/*
283 * This gets called before we allocate a new thread and copy
284 * the current task into it.
285 */
286void prepare_to_copy(struct task_struct *tsk)
287{
288 unlazy_fpu(tsk);
289}
290
65ea5b03 291int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
1da177e4
LT
292 unsigned long unused,
293 struct task_struct * p, struct pt_regs * regs)
294{
295 int err;
296 struct pt_regs * childregs;
297 struct task_struct *me = current;
298
a88cde13 299 childregs = ((struct pt_regs *)
57eafdc2 300 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
301 *childregs = *regs;
302
65ea5b03
PA
303 childregs->ax = 0;
304 childregs->sp = sp;
305 if (sp == ~0UL)
306 childregs->sp = (unsigned long)childregs;
1da177e4 307
faca6227
PA
308 p->thread.sp = (unsigned long) childregs;
309 p->thread.sp0 = (unsigned long) (childregs+1);
310 p->thread.usersp = me->thread.usersp;
1da177e4 311
e4f17c43 312 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
313
314 p->thread.fs = me->thread.fs;
315 p->thread.gs = me->thread.gs;
316
ada85708
JF
317 savesegment(gs, p->thread.gsindex);
318 savesegment(fs, p->thread.fsindex);
319 savesegment(es, p->thread.es);
320 savesegment(ds, p->thread.ds);
1da177e4 321
d3a4f48d 322 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
323 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
324 if (!p->thread.io_bitmap_ptr) {
325 p->thread.io_bitmap_max = 0;
326 return -ENOMEM;
327 }
a88cde13
AK
328 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
329 IO_BITMAP_BYTES);
d3a4f48d 330 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 331 }
1da177e4
LT
332
333 /*
334 * Set a new TLS for the child thread?
335 */
336 if (clone_flags & CLONE_SETTLS) {
337#ifdef CONFIG_IA32_EMULATION
338 if (test_thread_flag(TIF_IA32))
efd1ca52 339 err = do_set_thread_area(p, -1,
65ea5b03 340 (struct user_desc __user *)childregs->si, 0);
1da177e4
LT
341 else
342#endif
343 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
344 if (err)
345 goto out;
346 }
347 err = 0;
348out:
349 if (err && p->thread.io_bitmap_ptr) {
350 kfree(p->thread.io_bitmap_ptr);
351 p->thread.io_bitmap_max = 0;
352 }
353 return err;
354}
355
513ad84b
IM
356void
357start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
358{
ada85708
JF
359 loadsegment(fs, 0);
360 loadsegment(es, 0);
361 loadsegment(ds, 0);
513ad84b
IM
362 load_gs_index(0);
363 regs->ip = new_ip;
364 regs->sp = new_sp;
365 write_pda(oldrsp, new_sp);
366 regs->cs = __USER_CS;
367 regs->ss = __USER_DS;
368 regs->flags = 0x200;
369 set_fs(USER_DS);
aa283f49
SS
370 /*
371 * Free the old FP and other extended state
372 */
373 free_thread_xstate(current);
513ad84b
IM
374}
375EXPORT_SYMBOL_GPL(start_thread);
376
529e25f6
EB
377static void hard_disable_TSC(void)
378{
379 write_cr4(read_cr4() | X86_CR4_TSD);
380}
381
382void disable_TSC(void)
383{
384 preempt_disable();
385 if (!test_and_set_thread_flag(TIF_NOTSC))
386 /*
387 * Must flip the CPU state synchronously with
388 * TIF_NOTSC in the current running context.
389 */
390 hard_disable_TSC();
391 preempt_enable();
392}
393
394static void hard_enable_TSC(void)
395{
396 write_cr4(read_cr4() & ~X86_CR4_TSD);
397}
398
a4928cff 399static void enable_TSC(void)
529e25f6
EB
400{
401 preempt_disable();
402 if (test_and_clear_thread_flag(TIF_NOTSC))
403 /*
404 * Must flip the CPU state synchronously with
405 * TIF_NOTSC in the current running context.
406 */
407 hard_enable_TSC();
408 preempt_enable();
409}
410
411int get_tsc_mode(unsigned long adr)
412{
413 unsigned int val;
414
415 if (test_thread_flag(TIF_NOTSC))
416 val = PR_TSC_SIGSEGV;
417 else
418 val = PR_TSC_ENABLE;
419
420 return put_user(val, (unsigned int __user *)adr);
421}
422
423int set_tsc_mode(unsigned int val)
424{
425 if (val == PR_TSC_SIGSEGV)
426 disable_TSC();
427 else if (val == PR_TSC_ENABLE)
428 enable_TSC();
429 else
430 return -EINVAL;
431
432 return 0;
433}
434
1da177e4
LT
435/*
436 * This special macro can be used to load a debugging register
437 */
6612538c
HS
438#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
439
d3a4f48d 440static inline void __switch_to_xtra(struct task_struct *prev_p,
6612538c
HS
441 struct task_struct *next_p,
442 struct tss_struct *tss)
d3a4f48d
SE
443{
444 struct thread_struct *prev, *next;
eee3af4a 445 unsigned long debugctl;
d3a4f48d
SE
446
447 prev = &prev_p->thread,
448 next = &next_p->thread;
449
eee3af4a
MM
450 debugctl = prev->debugctlmsr;
451 if (next->ds_area_msr != prev->ds_area_msr) {
452 /* we clear debugctl to make sure DS
453 * is not in use when we change it */
454 debugctl = 0;
5b0e5084 455 update_debugctlmsr(0);
eee3af4a
MM
456 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
457 }
458
459 if (next->debugctlmsr != debugctl)
5b0e5084 460 update_debugctlmsr(next->debugctlmsr);
7e991604 461
d3a4f48d
SE
462 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
463 loaddebug(next, 0);
464 loaddebug(next, 1);
465 loaddebug(next, 2);
466 loaddebug(next, 3);
467 /* no 4 and 5 */
468 loaddebug(next, 6);
469 loaddebug(next, 7);
470 }
471
529e25f6
EB
472 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
473 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
474 /* prev and next are different */
475 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
476 hard_disable_TSC();
477 else
478 hard_enable_TSC();
479 }
480
d3a4f48d
SE
481 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
482 /*
483 * Copy the relevant range of the IO bitmap.
484 * Normally this is 128 bytes or less:
485 */
486 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
487 max(prev->io_bitmap_max, next->io_bitmap_max));
488 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
489 /*
490 * Clear any possible leftover bits:
491 */
492 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
493 }
eee3af4a 494
b4ef95de 495#ifdef X86_BTS
eee3af4a
MM
496 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
497 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
498
499 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
500 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
b4ef95de 501#endif
d3a4f48d
SE
502}
503
1da177e4
LT
504/*
505 * switch_to(x,y) should switch tasks from x to y.
506 *
6612538c 507 * This could still be optimized:
1da177e4
LT
508 * - fold all the options into a flag word and test it with a single test.
509 * - could test fs/gs bitsliced
099f318b
AK
510 *
511 * Kprobes not supported here. Set the probe on schedule instead.
1da177e4 512 */
f438d914 513struct task_struct *
a88cde13 514__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4 515{
87b935a0
JF
516 struct thread_struct *prev = &prev_p->thread;
517 struct thread_struct *next = &next_p->thread;
6612538c 518 int cpu = smp_processor_id();
1da177e4 519 struct tss_struct *tss = &per_cpu(init_tss, cpu);
478de5a9 520 unsigned fsindex, gsindex;
1da177e4 521
e07e23e1
AV
522 /* we're going to use this soon, after a few expensive things */
523 if (next_p->fpu_counter>5)
61c4628b 524 prefetch(next->xstate);
e07e23e1 525
1da177e4
LT
526 /*
527 * Reload esp0, LDT and the page table pointer:
528 */
7818a1e0 529 load_sp0(tss, next);
1da177e4
LT
530
531 /*
532 * Switch DS and ES.
533 * This won't pick up thread selector changes, but I guess that is ok.
534 */
ada85708 535 savesegment(es, prev->es);
1da177e4
LT
536 if (unlikely(next->es | prev->es))
537 loadsegment(es, next->es);
ada85708
JF
538
539 savesegment(ds, prev->ds);
1da177e4
LT
540 if (unlikely(next->ds | prev->ds))
541 loadsegment(ds, next->ds);
542
478de5a9
JF
543
544 /* We must save %fs and %gs before load_TLS() because
545 * %fs and %gs may be cleared by load_TLS().
546 *
547 * (e.g. xen_load_tls())
548 */
549 savesegment(fs, fsindex);
550 savesegment(gs, gsindex);
551
1da177e4
LT
552 load_TLS(next, cpu);
553
3fe0a63e
JF
554 /*
555 * Leave lazy mode, flushing any hypercalls made here.
556 * This must be done before restoring TLS segments so
557 * the GDT and LDT are properly updated, and must be
558 * done before math_state_restore, so the TS bit is up
559 * to date.
560 */
561 arch_leave_lazy_cpu_mode();
562
1da177e4
LT
563 /*
564 * Switch FS and GS.
87b935a0
JF
565 *
566 * Segment register != 0 always requires a reload. Also
567 * reload when it has changed. When prev process used 64bit
568 * base always reload to avoid an information leak.
1da177e4 569 */
87b935a0
JF
570 if (unlikely(fsindex | next->fsindex | prev->fs)) {
571 loadsegment(fs, next->fsindex);
572 /*
573 * Check if the user used a selector != 0; if yes
574 * clear 64bit base, since overloaded base is always
575 * mapped to the Null selector
576 */
577 if (fsindex)
1da177e4 578 prev->fs = 0;
87b935a0
JF
579 }
580 /* when next process has a 64bit base use it */
581 if (next->fs)
582 wrmsrl(MSR_FS_BASE, next->fs);
583 prev->fsindex = fsindex;
584
585 if (unlikely(gsindex | next->gsindex | prev->gs)) {
586 load_gs_index(next->gsindex);
587 if (gsindex)
1da177e4 588 prev->gs = 0;
1da177e4 589 }
87b935a0
JF
590 if (next->gs)
591 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
592 prev->gsindex = gsindex;
1da177e4 593
0a5ace2a
AK
594 /* Must be after DS reload */
595 unlazy_fpu(prev_p);
596
1da177e4 597 /*
45948d77 598 * Switch the PDA and FPU contexts.
1da177e4 599 */
faca6227
PA
600 prev->usersp = read_pda(oldrsp);
601 write_pda(oldrsp, next->usersp);
1da177e4 602 write_pda(pcurrent, next_p);
18bd057b 603
a88cde13 604 write_pda(kernelstack,
87b935a0
JF
605 (unsigned long)task_stack_page(next_p) +
606 THREAD_SIZE - PDA_STACKOFFSET);
0a425405
AV
607#ifdef CONFIG_CC_STACKPROTECTOR
608 write_pda(stack_canary, next_p->stack_canary);
609 /*
610 * Build time only check to make sure the stack_canary is at
611 * offset 40 in the pda; this is a gcc ABI requirement
612 */
613 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
614#endif
1da177e4
LT
615
616 /*
d3a4f48d 617 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 618 */
eee3af4a
MM
619 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
620 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 621 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 622
e07e23e1
AV
623 /* If the task has used fpu the last 5 timeslices, just do a full
624 * restore of the math state immediately to avoid the trap; the
625 * chances of needing FPU soon are obviously high now
870568b3
SS
626 *
627 * tsk_used_math() checks prevent calling math_state_restore(),
628 * which can sleep in the case of !tsk_used_math()
e07e23e1 629 */
870568b3 630 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
e07e23e1 631 math_state_restore();
1da177e4
LT
632 return prev_p;
633}
634
635/*
636 * sys_execve() executes a new program.
637 */
6612538c 638asmlinkage
1da177e4 639long sys_execve(char __user *name, char __user * __user *argv,
5d119b2c 640 char __user * __user *envp, struct pt_regs *regs)
1da177e4
LT
641{
642 long error;
643 char * filename;
644
645 filename = getname(name);
646 error = PTR_ERR(filename);
5d119b2c 647 if (IS_ERR(filename))
1da177e4 648 return error;
5d119b2c 649 error = do_execve(filename, argv, envp, regs);
1da177e4
LT
650 putname(filename);
651 return error;
652}
653
654void set_personality_64bit(void)
655{
656 /* inherit personality from parent */
657
658 /* Make sure to be in 64bit mode */
6612538c 659 clear_thread_flag(TIF_IA32);
1da177e4
LT
660
661 /* TBD: overwrites user setup. Should have two bits.
662 But 64bit processes have always behaved this way,
663 so it's not too bad. The main problem is just that
6612538c 664 32bit childs are affected again. */
1da177e4
LT
665 current->personality &= ~READ_IMPLIES_EXEC;
666}
667
668asmlinkage long sys_fork(struct pt_regs *regs)
669{
65ea5b03 670 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
1da177e4
LT
671}
672
a88cde13
AK
673asmlinkage long
674sys_clone(unsigned long clone_flags, unsigned long newsp,
675 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
676{
677 if (!newsp)
65ea5b03 678 newsp = regs->sp;
1da177e4
LT
679 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
680}
681
682/*
683 * This is trivial, and on the face of it looks like it
684 * could equally well be done in user mode.
685 *
686 * Not so, for quite unobvious reasons - register pressure.
687 * In user mode vfork() cannot have a stack frame, and if
688 * done by calling the "clone()" system call directly, you
689 * do not have enough call-clobbered registers to hold all
690 * the information you need.
691 */
692asmlinkage long sys_vfork(struct pt_regs *regs)
693{
65ea5b03 694 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
1da177e4
LT
695 NULL, NULL);
696}
697
698unsigned long get_wchan(struct task_struct *p)
699{
700 unsigned long stack;
65ea5b03 701 u64 fp,ip;
1da177e4
LT
702 int count = 0;
703
704 if (!p || p == current || p->state==TASK_RUNNING)
705 return 0;
57eafdc2 706 stack = (unsigned long)task_stack_page(p);
faca6227 707 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
1da177e4 708 return 0;
faca6227 709 fp = *(u64 *)(p->thread.sp);
1da177e4 710 do {
a88cde13
AK
711 if (fp < (unsigned long)stack ||
712 fp > (unsigned long)stack+THREAD_SIZE)
1da177e4 713 return 0;
65ea5b03
PA
714 ip = *(u64 *)(fp+8);
715 if (!in_sched_functions(ip))
716 return ip;
1da177e4
LT
717 fp = *(u64 *)fp;
718 } while (count++ < 16);
719 return 0;
720}
721
722long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
723{
724 int ret = 0;
725 int doit = task == current;
726 int cpu;
727
728 switch (code) {
729 case ARCH_SET_GS:
84929801 730 if (addr >= TASK_SIZE_OF(task))
1da177e4
LT
731 return -EPERM;
732 cpu = get_cpu();
733 /* handle small bases via the GDT because that's faster to
734 switch. */
735 if (addr <= 0xffffffff) {
736 set_32bit_tls(task, GS_TLS, addr);
737 if (doit) {
738 load_TLS(&task->thread, cpu);
739 load_gs_index(GS_TLS_SEL);
740 }
741 task->thread.gsindex = GS_TLS_SEL;
742 task->thread.gs = 0;
743 } else {
744 task->thread.gsindex = 0;
745 task->thread.gs = addr;
746 if (doit) {
a88cde13
AK
747 load_gs_index(0);
748 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
1da177e4
LT
749 }
750 }
751 put_cpu();
752 break;
753 case ARCH_SET_FS:
754 /* Not strictly needed for fs, but do it for symmetry
755 with gs */
84929801 756 if (addr >= TASK_SIZE_OF(task))
6612538c 757 return -EPERM;
1da177e4 758 cpu = get_cpu();
6612538c 759 /* handle small bases via the GDT because that's faster to
1da177e4 760 switch. */
6612538c 761 if (addr <= 0xffffffff) {
1da177e4 762 set_32bit_tls(task, FS_TLS, addr);
6612538c
HS
763 if (doit) {
764 load_TLS(&task->thread, cpu);
ada85708 765 loadsegment(fs, FS_TLS_SEL);
1da177e4
LT
766 }
767 task->thread.fsindex = FS_TLS_SEL;
768 task->thread.fs = 0;
6612538c 769 } else {
1da177e4
LT
770 task->thread.fsindex = 0;
771 task->thread.fs = addr;
772 if (doit) {
773 /* set the selector to 0 to not confuse
774 __switch_to */
ada85708 775 loadsegment(fs, 0);
a88cde13 776 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
777 }
778 }
779 put_cpu();
780 break;
6612538c
HS
781 case ARCH_GET_FS: {
782 unsigned long base;
1da177e4
LT
783 if (task->thread.fsindex == FS_TLS_SEL)
784 base = read_32bit_tls(task, FS_TLS);
a88cde13 785 else if (doit)
1da177e4 786 rdmsrl(MSR_FS_BASE, base);
a88cde13 787 else
1da177e4 788 base = task->thread.fs;
6612538c
HS
789 ret = put_user(base, (unsigned long __user *)addr);
790 break;
1da177e4 791 }
6612538c 792 case ARCH_GET_GS: {
1da177e4 793 unsigned long base;
97c2803c 794 unsigned gsindex;
1da177e4
LT
795 if (task->thread.gsindex == GS_TLS_SEL)
796 base = read_32bit_tls(task, GS_TLS);
97c2803c 797 else if (doit) {
ada85708 798 savesegment(gs, gsindex);
97c2803c
JB
799 if (gsindex)
800 rdmsrl(MSR_KERNEL_GS_BASE, base);
801 else
802 base = task->thread.gs;
803 }
a88cde13 804 else
1da177e4 805 base = task->thread.gs;
6612538c 806 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
807 break;
808 }
809
810 default:
811 ret = -EINVAL;
812 break;
6612538c 813 }
1da177e4 814
6612538c
HS
815 return ret;
816}
1da177e4
LT
817
818long sys_arch_prctl(int code, unsigned long addr)
819{
820 return do_arch_prctl(current, code, addr);
1da177e4
LT
821}
822
823unsigned long arch_align_stack(unsigned long sp)
824{
c16b63e0 825 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
826 sp -= get_random_int() % 8192;
827 return sp & ~0xf;
828}
c1d171a0
JK
829
830unsigned long arch_randomize_brk(struct mm_struct *mm)
831{
832 unsigned long range_end = mm->brk + 0x02000000;
833 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
834}