]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/process_64.c
Linux 2.6.27
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
76e4f660 19#include <linux/cpu.h>
1da177e4
LT
20#include <linux/errno.h>
21#include <linux/sched.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4 29#include <linux/interrupt.h>
6612538c 30#include <linux/utsname.h>
1da177e4 31#include <linux/delay.h>
6612538c 32#include <linux/module.h>
1da177e4 33#include <linux/ptrace.h>
1da177e4 34#include <linux/random.h>
95833c83 35#include <linux/notifier.h>
c6fd91f0 36#include <linux/kprobes.h>
1eeb66a1 37#include <linux/kdebug.h>
02290683 38#include <linux/tick.h>
529e25f6 39#include <linux/prctl.h>
1da177e4
LT
40
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
1da177e4
LT
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
95833c83 53#include <asm/idle.h>
1da177e4
LT
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
e041c683 59static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
60
61void idle_notifier_register(struct notifier_block *n)
62{
e041c683 63 atomic_notifier_chain_register(&idle_notifier, n);
95833c83 64}
95833c83 65
95833c83
AK
66void enter_idle(void)
67{
a15da49d 68 write_pda(isidle, 1);
e041c683 69 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
70}
71
72static void __exit_idle(void)
73{
9446868b 74 if (test_and_clear_bit_pda(0, isidle) == 0)
a15da49d 75 return;
e041c683 76 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
77}
78
79/* Called from interrupts to signify idle end */
80void exit_idle(void)
81{
a15da49d
AK
82 /* idle loop has pid 0 */
83 if (current->pid)
95833c83
AK
84 return;
85 __exit_idle();
86}
87
76e4f660
AR
88#ifdef CONFIG_HOTPLUG_CPU
89DECLARE_PER_CPU(int, cpu_state);
90
91#include <asm/nmi.h>
1fa744e6 92/* We halt the CPU with physical CPU hotplug */
76e4f660
AR
93static inline void play_dead(void)
94{
95 idle_task_exit();
4faac97d
TG
96 c1e_remove_cpu(raw_smp_processor_id());
97
76e4f660
AR
98 mb();
99 /* Ack it */
100 __get_cpu_var(cpu_state) = CPU_DEAD;
101
1fa744e6 102 local_irq_disable();
394a1505
ML
103 /* mask all interrupts, flush any and all caches, and halt */
104 wbinvd_halt();
76e4f660
AR
105}
106#else
107static inline void play_dead(void)
108{
109 BUG();
110}
111#endif /* CONFIG_HOTPLUG_CPU */
112
1da177e4
LT
113/*
114 * The idle thread. There's no useful work to be
115 * done, so just try to conserve power and have a
116 * low exit latency (ie sit in a loop waiting for
117 * somebody to say that they'd like to reschedule)
118 */
b10db7f0 119void cpu_idle(void)
1da177e4 120{
495ab9c0 121 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
122 /* endless idle loop with no priority at all */
123 while (1) {
b8f8c3cf 124 tick_nohz_stop_sched_tick(1);
1da177e4 125 while (!need_resched()) {
1da177e4 126
1da177e4 127 rmb();
6ddd2a27 128
76e4f660
AR
129 if (cpu_is_offline(smp_processor_id()))
130 play_dead();
d331e739
VP
131 /*
132 * Idle routines should keep interrupts disabled
133 * from here on, until they go to idle.
134 * Otherwise, idle callbacks can misfire.
135 */
136 local_irq_disable();
95833c83 137 enter_idle();
81d68a96
SR
138 /* Don't trace irqs off for idle */
139 stop_critical_timings();
6ddd2a27 140 pm_idle();
81d68a96 141 start_critical_timings();
a15da49d
AK
142 /* In many cases the interrupt that ended idle
143 has already called exit_idle. But some idle
144 loops can be woken up without interrupt. */
95833c83 145 __exit_idle();
1da177e4
LT
146 }
147
02290683 148 tick_nohz_restart_sched_tick();
5bfb5d69 149 preempt_enable_no_resched();
1da177e4 150 schedule();
5bfb5d69 151 preempt_disable();
1da177e4
LT
152 }
153}
154
6612538c 155/* Prints also some state that isn't saved in the pt_regs */
1da177e4
LT
156void __show_regs(struct pt_regs * regs)
157{
158 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 159 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
160 unsigned int fsindex, gsindex;
161 unsigned int ds, cs, es;
1da177e4
LT
162
163 printk("\n");
164 print_modules();
9acf23c4
AK
165 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
166 current->pid, current->comm, print_tainted(),
96b644bd
SH
167 init_utsname()->release,
168 (int)strcspn(init_utsname()->version, " "),
169 init_utsname()->version);
65ea5b03 170 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
aafbd7eb 171 printk_address(regs->ip, 1);
65ea5b03
PA
172 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
173 regs->flags);
1da177e4 174 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 175 regs->ax, regs->bx, regs->cx);
1da177e4 176 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 177 regs->dx, regs->si, regs->di);
1da177e4 178 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 179 regs->bp, regs->r8, regs->r9);
1da177e4
LT
180 printk("R10: %016lx R11: %016lx R12: %016lx\n",
181 regs->r10, regs->r11, regs->r12);
182 printk("R13: %016lx R14: %016lx R15: %016lx\n",
183 regs->r13, regs->r14, regs->r15);
184
185 asm("movl %%ds,%0" : "=r" (ds));
186 asm("movl %%cs,%0" : "=r" (cs));
187 asm("movl %%es,%0" : "=r" (es));
188 asm("movl %%fs,%0" : "=r" (fsindex));
189 asm("movl %%gs,%0" : "=r" (gsindex));
190
191 rdmsrl(MSR_FS_BASE, fs);
192 rdmsrl(MSR_GS_BASE, gs);
193 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
194
f51c9452
GOC
195 cr0 = read_cr0();
196 cr2 = read_cr2();
197 cr3 = read_cr3();
198 cr4 = read_cr4();
1da177e4
LT
199
200 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
201 fs,fsindex,gs,gsindex,shadowgs);
202 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
203 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
bb1995d5
AS
204
205 get_debugreg(d0, 0);
206 get_debugreg(d1, 1);
207 get_debugreg(d2, 2);
208 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
209 get_debugreg(d3, 3);
210 get_debugreg(d6, 6);
211 get_debugreg(d7, 7);
212 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1da177e4
LT
213}
214
215void show_regs(struct pt_regs *regs)
216{
c078d326 217 printk("CPU %d:", smp_processor_id());
1da177e4 218 __show_regs(regs);
bc850d6b 219 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
1da177e4
LT
220}
221
222/*
223 * Free current thread data structures etc..
224 */
225void exit_thread(void)
226{
227 struct task_struct *me = current;
228 struct thread_struct *t = &me->thread;
73649dab 229
6612538c 230 if (me->thread.io_bitmap_ptr) {
1da177e4
LT
231 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
232
233 kfree(t->io_bitmap_ptr);
234 t->io_bitmap_ptr = NULL;
d3a4f48d 235 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
236 /*
237 * Careful, clear this in the TSS too:
238 */
239 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
240 t->io_bitmap_max = 0;
241 put_cpu();
242 }
243}
244
245void flush_thread(void)
246{
247 struct task_struct *tsk = current;
1da177e4 248
303cd153
MD
249 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
250 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
251 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
252 clear_tsk_thread_flag(tsk, TIF_IA32);
253 } else {
254 set_tsk_thread_flag(tsk, TIF_IA32);
4d9bc79c 255 current_thread_info()->status |= TS_COMPAT;
303cd153 256 }
4d9bc79c 257 }
303cd153 258 clear_tsk_thread_flag(tsk, TIF_DEBUG);
1da177e4
LT
259
260 tsk->thread.debugreg0 = 0;
261 tsk->thread.debugreg1 = 0;
262 tsk->thread.debugreg2 = 0;
263 tsk->thread.debugreg3 = 0;
264 tsk->thread.debugreg6 = 0;
265 tsk->thread.debugreg7 = 0;
6612538c 266 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
1da177e4
LT
267 /*
268 * Forget coprocessor state..
269 */
75118a82 270 tsk->fpu_counter = 0;
1da177e4
LT
271 clear_fpu(tsk);
272 clear_used_math();
273}
274
275void release_thread(struct task_struct *dead_task)
276{
277 if (dead_task->mm) {
278 if (dead_task->mm->context.size) {
279 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
280 dead_task->comm,
281 dead_task->mm->context.ldt,
282 dead_task->mm->context.size);
283 BUG();
284 }
285 }
286}
287
288static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
289{
6612538c 290 struct user_desc ud = {
1da177e4
LT
291 .base_addr = addr,
292 .limit = 0xfffff,
293 .seg_32bit = 1,
294 .limit_in_pages = 1,
295 .useable = 1,
296 };
ade1af77 297 struct desc_struct *desc = t->thread.tls_array;
1da177e4 298 desc += tls;
80fbb69a 299 fill_ldt(desc, &ud);
1da177e4
LT
300}
301
302static inline u32 read_32bit_tls(struct task_struct *t, int tls)
303{
91394eb0 304 return get_desc_base(&t->thread.tls_array[tls]);
1da177e4
LT
305}
306
307/*
308 * This gets called before we allocate a new thread and copy
309 * the current task into it.
310 */
311void prepare_to_copy(struct task_struct *tsk)
312{
313 unlazy_fpu(tsk);
314}
315
65ea5b03 316int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
1da177e4
LT
317 unsigned long unused,
318 struct task_struct * p, struct pt_regs * regs)
319{
320 int err;
321 struct pt_regs * childregs;
322 struct task_struct *me = current;
323
a88cde13 324 childregs = ((struct pt_regs *)
57eafdc2 325 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
326 *childregs = *regs;
327
65ea5b03
PA
328 childregs->ax = 0;
329 childregs->sp = sp;
330 if (sp == ~0UL)
331 childregs->sp = (unsigned long)childregs;
1da177e4 332
faca6227
PA
333 p->thread.sp = (unsigned long) childregs;
334 p->thread.sp0 = (unsigned long) (childregs+1);
335 p->thread.usersp = me->thread.usersp;
1da177e4 336
e4f17c43 337 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
338
339 p->thread.fs = me->thread.fs;
340 p->thread.gs = me->thread.gs;
341
ada85708
JF
342 savesegment(gs, p->thread.gsindex);
343 savesegment(fs, p->thread.fsindex);
344 savesegment(es, p->thread.es);
345 savesegment(ds, p->thread.ds);
1da177e4 346
d3a4f48d 347 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
348 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
349 if (!p->thread.io_bitmap_ptr) {
350 p->thread.io_bitmap_max = 0;
351 return -ENOMEM;
352 }
a88cde13
AK
353 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
354 IO_BITMAP_BYTES);
d3a4f48d 355 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 356 }
1da177e4
LT
357
358 /*
359 * Set a new TLS for the child thread?
360 */
361 if (clone_flags & CLONE_SETTLS) {
362#ifdef CONFIG_IA32_EMULATION
363 if (test_thread_flag(TIF_IA32))
efd1ca52 364 err = do_set_thread_area(p, -1,
65ea5b03 365 (struct user_desc __user *)childregs->si, 0);
1da177e4
LT
366 else
367#endif
368 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
369 if (err)
370 goto out;
371 }
372 err = 0;
373out:
374 if (err && p->thread.io_bitmap_ptr) {
375 kfree(p->thread.io_bitmap_ptr);
376 p->thread.io_bitmap_max = 0;
377 }
378 return err;
379}
380
513ad84b
IM
381void
382start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
383{
ada85708
JF
384 loadsegment(fs, 0);
385 loadsegment(es, 0);
386 loadsegment(ds, 0);
513ad84b
IM
387 load_gs_index(0);
388 regs->ip = new_ip;
389 regs->sp = new_sp;
390 write_pda(oldrsp, new_sp);
391 regs->cs = __USER_CS;
392 regs->ss = __USER_DS;
393 regs->flags = 0x200;
394 set_fs(USER_DS);
aa283f49
SS
395 /*
396 * Free the old FP and other extended state
397 */
398 free_thread_xstate(current);
513ad84b
IM
399}
400EXPORT_SYMBOL_GPL(start_thread);
401
529e25f6
EB
402static void hard_disable_TSC(void)
403{
404 write_cr4(read_cr4() | X86_CR4_TSD);
405}
406
407void disable_TSC(void)
408{
409 preempt_disable();
410 if (!test_and_set_thread_flag(TIF_NOTSC))
411 /*
412 * Must flip the CPU state synchronously with
413 * TIF_NOTSC in the current running context.
414 */
415 hard_disable_TSC();
416 preempt_enable();
417}
418
419static void hard_enable_TSC(void)
420{
421 write_cr4(read_cr4() & ~X86_CR4_TSD);
422}
423
a4928cff 424static void enable_TSC(void)
529e25f6
EB
425{
426 preempt_disable();
427 if (test_and_clear_thread_flag(TIF_NOTSC))
428 /*
429 * Must flip the CPU state synchronously with
430 * TIF_NOTSC in the current running context.
431 */
432 hard_enable_TSC();
433 preempt_enable();
434}
435
436int get_tsc_mode(unsigned long adr)
437{
438 unsigned int val;
439
440 if (test_thread_flag(TIF_NOTSC))
441 val = PR_TSC_SIGSEGV;
442 else
443 val = PR_TSC_ENABLE;
444
445 return put_user(val, (unsigned int __user *)adr);
446}
447
448int set_tsc_mode(unsigned int val)
449{
450 if (val == PR_TSC_SIGSEGV)
451 disable_TSC();
452 else if (val == PR_TSC_ENABLE)
453 enable_TSC();
454 else
455 return -EINVAL;
456
457 return 0;
458}
459
1da177e4
LT
460/*
461 * This special macro can be used to load a debugging register
462 */
6612538c
HS
463#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
464
d3a4f48d 465static inline void __switch_to_xtra(struct task_struct *prev_p,
6612538c
HS
466 struct task_struct *next_p,
467 struct tss_struct *tss)
d3a4f48d
SE
468{
469 struct thread_struct *prev, *next;
eee3af4a 470 unsigned long debugctl;
d3a4f48d
SE
471
472 prev = &prev_p->thread,
473 next = &next_p->thread;
474
eee3af4a
MM
475 debugctl = prev->debugctlmsr;
476 if (next->ds_area_msr != prev->ds_area_msr) {
477 /* we clear debugctl to make sure DS
478 * is not in use when we change it */
479 debugctl = 0;
5b0e5084 480 update_debugctlmsr(0);
eee3af4a
MM
481 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
482 }
483
484 if (next->debugctlmsr != debugctl)
5b0e5084 485 update_debugctlmsr(next->debugctlmsr);
7e991604 486
d3a4f48d
SE
487 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
488 loaddebug(next, 0);
489 loaddebug(next, 1);
490 loaddebug(next, 2);
491 loaddebug(next, 3);
492 /* no 4 and 5 */
493 loaddebug(next, 6);
494 loaddebug(next, 7);
495 }
496
529e25f6
EB
497 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
498 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
499 /* prev and next are different */
500 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
501 hard_disable_TSC();
502 else
503 hard_enable_TSC();
504 }
505
d3a4f48d
SE
506 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
507 /*
508 * Copy the relevant range of the IO bitmap.
509 * Normally this is 128 bytes or less:
510 */
511 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
512 max(prev->io_bitmap_max, next->io_bitmap_max));
513 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
514 /*
515 * Clear any possible leftover bits:
516 */
517 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
518 }
eee3af4a 519
b4ef95de 520#ifdef X86_BTS
eee3af4a
MM
521 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
522 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
523
524 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
525 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
b4ef95de 526#endif
d3a4f48d
SE
527}
528
1da177e4
LT
529/*
530 * switch_to(x,y) should switch tasks from x to y.
531 *
6612538c 532 * This could still be optimized:
1da177e4
LT
533 * - fold all the options into a flag word and test it with a single test.
534 * - could test fs/gs bitsliced
099f318b
AK
535 *
536 * Kprobes not supported here. Set the probe on schedule instead.
1da177e4 537 */
f438d914 538struct task_struct *
a88cde13 539__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4 540{
87b935a0
JF
541 struct thread_struct *prev = &prev_p->thread;
542 struct thread_struct *next = &next_p->thread;
6612538c 543 int cpu = smp_processor_id();
1da177e4 544 struct tss_struct *tss = &per_cpu(init_tss, cpu);
478de5a9 545 unsigned fsindex, gsindex;
1da177e4 546
e07e23e1
AV
547 /* we're going to use this soon, after a few expensive things */
548 if (next_p->fpu_counter>5)
61c4628b 549 prefetch(next->xstate);
e07e23e1 550
1da177e4
LT
551 /*
552 * Reload esp0, LDT and the page table pointer:
553 */
7818a1e0 554 load_sp0(tss, next);
1da177e4
LT
555
556 /*
557 * Switch DS and ES.
558 * This won't pick up thread selector changes, but I guess that is ok.
559 */
ada85708 560 savesegment(es, prev->es);
1da177e4
LT
561 if (unlikely(next->es | prev->es))
562 loadsegment(es, next->es);
ada85708
JF
563
564 savesegment(ds, prev->ds);
1da177e4
LT
565 if (unlikely(next->ds | prev->ds))
566 loadsegment(ds, next->ds);
567
478de5a9
JF
568
569 /* We must save %fs and %gs before load_TLS() because
570 * %fs and %gs may be cleared by load_TLS().
571 *
572 * (e.g. xen_load_tls())
573 */
574 savesegment(fs, fsindex);
575 savesegment(gs, gsindex);
576
1da177e4
LT
577 load_TLS(next, cpu);
578
3fe0a63e
JF
579 /*
580 * Leave lazy mode, flushing any hypercalls made here.
581 * This must be done before restoring TLS segments so
582 * the GDT and LDT are properly updated, and must be
583 * done before math_state_restore, so the TS bit is up
584 * to date.
585 */
586 arch_leave_lazy_cpu_mode();
587
1da177e4
LT
588 /*
589 * Switch FS and GS.
87b935a0
JF
590 *
591 * Segment register != 0 always requires a reload. Also
592 * reload when it has changed. When prev process used 64bit
593 * base always reload to avoid an information leak.
1da177e4 594 */
87b935a0
JF
595 if (unlikely(fsindex | next->fsindex | prev->fs)) {
596 loadsegment(fs, next->fsindex);
597 /*
598 * Check if the user used a selector != 0; if yes
599 * clear 64bit base, since overloaded base is always
600 * mapped to the Null selector
601 */
602 if (fsindex)
1da177e4 603 prev->fs = 0;
87b935a0
JF
604 }
605 /* when next process has a 64bit base use it */
606 if (next->fs)
607 wrmsrl(MSR_FS_BASE, next->fs);
608 prev->fsindex = fsindex;
609
610 if (unlikely(gsindex | next->gsindex | prev->gs)) {
611 load_gs_index(next->gsindex);
612 if (gsindex)
1da177e4 613 prev->gs = 0;
1da177e4 614 }
87b935a0
JF
615 if (next->gs)
616 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
617 prev->gsindex = gsindex;
1da177e4 618
0a5ace2a
AK
619 /* Must be after DS reload */
620 unlazy_fpu(prev_p);
621
1da177e4 622 /*
45948d77 623 * Switch the PDA and FPU contexts.
1da177e4 624 */
faca6227
PA
625 prev->usersp = read_pda(oldrsp);
626 write_pda(oldrsp, next->usersp);
1da177e4 627 write_pda(pcurrent, next_p);
18bd057b 628
a88cde13 629 write_pda(kernelstack,
87b935a0
JF
630 (unsigned long)task_stack_page(next_p) +
631 THREAD_SIZE - PDA_STACKOFFSET);
0a425405
AV
632#ifdef CONFIG_CC_STACKPROTECTOR
633 write_pda(stack_canary, next_p->stack_canary);
634 /*
635 * Build time only check to make sure the stack_canary is at
636 * offset 40 in the pda; this is a gcc ABI requirement
637 */
638 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
639#endif
1da177e4
LT
640
641 /*
d3a4f48d 642 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 643 */
eee3af4a
MM
644 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
645 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 646 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 647
e07e23e1
AV
648 /* If the task has used fpu the last 5 timeslices, just do a full
649 * restore of the math state immediately to avoid the trap; the
650 * chances of needing FPU soon are obviously high now
870568b3
SS
651 *
652 * tsk_used_math() checks prevent calling math_state_restore(),
653 * which can sleep in the case of !tsk_used_math()
e07e23e1 654 */
870568b3 655 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
e07e23e1 656 math_state_restore();
1da177e4
LT
657 return prev_p;
658}
659
660/*
661 * sys_execve() executes a new program.
662 */
6612538c 663asmlinkage
1da177e4 664long sys_execve(char __user *name, char __user * __user *argv,
5d119b2c 665 char __user * __user *envp, struct pt_regs *regs)
1da177e4
LT
666{
667 long error;
668 char * filename;
669
670 filename = getname(name);
671 error = PTR_ERR(filename);
5d119b2c 672 if (IS_ERR(filename))
1da177e4 673 return error;
5d119b2c 674 error = do_execve(filename, argv, envp, regs);
1da177e4
LT
675 putname(filename);
676 return error;
677}
678
679void set_personality_64bit(void)
680{
681 /* inherit personality from parent */
682
683 /* Make sure to be in 64bit mode */
6612538c 684 clear_thread_flag(TIF_IA32);
1da177e4
LT
685
686 /* TBD: overwrites user setup. Should have two bits.
687 But 64bit processes have always behaved this way,
688 so it's not too bad. The main problem is just that
6612538c 689 32bit childs are affected again. */
1da177e4
LT
690 current->personality &= ~READ_IMPLIES_EXEC;
691}
692
693asmlinkage long sys_fork(struct pt_regs *regs)
694{
65ea5b03 695 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
1da177e4
LT
696}
697
a88cde13
AK
698asmlinkage long
699sys_clone(unsigned long clone_flags, unsigned long newsp,
700 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
701{
702 if (!newsp)
65ea5b03 703 newsp = regs->sp;
1da177e4
LT
704 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
705}
706
707/*
708 * This is trivial, and on the face of it looks like it
709 * could equally well be done in user mode.
710 *
711 * Not so, for quite unobvious reasons - register pressure.
712 * In user mode vfork() cannot have a stack frame, and if
713 * done by calling the "clone()" system call directly, you
714 * do not have enough call-clobbered registers to hold all
715 * the information you need.
716 */
717asmlinkage long sys_vfork(struct pt_regs *regs)
718{
65ea5b03 719 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
1da177e4
LT
720 NULL, NULL);
721}
722
723unsigned long get_wchan(struct task_struct *p)
724{
725 unsigned long stack;
65ea5b03 726 u64 fp,ip;
1da177e4
LT
727 int count = 0;
728
729 if (!p || p == current || p->state==TASK_RUNNING)
730 return 0;
57eafdc2 731 stack = (unsigned long)task_stack_page(p);
faca6227 732 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
1da177e4 733 return 0;
faca6227 734 fp = *(u64 *)(p->thread.sp);
1da177e4 735 do {
a88cde13
AK
736 if (fp < (unsigned long)stack ||
737 fp > (unsigned long)stack+THREAD_SIZE)
1da177e4 738 return 0;
65ea5b03
PA
739 ip = *(u64 *)(fp+8);
740 if (!in_sched_functions(ip))
741 return ip;
1da177e4
LT
742 fp = *(u64 *)fp;
743 } while (count++ < 16);
744 return 0;
745}
746
747long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
748{
749 int ret = 0;
750 int doit = task == current;
751 int cpu;
752
753 switch (code) {
754 case ARCH_SET_GS:
84929801 755 if (addr >= TASK_SIZE_OF(task))
1da177e4
LT
756 return -EPERM;
757 cpu = get_cpu();
758 /* handle small bases via the GDT because that's faster to
759 switch. */
760 if (addr <= 0xffffffff) {
761 set_32bit_tls(task, GS_TLS, addr);
762 if (doit) {
763 load_TLS(&task->thread, cpu);
764 load_gs_index(GS_TLS_SEL);
765 }
766 task->thread.gsindex = GS_TLS_SEL;
767 task->thread.gs = 0;
768 } else {
769 task->thread.gsindex = 0;
770 task->thread.gs = addr;
771 if (doit) {
a88cde13
AK
772 load_gs_index(0);
773 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
1da177e4
LT
774 }
775 }
776 put_cpu();
777 break;
778 case ARCH_SET_FS:
779 /* Not strictly needed for fs, but do it for symmetry
780 with gs */
84929801 781 if (addr >= TASK_SIZE_OF(task))
6612538c 782 return -EPERM;
1da177e4 783 cpu = get_cpu();
6612538c 784 /* handle small bases via the GDT because that's faster to
1da177e4 785 switch. */
6612538c 786 if (addr <= 0xffffffff) {
1da177e4 787 set_32bit_tls(task, FS_TLS, addr);
6612538c
HS
788 if (doit) {
789 load_TLS(&task->thread, cpu);
ada85708 790 loadsegment(fs, FS_TLS_SEL);
1da177e4
LT
791 }
792 task->thread.fsindex = FS_TLS_SEL;
793 task->thread.fs = 0;
6612538c 794 } else {
1da177e4
LT
795 task->thread.fsindex = 0;
796 task->thread.fs = addr;
797 if (doit) {
798 /* set the selector to 0 to not confuse
799 __switch_to */
ada85708 800 loadsegment(fs, 0);
a88cde13 801 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
802 }
803 }
804 put_cpu();
805 break;
6612538c
HS
806 case ARCH_GET_FS: {
807 unsigned long base;
1da177e4
LT
808 if (task->thread.fsindex == FS_TLS_SEL)
809 base = read_32bit_tls(task, FS_TLS);
a88cde13 810 else if (doit)
1da177e4 811 rdmsrl(MSR_FS_BASE, base);
a88cde13 812 else
1da177e4 813 base = task->thread.fs;
6612538c
HS
814 ret = put_user(base, (unsigned long __user *)addr);
815 break;
1da177e4 816 }
6612538c 817 case ARCH_GET_GS: {
1da177e4 818 unsigned long base;
97c2803c 819 unsigned gsindex;
1da177e4
LT
820 if (task->thread.gsindex == GS_TLS_SEL)
821 base = read_32bit_tls(task, GS_TLS);
97c2803c 822 else if (doit) {
ada85708 823 savesegment(gs, gsindex);
97c2803c
JB
824 if (gsindex)
825 rdmsrl(MSR_KERNEL_GS_BASE, base);
826 else
827 base = task->thread.gs;
828 }
a88cde13 829 else
1da177e4 830 base = task->thread.gs;
6612538c 831 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
832 break;
833 }
834
835 default:
836 ret = -EINVAL;
837 break;
6612538c 838 }
1da177e4 839
6612538c
HS
840 return ret;
841}
1da177e4
LT
842
843long sys_arch_prctl(int code, unsigned long addr)
844{
845 return do_arch_prctl(current, code, addr);
1da177e4
LT
846}
847
848unsigned long arch_align_stack(unsigned long sp)
849{
c16b63e0 850 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
851 sp -= get_random_int() % 8192;
852 return sp & ~0xf;
853}
c1d171a0
JK
854
855unsigned long arch_randomize_brk(struct mm_struct *mm)
856{
857 unsigned long range_end = mm->brk + 0x02000000;
858 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
859}