]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/process_64.c
x86: more cleanups in arch/x86/boot/compressed/misc.c
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
76e4f660 19#include <linux/cpu.h>
1da177e4
LT
20#include <linux/errno.h>
21#include <linux/sched.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4 29#include <linux/interrupt.h>
6612538c 30#include <linux/utsname.h>
1da177e4 31#include <linux/delay.h>
6612538c 32#include <linux/module.h>
1da177e4 33#include <linux/ptrace.h>
1da177e4 34#include <linux/random.h>
95833c83 35#include <linux/notifier.h>
c6fd91f0 36#include <linux/kprobes.h>
1eeb66a1 37#include <linux/kdebug.h>
02290683 38#include <linux/tick.h>
1da177e4
LT
39
40#include <asm/uaccess.h>
41#include <asm/pgtable.h>
42#include <asm/system.h>
43#include <asm/io.h>
44#include <asm/processor.h>
45#include <asm/i387.h>
46#include <asm/mmu_context.h>
47#include <asm/pda.h>
48#include <asm/prctl.h>
1da177e4
LT
49#include <asm/desc.h>
50#include <asm/proto.h>
51#include <asm/ia32.h>
95833c83 52#include <asm/idle.h>
1da177e4
LT
53
54asmlinkage extern void ret_from_fork(void);
55
56unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
57
1da177e4
LT
58unsigned long boot_option_idle_override = 0;
59EXPORT_SYMBOL(boot_option_idle_override);
60
61/*
62 * Powermanagement idle function, if any..
63 */
64void (*pm_idle)(void);
2ee60e17 65EXPORT_SYMBOL(pm_idle);
1da177e4 66
e041c683 67static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
68
69void idle_notifier_register(struct notifier_block *n)
70{
e041c683 71 atomic_notifier_chain_register(&idle_notifier, n);
95833c83 72}
95833c83 73
95833c83
AK
74void enter_idle(void)
75{
a15da49d 76 write_pda(isidle, 1);
e041c683 77 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
78}
79
80static void __exit_idle(void)
81{
9446868b 82 if (test_and_clear_bit_pda(0, isidle) == 0)
a15da49d 83 return;
e041c683 84 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
85}
86
87/* Called from interrupts to signify idle end */
88void exit_idle(void)
89{
a15da49d
AK
90 /* idle loop has pid 0 */
91 if (current->pid)
95833c83
AK
92 return;
93 __exit_idle();
94}
95
1da177e4
LT
96/*
97 * We use this if we don't have any better
98 * idle routine..
99 */
d8954222 100void default_idle(void)
1da177e4 101{
495ab9c0 102 current_thread_info()->status &= ~TS_POLLING;
0888f06a
IM
103 /*
104 * TS_POLLING-cleared state must be visible before we
105 * test NEED_RESCHED:
106 */
107 smp_mb();
72690a21
AK
108 local_irq_disable();
109 if (!need_resched()) {
5ee613b6
IM
110 ktime_t t0, t1;
111 u64 t0n, t1n;
112
113 t0 = ktime_get();
114 t0n = ktime_to_ns(t0);
115 safe_halt(); /* enables interrupts racelessly */
116 local_irq_disable();
117 t1 = ktime_get();
118 t1n = ktime_to_ns(t1);
119 sched_clock_idle_wakeup_event(t1n - t0n);
39d44a51
HS
120 }
121 local_irq_enable();
495ab9c0 122 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
123}
124
125/*
126 * On SMP it's slightly faster (but much more power-consuming!)
127 * to poll the ->need_resched flag instead of waiting for the
128 * cross-CPU IPI to arrive. Use this option with caution.
129 */
6612538c 130static void poll_idle(void)
1da177e4 131{
d331e739 132 local_irq_enable();
72690a21 133 cpu_relax();
1da177e4
LT
134}
135
76e4f660
AR
136#ifdef CONFIG_HOTPLUG_CPU
137DECLARE_PER_CPU(int, cpu_state);
138
139#include <asm/nmi.h>
1fa744e6 140/* We halt the CPU with physical CPU hotplug */
76e4f660
AR
141static inline void play_dead(void)
142{
143 idle_task_exit();
144 wbinvd();
145 mb();
146 /* Ack it */
147 __get_cpu_var(cpu_state) = CPU_DEAD;
148
1fa744e6 149 local_irq_disable();
76e4f660 150 while (1)
1fa744e6 151 halt();
76e4f660
AR
152}
153#else
154static inline void play_dead(void)
155{
156 BUG();
157}
158#endif /* CONFIG_HOTPLUG_CPU */
159
1da177e4
LT
160/*
161 * The idle thread. There's no useful work to be
162 * done, so just try to conserve power and have a
163 * low exit latency (ie sit in a loop waiting for
164 * somebody to say that they'd like to reschedule)
165 */
b10db7f0 166void cpu_idle(void)
1da177e4 167{
495ab9c0 168 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
169 /* endless idle loop with no priority at all */
170 while (1) {
3d97775a 171 tick_nohz_stop_sched_tick();
1da177e4
LT
172 while (!need_resched()) {
173 void (*idle)(void);
174
1da177e4
LT
175 rmb();
176 idle = pm_idle;
177 if (!idle)
178 idle = default_idle;
76e4f660
AR
179 if (cpu_is_offline(smp_processor_id()))
180 play_dead();
d331e739
VP
181 /*
182 * Idle routines should keep interrupts disabled
183 * from here on, until they go to idle.
184 * Otherwise, idle callbacks can misfire.
185 */
186 local_irq_disable();
95833c83 187 enter_idle();
1da177e4 188 idle();
a15da49d
AK
189 /* In many cases the interrupt that ended idle
190 has already called exit_idle. But some idle
191 loops can be woken up without interrupt. */
95833c83 192 __exit_idle();
1da177e4
LT
193 }
194
02290683 195 tick_nohz_restart_sched_tick();
5bfb5d69 196 preempt_enable_no_resched();
1da177e4 197 schedule();
5bfb5d69 198 preempt_disable();
1da177e4
LT
199 }
200}
201
6612538c
HS
202static void do_nothing(void *unused)
203{
204}
205
783e391b
VP
206/*
207 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
208 * pm_idle and update to new pm_idle value. Required while changing pm_idle
209 * handler on SMP systems.
210 *
211 * Caller must have changed pm_idle to the new value before the call. Old
212 * pm_idle value will not be used by any CPU after the return of this function.
213 */
6612538c
HS
214void cpu_idle_wait(void)
215{
783e391b
VP
216 smp_mb();
217 /* kick all the CPUs so that they exit out of pm_idle */
218 smp_call_function(do_nothing, NULL, 0, 1);
6612538c
HS
219}
220EXPORT_SYMBOL_GPL(cpu_idle_wait);
221
1da177e4
LT
222/*
223 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
224 * which can obviate IPI to trigger checking of need_resched.
225 * We execute MONITOR against need_resched and enter optimized wait state
226 * through MWAIT. Whenever someone changes need_resched, we would be woken
227 * up from MWAIT (without an IPI).
991528d7
VP
228 *
229 * New with Core Duo processors, MWAIT can take some hints based on CPU
230 * capability.
1da177e4 231 */
65ea5b03 232void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
1da177e4 233{
991528d7 234 if (!need_resched()) {
64c7c8f8
NP
235 __monitor((void *)&current_thread_info()->flags, 0, 0);
236 smp_mb();
991528d7 237 if (!need_resched())
65ea5b03 238 __mwait(ax, cx);
1da177e4
LT
239 }
240}
241
991528d7
VP
242/* Default MONITOR/MWAIT with no hints, used for default C1 state */
243static void mwait_idle(void)
244{
d331e739
VP
245 if (!need_resched()) {
246 __monitor((void *)&current_thread_info()->flags, 0, 0);
247 smp_mb();
248 if (!need_resched())
249 __sti_mwait(0, 0);
250 else
251 local_irq_enable();
252 } else {
253 local_irq_enable();
254 }
991528d7
VP
255}
256
0c07ee38 257
4c02ad1e 258static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
0c07ee38
AK
259{
260 if (force_mwait)
261 return 1;
262 /* Any C1 states supported? */
263 return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
264}
265
e6982c67 266void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
1da177e4 267{
27415a4f
HS
268 static int selected;
269
270 if (selected)
271 return;
272#ifdef CONFIG_X86_SMP
273 if (pm_idle == poll_idle && smp_num_siblings > 1) {
274 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
275 " performance may degrade.\n");
276 }
277#endif
0c07ee38 278 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
1da177e4
LT
279 /*
280 * Skip, if setup has overridden idle.
281 * One CPU supports mwait => All CPUs supports mwait
282 */
283 if (!pm_idle) {
27415a4f 284 printk(KERN_INFO "using mwait in idle threads.\n");
1da177e4
LT
285 pm_idle = mwait_idle;
286 }
287 }
27415a4f 288 selected = 1;
1da177e4
LT
289}
290
6612538c 291static int __init idle_setup(char *str)
1da177e4 292{
f039b754 293 if (!strcmp(str, "poll")) {
1da177e4
LT
294 printk("using polling idle threads.\n");
295 pm_idle = poll_idle;
f039b754
AK
296 } else if (!strcmp(str, "mwait"))
297 force_mwait = 1;
298 else
299 return -1;
1da177e4
LT
300
301 boot_option_idle_override = 1;
f039b754 302 return 0;
1da177e4 303}
f039b754 304early_param("idle", idle_setup);
1da177e4 305
6612538c 306/* Prints also some state that isn't saved in the pt_regs */
1da177e4
LT
307void __show_regs(struct pt_regs * regs)
308{
309 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 310 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
311 unsigned int fsindex, gsindex;
312 unsigned int ds, cs, es;
1da177e4
LT
313
314 printk("\n");
315 print_modules();
9acf23c4
AK
316 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
317 current->pid, current->comm, print_tainted(),
96b644bd
SH
318 init_utsname()->release,
319 (int)strcspn(init_utsname()->version, " "),
320 init_utsname()->version);
65ea5b03 321 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
aafbd7eb 322 printk_address(regs->ip, 1);
65ea5b03
PA
323 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
324 regs->flags);
1da177e4 325 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 326 regs->ax, regs->bx, regs->cx);
1da177e4 327 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 328 regs->dx, regs->si, regs->di);
1da177e4 329 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 330 regs->bp, regs->r8, regs->r9);
1da177e4
LT
331 printk("R10: %016lx R11: %016lx R12: %016lx\n",
332 regs->r10, regs->r11, regs->r12);
333 printk("R13: %016lx R14: %016lx R15: %016lx\n",
334 regs->r13, regs->r14, regs->r15);
335
336 asm("movl %%ds,%0" : "=r" (ds));
337 asm("movl %%cs,%0" : "=r" (cs));
338 asm("movl %%es,%0" : "=r" (es));
339 asm("movl %%fs,%0" : "=r" (fsindex));
340 asm("movl %%gs,%0" : "=r" (gsindex));
341
342 rdmsrl(MSR_FS_BASE, fs);
343 rdmsrl(MSR_GS_BASE, gs);
344 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
345
f51c9452
GOC
346 cr0 = read_cr0();
347 cr2 = read_cr2();
348 cr3 = read_cr3();
349 cr4 = read_cr4();
1da177e4
LT
350
351 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
352 fs,fsindex,gs,gsindex,shadowgs);
353 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
354 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
bb1995d5
AS
355
356 get_debugreg(d0, 0);
357 get_debugreg(d1, 1);
358 get_debugreg(d2, 2);
359 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
360 get_debugreg(d3, 3);
361 get_debugreg(d6, 6);
362 get_debugreg(d7, 7);
363 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1da177e4
LT
364}
365
366void show_regs(struct pt_regs *regs)
367{
c078d326 368 printk("CPU %d:", smp_processor_id());
1da177e4 369 __show_regs(regs);
bc850d6b 370 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
1da177e4
LT
371}
372
373/*
374 * Free current thread data structures etc..
375 */
376void exit_thread(void)
377{
378 struct task_struct *me = current;
379 struct thread_struct *t = &me->thread;
73649dab 380
6612538c 381 if (me->thread.io_bitmap_ptr) {
1da177e4
LT
382 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
383
384 kfree(t->io_bitmap_ptr);
385 t->io_bitmap_ptr = NULL;
d3a4f48d 386 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
387 /*
388 * Careful, clear this in the TSS too:
389 */
390 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
391 t->io_bitmap_max = 0;
392 put_cpu();
393 }
394}
395
396void flush_thread(void)
397{
398 struct task_struct *tsk = current;
1da177e4 399
303cd153
MD
400 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
401 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
402 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
403 clear_tsk_thread_flag(tsk, TIF_IA32);
404 } else {
405 set_tsk_thread_flag(tsk, TIF_IA32);
4d9bc79c 406 current_thread_info()->status |= TS_COMPAT;
303cd153 407 }
4d9bc79c 408 }
303cd153 409 clear_tsk_thread_flag(tsk, TIF_DEBUG);
1da177e4
LT
410
411 tsk->thread.debugreg0 = 0;
412 tsk->thread.debugreg1 = 0;
413 tsk->thread.debugreg2 = 0;
414 tsk->thread.debugreg3 = 0;
415 tsk->thread.debugreg6 = 0;
416 tsk->thread.debugreg7 = 0;
6612538c 417 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
1da177e4
LT
418 /*
419 * Forget coprocessor state..
420 */
421 clear_fpu(tsk);
422 clear_used_math();
423}
424
425void release_thread(struct task_struct *dead_task)
426{
427 if (dead_task->mm) {
428 if (dead_task->mm->context.size) {
429 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
430 dead_task->comm,
431 dead_task->mm->context.ldt,
432 dead_task->mm->context.size);
433 BUG();
434 }
435 }
436}
437
438static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
439{
6612538c 440 struct user_desc ud = {
1da177e4
LT
441 .base_addr = addr,
442 .limit = 0xfffff,
443 .seg_32bit = 1,
444 .limit_in_pages = 1,
445 .useable = 1,
446 };
ade1af77 447 struct desc_struct *desc = t->thread.tls_array;
1da177e4 448 desc += tls;
80fbb69a 449 fill_ldt(desc, &ud);
1da177e4
LT
450}
451
452static inline u32 read_32bit_tls(struct task_struct *t, int tls)
453{
91394eb0 454 return get_desc_base(&t->thread.tls_array[tls]);
1da177e4
LT
455}
456
457/*
458 * This gets called before we allocate a new thread and copy
459 * the current task into it.
460 */
461void prepare_to_copy(struct task_struct *tsk)
462{
463 unlazy_fpu(tsk);
464}
465
65ea5b03 466int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
1da177e4
LT
467 unsigned long unused,
468 struct task_struct * p, struct pt_regs * regs)
469{
470 int err;
471 struct pt_regs * childregs;
472 struct task_struct *me = current;
473
a88cde13 474 childregs = ((struct pt_regs *)
57eafdc2 475 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
476 *childregs = *regs;
477
65ea5b03
PA
478 childregs->ax = 0;
479 childregs->sp = sp;
480 if (sp == ~0UL)
481 childregs->sp = (unsigned long)childregs;
1da177e4 482
faca6227
PA
483 p->thread.sp = (unsigned long) childregs;
484 p->thread.sp0 = (unsigned long) (childregs+1);
485 p->thread.usersp = me->thread.usersp;
1da177e4 486
e4f17c43 487 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
488
489 p->thread.fs = me->thread.fs;
490 p->thread.gs = me->thread.gs;
491
fd51f666
L
492 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
493 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
494 asm("mov %%es,%0" : "=m" (p->thread.es));
495 asm("mov %%ds,%0" : "=m" (p->thread.ds));
1da177e4 496
d3a4f48d 497 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
498 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
499 if (!p->thread.io_bitmap_ptr) {
500 p->thread.io_bitmap_max = 0;
501 return -ENOMEM;
502 }
a88cde13
AK
503 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
504 IO_BITMAP_BYTES);
d3a4f48d 505 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 506 }
1da177e4
LT
507
508 /*
509 * Set a new TLS for the child thread?
510 */
511 if (clone_flags & CLONE_SETTLS) {
512#ifdef CONFIG_IA32_EMULATION
513 if (test_thread_flag(TIF_IA32))
efd1ca52 514 err = do_set_thread_area(p, -1,
65ea5b03 515 (struct user_desc __user *)childregs->si, 0);
1da177e4
LT
516 else
517#endif
518 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
519 if (err)
520 goto out;
521 }
522 err = 0;
523out:
524 if (err && p->thread.io_bitmap_ptr) {
525 kfree(p->thread.io_bitmap_ptr);
526 p->thread.io_bitmap_max = 0;
527 }
528 return err;
529}
530
531/*
532 * This special macro can be used to load a debugging register
533 */
6612538c
HS
534#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
535
d3a4f48d 536static inline void __switch_to_xtra(struct task_struct *prev_p,
6612538c
HS
537 struct task_struct *next_p,
538 struct tss_struct *tss)
d3a4f48d
SE
539{
540 struct thread_struct *prev, *next;
eee3af4a 541 unsigned long debugctl;
d3a4f48d
SE
542
543 prev = &prev_p->thread,
544 next = &next_p->thread;
545
eee3af4a
MM
546 debugctl = prev->debugctlmsr;
547 if (next->ds_area_msr != prev->ds_area_msr) {
548 /* we clear debugctl to make sure DS
549 * is not in use when we change it */
550 debugctl = 0;
551 wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
552 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
553 }
554
555 if (next->debugctlmsr != debugctl)
7e991604
RM
556 wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
557
d3a4f48d
SE
558 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
559 loaddebug(next, 0);
560 loaddebug(next, 1);
561 loaddebug(next, 2);
562 loaddebug(next, 3);
563 /* no 4 and 5 */
564 loaddebug(next, 6);
565 loaddebug(next, 7);
566 }
567
568 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
569 /*
570 * Copy the relevant range of the IO bitmap.
571 * Normally this is 128 bytes or less:
572 */
573 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
574 max(prev->io_bitmap_max, next->io_bitmap_max));
575 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
576 /*
577 * Clear any possible leftover bits:
578 */
579 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
580 }
eee3af4a 581
b4ef95de 582#ifdef X86_BTS
eee3af4a
MM
583 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
584 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
585
586 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
587 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
b4ef95de 588#endif
d3a4f48d
SE
589}
590
1da177e4
LT
591/*
592 * switch_to(x,y) should switch tasks from x to y.
593 *
6612538c 594 * This could still be optimized:
1da177e4
LT
595 * - fold all the options into a flag word and test it with a single test.
596 * - could test fs/gs bitsliced
099f318b
AK
597 *
598 * Kprobes not supported here. Set the probe on schedule instead.
1da177e4 599 */
f438d914 600struct task_struct *
a88cde13 601__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4
LT
602{
603 struct thread_struct *prev = &prev_p->thread,
604 *next = &next_p->thread;
6612538c 605 int cpu = smp_processor_id();
1da177e4
LT
606 struct tss_struct *tss = &per_cpu(init_tss, cpu);
607
e07e23e1
AV
608 /* we're going to use this soon, after a few expensive things */
609 if (next_p->fpu_counter>5)
610 prefetch(&next->i387.fxsave);
611
1da177e4
LT
612 /*
613 * Reload esp0, LDT and the page table pointer:
614 */
7818a1e0 615 load_sp0(tss, next);
1da177e4
LT
616
617 /*
618 * Switch DS and ES.
619 * This won't pick up thread selector changes, but I guess that is ok.
620 */
fd51f666 621 asm volatile("mov %%es,%0" : "=m" (prev->es));
1da177e4
LT
622 if (unlikely(next->es | prev->es))
623 loadsegment(es, next->es);
624
fd51f666 625 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
1da177e4
LT
626 if (unlikely(next->ds | prev->ds))
627 loadsegment(ds, next->ds);
628
629 load_TLS(next, cpu);
630
631 /*
632 * Switch FS and GS.
633 */
634 {
635 unsigned fsindex;
636 asm volatile("movl %%fs,%0" : "=r" (fsindex));
637 /* segment register != 0 always requires a reload.
638 also reload when it has changed.
639 when prev process used 64bit base always reload
640 to avoid an information leak. */
641 if (unlikely(fsindex | next->fsindex | prev->fs)) {
642 loadsegment(fs, next->fsindex);
643 /* check if the user used a selector != 0
644 * if yes clear 64bit base, since overloaded base
645 * is always mapped to the Null selector
646 */
647 if (fsindex)
648 prev->fs = 0;
649 }
650 /* when next process has a 64bit base use it */
651 if (next->fs)
652 wrmsrl(MSR_FS_BASE, next->fs);
653 prev->fsindex = fsindex;
654 }
655 {
656 unsigned gsindex;
657 asm volatile("movl %%gs,%0" : "=r" (gsindex));
658 if (unlikely(gsindex | next->gsindex | prev->gs)) {
659 load_gs_index(next->gsindex);
660 if (gsindex)
661 prev->gs = 0;
662 }
663 if (next->gs)
664 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
665 prev->gsindex = gsindex;
666 }
667
0a5ace2a
AK
668 /* Must be after DS reload */
669 unlazy_fpu(prev_p);
670
1da177e4 671 /*
45948d77 672 * Switch the PDA and FPU contexts.
1da177e4 673 */
faca6227
PA
674 prev->usersp = read_pda(oldrsp);
675 write_pda(oldrsp, next->usersp);
1da177e4 676 write_pda(pcurrent, next_p);
18bd057b 677
a88cde13 678 write_pda(kernelstack,
7b0bda74 679 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
0a425405
AV
680#ifdef CONFIG_CC_STACKPROTECTOR
681 write_pda(stack_canary, next_p->stack_canary);
682 /*
683 * Build time only check to make sure the stack_canary is at
684 * offset 40 in the pda; this is a gcc ABI requirement
685 */
686 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
687#endif
1da177e4
LT
688
689 /*
d3a4f48d 690 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 691 */
eee3af4a
MM
692 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
693 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 694 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 695
e07e23e1
AV
696 /* If the task has used fpu the last 5 timeslices, just do a full
697 * restore of the math state immediately to avoid the trap; the
698 * chances of needing FPU soon are obviously high now
699 */
700 if (next_p->fpu_counter>5)
701 math_state_restore();
1da177e4
LT
702 return prev_p;
703}
704
705/*
706 * sys_execve() executes a new program.
707 */
6612538c 708asmlinkage
1da177e4 709long sys_execve(char __user *name, char __user * __user *argv,
5d119b2c 710 char __user * __user *envp, struct pt_regs *regs)
1da177e4
LT
711{
712 long error;
713 char * filename;
714
715 filename = getname(name);
716 error = PTR_ERR(filename);
5d119b2c 717 if (IS_ERR(filename))
1da177e4 718 return error;
5d119b2c 719 error = do_execve(filename, argv, envp, regs);
1da177e4
LT
720 putname(filename);
721 return error;
722}
723
724void set_personality_64bit(void)
725{
726 /* inherit personality from parent */
727
728 /* Make sure to be in 64bit mode */
6612538c 729 clear_thread_flag(TIF_IA32);
1da177e4
LT
730
731 /* TBD: overwrites user setup. Should have two bits.
732 But 64bit processes have always behaved this way,
733 so it's not too bad. The main problem is just that
6612538c 734 32bit childs are affected again. */
1da177e4
LT
735 current->personality &= ~READ_IMPLIES_EXEC;
736}
737
738asmlinkage long sys_fork(struct pt_regs *regs)
739{
65ea5b03 740 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
1da177e4
LT
741}
742
a88cde13
AK
743asmlinkage long
744sys_clone(unsigned long clone_flags, unsigned long newsp,
745 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
746{
747 if (!newsp)
65ea5b03 748 newsp = regs->sp;
1da177e4
LT
749 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
750}
751
752/*
753 * This is trivial, and on the face of it looks like it
754 * could equally well be done in user mode.
755 *
756 * Not so, for quite unobvious reasons - register pressure.
757 * In user mode vfork() cannot have a stack frame, and if
758 * done by calling the "clone()" system call directly, you
759 * do not have enough call-clobbered registers to hold all
760 * the information you need.
761 */
762asmlinkage long sys_vfork(struct pt_regs *regs)
763{
65ea5b03 764 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
1da177e4
LT
765 NULL, NULL);
766}
767
768unsigned long get_wchan(struct task_struct *p)
769{
770 unsigned long stack;
65ea5b03 771 u64 fp,ip;
1da177e4
LT
772 int count = 0;
773
774 if (!p || p == current || p->state==TASK_RUNNING)
775 return 0;
57eafdc2 776 stack = (unsigned long)task_stack_page(p);
faca6227 777 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
1da177e4 778 return 0;
faca6227 779 fp = *(u64 *)(p->thread.sp);
1da177e4 780 do {
a88cde13
AK
781 if (fp < (unsigned long)stack ||
782 fp > (unsigned long)stack+THREAD_SIZE)
1da177e4 783 return 0;
65ea5b03
PA
784 ip = *(u64 *)(fp+8);
785 if (!in_sched_functions(ip))
786 return ip;
1da177e4
LT
787 fp = *(u64 *)fp;
788 } while (count++ < 16);
789 return 0;
790}
791
792long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
793{
794 int ret = 0;
795 int doit = task == current;
796 int cpu;
797
798 switch (code) {
799 case ARCH_SET_GS:
84929801 800 if (addr >= TASK_SIZE_OF(task))
1da177e4
LT
801 return -EPERM;
802 cpu = get_cpu();
803 /* handle small bases via the GDT because that's faster to
804 switch. */
805 if (addr <= 0xffffffff) {
806 set_32bit_tls(task, GS_TLS, addr);
807 if (doit) {
808 load_TLS(&task->thread, cpu);
809 load_gs_index(GS_TLS_SEL);
810 }
811 task->thread.gsindex = GS_TLS_SEL;
812 task->thread.gs = 0;
813 } else {
814 task->thread.gsindex = 0;
815 task->thread.gs = addr;
816 if (doit) {
a88cde13
AK
817 load_gs_index(0);
818 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
1da177e4
LT
819 }
820 }
821 put_cpu();
822 break;
823 case ARCH_SET_FS:
824 /* Not strictly needed for fs, but do it for symmetry
825 with gs */
84929801 826 if (addr >= TASK_SIZE_OF(task))
6612538c 827 return -EPERM;
1da177e4 828 cpu = get_cpu();
6612538c 829 /* handle small bases via the GDT because that's faster to
1da177e4 830 switch. */
6612538c 831 if (addr <= 0xffffffff) {
1da177e4 832 set_32bit_tls(task, FS_TLS, addr);
6612538c
HS
833 if (doit) {
834 load_TLS(&task->thread, cpu);
a88cde13 835 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
1da177e4
LT
836 }
837 task->thread.fsindex = FS_TLS_SEL;
838 task->thread.fs = 0;
6612538c 839 } else {
1da177e4
LT
840 task->thread.fsindex = 0;
841 task->thread.fs = addr;
842 if (doit) {
843 /* set the selector to 0 to not confuse
844 __switch_to */
a88cde13
AK
845 asm volatile("movl %0,%%fs" :: "r" (0));
846 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
847 }
848 }
849 put_cpu();
850 break;
6612538c
HS
851 case ARCH_GET_FS: {
852 unsigned long base;
1da177e4
LT
853 if (task->thread.fsindex == FS_TLS_SEL)
854 base = read_32bit_tls(task, FS_TLS);
a88cde13 855 else if (doit)
1da177e4 856 rdmsrl(MSR_FS_BASE, base);
a88cde13 857 else
1da177e4 858 base = task->thread.fs;
6612538c
HS
859 ret = put_user(base, (unsigned long __user *)addr);
860 break;
1da177e4 861 }
6612538c 862 case ARCH_GET_GS: {
1da177e4 863 unsigned long base;
97c2803c 864 unsigned gsindex;
1da177e4
LT
865 if (task->thread.gsindex == GS_TLS_SEL)
866 base = read_32bit_tls(task, GS_TLS);
97c2803c 867 else if (doit) {
6612538c 868 asm("movl %%gs,%0" : "=r" (gsindex));
97c2803c
JB
869 if (gsindex)
870 rdmsrl(MSR_KERNEL_GS_BASE, base);
871 else
872 base = task->thread.gs;
873 }
a88cde13 874 else
1da177e4 875 base = task->thread.gs;
6612538c 876 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
877 break;
878 }
879
880 default:
881 ret = -EINVAL;
882 break;
6612538c 883 }
1da177e4 884
6612538c
HS
885 return ret;
886}
1da177e4
LT
887
888long sys_arch_prctl(int code, unsigned long addr)
889{
890 return do_arch_prctl(current, code, addr);
1da177e4
LT
891}
892
893unsigned long arch_align_stack(unsigned long sp)
894{
c16b63e0 895 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
896 sp -= get_random_int() % 8192;
897 return sp & ~0xf;
898}
c1d171a0
JK
899
900unsigned long arch_randomize_brk(struct mm_struct *mm)
901{
902 unsigned long range_end = mm->brk + 0x02000000;
903 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
904}