]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/process_64.c
x86: use v8086_mode helper, trivial unification
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
76e4f660 19#include <linux/cpu.h>
1da177e4
LT
20#include <linux/errno.h>
21#include <linux/sched.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4
LT
29#include <linux/a.out.h>
30#include <linux/interrupt.h>
6612538c 31#include <linux/utsname.h>
1da177e4 32#include <linux/delay.h>
6612538c 33#include <linux/module.h>
1da177e4 34#include <linux/ptrace.h>
1da177e4 35#include <linux/random.h>
95833c83 36#include <linux/notifier.h>
c6fd91f0 37#include <linux/kprobes.h>
1eeb66a1 38#include <linux/kdebug.h>
02290683 39#include <linux/tick.h>
1da177e4
LT
40
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
1da177e4
LT
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
95833c83 53#include <asm/idle.h>
1da177e4
LT
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
1da177e4
LT
59unsigned long boot_option_idle_override = 0;
60EXPORT_SYMBOL(boot_option_idle_override);
61
62/*
63 * Powermanagement idle function, if any..
64 */
65void (*pm_idle)(void);
2ee60e17 66EXPORT_SYMBOL(pm_idle);
1da177e4
LT
67static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
e041c683 69static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
70
71void idle_notifier_register(struct notifier_block *n)
72{
e041c683 73 atomic_notifier_chain_register(&idle_notifier, n);
95833c83 74}
95833c83 75
95833c83
AK
76void enter_idle(void)
77{
a15da49d 78 write_pda(isidle, 1);
e041c683 79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
80}
81
82static void __exit_idle(void)
83{
9446868b 84 if (test_and_clear_bit_pda(0, isidle) == 0)
a15da49d 85 return;
e041c683 86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
87}
88
89/* Called from interrupts to signify idle end */
90void exit_idle(void)
91{
a15da49d
AK
92 /* idle loop has pid 0 */
93 if (current->pid)
95833c83
AK
94 return;
95 __exit_idle();
96}
97
1da177e4
LT
98/*
99 * We use this if we don't have any better
100 * idle routine..
101 */
d8954222 102void default_idle(void)
1da177e4 103{
495ab9c0 104 current_thread_info()->status &= ~TS_POLLING;
0888f06a
IM
105 /*
106 * TS_POLLING-cleared state must be visible before we
107 * test NEED_RESCHED:
108 */
109 smp_mb();
72690a21
AK
110 local_irq_disable();
111 if (!need_resched()) {
5ee613b6
IM
112 ktime_t t0, t1;
113 u64 t0n, t1n;
114
115 t0 = ktime_get();
116 t0n = ktime_to_ns(t0);
117 safe_halt(); /* enables interrupts racelessly */
118 local_irq_disable();
119 t1 = ktime_get();
120 t1n = ktime_to_ns(t1);
121 sched_clock_idle_wakeup_event(t1n - t0n);
39d44a51
HS
122 }
123 local_irq_enable();
495ab9c0 124 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
125}
126
127/*
128 * On SMP it's slightly faster (but much more power-consuming!)
129 * to poll the ->need_resched flag instead of waiting for the
130 * cross-CPU IPI to arrive. Use this option with caution.
131 */
6612538c 132static void poll_idle(void)
1da177e4 133{
d331e739 134 local_irq_enable();
72690a21 135 cpu_relax();
1da177e4
LT
136}
137
76e4f660
AR
138#ifdef CONFIG_HOTPLUG_CPU
139DECLARE_PER_CPU(int, cpu_state);
140
141#include <asm/nmi.h>
1fa744e6 142/* We halt the CPU with physical CPU hotplug */
76e4f660
AR
143static inline void play_dead(void)
144{
145 idle_task_exit();
146 wbinvd();
147 mb();
148 /* Ack it */
149 __get_cpu_var(cpu_state) = CPU_DEAD;
150
1fa744e6 151 local_irq_disable();
76e4f660 152 while (1)
1fa744e6 153 halt();
76e4f660
AR
154}
155#else
156static inline void play_dead(void)
157{
158 BUG();
159}
160#endif /* CONFIG_HOTPLUG_CPU */
161
1da177e4
LT
162/*
163 * The idle thread. There's no useful work to be
164 * done, so just try to conserve power and have a
165 * low exit latency (ie sit in a loop waiting for
166 * somebody to say that they'd like to reschedule)
167 */
b10db7f0 168void cpu_idle(void)
1da177e4 169{
495ab9c0 170 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
171 /* endless idle loop with no priority at all */
172 while (1) {
3d97775a 173 tick_nohz_stop_sched_tick();
1da177e4
LT
174 while (!need_resched()) {
175 void (*idle)(void);
176
177 if (__get_cpu_var(cpu_idle_state))
178 __get_cpu_var(cpu_idle_state) = 0;
179
180 rmb();
181 idle = pm_idle;
182 if (!idle)
183 idle = default_idle;
76e4f660
AR
184 if (cpu_is_offline(smp_processor_id()))
185 play_dead();
d331e739
VP
186 /*
187 * Idle routines should keep interrupts disabled
188 * from here on, until they go to idle.
189 * Otherwise, idle callbacks can misfire.
190 */
191 local_irq_disable();
95833c83 192 enter_idle();
1da177e4 193 idle();
a15da49d
AK
194 /* In many cases the interrupt that ended idle
195 has already called exit_idle. But some idle
196 loops can be woken up without interrupt. */
95833c83 197 __exit_idle();
1da177e4
LT
198 }
199
02290683 200 tick_nohz_restart_sched_tick();
5bfb5d69 201 preempt_enable_no_resched();
1da177e4 202 schedule();
5bfb5d69 203 preempt_disable();
1da177e4
LT
204 }
205}
206
6612538c
HS
207static void do_nothing(void *unused)
208{
209}
210
211void cpu_idle_wait(void)
212{
213 unsigned int cpu, this_cpu = get_cpu();
214 cpumask_t map, tmp = current->cpus_allowed;
215
216 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
217 put_cpu();
218
219 cpus_clear(map);
220 for_each_online_cpu(cpu) {
221 per_cpu(cpu_idle_state, cpu) = 1;
222 cpu_set(cpu, map);
223 }
224
225 __get_cpu_var(cpu_idle_state) = 0;
226
227 wmb();
228 do {
229 ssleep(1);
230 for_each_online_cpu(cpu) {
231 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
232 cpu_clear(cpu, map);
233 }
234 cpus_and(map, map, cpu_online_map);
235 /*
236 * We waited 1 sec, if a CPU still did not call idle
237 * it may be because it is in idle and not waking up
238 * because it has nothing to do.
239 * Give all the remaining CPUS a kick.
240 */
241 smp_call_function_mask(map, do_nothing, 0, 0);
242 } while (!cpus_empty(map));
243
244 set_cpus_allowed(current, tmp);
245}
246EXPORT_SYMBOL_GPL(cpu_idle_wait);
247
1da177e4
LT
248/*
249 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
250 * which can obviate IPI to trigger checking of need_resched.
251 * We execute MONITOR against need_resched and enter optimized wait state
252 * through MWAIT. Whenever someone changes need_resched, we would be woken
253 * up from MWAIT (without an IPI).
991528d7
VP
254 *
255 * New with Core Duo processors, MWAIT can take some hints based on CPU
256 * capability.
1da177e4 257 */
65ea5b03 258void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
1da177e4 259{
991528d7 260 if (!need_resched()) {
64c7c8f8
NP
261 __monitor((void *)&current_thread_info()->flags, 0, 0);
262 smp_mb();
991528d7 263 if (!need_resched())
65ea5b03 264 __mwait(ax, cx);
1da177e4
LT
265 }
266}
267
991528d7
VP
268/* Default MONITOR/MWAIT with no hints, used for default C1 state */
269static void mwait_idle(void)
270{
d331e739
VP
271 if (!need_resched()) {
272 __monitor((void *)&current_thread_info()->flags, 0, 0);
273 smp_mb();
274 if (!need_resched())
275 __sti_mwait(0, 0);
276 else
277 local_irq_enable();
278 } else {
279 local_irq_enable();
280 }
991528d7
VP
281}
282
0c07ee38
AK
283
284static int mwait_usable(const struct cpuinfo_x86 *c)
285{
286 if (force_mwait)
287 return 1;
288 /* Any C1 states supported? */
289 return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
290}
291
e6982c67 292void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
1da177e4 293{
27415a4f
HS
294 static int selected;
295
296 if (selected)
297 return;
298#ifdef CONFIG_X86_SMP
299 if (pm_idle == poll_idle && smp_num_siblings > 1) {
300 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
301 " performance may degrade.\n");
302 }
303#endif
0c07ee38 304 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
1da177e4
LT
305 /*
306 * Skip, if setup has overridden idle.
307 * One CPU supports mwait => All CPUs supports mwait
308 */
309 if (!pm_idle) {
27415a4f 310 printk(KERN_INFO "using mwait in idle threads.\n");
1da177e4
LT
311 pm_idle = mwait_idle;
312 }
313 }
27415a4f 314 selected = 1;
1da177e4
LT
315}
316
6612538c 317static int __init idle_setup(char *str)
1da177e4 318{
f039b754 319 if (!strcmp(str, "poll")) {
1da177e4
LT
320 printk("using polling idle threads.\n");
321 pm_idle = poll_idle;
f039b754
AK
322 } else if (!strcmp(str, "mwait"))
323 force_mwait = 1;
324 else
325 return -1;
1da177e4
LT
326
327 boot_option_idle_override = 1;
f039b754 328 return 0;
1da177e4 329}
f039b754 330early_param("idle", idle_setup);
1da177e4 331
6612538c 332/* Prints also some state that isn't saved in the pt_regs */
1da177e4
LT
333void __show_regs(struct pt_regs * regs)
334{
335 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 336 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
337 unsigned int fsindex, gsindex;
338 unsigned int ds, cs, es;
1da177e4
LT
339
340 printk("\n");
341 print_modules();
9acf23c4
AK
342 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
343 current->pid, current->comm, print_tainted(),
96b644bd
SH
344 init_utsname()->release,
345 (int)strcspn(init_utsname()->version, " "),
346 init_utsname()->version);
65ea5b03 347 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
aafbd7eb 348 printk_address(regs->ip, 1);
65ea5b03
PA
349 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
350 regs->flags);
1da177e4 351 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 352 regs->ax, regs->bx, regs->cx);
1da177e4 353 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 354 regs->dx, regs->si, regs->di);
1da177e4 355 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 356 regs->bp, regs->r8, regs->r9);
1da177e4
LT
357 printk("R10: %016lx R11: %016lx R12: %016lx\n",
358 regs->r10, regs->r11, regs->r12);
359 printk("R13: %016lx R14: %016lx R15: %016lx\n",
360 regs->r13, regs->r14, regs->r15);
361
362 asm("movl %%ds,%0" : "=r" (ds));
363 asm("movl %%cs,%0" : "=r" (cs));
364 asm("movl %%es,%0" : "=r" (es));
365 asm("movl %%fs,%0" : "=r" (fsindex));
366 asm("movl %%gs,%0" : "=r" (gsindex));
367
368 rdmsrl(MSR_FS_BASE, fs);
369 rdmsrl(MSR_GS_BASE, gs);
370 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
371
f51c9452
GOC
372 cr0 = read_cr0();
373 cr2 = read_cr2();
374 cr3 = read_cr3();
375 cr4 = read_cr4();
1da177e4
LT
376
377 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
378 fs,fsindex,gs,gsindex,shadowgs);
379 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
380 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
bb1995d5
AS
381
382 get_debugreg(d0, 0);
383 get_debugreg(d1, 1);
384 get_debugreg(d2, 2);
385 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
386 get_debugreg(d3, 3);
387 get_debugreg(d6, 6);
388 get_debugreg(d7, 7);
389 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1da177e4
LT
390}
391
392void show_regs(struct pt_regs *regs)
393{
c078d326 394 printk("CPU %d:", smp_processor_id());
1da177e4 395 __show_regs(regs);
bc850d6b 396 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
1da177e4
LT
397}
398
399/*
400 * Free current thread data structures etc..
401 */
402void exit_thread(void)
403{
404 struct task_struct *me = current;
405 struct thread_struct *t = &me->thread;
73649dab 406
6612538c 407 if (me->thread.io_bitmap_ptr) {
1da177e4
LT
408 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
409
410 kfree(t->io_bitmap_ptr);
411 t->io_bitmap_ptr = NULL;
d3a4f48d 412 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
413 /*
414 * Careful, clear this in the TSS too:
415 */
416 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
417 t->io_bitmap_max = 0;
418 put_cpu();
419 }
420}
421
422void flush_thread(void)
423{
424 struct task_struct *tsk = current;
1da177e4 425
303cd153
MD
426 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
427 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
428 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
429 clear_tsk_thread_flag(tsk, TIF_IA32);
430 } else {
431 set_tsk_thread_flag(tsk, TIF_IA32);
4d9bc79c 432 current_thread_info()->status |= TS_COMPAT;
303cd153 433 }
4d9bc79c 434 }
303cd153 435 clear_tsk_thread_flag(tsk, TIF_DEBUG);
1da177e4
LT
436
437 tsk->thread.debugreg0 = 0;
438 tsk->thread.debugreg1 = 0;
439 tsk->thread.debugreg2 = 0;
440 tsk->thread.debugreg3 = 0;
441 tsk->thread.debugreg6 = 0;
442 tsk->thread.debugreg7 = 0;
6612538c 443 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
1da177e4
LT
444 /*
445 * Forget coprocessor state..
446 */
447 clear_fpu(tsk);
448 clear_used_math();
449}
450
451void release_thread(struct task_struct *dead_task)
452{
453 if (dead_task->mm) {
454 if (dead_task->mm->context.size) {
455 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
456 dead_task->comm,
457 dead_task->mm->context.ldt,
458 dead_task->mm->context.size);
459 BUG();
460 }
461 }
462}
463
464static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
465{
6612538c 466 struct user_desc ud = {
1da177e4
LT
467 .base_addr = addr,
468 .limit = 0xfffff,
469 .seg_32bit = 1,
470 .limit_in_pages = 1,
471 .useable = 1,
472 };
6842ef0e 473 struct desc_struct *desc = (void *)t->thread.tls_array;
1da177e4 474 desc += tls;
80fbb69a 475 fill_ldt(desc, &ud);
1da177e4
LT
476}
477
478static inline u32 read_32bit_tls(struct task_struct *t, int tls)
479{
91394eb0 480 return get_desc_base(&t->thread.tls_array[tls]);
1da177e4
LT
481}
482
483/*
484 * This gets called before we allocate a new thread and copy
485 * the current task into it.
486 */
487void prepare_to_copy(struct task_struct *tsk)
488{
489 unlazy_fpu(tsk);
490}
491
65ea5b03 492int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
1da177e4
LT
493 unsigned long unused,
494 struct task_struct * p, struct pt_regs * regs)
495{
496 int err;
497 struct pt_regs * childregs;
498 struct task_struct *me = current;
499
a88cde13 500 childregs = ((struct pt_regs *)
57eafdc2 501 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
502 *childregs = *regs;
503
65ea5b03
PA
504 childregs->ax = 0;
505 childregs->sp = sp;
506 if (sp == ~0UL)
507 childregs->sp = (unsigned long)childregs;
1da177e4 508
faca6227
PA
509 p->thread.sp = (unsigned long) childregs;
510 p->thread.sp0 = (unsigned long) (childregs+1);
511 p->thread.usersp = me->thread.usersp;
1da177e4 512
e4f17c43 513 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
514
515 p->thread.fs = me->thread.fs;
516 p->thread.gs = me->thread.gs;
517
fd51f666
L
518 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
519 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
520 asm("mov %%es,%0" : "=m" (p->thread.es));
521 asm("mov %%ds,%0" : "=m" (p->thread.ds));
1da177e4 522
d3a4f48d 523 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
524 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
525 if (!p->thread.io_bitmap_ptr) {
526 p->thread.io_bitmap_max = 0;
527 return -ENOMEM;
528 }
a88cde13
AK
529 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
530 IO_BITMAP_BYTES);
d3a4f48d 531 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 532 }
1da177e4
LT
533
534 /*
535 * Set a new TLS for the child thread?
536 */
537 if (clone_flags & CLONE_SETTLS) {
538#ifdef CONFIG_IA32_EMULATION
539 if (test_thread_flag(TIF_IA32))
efd1ca52 540 err = do_set_thread_area(p, -1,
65ea5b03 541 (struct user_desc __user *)childregs->si, 0);
1da177e4
LT
542 else
543#endif
544 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
545 if (err)
546 goto out;
547 }
548 err = 0;
549out:
550 if (err && p->thread.io_bitmap_ptr) {
551 kfree(p->thread.io_bitmap_ptr);
552 p->thread.io_bitmap_max = 0;
553 }
554 return err;
555}
556
557/*
558 * This special macro can be used to load a debugging register
559 */
6612538c
HS
560#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
561
d3a4f48d 562static inline void __switch_to_xtra(struct task_struct *prev_p,
6612538c
HS
563 struct task_struct *next_p,
564 struct tss_struct *tss)
d3a4f48d
SE
565{
566 struct thread_struct *prev, *next;
eee3af4a 567 unsigned long debugctl;
d3a4f48d
SE
568
569 prev = &prev_p->thread,
570 next = &next_p->thread;
571
eee3af4a
MM
572 debugctl = prev->debugctlmsr;
573 if (next->ds_area_msr != prev->ds_area_msr) {
574 /* we clear debugctl to make sure DS
575 * is not in use when we change it */
576 debugctl = 0;
577 wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
578 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
579 }
580
581 if (next->debugctlmsr != debugctl)
7e991604
RM
582 wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
583
d3a4f48d
SE
584 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
585 loaddebug(next, 0);
586 loaddebug(next, 1);
587 loaddebug(next, 2);
588 loaddebug(next, 3);
589 /* no 4 and 5 */
590 loaddebug(next, 6);
591 loaddebug(next, 7);
592 }
593
594 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
595 /*
596 * Copy the relevant range of the IO bitmap.
597 * Normally this is 128 bytes or less:
598 */
599 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
600 max(prev->io_bitmap_max, next->io_bitmap_max));
601 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
602 /*
603 * Clear any possible leftover bits:
604 */
605 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
606 }
eee3af4a 607
eee3af4a
MM
608 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
609 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
610
611 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
612 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
d3a4f48d
SE
613}
614
1da177e4
LT
615/*
616 * switch_to(x,y) should switch tasks from x to y.
617 *
6612538c 618 * This could still be optimized:
1da177e4
LT
619 * - fold all the options into a flag word and test it with a single test.
620 * - could test fs/gs bitsliced
099f318b
AK
621 *
622 * Kprobes not supported here. Set the probe on schedule instead.
1da177e4 623 */
f438d914 624struct task_struct *
a88cde13 625__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4
LT
626{
627 struct thread_struct *prev = &prev_p->thread,
628 *next = &next_p->thread;
6612538c 629 int cpu = smp_processor_id();
1da177e4
LT
630 struct tss_struct *tss = &per_cpu(init_tss, cpu);
631
e07e23e1
AV
632 /* we're going to use this soon, after a few expensive things */
633 if (next_p->fpu_counter>5)
634 prefetch(&next->i387.fxsave);
635
1da177e4
LT
636 /*
637 * Reload esp0, LDT and the page table pointer:
638 */
7818a1e0 639 load_sp0(tss, next);
1da177e4
LT
640
641 /*
642 * Switch DS and ES.
643 * This won't pick up thread selector changes, but I guess that is ok.
644 */
fd51f666 645 asm volatile("mov %%es,%0" : "=m" (prev->es));
1da177e4
LT
646 if (unlikely(next->es | prev->es))
647 loadsegment(es, next->es);
648
fd51f666 649 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
1da177e4
LT
650 if (unlikely(next->ds | prev->ds))
651 loadsegment(ds, next->ds);
652
653 load_TLS(next, cpu);
654
655 /*
656 * Switch FS and GS.
657 */
658 {
659 unsigned fsindex;
660 asm volatile("movl %%fs,%0" : "=r" (fsindex));
661 /* segment register != 0 always requires a reload.
662 also reload when it has changed.
663 when prev process used 64bit base always reload
664 to avoid an information leak. */
665 if (unlikely(fsindex | next->fsindex | prev->fs)) {
666 loadsegment(fs, next->fsindex);
667 /* check if the user used a selector != 0
668 * if yes clear 64bit base, since overloaded base
669 * is always mapped to the Null selector
670 */
671 if (fsindex)
672 prev->fs = 0;
673 }
674 /* when next process has a 64bit base use it */
675 if (next->fs)
676 wrmsrl(MSR_FS_BASE, next->fs);
677 prev->fsindex = fsindex;
678 }
679 {
680 unsigned gsindex;
681 asm volatile("movl %%gs,%0" : "=r" (gsindex));
682 if (unlikely(gsindex | next->gsindex | prev->gs)) {
683 load_gs_index(next->gsindex);
684 if (gsindex)
685 prev->gs = 0;
686 }
687 if (next->gs)
688 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
689 prev->gsindex = gsindex;
690 }
691
0a5ace2a
AK
692 /* Must be after DS reload */
693 unlazy_fpu(prev_p);
694
1da177e4 695 /*
45948d77 696 * Switch the PDA and FPU contexts.
1da177e4 697 */
faca6227
PA
698 prev->usersp = read_pda(oldrsp);
699 write_pda(oldrsp, next->usersp);
1da177e4 700 write_pda(pcurrent, next_p);
18bd057b 701
a88cde13 702 write_pda(kernelstack,
7b0bda74 703 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
0a425405
AV
704#ifdef CONFIG_CC_STACKPROTECTOR
705 write_pda(stack_canary, next_p->stack_canary);
706 /*
707 * Build time only check to make sure the stack_canary is at
708 * offset 40 in the pda; this is a gcc ABI requirement
709 */
710 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
711#endif
1da177e4
LT
712
713 /*
d3a4f48d 714 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 715 */
eee3af4a
MM
716 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
717 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 718 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 719
e07e23e1
AV
720 /* If the task has used fpu the last 5 timeslices, just do a full
721 * restore of the math state immediately to avoid the trap; the
722 * chances of needing FPU soon are obviously high now
723 */
724 if (next_p->fpu_counter>5)
725 math_state_restore();
1da177e4
LT
726 return prev_p;
727}
728
729/*
730 * sys_execve() executes a new program.
731 */
6612538c 732asmlinkage
1da177e4
LT
733long sys_execve(char __user *name, char __user * __user *argv,
734 char __user * __user *envp, struct pt_regs regs)
735{
736 long error;
737 char * filename;
738
739 filename = getname(name);
740 error = PTR_ERR(filename);
741 if (IS_ERR(filename))
742 return error;
743 error = do_execve(filename, argv, envp, &regs);
1da177e4
LT
744 putname(filename);
745 return error;
746}
747
748void set_personality_64bit(void)
749{
750 /* inherit personality from parent */
751
752 /* Make sure to be in 64bit mode */
6612538c 753 clear_thread_flag(TIF_IA32);
1da177e4
LT
754
755 /* TBD: overwrites user setup. Should have two bits.
756 But 64bit processes have always behaved this way,
757 so it's not too bad. The main problem is just that
6612538c 758 32bit childs are affected again. */
1da177e4
LT
759 current->personality &= ~READ_IMPLIES_EXEC;
760}
761
762asmlinkage long sys_fork(struct pt_regs *regs)
763{
65ea5b03 764 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
1da177e4
LT
765}
766
a88cde13
AK
767asmlinkage long
768sys_clone(unsigned long clone_flags, unsigned long newsp,
769 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
770{
771 if (!newsp)
65ea5b03 772 newsp = regs->sp;
1da177e4
LT
773 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
774}
775
776/*
777 * This is trivial, and on the face of it looks like it
778 * could equally well be done in user mode.
779 *
780 * Not so, for quite unobvious reasons - register pressure.
781 * In user mode vfork() cannot have a stack frame, and if
782 * done by calling the "clone()" system call directly, you
783 * do not have enough call-clobbered registers to hold all
784 * the information you need.
785 */
786asmlinkage long sys_vfork(struct pt_regs *regs)
787{
65ea5b03 788 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
1da177e4
LT
789 NULL, NULL);
790}
791
792unsigned long get_wchan(struct task_struct *p)
793{
794 unsigned long stack;
65ea5b03 795 u64 fp,ip;
1da177e4
LT
796 int count = 0;
797
798 if (!p || p == current || p->state==TASK_RUNNING)
799 return 0;
57eafdc2 800 stack = (unsigned long)task_stack_page(p);
faca6227 801 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
1da177e4 802 return 0;
faca6227 803 fp = *(u64 *)(p->thread.sp);
1da177e4 804 do {
a88cde13
AK
805 if (fp < (unsigned long)stack ||
806 fp > (unsigned long)stack+THREAD_SIZE)
1da177e4 807 return 0;
65ea5b03
PA
808 ip = *(u64 *)(fp+8);
809 if (!in_sched_functions(ip))
810 return ip;
1da177e4
LT
811 fp = *(u64 *)fp;
812 } while (count++ < 16);
813 return 0;
814}
815
816long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
817{
818 int ret = 0;
819 int doit = task == current;
820 int cpu;
821
822 switch (code) {
823 case ARCH_SET_GS:
84929801 824 if (addr >= TASK_SIZE_OF(task))
1da177e4
LT
825 return -EPERM;
826 cpu = get_cpu();
827 /* handle small bases via the GDT because that's faster to
828 switch. */
829 if (addr <= 0xffffffff) {
830 set_32bit_tls(task, GS_TLS, addr);
831 if (doit) {
832 load_TLS(&task->thread, cpu);
833 load_gs_index(GS_TLS_SEL);
834 }
835 task->thread.gsindex = GS_TLS_SEL;
836 task->thread.gs = 0;
837 } else {
838 task->thread.gsindex = 0;
839 task->thread.gs = addr;
840 if (doit) {
a88cde13
AK
841 load_gs_index(0);
842 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
1da177e4
LT
843 }
844 }
845 put_cpu();
846 break;
847 case ARCH_SET_FS:
848 /* Not strictly needed for fs, but do it for symmetry
849 with gs */
84929801 850 if (addr >= TASK_SIZE_OF(task))
6612538c 851 return -EPERM;
1da177e4 852 cpu = get_cpu();
6612538c 853 /* handle small bases via the GDT because that's faster to
1da177e4 854 switch. */
6612538c 855 if (addr <= 0xffffffff) {
1da177e4 856 set_32bit_tls(task, FS_TLS, addr);
6612538c
HS
857 if (doit) {
858 load_TLS(&task->thread, cpu);
a88cde13 859 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
1da177e4
LT
860 }
861 task->thread.fsindex = FS_TLS_SEL;
862 task->thread.fs = 0;
6612538c 863 } else {
1da177e4
LT
864 task->thread.fsindex = 0;
865 task->thread.fs = addr;
866 if (doit) {
867 /* set the selector to 0 to not confuse
868 __switch_to */
a88cde13
AK
869 asm volatile("movl %0,%%fs" :: "r" (0));
870 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
871 }
872 }
873 put_cpu();
874 break;
6612538c
HS
875 case ARCH_GET_FS: {
876 unsigned long base;
1da177e4
LT
877 if (task->thread.fsindex == FS_TLS_SEL)
878 base = read_32bit_tls(task, FS_TLS);
a88cde13 879 else if (doit)
1da177e4 880 rdmsrl(MSR_FS_BASE, base);
a88cde13 881 else
1da177e4 882 base = task->thread.fs;
6612538c
HS
883 ret = put_user(base, (unsigned long __user *)addr);
884 break;
1da177e4 885 }
6612538c 886 case ARCH_GET_GS: {
1da177e4 887 unsigned long base;
97c2803c 888 unsigned gsindex;
1da177e4
LT
889 if (task->thread.gsindex == GS_TLS_SEL)
890 base = read_32bit_tls(task, GS_TLS);
97c2803c 891 else if (doit) {
6612538c 892 asm("movl %%gs,%0" : "=r" (gsindex));
97c2803c
JB
893 if (gsindex)
894 rdmsrl(MSR_KERNEL_GS_BASE, base);
895 else
896 base = task->thread.gs;
897 }
a88cde13 898 else
1da177e4 899 base = task->thread.gs;
6612538c 900 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
901 break;
902 }
903
904 default:
905 ret = -EINVAL;
906 break;
6612538c 907 }
1da177e4 908
6612538c
HS
909 return ret;
910}
1da177e4
LT
911
912long sys_arch_prctl(int code, unsigned long addr)
913{
914 return do_arch_prctl(current, code, addr);
1da177e4
LT
915}
916
917unsigned long arch_align_stack(unsigned long sp)
918{
c16b63e0 919 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
920 sp -= get_random_int() % 8192;
921 return sp & ~0xf;
922}
c1d171a0
JK
923
924unsigned long arch_randomize_brk(struct mm_struct *mm)
925{
926 unsigned long range_end = mm->brk + 0x02000000;
927 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
928}