]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/process_64.c
x86: fix compilation error in VisWS
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
76e4f660 19#include <linux/cpu.h>
1da177e4
LT
20#include <linux/errno.h>
21#include <linux/sched.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4 29#include <linux/interrupt.h>
6612538c 30#include <linux/utsname.h>
1da177e4 31#include <linux/delay.h>
6612538c 32#include <linux/module.h>
1da177e4 33#include <linux/ptrace.h>
1da177e4 34#include <linux/random.h>
95833c83 35#include <linux/notifier.h>
c6fd91f0 36#include <linux/kprobes.h>
1eeb66a1 37#include <linux/kdebug.h>
02290683 38#include <linux/tick.h>
529e25f6 39#include <linux/prctl.h>
1da177e4
LT
40
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
1da177e4
LT
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
95833c83 53#include <asm/idle.h>
1da177e4
LT
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
1da177e4
LT
59unsigned long boot_option_idle_override = 0;
60EXPORT_SYMBOL(boot_option_idle_override);
61
62/*
63 * Powermanagement idle function, if any..
64 */
65void (*pm_idle)(void);
2ee60e17 66EXPORT_SYMBOL(pm_idle);
1da177e4 67
e041c683 68static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
69
70void idle_notifier_register(struct notifier_block *n)
71{
e041c683 72 atomic_notifier_chain_register(&idle_notifier, n);
95833c83 73}
95833c83 74
95833c83
AK
75void enter_idle(void)
76{
a15da49d 77 write_pda(isidle, 1);
e041c683 78 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
79}
80
81static void __exit_idle(void)
82{
9446868b 83 if (test_and_clear_bit_pda(0, isidle) == 0)
a15da49d 84 return;
e041c683 85 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
86}
87
88/* Called from interrupts to signify idle end */
89void exit_idle(void)
90{
a15da49d
AK
91 /* idle loop has pid 0 */
92 if (current->pid)
95833c83
AK
93 return;
94 __exit_idle();
95}
96
1da177e4
LT
97/*
98 * We use this if we don't have any better
99 * idle routine..
100 */
d8954222 101void default_idle(void)
1da177e4 102{
495ab9c0 103 current_thread_info()->status &= ~TS_POLLING;
0888f06a
IM
104 /*
105 * TS_POLLING-cleared state must be visible before we
106 * test NEED_RESCHED:
107 */
108 smp_mb();
72690a21
AK
109 local_irq_disable();
110 if (!need_resched()) {
5ee613b6
IM
111 safe_halt(); /* enables interrupts racelessly */
112 local_irq_disable();
39d44a51
HS
113 }
114 local_irq_enable();
495ab9c0 115 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
116}
117
118/*
119 * On SMP it's slightly faster (but much more power-consuming!)
120 * to poll the ->need_resched flag instead of waiting for the
121 * cross-CPU IPI to arrive. Use this option with caution.
122 */
6612538c 123static void poll_idle(void)
1da177e4 124{
d331e739 125 local_irq_enable();
72690a21 126 cpu_relax();
1da177e4
LT
127}
128
76e4f660
AR
129#ifdef CONFIG_HOTPLUG_CPU
130DECLARE_PER_CPU(int, cpu_state);
131
132#include <asm/nmi.h>
1fa744e6 133/* We halt the CPU with physical CPU hotplug */
76e4f660
AR
134static inline void play_dead(void)
135{
136 idle_task_exit();
137 wbinvd();
138 mb();
139 /* Ack it */
140 __get_cpu_var(cpu_state) = CPU_DEAD;
141
1fa744e6 142 local_irq_disable();
76e4f660 143 while (1)
1fa744e6 144 halt();
76e4f660
AR
145}
146#else
147static inline void play_dead(void)
148{
149 BUG();
150}
151#endif /* CONFIG_HOTPLUG_CPU */
152
1da177e4
LT
153/*
154 * The idle thread. There's no useful work to be
155 * done, so just try to conserve power and have a
156 * low exit latency (ie sit in a loop waiting for
157 * somebody to say that they'd like to reschedule)
158 */
b10db7f0 159void cpu_idle(void)
1da177e4 160{
495ab9c0 161 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
162 /* endless idle loop with no priority at all */
163 while (1) {
3d97775a 164 tick_nohz_stop_sched_tick();
1da177e4
LT
165 while (!need_resched()) {
166 void (*idle)(void);
167
1da177e4
LT
168 rmb();
169 idle = pm_idle;
170 if (!idle)
171 idle = default_idle;
76e4f660
AR
172 if (cpu_is_offline(smp_processor_id()))
173 play_dead();
d331e739
VP
174 /*
175 * Idle routines should keep interrupts disabled
176 * from here on, until they go to idle.
177 * Otherwise, idle callbacks can misfire.
178 */
179 local_irq_disable();
95833c83 180 enter_idle();
1da177e4 181 idle();
a15da49d
AK
182 /* In many cases the interrupt that ended idle
183 has already called exit_idle. But some idle
184 loops can be woken up without interrupt. */
95833c83 185 __exit_idle();
1da177e4
LT
186 }
187
02290683 188 tick_nohz_restart_sched_tick();
5bfb5d69 189 preempt_enable_no_resched();
1da177e4 190 schedule();
5bfb5d69 191 preempt_disable();
1da177e4
LT
192 }
193}
194
6612538c
HS
195static void do_nothing(void *unused)
196{
197}
198
783e391b
VP
199/*
200 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
201 * pm_idle and update to new pm_idle value. Required while changing pm_idle
202 * handler on SMP systems.
203 *
204 * Caller must have changed pm_idle to the new value before the call. Old
205 * pm_idle value will not be used by any CPU after the return of this function.
206 */
6612538c
HS
207void cpu_idle_wait(void)
208{
783e391b
VP
209 smp_mb();
210 /* kick all the CPUs so that they exit out of pm_idle */
211 smp_call_function(do_nothing, NULL, 0, 1);
6612538c
HS
212}
213EXPORT_SYMBOL_GPL(cpu_idle_wait);
214
1da177e4
LT
215/*
216 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
217 * which can obviate IPI to trigger checking of need_resched.
218 * We execute MONITOR against need_resched and enter optimized wait state
219 * through MWAIT. Whenever someone changes need_resched, we would be woken
220 * up from MWAIT (without an IPI).
991528d7
VP
221 *
222 * New with Core Duo processors, MWAIT can take some hints based on CPU
223 * capability.
1da177e4 224 */
65ea5b03 225void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
1da177e4 226{
991528d7 227 if (!need_resched()) {
64c7c8f8
NP
228 __monitor((void *)&current_thread_info()->flags, 0, 0);
229 smp_mb();
991528d7 230 if (!need_resched())
65ea5b03 231 __mwait(ax, cx);
1da177e4
LT
232 }
233}
234
991528d7
VP
235/* Default MONITOR/MWAIT with no hints, used for default C1 state */
236static void mwait_idle(void)
237{
d331e739
VP
238 if (!need_resched()) {
239 __monitor((void *)&current_thread_info()->flags, 0, 0);
240 smp_mb();
241 if (!need_resched())
242 __sti_mwait(0, 0);
243 else
244 local_irq_enable();
245 } else {
246 local_irq_enable();
247 }
991528d7
VP
248}
249
0c07ee38 250
4c02ad1e 251static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
0c07ee38
AK
252{
253 if (force_mwait)
254 return 1;
255 /* Any C1 states supported? */
256 return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
257}
258
e6982c67 259void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
1da177e4 260{
27415a4f
HS
261 static int selected;
262
263 if (selected)
264 return;
265#ifdef CONFIG_X86_SMP
266 if (pm_idle == poll_idle && smp_num_siblings > 1) {
267 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
268 " performance may degrade.\n");
269 }
270#endif
0c07ee38 271 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
1da177e4
LT
272 /*
273 * Skip, if setup has overridden idle.
274 * One CPU supports mwait => All CPUs supports mwait
275 */
276 if (!pm_idle) {
27415a4f 277 printk(KERN_INFO "using mwait in idle threads.\n");
1da177e4
LT
278 pm_idle = mwait_idle;
279 }
280 }
27415a4f 281 selected = 1;
1da177e4
LT
282}
283
6612538c 284static int __init idle_setup(char *str)
1da177e4 285{
f039b754 286 if (!strcmp(str, "poll")) {
1da177e4
LT
287 printk("using polling idle threads.\n");
288 pm_idle = poll_idle;
f039b754
AK
289 } else if (!strcmp(str, "mwait"))
290 force_mwait = 1;
291 else
292 return -1;
1da177e4
LT
293
294 boot_option_idle_override = 1;
f039b754 295 return 0;
1da177e4 296}
f039b754 297early_param("idle", idle_setup);
1da177e4 298
6612538c 299/* Prints also some state that isn't saved in the pt_regs */
1da177e4
LT
300void __show_regs(struct pt_regs * regs)
301{
302 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 303 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
304 unsigned int fsindex, gsindex;
305 unsigned int ds, cs, es;
1da177e4
LT
306
307 printk("\n");
308 print_modules();
9acf23c4
AK
309 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
310 current->pid, current->comm, print_tainted(),
96b644bd
SH
311 init_utsname()->release,
312 (int)strcspn(init_utsname()->version, " "),
313 init_utsname()->version);
65ea5b03 314 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
aafbd7eb 315 printk_address(regs->ip, 1);
65ea5b03
PA
316 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
317 regs->flags);
1da177e4 318 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 319 regs->ax, regs->bx, regs->cx);
1da177e4 320 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 321 regs->dx, regs->si, regs->di);
1da177e4 322 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 323 regs->bp, regs->r8, regs->r9);
1da177e4
LT
324 printk("R10: %016lx R11: %016lx R12: %016lx\n",
325 regs->r10, regs->r11, regs->r12);
326 printk("R13: %016lx R14: %016lx R15: %016lx\n",
327 regs->r13, regs->r14, regs->r15);
328
329 asm("movl %%ds,%0" : "=r" (ds));
330 asm("movl %%cs,%0" : "=r" (cs));
331 asm("movl %%es,%0" : "=r" (es));
332 asm("movl %%fs,%0" : "=r" (fsindex));
333 asm("movl %%gs,%0" : "=r" (gsindex));
334
335 rdmsrl(MSR_FS_BASE, fs);
336 rdmsrl(MSR_GS_BASE, gs);
337 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
338
f51c9452
GOC
339 cr0 = read_cr0();
340 cr2 = read_cr2();
341 cr3 = read_cr3();
342 cr4 = read_cr4();
1da177e4
LT
343
344 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
345 fs,fsindex,gs,gsindex,shadowgs);
346 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
347 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
bb1995d5
AS
348
349 get_debugreg(d0, 0);
350 get_debugreg(d1, 1);
351 get_debugreg(d2, 2);
352 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
353 get_debugreg(d3, 3);
354 get_debugreg(d6, 6);
355 get_debugreg(d7, 7);
356 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1da177e4
LT
357}
358
359void show_regs(struct pt_regs *regs)
360{
c078d326 361 printk("CPU %d:", smp_processor_id());
1da177e4 362 __show_regs(regs);
bc850d6b 363 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
1da177e4
LT
364}
365
366/*
367 * Free current thread data structures etc..
368 */
369void exit_thread(void)
370{
371 struct task_struct *me = current;
372 struct thread_struct *t = &me->thread;
73649dab 373
6612538c 374 if (me->thread.io_bitmap_ptr) {
1da177e4
LT
375 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
376
377 kfree(t->io_bitmap_ptr);
378 t->io_bitmap_ptr = NULL;
d3a4f48d 379 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
380 /*
381 * Careful, clear this in the TSS too:
382 */
383 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
384 t->io_bitmap_max = 0;
385 put_cpu();
386 }
387}
388
389void flush_thread(void)
390{
391 struct task_struct *tsk = current;
1da177e4 392
303cd153
MD
393 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
394 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
395 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
396 clear_tsk_thread_flag(tsk, TIF_IA32);
397 } else {
398 set_tsk_thread_flag(tsk, TIF_IA32);
4d9bc79c 399 current_thread_info()->status |= TS_COMPAT;
303cd153 400 }
4d9bc79c 401 }
303cd153 402 clear_tsk_thread_flag(tsk, TIF_DEBUG);
1da177e4
LT
403
404 tsk->thread.debugreg0 = 0;
405 tsk->thread.debugreg1 = 0;
406 tsk->thread.debugreg2 = 0;
407 tsk->thread.debugreg3 = 0;
408 tsk->thread.debugreg6 = 0;
409 tsk->thread.debugreg7 = 0;
6612538c 410 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
1da177e4
LT
411 /*
412 * Forget coprocessor state..
413 */
414 clear_fpu(tsk);
415 clear_used_math();
416}
417
418void release_thread(struct task_struct *dead_task)
419{
420 if (dead_task->mm) {
421 if (dead_task->mm->context.size) {
422 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
423 dead_task->comm,
424 dead_task->mm->context.ldt,
425 dead_task->mm->context.size);
426 BUG();
427 }
428 }
429}
430
431static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
432{
6612538c 433 struct user_desc ud = {
1da177e4
LT
434 .base_addr = addr,
435 .limit = 0xfffff,
436 .seg_32bit = 1,
437 .limit_in_pages = 1,
438 .useable = 1,
439 };
ade1af77 440 struct desc_struct *desc = t->thread.tls_array;
1da177e4 441 desc += tls;
80fbb69a 442 fill_ldt(desc, &ud);
1da177e4
LT
443}
444
445static inline u32 read_32bit_tls(struct task_struct *t, int tls)
446{
91394eb0 447 return get_desc_base(&t->thread.tls_array[tls]);
1da177e4
LT
448}
449
450/*
451 * This gets called before we allocate a new thread and copy
452 * the current task into it.
453 */
454void prepare_to_copy(struct task_struct *tsk)
455{
456 unlazy_fpu(tsk);
457}
458
65ea5b03 459int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
1da177e4
LT
460 unsigned long unused,
461 struct task_struct * p, struct pt_regs * regs)
462{
463 int err;
464 struct pt_regs * childregs;
465 struct task_struct *me = current;
466
a88cde13 467 childregs = ((struct pt_regs *)
57eafdc2 468 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
469 *childregs = *regs;
470
65ea5b03
PA
471 childregs->ax = 0;
472 childregs->sp = sp;
473 if (sp == ~0UL)
474 childregs->sp = (unsigned long)childregs;
1da177e4 475
faca6227
PA
476 p->thread.sp = (unsigned long) childregs;
477 p->thread.sp0 = (unsigned long) (childregs+1);
478 p->thread.usersp = me->thread.usersp;
1da177e4 479
e4f17c43 480 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
481
482 p->thread.fs = me->thread.fs;
483 p->thread.gs = me->thread.gs;
484
fd51f666
L
485 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
486 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
487 asm("mov %%es,%0" : "=m" (p->thread.es));
488 asm("mov %%ds,%0" : "=m" (p->thread.ds));
1da177e4 489
d3a4f48d 490 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
491 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
492 if (!p->thread.io_bitmap_ptr) {
493 p->thread.io_bitmap_max = 0;
494 return -ENOMEM;
495 }
a88cde13
AK
496 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
497 IO_BITMAP_BYTES);
d3a4f48d 498 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 499 }
1da177e4
LT
500
501 /*
502 * Set a new TLS for the child thread?
503 */
504 if (clone_flags & CLONE_SETTLS) {
505#ifdef CONFIG_IA32_EMULATION
506 if (test_thread_flag(TIF_IA32))
efd1ca52 507 err = do_set_thread_area(p, -1,
65ea5b03 508 (struct user_desc __user *)childregs->si, 0);
1da177e4
LT
509 else
510#endif
511 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
512 if (err)
513 goto out;
514 }
515 err = 0;
516out:
517 if (err && p->thread.io_bitmap_ptr) {
518 kfree(p->thread.io_bitmap_ptr);
519 p->thread.io_bitmap_max = 0;
520 }
521 return err;
522}
523
513ad84b
IM
524void
525start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
526{
527 asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
528 load_gs_index(0);
529 regs->ip = new_ip;
530 regs->sp = new_sp;
531 write_pda(oldrsp, new_sp);
532 regs->cs = __USER_CS;
533 regs->ss = __USER_DS;
534 regs->flags = 0x200;
535 set_fs(USER_DS);
aa283f49
SS
536 /*
537 * Free the old FP and other extended state
538 */
539 free_thread_xstate(current);
513ad84b
IM
540}
541EXPORT_SYMBOL_GPL(start_thread);
542
529e25f6
EB
543static void hard_disable_TSC(void)
544{
545 write_cr4(read_cr4() | X86_CR4_TSD);
546}
547
548void disable_TSC(void)
549{
550 preempt_disable();
551 if (!test_and_set_thread_flag(TIF_NOTSC))
552 /*
553 * Must flip the CPU state synchronously with
554 * TIF_NOTSC in the current running context.
555 */
556 hard_disable_TSC();
557 preempt_enable();
558}
559
560static void hard_enable_TSC(void)
561{
562 write_cr4(read_cr4() & ~X86_CR4_TSD);
563}
564
565void enable_TSC(void)
566{
567 preempt_disable();
568 if (test_and_clear_thread_flag(TIF_NOTSC))
569 /*
570 * Must flip the CPU state synchronously with
571 * TIF_NOTSC in the current running context.
572 */
573 hard_enable_TSC();
574 preempt_enable();
575}
576
577int get_tsc_mode(unsigned long adr)
578{
579 unsigned int val;
580
581 if (test_thread_flag(TIF_NOTSC))
582 val = PR_TSC_SIGSEGV;
583 else
584 val = PR_TSC_ENABLE;
585
586 return put_user(val, (unsigned int __user *)adr);
587}
588
589int set_tsc_mode(unsigned int val)
590{
591 if (val == PR_TSC_SIGSEGV)
592 disable_TSC();
593 else if (val == PR_TSC_ENABLE)
594 enable_TSC();
595 else
596 return -EINVAL;
597
598 return 0;
599}
600
1da177e4
LT
601/*
602 * This special macro can be used to load a debugging register
603 */
6612538c
HS
604#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
605
d3a4f48d 606static inline void __switch_to_xtra(struct task_struct *prev_p,
6612538c
HS
607 struct task_struct *next_p,
608 struct tss_struct *tss)
d3a4f48d
SE
609{
610 struct thread_struct *prev, *next;
eee3af4a 611 unsigned long debugctl;
d3a4f48d
SE
612
613 prev = &prev_p->thread,
614 next = &next_p->thread;
615
eee3af4a
MM
616 debugctl = prev->debugctlmsr;
617 if (next->ds_area_msr != prev->ds_area_msr) {
618 /* we clear debugctl to make sure DS
619 * is not in use when we change it */
620 debugctl = 0;
5b0e5084 621 update_debugctlmsr(0);
eee3af4a
MM
622 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
623 }
624
625 if (next->debugctlmsr != debugctl)
5b0e5084 626 update_debugctlmsr(next->debugctlmsr);
7e991604 627
d3a4f48d
SE
628 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
629 loaddebug(next, 0);
630 loaddebug(next, 1);
631 loaddebug(next, 2);
632 loaddebug(next, 3);
633 /* no 4 and 5 */
634 loaddebug(next, 6);
635 loaddebug(next, 7);
636 }
637
529e25f6
EB
638 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
639 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
640 /* prev and next are different */
641 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
642 hard_disable_TSC();
643 else
644 hard_enable_TSC();
645 }
646
d3a4f48d
SE
647 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
648 /*
649 * Copy the relevant range of the IO bitmap.
650 * Normally this is 128 bytes or less:
651 */
652 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
653 max(prev->io_bitmap_max, next->io_bitmap_max));
654 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
655 /*
656 * Clear any possible leftover bits:
657 */
658 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
659 }
eee3af4a 660
b4ef95de 661#ifdef X86_BTS
eee3af4a
MM
662 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
663 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
664
665 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
666 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
b4ef95de 667#endif
d3a4f48d
SE
668}
669
1da177e4
LT
670/*
671 * switch_to(x,y) should switch tasks from x to y.
672 *
6612538c 673 * This could still be optimized:
1da177e4
LT
674 * - fold all the options into a flag word and test it with a single test.
675 * - could test fs/gs bitsliced
099f318b
AK
676 *
677 * Kprobes not supported here. Set the probe on schedule instead.
1da177e4 678 */
f438d914 679struct task_struct *
a88cde13 680__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4
LT
681{
682 struct thread_struct *prev = &prev_p->thread,
683 *next = &next_p->thread;
6612538c 684 int cpu = smp_processor_id();
1da177e4
LT
685 struct tss_struct *tss = &per_cpu(init_tss, cpu);
686
e07e23e1
AV
687 /* we're going to use this soon, after a few expensive things */
688 if (next_p->fpu_counter>5)
61c4628b 689 prefetch(next->xstate);
e07e23e1 690
1da177e4
LT
691 /*
692 * Reload esp0, LDT and the page table pointer:
693 */
7818a1e0 694 load_sp0(tss, next);
1da177e4
LT
695
696 /*
697 * Switch DS and ES.
698 * This won't pick up thread selector changes, but I guess that is ok.
699 */
fd51f666 700 asm volatile("mov %%es,%0" : "=m" (prev->es));
1da177e4
LT
701 if (unlikely(next->es | prev->es))
702 loadsegment(es, next->es);
703
fd51f666 704 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
1da177e4
LT
705 if (unlikely(next->ds | prev->ds))
706 loadsegment(ds, next->ds);
707
708 load_TLS(next, cpu);
709
710 /*
711 * Switch FS and GS.
712 */
713 {
714 unsigned fsindex;
715 asm volatile("movl %%fs,%0" : "=r" (fsindex));
716 /* segment register != 0 always requires a reload.
717 also reload when it has changed.
718 when prev process used 64bit base always reload
719 to avoid an information leak. */
720 if (unlikely(fsindex | next->fsindex | prev->fs)) {
721 loadsegment(fs, next->fsindex);
722 /* check if the user used a selector != 0
723 * if yes clear 64bit base, since overloaded base
724 * is always mapped to the Null selector
725 */
726 if (fsindex)
727 prev->fs = 0;
728 }
729 /* when next process has a 64bit base use it */
730 if (next->fs)
731 wrmsrl(MSR_FS_BASE, next->fs);
732 prev->fsindex = fsindex;
733 }
734 {
735 unsigned gsindex;
736 asm volatile("movl %%gs,%0" : "=r" (gsindex));
737 if (unlikely(gsindex | next->gsindex | prev->gs)) {
738 load_gs_index(next->gsindex);
739 if (gsindex)
740 prev->gs = 0;
741 }
742 if (next->gs)
743 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
744 prev->gsindex = gsindex;
745 }
746
0a5ace2a
AK
747 /* Must be after DS reload */
748 unlazy_fpu(prev_p);
749
1da177e4 750 /*
45948d77 751 * Switch the PDA and FPU contexts.
1da177e4 752 */
faca6227
PA
753 prev->usersp = read_pda(oldrsp);
754 write_pda(oldrsp, next->usersp);
1da177e4 755 write_pda(pcurrent, next_p);
18bd057b 756
a88cde13 757 write_pda(kernelstack,
7b0bda74 758 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
0a425405
AV
759#ifdef CONFIG_CC_STACKPROTECTOR
760 write_pda(stack_canary, next_p->stack_canary);
761 /*
762 * Build time only check to make sure the stack_canary is at
763 * offset 40 in the pda; this is a gcc ABI requirement
764 */
765 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
766#endif
1da177e4
LT
767
768 /*
d3a4f48d 769 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 770 */
eee3af4a
MM
771 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
772 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 773 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 774
e07e23e1
AV
775 /* If the task has used fpu the last 5 timeslices, just do a full
776 * restore of the math state immediately to avoid the trap; the
777 * chances of needing FPU soon are obviously high now
778 */
779 if (next_p->fpu_counter>5)
780 math_state_restore();
1da177e4
LT
781 return prev_p;
782}
783
784/*
785 * sys_execve() executes a new program.
786 */
6612538c 787asmlinkage
1da177e4 788long sys_execve(char __user *name, char __user * __user *argv,
5d119b2c 789 char __user * __user *envp, struct pt_regs *regs)
1da177e4
LT
790{
791 long error;
792 char * filename;
793
794 filename = getname(name);
795 error = PTR_ERR(filename);
5d119b2c 796 if (IS_ERR(filename))
1da177e4 797 return error;
5d119b2c 798 error = do_execve(filename, argv, envp, regs);
1da177e4
LT
799 putname(filename);
800 return error;
801}
802
803void set_personality_64bit(void)
804{
805 /* inherit personality from parent */
806
807 /* Make sure to be in 64bit mode */
6612538c 808 clear_thread_flag(TIF_IA32);
1da177e4
LT
809
810 /* TBD: overwrites user setup. Should have two bits.
811 But 64bit processes have always behaved this way,
812 so it's not too bad. The main problem is just that
6612538c 813 32bit childs are affected again. */
1da177e4
LT
814 current->personality &= ~READ_IMPLIES_EXEC;
815}
816
817asmlinkage long sys_fork(struct pt_regs *regs)
818{
65ea5b03 819 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
1da177e4
LT
820}
821
a88cde13
AK
822asmlinkage long
823sys_clone(unsigned long clone_flags, unsigned long newsp,
824 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
825{
826 if (!newsp)
65ea5b03 827 newsp = regs->sp;
1da177e4
LT
828 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
829}
830
831/*
832 * This is trivial, and on the face of it looks like it
833 * could equally well be done in user mode.
834 *
835 * Not so, for quite unobvious reasons - register pressure.
836 * In user mode vfork() cannot have a stack frame, and if
837 * done by calling the "clone()" system call directly, you
838 * do not have enough call-clobbered registers to hold all
839 * the information you need.
840 */
841asmlinkage long sys_vfork(struct pt_regs *regs)
842{
65ea5b03 843 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
1da177e4
LT
844 NULL, NULL);
845}
846
847unsigned long get_wchan(struct task_struct *p)
848{
849 unsigned long stack;
65ea5b03 850 u64 fp,ip;
1da177e4
LT
851 int count = 0;
852
853 if (!p || p == current || p->state==TASK_RUNNING)
854 return 0;
57eafdc2 855 stack = (unsigned long)task_stack_page(p);
faca6227 856 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
1da177e4 857 return 0;
faca6227 858 fp = *(u64 *)(p->thread.sp);
1da177e4 859 do {
a88cde13
AK
860 if (fp < (unsigned long)stack ||
861 fp > (unsigned long)stack+THREAD_SIZE)
1da177e4 862 return 0;
65ea5b03
PA
863 ip = *(u64 *)(fp+8);
864 if (!in_sched_functions(ip))
865 return ip;
1da177e4
LT
866 fp = *(u64 *)fp;
867 } while (count++ < 16);
868 return 0;
869}
870
871long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
872{
873 int ret = 0;
874 int doit = task == current;
875 int cpu;
876
877 switch (code) {
878 case ARCH_SET_GS:
84929801 879 if (addr >= TASK_SIZE_OF(task))
1da177e4
LT
880 return -EPERM;
881 cpu = get_cpu();
882 /* handle small bases via the GDT because that's faster to
883 switch. */
884 if (addr <= 0xffffffff) {
885 set_32bit_tls(task, GS_TLS, addr);
886 if (doit) {
887 load_TLS(&task->thread, cpu);
888 load_gs_index(GS_TLS_SEL);
889 }
890 task->thread.gsindex = GS_TLS_SEL;
891 task->thread.gs = 0;
892 } else {
893 task->thread.gsindex = 0;
894 task->thread.gs = addr;
895 if (doit) {
a88cde13
AK
896 load_gs_index(0);
897 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
1da177e4
LT
898 }
899 }
900 put_cpu();
901 break;
902 case ARCH_SET_FS:
903 /* Not strictly needed for fs, but do it for symmetry
904 with gs */
84929801 905 if (addr >= TASK_SIZE_OF(task))
6612538c 906 return -EPERM;
1da177e4 907 cpu = get_cpu();
6612538c 908 /* handle small bases via the GDT because that's faster to
1da177e4 909 switch. */
6612538c 910 if (addr <= 0xffffffff) {
1da177e4 911 set_32bit_tls(task, FS_TLS, addr);
6612538c
HS
912 if (doit) {
913 load_TLS(&task->thread, cpu);
a88cde13 914 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
1da177e4
LT
915 }
916 task->thread.fsindex = FS_TLS_SEL;
917 task->thread.fs = 0;
6612538c 918 } else {
1da177e4
LT
919 task->thread.fsindex = 0;
920 task->thread.fs = addr;
921 if (doit) {
922 /* set the selector to 0 to not confuse
923 __switch_to */
a88cde13
AK
924 asm volatile("movl %0,%%fs" :: "r" (0));
925 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
926 }
927 }
928 put_cpu();
929 break;
6612538c
HS
930 case ARCH_GET_FS: {
931 unsigned long base;
1da177e4
LT
932 if (task->thread.fsindex == FS_TLS_SEL)
933 base = read_32bit_tls(task, FS_TLS);
a88cde13 934 else if (doit)
1da177e4 935 rdmsrl(MSR_FS_BASE, base);
a88cde13 936 else
1da177e4 937 base = task->thread.fs;
6612538c
HS
938 ret = put_user(base, (unsigned long __user *)addr);
939 break;
1da177e4 940 }
6612538c 941 case ARCH_GET_GS: {
1da177e4 942 unsigned long base;
97c2803c 943 unsigned gsindex;
1da177e4
LT
944 if (task->thread.gsindex == GS_TLS_SEL)
945 base = read_32bit_tls(task, GS_TLS);
97c2803c 946 else if (doit) {
6612538c 947 asm("movl %%gs,%0" : "=r" (gsindex));
97c2803c
JB
948 if (gsindex)
949 rdmsrl(MSR_KERNEL_GS_BASE, base);
950 else
951 base = task->thread.gs;
952 }
a88cde13 953 else
1da177e4 954 base = task->thread.gs;
6612538c 955 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
956 break;
957 }
958
959 default:
960 ret = -EINVAL;
961 break;
6612538c 962 }
1da177e4 963
6612538c
HS
964 return ret;
965}
1da177e4
LT
966
967long sys_arch_prctl(int code, unsigned long addr)
968{
969 return do_arch_prctl(current, code, addr);
1da177e4
LT
970}
971
972unsigned long arch_align_stack(unsigned long sp)
973{
c16b63e0 974 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
975 sp -= get_random_int() % 8192;
976 return sp & ~0xf;
977}
c1d171a0
JK
978
979unsigned long arch_randomize_brk(struct mm_struct *mm)
980{
981 unsigned long range_end = mm->brk + 0x02000000;
982 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
983}