]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/kernel/process_64.c
x86: unify tss_struct
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
76e4f660 19#include <linux/cpu.h>
1da177e4
LT
20#include <linux/errno.h>
21#include <linux/sched.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4
LT
29#include <linux/a.out.h>
30#include <linux/interrupt.h>
6612538c 31#include <linux/utsname.h>
1da177e4 32#include <linux/delay.h>
6612538c 33#include <linux/module.h>
1da177e4 34#include <linux/ptrace.h>
1da177e4 35#include <linux/random.h>
95833c83 36#include <linux/notifier.h>
c6fd91f0 37#include <linux/kprobes.h>
1eeb66a1 38#include <linux/kdebug.h>
02290683 39#include <linux/tick.h>
1da177e4
LT
40
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
1da177e4
LT
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
95833c83 53#include <asm/idle.h>
1da177e4
LT
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
1da177e4
LT
59unsigned long boot_option_idle_override = 0;
60EXPORT_SYMBOL(boot_option_idle_override);
61
62/*
63 * Powermanagement idle function, if any..
64 */
65void (*pm_idle)(void);
2ee60e17 66EXPORT_SYMBOL(pm_idle);
1da177e4
LT
67static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
e041c683 69static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
70
71void idle_notifier_register(struct notifier_block *n)
72{
e041c683 73 atomic_notifier_chain_register(&idle_notifier, n);
95833c83 74}
95833c83 75
95833c83
AK
76void enter_idle(void)
77{
a15da49d 78 write_pda(isidle, 1);
e041c683 79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
80}
81
82static void __exit_idle(void)
83{
9446868b 84 if (test_and_clear_bit_pda(0, isidle) == 0)
a15da49d 85 return;
e041c683 86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
87}
88
89/* Called from interrupts to signify idle end */
90void exit_idle(void)
91{
a15da49d
AK
92 /* idle loop has pid 0 */
93 if (current->pid)
95833c83
AK
94 return;
95 __exit_idle();
96}
97
1da177e4
LT
98/*
99 * We use this if we don't have any better
100 * idle routine..
101 */
d8954222 102void default_idle(void)
1da177e4 103{
495ab9c0 104 current_thread_info()->status &= ~TS_POLLING;
0888f06a
IM
105 /*
106 * TS_POLLING-cleared state must be visible before we
107 * test NEED_RESCHED:
108 */
109 smp_mb();
72690a21
AK
110 local_irq_disable();
111 if (!need_resched()) {
5ee613b6
IM
112 ktime_t t0, t1;
113 u64 t0n, t1n;
114
115 t0 = ktime_get();
116 t0n = ktime_to_ns(t0);
117 safe_halt(); /* enables interrupts racelessly */
118 local_irq_disable();
119 t1 = ktime_get();
120 t1n = ktime_to_ns(t1);
121 sched_clock_idle_wakeup_event(t1n - t0n);
39d44a51
HS
122 }
123 local_irq_enable();
495ab9c0 124 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
125}
126
127/*
128 * On SMP it's slightly faster (but much more power-consuming!)
129 * to poll the ->need_resched flag instead of waiting for the
130 * cross-CPU IPI to arrive. Use this option with caution.
131 */
6612538c 132static void poll_idle(void)
1da177e4 133{
d331e739 134 local_irq_enable();
72690a21 135 cpu_relax();
1da177e4
LT
136}
137
76e4f660
AR
138#ifdef CONFIG_HOTPLUG_CPU
139DECLARE_PER_CPU(int, cpu_state);
140
141#include <asm/nmi.h>
1fa744e6 142/* We halt the CPU with physical CPU hotplug */
76e4f660
AR
143static inline void play_dead(void)
144{
145 idle_task_exit();
146 wbinvd();
147 mb();
148 /* Ack it */
149 __get_cpu_var(cpu_state) = CPU_DEAD;
150
1fa744e6 151 local_irq_disable();
76e4f660 152 while (1)
1fa744e6 153 halt();
76e4f660
AR
154}
155#else
156static inline void play_dead(void)
157{
158 BUG();
159}
160#endif /* CONFIG_HOTPLUG_CPU */
161
1da177e4
LT
162/*
163 * The idle thread. There's no useful work to be
164 * done, so just try to conserve power and have a
165 * low exit latency (ie sit in a loop waiting for
166 * somebody to say that they'd like to reschedule)
167 */
b10db7f0 168void cpu_idle(void)
1da177e4 169{
495ab9c0 170 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
171 /* endless idle loop with no priority at all */
172 while (1) {
173 while (!need_resched()) {
174 void (*idle)(void);
175
176 if (__get_cpu_var(cpu_idle_state))
177 __get_cpu_var(cpu_idle_state) = 0;
178
02290683
CW
179 tick_nohz_stop_sched_tick();
180
1da177e4
LT
181 rmb();
182 idle = pm_idle;
183 if (!idle)
184 idle = default_idle;
76e4f660
AR
185 if (cpu_is_offline(smp_processor_id()))
186 play_dead();
d331e739
VP
187 /*
188 * Idle routines should keep interrupts disabled
189 * from here on, until they go to idle.
190 * Otherwise, idle callbacks can misfire.
191 */
192 local_irq_disable();
95833c83 193 enter_idle();
1da177e4 194 idle();
a15da49d
AK
195 /* In many cases the interrupt that ended idle
196 has already called exit_idle. But some idle
197 loops can be woken up without interrupt. */
95833c83 198 __exit_idle();
1da177e4
LT
199 }
200
02290683 201 tick_nohz_restart_sched_tick();
5bfb5d69 202 preempt_enable_no_resched();
1da177e4 203 schedule();
5bfb5d69 204 preempt_disable();
1da177e4
LT
205 }
206}
207
6612538c
HS
208static void do_nothing(void *unused)
209{
210}
211
212void cpu_idle_wait(void)
213{
214 unsigned int cpu, this_cpu = get_cpu();
215 cpumask_t map, tmp = current->cpus_allowed;
216
217 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
218 put_cpu();
219
220 cpus_clear(map);
221 for_each_online_cpu(cpu) {
222 per_cpu(cpu_idle_state, cpu) = 1;
223 cpu_set(cpu, map);
224 }
225
226 __get_cpu_var(cpu_idle_state) = 0;
227
228 wmb();
229 do {
230 ssleep(1);
231 for_each_online_cpu(cpu) {
232 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
233 cpu_clear(cpu, map);
234 }
235 cpus_and(map, map, cpu_online_map);
236 /*
237 * We waited 1 sec, if a CPU still did not call idle
238 * it may be because it is in idle and not waking up
239 * because it has nothing to do.
240 * Give all the remaining CPUS a kick.
241 */
242 smp_call_function_mask(map, do_nothing, 0, 0);
243 } while (!cpus_empty(map));
244
245 set_cpus_allowed(current, tmp);
246}
247EXPORT_SYMBOL_GPL(cpu_idle_wait);
248
1da177e4
LT
249/*
250 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
251 * which can obviate IPI to trigger checking of need_resched.
252 * We execute MONITOR against need_resched and enter optimized wait state
253 * through MWAIT. Whenever someone changes need_resched, we would be woken
254 * up from MWAIT (without an IPI).
991528d7
VP
255 *
256 * New with Core Duo processors, MWAIT can take some hints based on CPU
257 * capability.
1da177e4 258 */
65ea5b03 259void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
1da177e4 260{
991528d7 261 if (!need_resched()) {
64c7c8f8
NP
262 __monitor((void *)&current_thread_info()->flags, 0, 0);
263 smp_mb();
991528d7 264 if (!need_resched())
65ea5b03 265 __mwait(ax, cx);
1da177e4
LT
266 }
267}
268
991528d7
VP
269/* Default MONITOR/MWAIT with no hints, used for default C1 state */
270static void mwait_idle(void)
271{
d331e739
VP
272 if (!need_resched()) {
273 __monitor((void *)&current_thread_info()->flags, 0, 0);
274 smp_mb();
275 if (!need_resched())
276 __sti_mwait(0, 0);
277 else
278 local_irq_enable();
279 } else {
280 local_irq_enable();
281 }
991528d7
VP
282}
283
e6982c67 284void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
1da177e4
LT
285{
286 static int printed;
287 if (cpu_has(c, X86_FEATURE_MWAIT)) {
288 /*
289 * Skip, if setup has overridden idle.
290 * One CPU supports mwait => All CPUs supports mwait
291 */
292 if (!pm_idle) {
293 if (!printed) {
2d4fa2f6 294 printk(KERN_INFO "using mwait in idle threads.\n");
1da177e4
LT
295 printed = 1;
296 }
297 pm_idle = mwait_idle;
298 }
299 }
300}
301
6612538c 302static int __init idle_setup(char *str)
1da177e4 303{
f039b754 304 if (!strcmp(str, "poll")) {
1da177e4
LT
305 printk("using polling idle threads.\n");
306 pm_idle = poll_idle;
f039b754
AK
307 } else if (!strcmp(str, "mwait"))
308 force_mwait = 1;
309 else
310 return -1;
1da177e4
LT
311
312 boot_option_idle_override = 1;
f039b754 313 return 0;
1da177e4 314}
f039b754 315early_param("idle", idle_setup);
1da177e4 316
6612538c 317/* Prints also some state that isn't saved in the pt_regs */
1da177e4
LT
318void __show_regs(struct pt_regs * regs)
319{
320 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 321 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
322 unsigned int fsindex, gsindex;
323 unsigned int ds, cs, es;
1da177e4
LT
324
325 printk("\n");
326 print_modules();
9acf23c4
AK
327 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
328 current->pid, current->comm, print_tainted(),
96b644bd
SH
329 init_utsname()->release,
330 (int)strcspn(init_utsname()->version, " "),
331 init_utsname()->version);
65ea5b03
PA
332 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
333 printk_address(regs->ip);
334 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
335 regs->flags);
1da177e4 336 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 337 regs->ax, regs->bx, regs->cx);
1da177e4 338 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 339 regs->dx, regs->si, regs->di);
1da177e4 340 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 341 regs->bp, regs->r8, regs->r9);
1da177e4
LT
342 printk("R10: %016lx R11: %016lx R12: %016lx\n",
343 regs->r10, regs->r11, regs->r12);
344 printk("R13: %016lx R14: %016lx R15: %016lx\n",
345 regs->r13, regs->r14, regs->r15);
346
347 asm("movl %%ds,%0" : "=r" (ds));
348 asm("movl %%cs,%0" : "=r" (cs));
349 asm("movl %%es,%0" : "=r" (es));
350 asm("movl %%fs,%0" : "=r" (fsindex));
351 asm("movl %%gs,%0" : "=r" (gsindex));
352
353 rdmsrl(MSR_FS_BASE, fs);
354 rdmsrl(MSR_GS_BASE, gs);
355 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
356
f51c9452
GOC
357 cr0 = read_cr0();
358 cr2 = read_cr2();
359 cr3 = read_cr3();
360 cr4 = read_cr4();
1da177e4
LT
361
362 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
363 fs,fsindex,gs,gsindex,shadowgs);
364 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
365 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
bb1995d5
AS
366
367 get_debugreg(d0, 0);
368 get_debugreg(d1, 1);
369 get_debugreg(d2, 2);
370 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
371 get_debugreg(d3, 3);
372 get_debugreg(d6, 6);
373 get_debugreg(d7, 7);
374 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1da177e4
LT
375}
376
377void show_regs(struct pt_regs *regs)
378{
c078d326 379 printk("CPU %d:", smp_processor_id());
1da177e4 380 __show_regs(regs);
b538ed27 381 show_trace(NULL, regs, (void *)(regs + 1));
1da177e4
LT
382}
383
384/*
385 * Free current thread data structures etc..
386 */
387void exit_thread(void)
388{
389 struct task_struct *me = current;
390 struct thread_struct *t = &me->thread;
73649dab 391
6612538c 392 if (me->thread.io_bitmap_ptr) {
1da177e4
LT
393 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
394
395 kfree(t->io_bitmap_ptr);
396 t->io_bitmap_ptr = NULL;
d3a4f48d 397 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
398 /*
399 * Careful, clear this in the TSS too:
400 */
401 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
402 t->io_bitmap_max = 0;
403 put_cpu();
404 }
405}
406
407void flush_thread(void)
408{
409 struct task_struct *tsk = current;
1da177e4 410
303cd153
MD
411 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
412 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
413 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
414 clear_tsk_thread_flag(tsk, TIF_IA32);
415 } else {
416 set_tsk_thread_flag(tsk, TIF_IA32);
4d9bc79c 417 current_thread_info()->status |= TS_COMPAT;
303cd153 418 }
4d9bc79c 419 }
303cd153 420 clear_tsk_thread_flag(tsk, TIF_DEBUG);
1da177e4
LT
421
422 tsk->thread.debugreg0 = 0;
423 tsk->thread.debugreg1 = 0;
424 tsk->thread.debugreg2 = 0;
425 tsk->thread.debugreg3 = 0;
426 tsk->thread.debugreg6 = 0;
427 tsk->thread.debugreg7 = 0;
6612538c 428 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
1da177e4
LT
429 /*
430 * Forget coprocessor state..
431 */
432 clear_fpu(tsk);
433 clear_used_math();
434}
435
436void release_thread(struct task_struct *dead_task)
437{
438 if (dead_task->mm) {
439 if (dead_task->mm->context.size) {
440 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
441 dead_task->comm,
442 dead_task->mm->context.ldt,
443 dead_task->mm->context.size);
444 BUG();
445 }
446 }
447}
448
449static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
450{
6612538c 451 struct user_desc ud = {
1da177e4
LT
452 .base_addr = addr,
453 .limit = 0xfffff,
454 .seg_32bit = 1,
455 .limit_in_pages = 1,
456 .useable = 1,
457 };
6842ef0e 458 struct desc_struct *desc = (void *)t->thread.tls_array;
1da177e4 459 desc += tls;
80fbb69a 460 fill_ldt(desc, &ud);
1da177e4
LT
461}
462
463static inline u32 read_32bit_tls(struct task_struct *t, int tls)
464{
91394eb0 465 return get_desc_base(&t->thread.tls_array[tls]);
1da177e4
LT
466}
467
468/*
469 * This gets called before we allocate a new thread and copy
470 * the current task into it.
471 */
472void prepare_to_copy(struct task_struct *tsk)
473{
474 unlazy_fpu(tsk);
475}
476
65ea5b03 477int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
1da177e4
LT
478 unsigned long unused,
479 struct task_struct * p, struct pt_regs * regs)
480{
481 int err;
482 struct pt_regs * childregs;
483 struct task_struct *me = current;
484
a88cde13 485 childregs = ((struct pt_regs *)
57eafdc2 486 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
487 *childregs = *regs;
488
65ea5b03
PA
489 childregs->ax = 0;
490 childregs->sp = sp;
491 if (sp == ~0UL)
492 childregs->sp = (unsigned long)childregs;
1da177e4 493
faca6227
PA
494 p->thread.sp = (unsigned long) childregs;
495 p->thread.sp0 = (unsigned long) (childregs+1);
496 p->thread.usersp = me->thread.usersp;
1da177e4 497
e4f17c43 498 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
499
500 p->thread.fs = me->thread.fs;
501 p->thread.gs = me->thread.gs;
502
fd51f666
L
503 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
504 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
505 asm("mov %%es,%0" : "=m" (p->thread.es));
506 asm("mov %%ds,%0" : "=m" (p->thread.ds));
1da177e4 507
d3a4f48d 508 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
509 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
510 if (!p->thread.io_bitmap_ptr) {
511 p->thread.io_bitmap_max = 0;
512 return -ENOMEM;
513 }
a88cde13
AK
514 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
515 IO_BITMAP_BYTES);
d3a4f48d 516 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 517 }
1da177e4
LT
518
519 /*
520 * Set a new TLS for the child thread?
521 */
522 if (clone_flags & CLONE_SETTLS) {
523#ifdef CONFIG_IA32_EMULATION
524 if (test_thread_flag(TIF_IA32))
efd1ca52 525 err = do_set_thread_area(p, -1,
65ea5b03 526 (struct user_desc __user *)childregs->si, 0);
1da177e4
LT
527 else
528#endif
529 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
530 if (err)
531 goto out;
532 }
533 err = 0;
534out:
535 if (err && p->thread.io_bitmap_ptr) {
536 kfree(p->thread.io_bitmap_ptr);
537 p->thread.io_bitmap_max = 0;
538 }
539 return err;
540}
541
542/*
543 * This special macro can be used to load a debugging register
544 */
6612538c
HS
545#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
546
547/*
548 * Capture the user space registers if the task is not running (in user space)
549 */
550int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
551{
552 struct pt_regs *pp, ptregs;
553
554 pp = task_pt_regs(tsk);
555
556 ptregs = *pp;
557 ptregs.cs &= 0xffff;
558 ptregs.ss &= 0xffff;
559
560 elf_core_copy_regs(regs, &ptregs);
561
562 return 1;
563}
1da177e4 564
d3a4f48d 565static inline void __switch_to_xtra(struct task_struct *prev_p,
6612538c
HS
566 struct task_struct *next_p,
567 struct tss_struct *tss)
d3a4f48d
SE
568{
569 struct thread_struct *prev, *next;
eee3af4a 570 unsigned long debugctl;
d3a4f48d
SE
571
572 prev = &prev_p->thread,
573 next = &next_p->thread;
574
eee3af4a
MM
575 debugctl = prev->debugctlmsr;
576 if (next->ds_area_msr != prev->ds_area_msr) {
577 /* we clear debugctl to make sure DS
578 * is not in use when we change it */
579 debugctl = 0;
580 wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
581 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
582 }
583
584 if (next->debugctlmsr != debugctl)
7e991604
RM
585 wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
586
d3a4f48d
SE
587 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
588 loaddebug(next, 0);
589 loaddebug(next, 1);
590 loaddebug(next, 2);
591 loaddebug(next, 3);
592 /* no 4 and 5 */
593 loaddebug(next, 6);
594 loaddebug(next, 7);
595 }
596
597 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
598 /*
599 * Copy the relevant range of the IO bitmap.
600 * Normally this is 128 bytes or less:
601 */
602 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
603 max(prev->io_bitmap_max, next->io_bitmap_max));
604 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
605 /*
606 * Clear any possible leftover bits:
607 */
608 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
609 }
eee3af4a 610
eee3af4a
MM
611 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
612 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
613
614 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
615 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
d3a4f48d
SE
616}
617
1da177e4
LT
618/*
619 * switch_to(x,y) should switch tasks from x to y.
620 *
6612538c 621 * This could still be optimized:
1da177e4
LT
622 * - fold all the options into a flag word and test it with a single test.
623 * - could test fs/gs bitsliced
099f318b
AK
624 *
625 * Kprobes not supported here. Set the probe on schedule instead.
1da177e4 626 */
f438d914 627struct task_struct *
a88cde13 628__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4
LT
629{
630 struct thread_struct *prev = &prev_p->thread,
631 *next = &next_p->thread;
6612538c 632 int cpu = smp_processor_id();
1da177e4
LT
633 struct tss_struct *tss = &per_cpu(init_tss, cpu);
634
e07e23e1
AV
635 /* we're going to use this soon, after a few expensive things */
636 if (next_p->fpu_counter>5)
637 prefetch(&next->i387.fxsave);
638
1da177e4
LT
639 /*
640 * Reload esp0, LDT and the page table pointer:
641 */
ca241c75 642 tss->x86_tss.sp0 = next->sp0;
1da177e4
LT
643
644 /*
645 * Switch DS and ES.
646 * This won't pick up thread selector changes, but I guess that is ok.
647 */
fd51f666 648 asm volatile("mov %%es,%0" : "=m" (prev->es));
1da177e4
LT
649 if (unlikely(next->es | prev->es))
650 loadsegment(es, next->es);
651
fd51f666 652 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
1da177e4
LT
653 if (unlikely(next->ds | prev->ds))
654 loadsegment(ds, next->ds);
655
656 load_TLS(next, cpu);
657
658 /*
659 * Switch FS and GS.
660 */
661 {
662 unsigned fsindex;
663 asm volatile("movl %%fs,%0" : "=r" (fsindex));
664 /* segment register != 0 always requires a reload.
665 also reload when it has changed.
666 when prev process used 64bit base always reload
667 to avoid an information leak. */
668 if (unlikely(fsindex | next->fsindex | prev->fs)) {
669 loadsegment(fs, next->fsindex);
670 /* check if the user used a selector != 0
671 * if yes clear 64bit base, since overloaded base
672 * is always mapped to the Null selector
673 */
674 if (fsindex)
675 prev->fs = 0;
676 }
677 /* when next process has a 64bit base use it */
678 if (next->fs)
679 wrmsrl(MSR_FS_BASE, next->fs);
680 prev->fsindex = fsindex;
681 }
682 {
683 unsigned gsindex;
684 asm volatile("movl %%gs,%0" : "=r" (gsindex));
685 if (unlikely(gsindex | next->gsindex | prev->gs)) {
686 load_gs_index(next->gsindex);
687 if (gsindex)
688 prev->gs = 0;
689 }
690 if (next->gs)
691 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
692 prev->gsindex = gsindex;
693 }
694
0a5ace2a
AK
695 /* Must be after DS reload */
696 unlazy_fpu(prev_p);
697
1da177e4 698 /*
45948d77 699 * Switch the PDA and FPU contexts.
1da177e4 700 */
faca6227
PA
701 prev->usersp = read_pda(oldrsp);
702 write_pda(oldrsp, next->usersp);
1da177e4 703 write_pda(pcurrent, next_p);
18bd057b 704
a88cde13 705 write_pda(kernelstack,
7b0bda74 706 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
0a425405
AV
707#ifdef CONFIG_CC_STACKPROTECTOR
708 write_pda(stack_canary, next_p->stack_canary);
709 /*
710 * Build time only check to make sure the stack_canary is at
711 * offset 40 in the pda; this is a gcc ABI requirement
712 */
713 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
714#endif
1da177e4
LT
715
716 /*
d3a4f48d 717 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 718 */
eee3af4a
MM
719 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
720 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 721 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 722
e07e23e1
AV
723 /* If the task has used fpu the last 5 timeslices, just do a full
724 * restore of the math state immediately to avoid the trap; the
725 * chances of needing FPU soon are obviously high now
726 */
727 if (next_p->fpu_counter>5)
728 math_state_restore();
1da177e4
LT
729 return prev_p;
730}
731
732/*
733 * sys_execve() executes a new program.
734 */
6612538c 735asmlinkage
1da177e4
LT
736long sys_execve(char __user *name, char __user * __user *argv,
737 char __user * __user *envp, struct pt_regs regs)
738{
739 long error;
740 char * filename;
741
742 filename = getname(name);
743 error = PTR_ERR(filename);
744 if (IS_ERR(filename))
745 return error;
746 error = do_execve(filename, argv, envp, &regs);
1da177e4
LT
747 putname(filename);
748 return error;
749}
750
751void set_personality_64bit(void)
752{
753 /* inherit personality from parent */
754
755 /* Make sure to be in 64bit mode */
6612538c 756 clear_thread_flag(TIF_IA32);
1da177e4
LT
757
758 /* TBD: overwrites user setup. Should have two bits.
759 But 64bit processes have always behaved this way,
760 so it's not too bad. The main problem is just that
6612538c 761 32bit childs are affected again. */
1da177e4
LT
762 current->personality &= ~READ_IMPLIES_EXEC;
763}
764
765asmlinkage long sys_fork(struct pt_regs *regs)
766{
65ea5b03 767 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
1da177e4
LT
768}
769
a88cde13
AK
770asmlinkage long
771sys_clone(unsigned long clone_flags, unsigned long newsp,
772 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
773{
774 if (!newsp)
65ea5b03 775 newsp = regs->sp;
1da177e4
LT
776 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
777}
778
779/*
780 * This is trivial, and on the face of it looks like it
781 * could equally well be done in user mode.
782 *
783 * Not so, for quite unobvious reasons - register pressure.
784 * In user mode vfork() cannot have a stack frame, and if
785 * done by calling the "clone()" system call directly, you
786 * do not have enough call-clobbered registers to hold all
787 * the information you need.
788 */
789asmlinkage long sys_vfork(struct pt_regs *regs)
790{
65ea5b03 791 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
1da177e4
LT
792 NULL, NULL);
793}
794
795unsigned long get_wchan(struct task_struct *p)
796{
797 unsigned long stack;
65ea5b03 798 u64 fp,ip;
1da177e4
LT
799 int count = 0;
800
801 if (!p || p == current || p->state==TASK_RUNNING)
802 return 0;
57eafdc2 803 stack = (unsigned long)task_stack_page(p);
faca6227 804 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
1da177e4 805 return 0;
faca6227 806 fp = *(u64 *)(p->thread.sp);
1da177e4 807 do {
a88cde13
AK
808 if (fp < (unsigned long)stack ||
809 fp > (unsigned long)stack+THREAD_SIZE)
1da177e4 810 return 0;
65ea5b03
PA
811 ip = *(u64 *)(fp+8);
812 if (!in_sched_functions(ip))
813 return ip;
1da177e4
LT
814 fp = *(u64 *)fp;
815 } while (count++ < 16);
816 return 0;
817}
818
819long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
820{
821 int ret = 0;
822 int doit = task == current;
823 int cpu;
824
825 switch (code) {
826 case ARCH_SET_GS:
84929801 827 if (addr >= TASK_SIZE_OF(task))
1da177e4
LT
828 return -EPERM;
829 cpu = get_cpu();
830 /* handle small bases via the GDT because that's faster to
831 switch. */
832 if (addr <= 0xffffffff) {
833 set_32bit_tls(task, GS_TLS, addr);
834 if (doit) {
835 load_TLS(&task->thread, cpu);
836 load_gs_index(GS_TLS_SEL);
837 }
838 task->thread.gsindex = GS_TLS_SEL;
839 task->thread.gs = 0;
840 } else {
841 task->thread.gsindex = 0;
842 task->thread.gs = addr;
843 if (doit) {
a88cde13
AK
844 load_gs_index(0);
845 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
1da177e4
LT
846 }
847 }
848 put_cpu();
849 break;
850 case ARCH_SET_FS:
851 /* Not strictly needed for fs, but do it for symmetry
852 with gs */
84929801 853 if (addr >= TASK_SIZE_OF(task))
6612538c 854 return -EPERM;
1da177e4 855 cpu = get_cpu();
6612538c 856 /* handle small bases via the GDT because that's faster to
1da177e4 857 switch. */
6612538c 858 if (addr <= 0xffffffff) {
1da177e4 859 set_32bit_tls(task, FS_TLS, addr);
6612538c
HS
860 if (doit) {
861 load_TLS(&task->thread, cpu);
a88cde13 862 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
1da177e4
LT
863 }
864 task->thread.fsindex = FS_TLS_SEL;
865 task->thread.fs = 0;
6612538c 866 } else {
1da177e4
LT
867 task->thread.fsindex = 0;
868 task->thread.fs = addr;
869 if (doit) {
870 /* set the selector to 0 to not confuse
871 __switch_to */
a88cde13
AK
872 asm volatile("movl %0,%%fs" :: "r" (0));
873 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
874 }
875 }
876 put_cpu();
877 break;
6612538c
HS
878 case ARCH_GET_FS: {
879 unsigned long base;
1da177e4
LT
880 if (task->thread.fsindex == FS_TLS_SEL)
881 base = read_32bit_tls(task, FS_TLS);
a88cde13 882 else if (doit)
1da177e4 883 rdmsrl(MSR_FS_BASE, base);
a88cde13 884 else
1da177e4 885 base = task->thread.fs;
6612538c
HS
886 ret = put_user(base, (unsigned long __user *)addr);
887 break;
1da177e4 888 }
6612538c 889 case ARCH_GET_GS: {
1da177e4 890 unsigned long base;
97c2803c 891 unsigned gsindex;
1da177e4
LT
892 if (task->thread.gsindex == GS_TLS_SEL)
893 base = read_32bit_tls(task, GS_TLS);
97c2803c 894 else if (doit) {
6612538c 895 asm("movl %%gs,%0" : "=r" (gsindex));
97c2803c
JB
896 if (gsindex)
897 rdmsrl(MSR_KERNEL_GS_BASE, base);
898 else
899 base = task->thread.gs;
900 }
a88cde13 901 else
1da177e4 902 base = task->thread.gs;
6612538c 903 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
904 break;
905 }
906
907 default:
908 ret = -EINVAL;
909 break;
6612538c 910 }
1da177e4 911
6612538c
HS
912 return ret;
913}
1da177e4
LT
914
915long sys_arch_prctl(int code, unsigned long addr)
916{
917 return do_arch_prctl(current, code, addr);
1da177e4
LT
918}
919
920unsigned long arch_align_stack(unsigned long sp)
921{
c16b63e0 922 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
923 sp -= get_random_int() % 8192;
924 return sp & ~0xf;
925}
c1d171a0
JK
926
927unsigned long arch_randomize_brk(struct mm_struct *mm)
928{
929 unsigned long range_end = mm->brk + 0x02000000;
930 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
931}
932