]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - arch/x86/kernel/process_64.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bart/ide-2.6
[mirror_ubuntu-bionic-kernel.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
76e4f660 19#include <linux/cpu.h>
1da177e4
LT
20#include <linux/errno.h>
21#include <linux/sched.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4 29#include <linux/interrupt.h>
6612538c 30#include <linux/utsname.h>
1da177e4 31#include <linux/delay.h>
6612538c 32#include <linux/module.h>
1da177e4 33#include <linux/ptrace.h>
1da177e4 34#include <linux/random.h>
95833c83 35#include <linux/notifier.h>
c6fd91f0 36#include <linux/kprobes.h>
1eeb66a1 37#include <linux/kdebug.h>
02290683 38#include <linux/tick.h>
529e25f6 39#include <linux/prctl.h>
1da177e4
LT
40
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
1da177e4
LT
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
95833c83 53#include <asm/idle.h>
1da177e4
LT
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
1da177e4
LT
59unsigned long boot_option_idle_override = 0;
60EXPORT_SYMBOL(boot_option_idle_override);
61
62/*
63 * Powermanagement idle function, if any..
64 */
65void (*pm_idle)(void);
2ee60e17 66EXPORT_SYMBOL(pm_idle);
1da177e4 67
e041c683 68static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
69
70void idle_notifier_register(struct notifier_block *n)
71{
e041c683 72 atomic_notifier_chain_register(&idle_notifier, n);
95833c83 73}
95833c83 74
95833c83
AK
75void enter_idle(void)
76{
a15da49d 77 write_pda(isidle, 1);
e041c683 78 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
79}
80
81static void __exit_idle(void)
82{
9446868b 83 if (test_and_clear_bit_pda(0, isidle) == 0)
a15da49d 84 return;
e041c683 85 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
86}
87
88/* Called from interrupts to signify idle end */
89void exit_idle(void)
90{
a15da49d
AK
91 /* idle loop has pid 0 */
92 if (current->pid)
95833c83
AK
93 return;
94 __exit_idle();
95}
96
1da177e4
LT
97/*
98 * We use this if we don't have any better
99 * idle routine..
100 */
d8954222 101void default_idle(void)
1da177e4 102{
495ab9c0 103 current_thread_info()->status &= ~TS_POLLING;
0888f06a
IM
104 /*
105 * TS_POLLING-cleared state must be visible before we
106 * test NEED_RESCHED:
107 */
108 smp_mb();
7f424a8b 109 if (!need_resched())
5ee613b6 110 safe_halt(); /* enables interrupts racelessly */
7f424a8b
PZ
111 else
112 local_irq_enable();
495ab9c0 113 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
114}
115
76e4f660
AR
116#ifdef CONFIG_HOTPLUG_CPU
117DECLARE_PER_CPU(int, cpu_state);
118
119#include <asm/nmi.h>
1fa744e6 120/* We halt the CPU with physical CPU hotplug */
76e4f660
AR
121static inline void play_dead(void)
122{
123 idle_task_exit();
124 wbinvd();
125 mb();
126 /* Ack it */
127 __get_cpu_var(cpu_state) = CPU_DEAD;
128
1fa744e6 129 local_irq_disable();
76e4f660 130 while (1)
1fa744e6 131 halt();
76e4f660
AR
132}
133#else
134static inline void play_dead(void)
135{
136 BUG();
137}
138#endif /* CONFIG_HOTPLUG_CPU */
139
1da177e4
LT
140/*
141 * The idle thread. There's no useful work to be
142 * done, so just try to conserve power and have a
143 * low exit latency (ie sit in a loop waiting for
144 * somebody to say that they'd like to reschedule)
145 */
b10db7f0 146void cpu_idle(void)
1da177e4 147{
495ab9c0 148 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
149 /* endless idle loop with no priority at all */
150 while (1) {
3d97775a 151 tick_nohz_stop_sched_tick();
1da177e4
LT
152 while (!need_resched()) {
153 void (*idle)(void);
154
1da177e4
LT
155 rmb();
156 idle = pm_idle;
157 if (!idle)
158 idle = default_idle;
76e4f660
AR
159 if (cpu_is_offline(smp_processor_id()))
160 play_dead();
d331e739
VP
161 /*
162 * Idle routines should keep interrupts disabled
163 * from here on, until they go to idle.
164 * Otherwise, idle callbacks can misfire.
165 */
166 local_irq_disable();
95833c83 167 enter_idle();
1da177e4 168 idle();
a15da49d
AK
169 /* In many cases the interrupt that ended idle
170 has already called exit_idle. But some idle
171 loops can be woken up without interrupt. */
95833c83 172 __exit_idle();
1da177e4
LT
173 }
174
02290683 175 tick_nohz_restart_sched_tick();
5bfb5d69 176 preempt_enable_no_resched();
1da177e4 177 schedule();
5bfb5d69 178 preempt_disable();
1da177e4
LT
179 }
180}
181
6612538c 182/* Prints also some state that isn't saved in the pt_regs */
1da177e4
LT
183void __show_regs(struct pt_regs * regs)
184{
185 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 186 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
187 unsigned int fsindex, gsindex;
188 unsigned int ds, cs, es;
1da177e4
LT
189
190 printk("\n");
191 print_modules();
9acf23c4
AK
192 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
193 current->pid, current->comm, print_tainted(),
96b644bd
SH
194 init_utsname()->release,
195 (int)strcspn(init_utsname()->version, " "),
196 init_utsname()->version);
65ea5b03 197 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
aafbd7eb 198 printk_address(regs->ip, 1);
65ea5b03
PA
199 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
200 regs->flags);
1da177e4 201 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 202 regs->ax, regs->bx, regs->cx);
1da177e4 203 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 204 regs->dx, regs->si, regs->di);
1da177e4 205 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 206 regs->bp, regs->r8, regs->r9);
1da177e4
LT
207 printk("R10: %016lx R11: %016lx R12: %016lx\n",
208 regs->r10, regs->r11, regs->r12);
209 printk("R13: %016lx R14: %016lx R15: %016lx\n",
210 regs->r13, regs->r14, regs->r15);
211
212 asm("movl %%ds,%0" : "=r" (ds));
213 asm("movl %%cs,%0" : "=r" (cs));
214 asm("movl %%es,%0" : "=r" (es));
215 asm("movl %%fs,%0" : "=r" (fsindex));
216 asm("movl %%gs,%0" : "=r" (gsindex));
217
218 rdmsrl(MSR_FS_BASE, fs);
219 rdmsrl(MSR_GS_BASE, gs);
220 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
221
f51c9452
GOC
222 cr0 = read_cr0();
223 cr2 = read_cr2();
224 cr3 = read_cr3();
225 cr4 = read_cr4();
1da177e4
LT
226
227 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
228 fs,fsindex,gs,gsindex,shadowgs);
229 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
230 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
bb1995d5
AS
231
232 get_debugreg(d0, 0);
233 get_debugreg(d1, 1);
234 get_debugreg(d2, 2);
235 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
236 get_debugreg(d3, 3);
237 get_debugreg(d6, 6);
238 get_debugreg(d7, 7);
239 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1da177e4
LT
240}
241
242void show_regs(struct pt_regs *regs)
243{
c078d326 244 printk("CPU %d:", smp_processor_id());
1da177e4 245 __show_regs(regs);
bc850d6b 246 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
1da177e4
LT
247}
248
249/*
250 * Free current thread data structures etc..
251 */
252void exit_thread(void)
253{
254 struct task_struct *me = current;
255 struct thread_struct *t = &me->thread;
73649dab 256
6612538c 257 if (me->thread.io_bitmap_ptr) {
1da177e4
LT
258 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
259
260 kfree(t->io_bitmap_ptr);
261 t->io_bitmap_ptr = NULL;
d3a4f48d 262 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
263 /*
264 * Careful, clear this in the TSS too:
265 */
266 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
267 t->io_bitmap_max = 0;
268 put_cpu();
269 }
270}
271
272void flush_thread(void)
273{
274 struct task_struct *tsk = current;
1da177e4 275
303cd153
MD
276 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
277 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
278 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
279 clear_tsk_thread_flag(tsk, TIF_IA32);
280 } else {
281 set_tsk_thread_flag(tsk, TIF_IA32);
4d9bc79c 282 current_thread_info()->status |= TS_COMPAT;
303cd153 283 }
4d9bc79c 284 }
303cd153 285 clear_tsk_thread_flag(tsk, TIF_DEBUG);
1da177e4
LT
286
287 tsk->thread.debugreg0 = 0;
288 tsk->thread.debugreg1 = 0;
289 tsk->thread.debugreg2 = 0;
290 tsk->thread.debugreg3 = 0;
291 tsk->thread.debugreg6 = 0;
292 tsk->thread.debugreg7 = 0;
6612538c 293 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
1da177e4
LT
294 /*
295 * Forget coprocessor state..
296 */
75118a82 297 tsk->fpu_counter = 0;
1da177e4
LT
298 clear_fpu(tsk);
299 clear_used_math();
300}
301
302void release_thread(struct task_struct *dead_task)
303{
304 if (dead_task->mm) {
305 if (dead_task->mm->context.size) {
306 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
307 dead_task->comm,
308 dead_task->mm->context.ldt,
309 dead_task->mm->context.size);
310 BUG();
311 }
312 }
313}
314
315static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
316{
6612538c 317 struct user_desc ud = {
1da177e4
LT
318 .base_addr = addr,
319 .limit = 0xfffff,
320 .seg_32bit = 1,
321 .limit_in_pages = 1,
322 .useable = 1,
323 };
ade1af77 324 struct desc_struct *desc = t->thread.tls_array;
1da177e4 325 desc += tls;
80fbb69a 326 fill_ldt(desc, &ud);
1da177e4
LT
327}
328
329static inline u32 read_32bit_tls(struct task_struct *t, int tls)
330{
91394eb0 331 return get_desc_base(&t->thread.tls_array[tls]);
1da177e4
LT
332}
333
334/*
335 * This gets called before we allocate a new thread and copy
336 * the current task into it.
337 */
338void prepare_to_copy(struct task_struct *tsk)
339{
340 unlazy_fpu(tsk);
341}
342
65ea5b03 343int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
1da177e4
LT
344 unsigned long unused,
345 struct task_struct * p, struct pt_regs * regs)
346{
347 int err;
348 struct pt_regs * childregs;
349 struct task_struct *me = current;
350
a88cde13 351 childregs = ((struct pt_regs *)
57eafdc2 352 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
353 *childregs = *regs;
354
65ea5b03
PA
355 childregs->ax = 0;
356 childregs->sp = sp;
357 if (sp == ~0UL)
358 childregs->sp = (unsigned long)childregs;
1da177e4 359
faca6227
PA
360 p->thread.sp = (unsigned long) childregs;
361 p->thread.sp0 = (unsigned long) (childregs+1);
362 p->thread.usersp = me->thread.usersp;
1da177e4 363
e4f17c43 364 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
365
366 p->thread.fs = me->thread.fs;
367 p->thread.gs = me->thread.gs;
368
fd51f666
L
369 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
370 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
371 asm("mov %%es,%0" : "=m" (p->thread.es));
372 asm("mov %%ds,%0" : "=m" (p->thread.ds));
1da177e4 373
d3a4f48d 374 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
375 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
376 if (!p->thread.io_bitmap_ptr) {
377 p->thread.io_bitmap_max = 0;
378 return -ENOMEM;
379 }
a88cde13
AK
380 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
381 IO_BITMAP_BYTES);
d3a4f48d 382 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 383 }
1da177e4
LT
384
385 /*
386 * Set a new TLS for the child thread?
387 */
388 if (clone_flags & CLONE_SETTLS) {
389#ifdef CONFIG_IA32_EMULATION
390 if (test_thread_flag(TIF_IA32))
efd1ca52 391 err = do_set_thread_area(p, -1,
65ea5b03 392 (struct user_desc __user *)childregs->si, 0);
1da177e4
LT
393 else
394#endif
395 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
396 if (err)
397 goto out;
398 }
399 err = 0;
400out:
401 if (err && p->thread.io_bitmap_ptr) {
402 kfree(p->thread.io_bitmap_ptr);
403 p->thread.io_bitmap_max = 0;
404 }
405 return err;
406}
407
513ad84b
IM
408void
409start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
410{
411 asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
412 load_gs_index(0);
413 regs->ip = new_ip;
414 regs->sp = new_sp;
415 write_pda(oldrsp, new_sp);
416 regs->cs = __USER_CS;
417 regs->ss = __USER_DS;
418 regs->flags = 0x200;
419 set_fs(USER_DS);
aa283f49
SS
420 /*
421 * Free the old FP and other extended state
422 */
423 free_thread_xstate(current);
513ad84b
IM
424}
425EXPORT_SYMBOL_GPL(start_thread);
426
529e25f6
EB
427static void hard_disable_TSC(void)
428{
429 write_cr4(read_cr4() | X86_CR4_TSD);
430}
431
432void disable_TSC(void)
433{
434 preempt_disable();
435 if (!test_and_set_thread_flag(TIF_NOTSC))
436 /*
437 * Must flip the CPU state synchronously with
438 * TIF_NOTSC in the current running context.
439 */
440 hard_disable_TSC();
441 preempt_enable();
442}
443
444static void hard_enable_TSC(void)
445{
446 write_cr4(read_cr4() & ~X86_CR4_TSD);
447}
448
a4928cff 449static void enable_TSC(void)
529e25f6
EB
450{
451 preempt_disable();
452 if (test_and_clear_thread_flag(TIF_NOTSC))
453 /*
454 * Must flip the CPU state synchronously with
455 * TIF_NOTSC in the current running context.
456 */
457 hard_enable_TSC();
458 preempt_enable();
459}
460
461int get_tsc_mode(unsigned long adr)
462{
463 unsigned int val;
464
465 if (test_thread_flag(TIF_NOTSC))
466 val = PR_TSC_SIGSEGV;
467 else
468 val = PR_TSC_ENABLE;
469
470 return put_user(val, (unsigned int __user *)adr);
471}
472
473int set_tsc_mode(unsigned int val)
474{
475 if (val == PR_TSC_SIGSEGV)
476 disable_TSC();
477 else if (val == PR_TSC_ENABLE)
478 enable_TSC();
479 else
480 return -EINVAL;
481
482 return 0;
483}
484
1da177e4
LT
485/*
486 * This special macro can be used to load a debugging register
487 */
6612538c
HS
488#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
489
d3a4f48d 490static inline void __switch_to_xtra(struct task_struct *prev_p,
6612538c
HS
491 struct task_struct *next_p,
492 struct tss_struct *tss)
d3a4f48d
SE
493{
494 struct thread_struct *prev, *next;
eee3af4a 495 unsigned long debugctl;
d3a4f48d
SE
496
497 prev = &prev_p->thread,
498 next = &next_p->thread;
499
eee3af4a
MM
500 debugctl = prev->debugctlmsr;
501 if (next->ds_area_msr != prev->ds_area_msr) {
502 /* we clear debugctl to make sure DS
503 * is not in use when we change it */
504 debugctl = 0;
5b0e5084 505 update_debugctlmsr(0);
eee3af4a
MM
506 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
507 }
508
509 if (next->debugctlmsr != debugctl)
5b0e5084 510 update_debugctlmsr(next->debugctlmsr);
7e991604 511
d3a4f48d
SE
512 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
513 loaddebug(next, 0);
514 loaddebug(next, 1);
515 loaddebug(next, 2);
516 loaddebug(next, 3);
517 /* no 4 and 5 */
518 loaddebug(next, 6);
519 loaddebug(next, 7);
520 }
521
529e25f6
EB
522 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
523 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
524 /* prev and next are different */
525 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
526 hard_disable_TSC();
527 else
528 hard_enable_TSC();
529 }
530
d3a4f48d
SE
531 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
532 /*
533 * Copy the relevant range of the IO bitmap.
534 * Normally this is 128 bytes or less:
535 */
536 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
537 max(prev->io_bitmap_max, next->io_bitmap_max));
538 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
539 /*
540 * Clear any possible leftover bits:
541 */
542 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
543 }
eee3af4a 544
b4ef95de 545#ifdef X86_BTS
eee3af4a
MM
546 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
547 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
548
549 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
550 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
b4ef95de 551#endif
d3a4f48d
SE
552}
553
1da177e4
LT
554/*
555 * switch_to(x,y) should switch tasks from x to y.
556 *
6612538c 557 * This could still be optimized:
1da177e4
LT
558 * - fold all the options into a flag word and test it with a single test.
559 * - could test fs/gs bitsliced
099f318b
AK
560 *
561 * Kprobes not supported here. Set the probe on schedule instead.
1da177e4 562 */
f438d914 563struct task_struct *
a88cde13 564__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4
LT
565{
566 struct thread_struct *prev = &prev_p->thread,
567 *next = &next_p->thread;
6612538c 568 int cpu = smp_processor_id();
1da177e4
LT
569 struct tss_struct *tss = &per_cpu(init_tss, cpu);
570
e07e23e1
AV
571 /* we're going to use this soon, after a few expensive things */
572 if (next_p->fpu_counter>5)
61c4628b 573 prefetch(next->xstate);
e07e23e1 574
1da177e4
LT
575 /*
576 * Reload esp0, LDT and the page table pointer:
577 */
7818a1e0 578 load_sp0(tss, next);
1da177e4
LT
579
580 /*
581 * Switch DS and ES.
582 * This won't pick up thread selector changes, but I guess that is ok.
583 */
fd51f666 584 asm volatile("mov %%es,%0" : "=m" (prev->es));
1da177e4
LT
585 if (unlikely(next->es | prev->es))
586 loadsegment(es, next->es);
587
fd51f666 588 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
1da177e4
LT
589 if (unlikely(next->ds | prev->ds))
590 loadsegment(ds, next->ds);
591
592 load_TLS(next, cpu);
593
594 /*
595 * Switch FS and GS.
596 */
597 {
598 unsigned fsindex;
599 asm volatile("movl %%fs,%0" : "=r" (fsindex));
600 /* segment register != 0 always requires a reload.
601 also reload when it has changed.
602 when prev process used 64bit base always reload
603 to avoid an information leak. */
604 if (unlikely(fsindex | next->fsindex | prev->fs)) {
605 loadsegment(fs, next->fsindex);
606 /* check if the user used a selector != 0
607 * if yes clear 64bit base, since overloaded base
608 * is always mapped to the Null selector
609 */
610 if (fsindex)
611 prev->fs = 0;
612 }
613 /* when next process has a 64bit base use it */
614 if (next->fs)
615 wrmsrl(MSR_FS_BASE, next->fs);
616 prev->fsindex = fsindex;
617 }
618 {
619 unsigned gsindex;
620 asm volatile("movl %%gs,%0" : "=r" (gsindex));
621 if (unlikely(gsindex | next->gsindex | prev->gs)) {
622 load_gs_index(next->gsindex);
623 if (gsindex)
624 prev->gs = 0;
625 }
626 if (next->gs)
627 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
628 prev->gsindex = gsindex;
629 }
630
0a5ace2a
AK
631 /* Must be after DS reload */
632 unlazy_fpu(prev_p);
633
1da177e4 634 /*
45948d77 635 * Switch the PDA and FPU contexts.
1da177e4 636 */
faca6227
PA
637 prev->usersp = read_pda(oldrsp);
638 write_pda(oldrsp, next->usersp);
1da177e4 639 write_pda(pcurrent, next_p);
18bd057b 640
a88cde13 641 write_pda(kernelstack,
7b0bda74 642 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
0a425405
AV
643#ifdef CONFIG_CC_STACKPROTECTOR
644 write_pda(stack_canary, next_p->stack_canary);
645 /*
646 * Build time only check to make sure the stack_canary is at
647 * offset 40 in the pda; this is a gcc ABI requirement
648 */
649 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
650#endif
1da177e4
LT
651
652 /*
d3a4f48d 653 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 654 */
eee3af4a
MM
655 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
656 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 657 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 658
e07e23e1
AV
659 /* If the task has used fpu the last 5 timeslices, just do a full
660 * restore of the math state immediately to avoid the trap; the
661 * chances of needing FPU soon are obviously high now
870568b3
SS
662 *
663 * tsk_used_math() checks prevent calling math_state_restore(),
664 * which can sleep in the case of !tsk_used_math()
e07e23e1 665 */
870568b3 666 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
e07e23e1 667 math_state_restore();
1da177e4
LT
668 return prev_p;
669}
670
671/*
672 * sys_execve() executes a new program.
673 */
6612538c 674asmlinkage
1da177e4 675long sys_execve(char __user *name, char __user * __user *argv,
5d119b2c 676 char __user * __user *envp, struct pt_regs *regs)
1da177e4
LT
677{
678 long error;
679 char * filename;
680
681 filename = getname(name);
682 error = PTR_ERR(filename);
5d119b2c 683 if (IS_ERR(filename))
1da177e4 684 return error;
5d119b2c 685 error = do_execve(filename, argv, envp, regs);
1da177e4
LT
686 putname(filename);
687 return error;
688}
689
690void set_personality_64bit(void)
691{
692 /* inherit personality from parent */
693
694 /* Make sure to be in 64bit mode */
6612538c 695 clear_thread_flag(TIF_IA32);
1da177e4
LT
696
697 /* TBD: overwrites user setup. Should have two bits.
698 But 64bit processes have always behaved this way,
699 so it's not too bad. The main problem is just that
6612538c 700 32bit childs are affected again. */
1da177e4
LT
701 current->personality &= ~READ_IMPLIES_EXEC;
702}
703
704asmlinkage long sys_fork(struct pt_regs *regs)
705{
65ea5b03 706 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
1da177e4
LT
707}
708
a88cde13
AK
709asmlinkage long
710sys_clone(unsigned long clone_flags, unsigned long newsp,
711 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
712{
713 if (!newsp)
65ea5b03 714 newsp = regs->sp;
1da177e4
LT
715 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
716}
717
718/*
719 * This is trivial, and on the face of it looks like it
720 * could equally well be done in user mode.
721 *
722 * Not so, for quite unobvious reasons - register pressure.
723 * In user mode vfork() cannot have a stack frame, and if
724 * done by calling the "clone()" system call directly, you
725 * do not have enough call-clobbered registers to hold all
726 * the information you need.
727 */
728asmlinkage long sys_vfork(struct pt_regs *regs)
729{
65ea5b03 730 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
1da177e4
LT
731 NULL, NULL);
732}
733
734unsigned long get_wchan(struct task_struct *p)
735{
736 unsigned long stack;
65ea5b03 737 u64 fp,ip;
1da177e4
LT
738 int count = 0;
739
740 if (!p || p == current || p->state==TASK_RUNNING)
741 return 0;
57eafdc2 742 stack = (unsigned long)task_stack_page(p);
faca6227 743 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
1da177e4 744 return 0;
faca6227 745 fp = *(u64 *)(p->thread.sp);
1da177e4 746 do {
a88cde13
AK
747 if (fp < (unsigned long)stack ||
748 fp > (unsigned long)stack+THREAD_SIZE)
1da177e4 749 return 0;
65ea5b03
PA
750 ip = *(u64 *)(fp+8);
751 if (!in_sched_functions(ip))
752 return ip;
1da177e4
LT
753 fp = *(u64 *)fp;
754 } while (count++ < 16);
755 return 0;
756}
757
758long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
759{
760 int ret = 0;
761 int doit = task == current;
762 int cpu;
763
764 switch (code) {
765 case ARCH_SET_GS:
84929801 766 if (addr >= TASK_SIZE_OF(task))
1da177e4
LT
767 return -EPERM;
768 cpu = get_cpu();
769 /* handle small bases via the GDT because that's faster to
770 switch. */
771 if (addr <= 0xffffffff) {
772 set_32bit_tls(task, GS_TLS, addr);
773 if (doit) {
774 load_TLS(&task->thread, cpu);
775 load_gs_index(GS_TLS_SEL);
776 }
777 task->thread.gsindex = GS_TLS_SEL;
778 task->thread.gs = 0;
779 } else {
780 task->thread.gsindex = 0;
781 task->thread.gs = addr;
782 if (doit) {
a88cde13
AK
783 load_gs_index(0);
784 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
1da177e4
LT
785 }
786 }
787 put_cpu();
788 break;
789 case ARCH_SET_FS:
790 /* Not strictly needed for fs, but do it for symmetry
791 with gs */
84929801 792 if (addr >= TASK_SIZE_OF(task))
6612538c 793 return -EPERM;
1da177e4 794 cpu = get_cpu();
6612538c 795 /* handle small bases via the GDT because that's faster to
1da177e4 796 switch. */
6612538c 797 if (addr <= 0xffffffff) {
1da177e4 798 set_32bit_tls(task, FS_TLS, addr);
6612538c
HS
799 if (doit) {
800 load_TLS(&task->thread, cpu);
a88cde13 801 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
1da177e4
LT
802 }
803 task->thread.fsindex = FS_TLS_SEL;
804 task->thread.fs = 0;
6612538c 805 } else {
1da177e4
LT
806 task->thread.fsindex = 0;
807 task->thread.fs = addr;
808 if (doit) {
809 /* set the selector to 0 to not confuse
810 __switch_to */
a88cde13
AK
811 asm volatile("movl %0,%%fs" :: "r" (0));
812 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
813 }
814 }
815 put_cpu();
816 break;
6612538c
HS
817 case ARCH_GET_FS: {
818 unsigned long base;
1da177e4
LT
819 if (task->thread.fsindex == FS_TLS_SEL)
820 base = read_32bit_tls(task, FS_TLS);
a88cde13 821 else if (doit)
1da177e4 822 rdmsrl(MSR_FS_BASE, base);
a88cde13 823 else
1da177e4 824 base = task->thread.fs;
6612538c
HS
825 ret = put_user(base, (unsigned long __user *)addr);
826 break;
1da177e4 827 }
6612538c 828 case ARCH_GET_GS: {
1da177e4 829 unsigned long base;
97c2803c 830 unsigned gsindex;
1da177e4
LT
831 if (task->thread.gsindex == GS_TLS_SEL)
832 base = read_32bit_tls(task, GS_TLS);
97c2803c 833 else if (doit) {
6612538c 834 asm("movl %%gs,%0" : "=r" (gsindex));
97c2803c
JB
835 if (gsindex)
836 rdmsrl(MSR_KERNEL_GS_BASE, base);
837 else
838 base = task->thread.gs;
839 }
a88cde13 840 else
1da177e4 841 base = task->thread.gs;
6612538c 842 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
843 break;
844 }
845
846 default:
847 ret = -EINVAL;
848 break;
6612538c 849 }
1da177e4 850
6612538c
HS
851 return ret;
852}
1da177e4
LT
853
854long sys_arch_prctl(int code, unsigned long addr)
855{
856 return do_arch_prctl(current, code, addr);
1da177e4
LT
857}
858
859unsigned long arch_align_stack(unsigned long sp)
860{
c16b63e0 861 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
862 sp -= get_random_int() % 8192;
863 return sp & ~0xf;
864}
c1d171a0
JK
865
866unsigned long arch_randomize_brk(struct mm_struct *mm)
867{
868 unsigned long range_end = mm->brk + 0x02000000;
869 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
870}