2 * Copyright (C) 1994 Linus Torvalds
4 * Pentium III FXSR, SSE support
5 * General FPU state handling cleanups
6 * Gareth Hughes <gareth@valinux.com>, May 2000
8 #include <asm/fpu-internal.h>
11 * Track whether the kernel is using the FPU state
16 * - by IRQ context code to potentially use the FPU
19 * - to debug kernel_fpu_begin()/end() correctness
21 static DEFINE_PER_CPU(bool, in_kernel_fpu
);
23 static void kernel_fpu_disable(void)
25 WARN_ON(this_cpu_read(in_kernel_fpu
));
26 this_cpu_write(in_kernel_fpu
, true);
29 static void kernel_fpu_enable(void)
31 WARN_ON_ONCE(!this_cpu_read(in_kernel_fpu
));
32 this_cpu_write(in_kernel_fpu
, false);
35 static bool kernel_fpu_disabled(void)
37 return this_cpu_read(in_kernel_fpu
);
41 * Were we in an interrupt that interrupted kernel mode?
43 * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
44 * pair does nothing at all: the thread must not have fpu (so
45 * that we don't try to save the FPU state), and TS must
46 * be set (so that the clts/stts pair does nothing that is
47 * visible in the interrupted kernel thread).
49 * Except for the eagerfpu case when we return true; in the likely case
50 * the thread has FPU but we are not going to set/clear TS.
52 static bool interrupted_kernel_fpu_idle(void)
54 if (kernel_fpu_disabled())
60 return !current
->thread
.fpu
.has_fpu
&& (read_cr0() & X86_CR0_TS
);
64 * Were we in user mode (or vm86 mode) when we were
67 * Doing kernel_fpu_begin/end() is ok if we are running
68 * in an interrupt context from user mode - we'll just
69 * save the FPU state as required.
71 static bool interrupted_user_mode(void)
73 struct pt_regs
*regs
= get_irq_regs();
74 return regs
&& user_mode(regs
);
78 * Can we use the FPU in kernel mode with the
79 * whole "kernel_fpu_begin/end()" sequence?
81 * It's always ok in process context (ie "not interrupt")
82 * but it is sometimes ok even from an irq.
84 bool irq_fpu_usable(void)
86 return !in_interrupt() ||
87 interrupted_user_mode() ||
88 interrupted_kernel_fpu_idle();
90 EXPORT_SYMBOL(irq_fpu_usable
);
92 void __kernel_fpu_begin(void)
94 struct task_struct
*me
= current
;
95 struct fpu
*fpu
= &me
->thread
.fpu
;
102 this_cpu_write(fpu_owner_task
, NULL
);
103 if (!use_eager_fpu())
107 EXPORT_SYMBOL(__kernel_fpu_begin
);
109 void __kernel_fpu_end(void)
111 struct task_struct
*me
= current
;
112 struct fpu
*fpu
= &me
->thread
.fpu
;
115 if (WARN_ON(restore_fpu_checking(me
)))
117 } else if (!use_eager_fpu()) {
123 EXPORT_SYMBOL(__kernel_fpu_end
);
126 * Save the FPU state (initialize it if necessary):
128 * This only ever gets called for the current task.
130 void fpu__save(struct task_struct
*tsk
)
132 struct fpu
*fpu
= &tsk
->thread
.fpu
;
134 WARN_ON(tsk
!= current
);
138 if (use_eager_fpu()) {
142 __thread_fpu_end(tsk
);
147 EXPORT_SYMBOL_GPL(fpu__save
);
149 void fpstate_init(struct fpu
*fpu
)
152 finit_soft_fpu(&fpu
->state
->soft
);
156 memset(fpu
->state
, 0, xstate_size
);
159 fx_finit(&fpu
->state
->fxsave
);
161 struct i387_fsave_struct
*fp
= &fpu
->state
->fsave
;
162 fp
->cwd
= 0xffff037fu
;
163 fp
->swd
= 0xffff0000u
;
164 fp
->twd
= 0xffffffffu
;
165 fp
->fos
= 0xffff0000u
;
168 EXPORT_SYMBOL_GPL(fpstate_init
);
171 * FPU state allocation:
173 static struct kmem_cache
*task_xstate_cachep
;
175 void fpstate_cache_init(void)
178 kmem_cache_create("task_xstate", xstate_size
,
179 __alignof__(union thread_xstate
),
180 SLAB_PANIC
| SLAB_NOTRACK
, NULL
);
184 int fpstate_alloc(struct fpu
*fpu
)
189 fpu
->state
= kmem_cache_alloc(task_xstate_cachep
, GFP_KERNEL
);
193 /* The CPU requires the FPU state to be aligned to 16 byte boundaries: */
194 WARN_ON((unsigned long)fpu
->state
& 15);
198 EXPORT_SYMBOL_GPL(fpstate_alloc
);
200 void fpstate_free(struct fpu
*fpu
)
203 kmem_cache_free(task_xstate_cachep
, fpu
->state
);
207 EXPORT_SYMBOL_GPL(fpstate_free
);
210 * Copy the current task's FPU state to a new task's FPU context.
212 * In the 'eager' case we just save to the destination context.
214 * In the 'lazy' case we save to the source context, mark the FPU lazy
215 * via stts() and copy the source context into the destination context.
217 static void fpu_copy(struct task_struct
*dst
, struct task_struct
*src
)
219 WARN_ON(src
!= current
);
221 if (use_eager_fpu()) {
222 memset(&dst
->thread
.fpu
.state
->xsave
, 0, xstate_size
);
225 struct fpu
*dfpu
= &dst
->thread
.fpu
;
226 struct fpu
*sfpu
= &src
->thread
.fpu
;
229 memcpy(dfpu
->state
, sfpu
->state
, xstate_size
);
233 int fpu__copy(struct task_struct
*dst
, struct task_struct
*src
)
235 dst
->thread
.fpu
.counter
= 0;
236 dst
->thread
.fpu
.has_fpu
= 0;
237 dst
->thread
.fpu
.state
= NULL
;
239 task_disable_lazy_fpu_restore(dst
);
241 if (tsk_used_math(src
)) {
242 int err
= fpstate_alloc(&dst
->thread
.fpu
);
252 * Allocate the backing store for the current task's FPU registers
253 * and initialize the registers themselves as well.
257 int fpstate_alloc_init(struct task_struct
*curr
)
261 if (WARN_ON_ONCE(curr
!= current
))
263 if (WARN_ON_ONCE(curr
->flags
& PF_USED_MATH
))
267 * Memory allocation at the first usage of the FPU and other state.
269 ret
= fpstate_alloc(&curr
->thread
.fpu
);
273 fpstate_init(&curr
->thread
.fpu
);
275 /* Safe to do for the current task: */
276 curr
->flags
|= PF_USED_MATH
;
280 EXPORT_SYMBOL_GPL(fpstate_alloc_init
);
283 * The _current_ task is using the FPU for the first time
284 * so initialize it and set the mxcsr to its default
285 * value at reset if we support XMM instructions and then
286 * remember the current task has used the FPU.
288 static int fpu__unlazy_stopped(struct task_struct
*child
)
292 if (WARN_ON_ONCE(child
== current
))
295 if (child
->flags
& PF_USED_MATH
) {
296 task_disable_lazy_fpu_restore(child
);
301 * Memory allocation at the first usage of the FPU and other state.
303 ret
= fpstate_alloc(&child
->thread
.fpu
);
307 fpstate_init(&child
->thread
.fpu
);
309 /* Safe to do for stopped child tasks: */
310 child
->flags
|= PF_USED_MATH
;
316 * 'fpu__restore()' saves the current math information in the
317 * old math state array, and gets the new ones from the current task
319 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
320 * Don't touch unless you *really* know how it works.
322 * Must be called with kernel preemption disabled (eg with local
323 * local interrupts as in the case of do_device_not_available).
325 void fpu__restore(void)
327 struct task_struct
*tsk
= current
;
329 if (!tsk_used_math(tsk
)) {
332 * does a slab alloc which can sleep
334 if (fpstate_alloc_init(tsk
)) {
338 do_group_exit(SIGKILL
);
344 /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */
345 kernel_fpu_disable();
346 __thread_fpu_begin(tsk
);
347 if (unlikely(restore_fpu_checking(tsk
))) {
348 fpu_reset_state(tsk
);
349 force_sig_info(SIGSEGV
, SEND_SIG_PRIV
, tsk
);
351 tsk
->thread
.fpu
.counter
++;
355 EXPORT_SYMBOL_GPL(fpu__restore
);
357 void fpu__flush_thread(struct task_struct
*tsk
)
359 if (!use_eager_fpu()) {
360 /* FPU state will be reallocated lazily at the first use. */
362 fpstate_free(&tsk
->thread
.fpu
);
364 if (!tsk_used_math(tsk
)) {
365 /* kthread execs. TODO: cleanup this horror. */
366 if (WARN_ON(fpstate_alloc_init(tsk
)))
367 force_sig(SIGKILL
, tsk
);
370 restore_init_xstate();
375 * The xstateregs_active() routine is the same as the fpregs_active() routine,
376 * as the "regset->n" for the xstate regset will be updated based on the feature
377 * capabilites supported by the xsave.
379 int fpregs_active(struct task_struct
*target
, const struct user_regset
*regset
)
381 return tsk_used_math(target
) ? regset
->n
: 0;
384 int xfpregs_active(struct task_struct
*target
, const struct user_regset
*regset
)
386 return (cpu_has_fxsr
&& tsk_used_math(target
)) ? regset
->n
: 0;
389 int xfpregs_get(struct task_struct
*target
, const struct user_regset
*regset
,
390 unsigned int pos
, unsigned int count
,
391 void *kbuf
, void __user
*ubuf
)
398 ret
= fpu__unlazy_stopped(target
);
402 sanitize_i387_state(target
);
404 return user_regset_copyout(&pos
, &count
, &kbuf
, &ubuf
,
405 &target
->thread
.fpu
.state
->fxsave
, 0, -1);
408 int xfpregs_set(struct task_struct
*target
, const struct user_regset
*regset
,
409 unsigned int pos
, unsigned int count
,
410 const void *kbuf
, const void __user
*ubuf
)
417 ret
= fpu__unlazy_stopped(target
);
421 sanitize_i387_state(target
);
423 ret
= user_regset_copyin(&pos
, &count
, &kbuf
, &ubuf
,
424 &target
->thread
.fpu
.state
->fxsave
, 0, -1);
427 * mxcsr reserved bits must be masked to zero for security reasons.
429 target
->thread
.fpu
.state
->fxsave
.mxcsr
&= mxcsr_feature_mask
;
432 * update the header bits in the xsave header, indicating the
433 * presence of FP and SSE state.
436 target
->thread
.fpu
.state
->xsave
.xsave_hdr
.xstate_bv
|= XSTATE_FPSSE
;
441 int xstateregs_get(struct task_struct
*target
, const struct user_regset
*regset
,
442 unsigned int pos
, unsigned int count
,
443 void *kbuf
, void __user
*ubuf
)
445 struct xsave_struct
*xsave
;
451 ret
= fpu__unlazy_stopped(target
);
455 xsave
= &target
->thread
.fpu
.state
->xsave
;
458 * Copy the 48bytes defined by the software first into the xstate
459 * memory layout in the thread struct, so that we can copy the entire
460 * xstateregs to the user using one user_regset_copyout().
462 memcpy(&xsave
->i387
.sw_reserved
,
463 xstate_fx_sw_bytes
, sizeof(xstate_fx_sw_bytes
));
465 * Copy the xstate memory layout.
467 ret
= user_regset_copyout(&pos
, &count
, &kbuf
, &ubuf
, xsave
, 0, -1);
471 int xstateregs_set(struct task_struct
*target
, const struct user_regset
*regset
,
472 unsigned int pos
, unsigned int count
,
473 const void *kbuf
, const void __user
*ubuf
)
475 struct xsave_struct
*xsave
;
481 ret
= fpu__unlazy_stopped(target
);
485 xsave
= &target
->thread
.fpu
.state
->xsave
;
487 ret
= user_regset_copyin(&pos
, &count
, &kbuf
, &ubuf
, xsave
, 0, -1);
489 * mxcsr reserved bits must be masked to zero for security reasons.
491 xsave
->i387
.mxcsr
&= mxcsr_feature_mask
;
492 xsave
->xsave_hdr
.xstate_bv
&= pcntxt_mask
;
494 * These bits must be zero.
496 memset(&xsave
->xsave_hdr
.reserved
, 0, 48);
500 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
503 * FPU tag word conversions.
506 static inline unsigned short twd_i387_to_fxsr(unsigned short twd
)
508 unsigned int tmp
; /* to avoid 16 bit prefixes in the code */
510 /* Transform each pair of bits into 01 (valid) or 00 (empty) */
512 tmp
= (tmp
| (tmp
>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
513 /* and move the valid bits to the lower byte. */
514 tmp
= (tmp
| (tmp
>> 1)) & 0x3333; /* 00VV00VV00VV00VV */
515 tmp
= (tmp
| (tmp
>> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
516 tmp
= (tmp
| (tmp
>> 4)) & 0x00ff; /* 00000000VVVVVVVV */
521 #define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16)
522 #define FP_EXP_TAG_VALID 0
523 #define FP_EXP_TAG_ZERO 1
524 #define FP_EXP_TAG_SPECIAL 2
525 #define FP_EXP_TAG_EMPTY 3
527 static inline u32
twd_fxsr_to_i387(struct i387_fxsave_struct
*fxsave
)
530 u32 tos
= (fxsave
->swd
>> 11) & 7;
531 u32 twd
= (unsigned long) fxsave
->twd
;
533 u32 ret
= 0xffff0000u
;
536 for (i
= 0; i
< 8; i
++, twd
>>= 1) {
538 st
= FPREG_ADDR(fxsave
, (i
- tos
) & 7);
540 switch (st
->exponent
& 0x7fff) {
542 tag
= FP_EXP_TAG_SPECIAL
;
545 if (!st
->significand
[0] &&
546 !st
->significand
[1] &&
547 !st
->significand
[2] &&
549 tag
= FP_EXP_TAG_ZERO
;
551 tag
= FP_EXP_TAG_SPECIAL
;
554 if (st
->significand
[3] & 0x8000)
555 tag
= FP_EXP_TAG_VALID
;
557 tag
= FP_EXP_TAG_SPECIAL
;
561 tag
= FP_EXP_TAG_EMPTY
;
563 ret
|= tag
<< (2 * i
);
569 * FXSR floating point environment conversions.
573 convert_from_fxsr(struct user_i387_ia32_struct
*env
, struct task_struct
*tsk
)
575 struct i387_fxsave_struct
*fxsave
= &tsk
->thread
.fpu
.state
->fxsave
;
576 struct _fpreg
*to
= (struct _fpreg
*) &env
->st_space
[0];
577 struct _fpxreg
*from
= (struct _fpxreg
*) &fxsave
->st_space
[0];
580 env
->cwd
= fxsave
->cwd
| 0xffff0000u
;
581 env
->swd
= fxsave
->swd
| 0xffff0000u
;
582 env
->twd
= twd_fxsr_to_i387(fxsave
);
585 env
->fip
= fxsave
->rip
;
586 env
->foo
= fxsave
->rdp
;
588 * should be actually ds/cs at fpu exception time, but
589 * that information is not available in 64bit mode.
591 env
->fcs
= task_pt_regs(tsk
)->cs
;
592 if (tsk
== current
) {
593 savesegment(ds
, env
->fos
);
595 env
->fos
= tsk
->thread
.ds
;
597 env
->fos
|= 0xffff0000;
599 env
->fip
= fxsave
->fip
;
600 env
->fcs
= (u16
) fxsave
->fcs
| ((u32
) fxsave
->fop
<< 16);
601 env
->foo
= fxsave
->foo
;
602 env
->fos
= fxsave
->fos
;
605 for (i
= 0; i
< 8; ++i
)
606 memcpy(&to
[i
], &from
[i
], sizeof(to
[0]));
609 void convert_to_fxsr(struct task_struct
*tsk
,
610 const struct user_i387_ia32_struct
*env
)
613 struct i387_fxsave_struct
*fxsave
= &tsk
->thread
.fpu
.state
->fxsave
;
614 struct _fpreg
*from
= (struct _fpreg
*) &env
->st_space
[0];
615 struct _fpxreg
*to
= (struct _fpxreg
*) &fxsave
->st_space
[0];
618 fxsave
->cwd
= env
->cwd
;
619 fxsave
->swd
= env
->swd
;
620 fxsave
->twd
= twd_i387_to_fxsr(env
->twd
);
621 fxsave
->fop
= (u16
) ((u32
) env
->fcs
>> 16);
623 fxsave
->rip
= env
->fip
;
624 fxsave
->rdp
= env
->foo
;
625 /* cs and ds ignored */
627 fxsave
->fip
= env
->fip
;
628 fxsave
->fcs
= (env
->fcs
& 0xffff);
629 fxsave
->foo
= env
->foo
;
630 fxsave
->fos
= env
->fos
;
633 for (i
= 0; i
< 8; ++i
)
634 memcpy(&to
[i
], &from
[i
], sizeof(from
[0]));
637 int fpregs_get(struct task_struct
*target
, const struct user_regset
*regset
,
638 unsigned int pos
, unsigned int count
,
639 void *kbuf
, void __user
*ubuf
)
641 struct user_i387_ia32_struct env
;
644 ret
= fpu__unlazy_stopped(target
);
648 if (!static_cpu_has(X86_FEATURE_FPU
))
649 return fpregs_soft_get(target
, regset
, pos
, count
, kbuf
, ubuf
);
652 return user_regset_copyout(&pos
, &count
, &kbuf
, &ubuf
,
653 &target
->thread
.fpu
.state
->fsave
, 0,
656 sanitize_i387_state(target
);
658 if (kbuf
&& pos
== 0 && count
== sizeof(env
)) {
659 convert_from_fxsr(kbuf
, target
);
663 convert_from_fxsr(&env
, target
);
665 return user_regset_copyout(&pos
, &count
, &kbuf
, &ubuf
, &env
, 0, -1);
668 int fpregs_set(struct task_struct
*target
, const struct user_regset
*regset
,
669 unsigned int pos
, unsigned int count
,
670 const void *kbuf
, const void __user
*ubuf
)
672 struct user_i387_ia32_struct env
;
675 ret
= fpu__unlazy_stopped(target
);
679 sanitize_i387_state(target
);
681 if (!static_cpu_has(X86_FEATURE_FPU
))
682 return fpregs_soft_set(target
, regset
, pos
, count
, kbuf
, ubuf
);
685 return user_regset_copyin(&pos
, &count
, &kbuf
, &ubuf
,
686 &target
->thread
.fpu
.state
->fsave
, 0,
689 if (pos
> 0 || count
< sizeof(env
))
690 convert_from_fxsr(&env
, target
);
692 ret
= user_regset_copyin(&pos
, &count
, &kbuf
, &ubuf
, &env
, 0, -1);
694 convert_to_fxsr(target
, &env
);
697 * update the header bit in the xsave header, indicating the
701 target
->thread
.fpu
.state
->xsave
.xsave_hdr
.xstate_bv
|= XSTATE_FP
;
706 * FPU state for core dumps.
707 * This is only used for a.out dumps now.
708 * It is declared generically using elf_fpregset_t (which is
709 * struct user_i387_struct) but is in fact only used for 32-bit
710 * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
712 int dump_fpu(struct pt_regs
*regs
, struct user_i387_struct
*fpu
)
714 struct task_struct
*tsk
= current
;
717 fpvalid
= !!used_math();
719 fpvalid
= !fpregs_get(tsk
, NULL
,
720 0, sizeof(struct user_i387_ia32_struct
),
725 EXPORT_SYMBOL(dump_fpu
);
727 #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */