]>
Commit | Line | Data |
---|---|---|
1361b83a LT |
1 | /* |
2 | * Copyright (C) 1994 Linus Torvalds | |
3 | * | |
4 | * Pentium III FXSR, SSE support | |
5 | * General FPU state handling cleanups | |
6 | * Gareth Hughes <gareth@valinux.com>, May 2000 | |
7 | * x86-64 work by Andi Kleen 2002 | |
8 | */ | |
9 | ||
10 | #ifndef _FPU_INTERNAL_H | |
11 | #define _FPU_INTERNAL_H | |
12 | ||
13 | #include <linux/kernel_stat.h> | |
14 | #include <linux/regset.h> | |
050902c0 | 15 | #include <linux/compat.h> |
1361b83a LT |
16 | #include <linux/slab.h> |
17 | #include <asm/asm.h> | |
18 | #include <asm/cpufeature.h> | |
19 | #include <asm/processor.h> | |
20 | #include <asm/sigcontext.h> | |
21 | #include <asm/user.h> | |
22 | #include <asm/uaccess.h> | |
23 | #include <asm/xsave.h> | |
24 | ||
72a671ce SS |
25 | #ifdef CONFIG_X86_64 |
26 | # include <asm/sigcontext32.h> | |
27 | # include <asm/user32.h> | |
28 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |
29 | compat_sigset_t *set, struct pt_regs *regs); | |
30 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | |
31 | compat_sigset_t *set, struct pt_regs *regs); | |
32 | #else | |
33 | # define user_i387_ia32_struct user_i387_struct | |
34 | # define user32_fxsr_struct user_fxsr_struct | |
35 | # define ia32_setup_frame __setup_frame | |
36 | # define ia32_setup_rt_frame __setup_rt_frame | |
37 | #endif | |
38 | ||
39 | extern unsigned int mxcsr_feature_mask; | |
1361b83a LT |
40 | extern void fpu_init(void); |
41 | ||
42 | DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); | |
43 | ||
72a671ce SS |
44 | extern void convert_from_fxsr(struct user_i387_ia32_struct *env, |
45 | struct task_struct *tsk); | |
46 | extern void convert_to_fxsr(struct task_struct *tsk, | |
47 | const struct user_i387_ia32_struct *env); | |
48 | ||
1361b83a LT |
49 | extern user_regset_active_fn fpregs_active, xfpregs_active; |
50 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, | |
51 | xstateregs_get; | |
52 | extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, | |
53 | xstateregs_set; | |
54 | ||
1361b83a LT |
55 | /* |
56 | * xstateregs_active == fpregs_active. Please refer to the comment | |
57 | * at the definition of fpregs_active. | |
58 | */ | |
59 | #define xstateregs_active fpregs_active | |
60 | ||
1361b83a | 61 | #ifdef CONFIG_MATH_EMULATION |
72a671ce | 62 | # define HAVE_HWFP (boot_cpu_data.hard_math) |
1361b83a LT |
63 | extern void finit_soft_fpu(struct i387_soft_struct *soft); |
64 | #else | |
72a671ce | 65 | # define HAVE_HWFP 1 |
1361b83a LT |
66 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} |
67 | #endif | |
68 | ||
050902c0 SS |
69 | static inline int is_ia32_compat_frame(void) |
70 | { | |
71 | return config_enabled(CONFIG_IA32_EMULATION) && | |
72 | test_thread_flag(TIF_IA32); | |
73 | } | |
74 | ||
75 | static inline int is_ia32_frame(void) | |
76 | { | |
77 | return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); | |
78 | } | |
79 | ||
80 | static inline int is_x32_frame(void) | |
81 | { | |
82 | return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); | |
83 | } | |
84 | ||
1361b83a LT |
85 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ |
86 | ||
87 | static __always_inline __pure bool use_xsaveopt(void) | |
88 | { | |
89 | return static_cpu_has(X86_FEATURE_XSAVEOPT); | |
90 | } | |
91 | ||
92 | static __always_inline __pure bool use_xsave(void) | |
93 | { | |
94 | return static_cpu_has(X86_FEATURE_XSAVE); | |
95 | } | |
96 | ||
97 | static __always_inline __pure bool use_fxsr(void) | |
98 | { | |
99 | return static_cpu_has(X86_FEATURE_FXSR); | |
100 | } | |
101 | ||
102 | extern void __sanitize_i387_state(struct task_struct *); | |
103 | ||
104 | static inline void sanitize_i387_state(struct task_struct *tsk) | |
105 | { | |
106 | if (!use_xsaveopt()) | |
107 | return; | |
108 | __sanitize_i387_state(tsk); | |
109 | } | |
110 | ||
0ca5bd0d SS |
111 | #define check_insn(insn, output, input...) \ |
112 | ({ \ | |
113 | int err; \ | |
114 | asm volatile("1:" #insn "\n\t" \ | |
115 | "2:\n" \ | |
116 | ".section .fixup,\"ax\"\n" \ | |
117 | "3: movl $-1,%[err]\n" \ | |
118 | " jmp 2b\n" \ | |
119 | ".previous\n" \ | |
120 | _ASM_EXTABLE(1b, 3b) \ | |
121 | : [err] "=r" (err), output \ | |
122 | : "0"(0), input); \ | |
123 | err; \ | |
124 | }) | |
125 | ||
126 | static inline int fsave_user(struct i387_fsave_struct __user *fx) | |
1361b83a | 127 | { |
0ca5bd0d | 128 | return check_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); |
1361b83a LT |
129 | } |
130 | ||
131 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | |
132 | { | |
0ca5bd0d SS |
133 | if (config_enabled(CONFIG_X86_32)) |
134 | return check_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); | |
135 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) | |
136 | return check_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); | |
137 | ||
138 | /* See comment in fpu_fxsave() below. */ | |
139 | return check_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); | |
140 | } | |
141 | ||
142 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |
143 | { | |
144 | if (config_enabled(CONFIG_X86_32)) | |
145 | return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); | |
146 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) | |
147 | return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); | |
148 | ||
149 | /* See comment in fpu_fxsave() below. */ | |
150 | return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), | |
151 | "m" (*fx)); | |
152 | } | |
153 | ||
154 | static inline int frstor_checking(struct i387_fsave_struct *fx) | |
155 | { | |
156 | return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); | |
1361b83a LT |
157 | } |
158 | ||
159 | static inline void fpu_fxsave(struct fpu *fpu) | |
160 | { | |
0ca5bd0d SS |
161 | if (config_enabled(CONFIG_X86_32)) |
162 | asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); | |
163 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) | |
164 | asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave)); | |
165 | else { | |
166 | /* Using "rex64; fxsave %0" is broken because, if the memory | |
167 | * operand uses any extended registers for addressing, a second | |
168 | * REX prefix will be generated (to the assembler, rex64 | |
169 | * followed by semicolon is a separate instruction), and hence | |
170 | * the 64-bitness is lost. | |
171 | * | |
172 | * Using "fxsaveq %0" would be the ideal choice, but is only | |
173 | * supported starting with gas 2.16. | |
174 | * | |
175 | * Using, as a workaround, the properly prefixed form below | |
176 | * isn't accepted by any binutils version so far released, | |
177 | * complaining that the same type of prefix is used twice if | |
178 | * an extended register is needed for addressing (fix submitted | |
179 | * to mainline 2005-11-21). | |
180 | * | |
181 | * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave)); | |
182 | * | |
183 | * This, however, we can work around by forcing the compiler to | |
184 | * select an addressing mode that doesn't require extended | |
185 | * registers. | |
186 | */ | |
187 | asm volatile( "rex64/fxsave (%[fx])" | |
188 | : "=m" (fpu->state->fxsave) | |
189 | : [fx] "R" (&fpu->state->fxsave)); | |
190 | } | |
1361b83a | 191 | } |
1361b83a LT |
192 | |
193 | /* | |
194 | * These must be called with preempt disabled. Returns | |
195 | * 'true' if the FPU state is still intact. | |
196 | */ | |
197 | static inline int fpu_save_init(struct fpu *fpu) | |
198 | { | |
199 | if (use_xsave()) { | |
200 | fpu_xsave(fpu); | |
201 | ||
202 | /* | |
203 | * xsave header may indicate the init state of the FP. | |
204 | */ | |
205 | if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) | |
206 | return 1; | |
207 | } else if (use_fxsr()) { | |
208 | fpu_fxsave(fpu); | |
209 | } else { | |
210 | asm volatile("fnsave %[fx]; fwait" | |
211 | : [fx] "=m" (fpu->state->fsave)); | |
212 | return 0; | |
213 | } | |
214 | ||
215 | /* | |
216 | * If exceptions are pending, we need to clear them so | |
217 | * that we don't randomly get exceptions later. | |
218 | * | |
219 | * FIXME! Is this perhaps only true for the old-style | |
220 | * irq13 case? Maybe we could leave the x87 state | |
221 | * intact otherwise? | |
222 | */ | |
223 | if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { | |
224 | asm volatile("fnclex"); | |
225 | return 0; | |
226 | } | |
227 | return 1; | |
228 | } | |
229 | ||
230 | static inline int __save_init_fpu(struct task_struct *tsk) | |
231 | { | |
232 | return fpu_save_init(&tsk->thread.fpu); | |
233 | } | |
234 | ||
1361b83a LT |
235 | static inline int fpu_restore_checking(struct fpu *fpu) |
236 | { | |
237 | if (use_xsave()) | |
0ca5bd0d SS |
238 | return fpu_xrstor_checking(&fpu->state->xsave); |
239 | else if (use_fxsr()) | |
240 | return fxrstor_checking(&fpu->state->fxsave); | |
1361b83a | 241 | else |
0ca5bd0d | 242 | return frstor_checking(&fpu->state->fsave); |
1361b83a LT |
243 | } |
244 | ||
245 | static inline int restore_fpu_checking(struct task_struct *tsk) | |
246 | { | |
247 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | |
248 | is pending. Clear the x87 state here by setting it to fixed | |
249 | values. "m" is a random variable that should be in L1 */ | |
250 | alternative_input( | |
251 | ASM_NOP8 ASM_NOP2, | |
252 | "emms\n\t" /* clear stack tags */ | |
253 | "fildl %P[addr]", /* set F?P to defined value */ | |
254 | X86_FEATURE_FXSAVE_LEAK, | |
255 | [addr] "m" (tsk->thread.fpu.has_fpu)); | |
256 | ||
257 | return fpu_restore_checking(&tsk->thread.fpu); | |
258 | } | |
259 | ||
260 | /* | |
261 | * Software FPU state helpers. Careful: these need to | |
262 | * be preemption protection *and* they need to be | |
263 | * properly paired with the CR0.TS changes! | |
264 | */ | |
265 | static inline int __thread_has_fpu(struct task_struct *tsk) | |
266 | { | |
267 | return tsk->thread.fpu.has_fpu; | |
268 | } | |
269 | ||
270 | /* Must be paired with an 'stts' after! */ | |
271 | static inline void __thread_clear_has_fpu(struct task_struct *tsk) | |
272 | { | |
273 | tsk->thread.fpu.has_fpu = 0; | |
c6ae41e7 | 274 | this_cpu_write(fpu_owner_task, NULL); |
1361b83a LT |
275 | } |
276 | ||
277 | /* Must be paired with a 'clts' before! */ | |
278 | static inline void __thread_set_has_fpu(struct task_struct *tsk) | |
279 | { | |
280 | tsk->thread.fpu.has_fpu = 1; | |
c6ae41e7 | 281 | this_cpu_write(fpu_owner_task, tsk); |
1361b83a LT |
282 | } |
283 | ||
284 | /* | |
285 | * Encapsulate the CR0.TS handling together with the | |
286 | * software flag. | |
287 | * | |
288 | * These generally need preemption protection to work, | |
289 | * do try to avoid using these on their own. | |
290 | */ | |
291 | static inline void __thread_fpu_end(struct task_struct *tsk) | |
292 | { | |
293 | __thread_clear_has_fpu(tsk); | |
294 | stts(); | |
295 | } | |
296 | ||
297 | static inline void __thread_fpu_begin(struct task_struct *tsk) | |
298 | { | |
299 | clts(); | |
300 | __thread_set_has_fpu(tsk); | |
301 | } | |
302 | ||
303 | /* | |
304 | * FPU state switching for scheduling. | |
305 | * | |
306 | * This is a two-stage process: | |
307 | * | |
308 | * - switch_fpu_prepare() saves the old state and | |
309 | * sets the new state of the CR0.TS bit. This is | |
310 | * done within the context of the old process. | |
311 | * | |
312 | * - switch_fpu_finish() restores the new state as | |
313 | * necessary. | |
314 | */ | |
315 | typedef struct { int preload; } fpu_switch_t; | |
316 | ||
317 | /* | |
318 | * FIXME! We could do a totally lazy restore, but we need to | |
319 | * add a per-cpu "this was the task that last touched the FPU | |
320 | * on this CPU" variable, and the task needs to have a "I last | |
321 | * touched the FPU on this CPU" and check them. | |
322 | * | |
323 | * We don't do that yet, so "fpu_lazy_restore()" always returns | |
324 | * false, but some day.. | |
325 | */ | |
326 | static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) | |
327 | { | |
c6ae41e7 | 328 | return new == this_cpu_read_stable(fpu_owner_task) && |
1361b83a LT |
329 | cpu == new->thread.fpu.last_cpu; |
330 | } | |
331 | ||
332 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) | |
333 | { | |
334 | fpu_switch_t fpu; | |
335 | ||
336 | fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; | |
337 | if (__thread_has_fpu(old)) { | |
338 | if (!__save_init_fpu(old)) | |
339 | cpu = ~0; | |
340 | old->thread.fpu.last_cpu = cpu; | |
341 | old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ | |
342 | ||
343 | /* Don't change CR0.TS if we just switch! */ | |
344 | if (fpu.preload) { | |
345 | new->fpu_counter++; | |
346 | __thread_set_has_fpu(new); | |
347 | prefetch(new->thread.fpu.state); | |
348 | } else | |
349 | stts(); | |
350 | } else { | |
351 | old->fpu_counter = 0; | |
352 | old->thread.fpu.last_cpu = ~0; | |
353 | if (fpu.preload) { | |
354 | new->fpu_counter++; | |
355 | if (fpu_lazy_restore(new, cpu)) | |
356 | fpu.preload = 0; | |
357 | else | |
358 | prefetch(new->thread.fpu.state); | |
359 | __thread_fpu_begin(new); | |
360 | } | |
361 | } | |
362 | return fpu; | |
363 | } | |
364 | ||
365 | /* | |
366 | * By the time this gets called, we've already cleared CR0.TS and | |
367 | * given the process the FPU if we are going to preload the FPU | |
368 | * state - all we need to do is to conditionally restore the register | |
369 | * state itself. | |
370 | */ | |
371 | static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) | |
372 | { | |
373 | if (fpu.preload) { | |
374 | if (unlikely(restore_fpu_checking(new))) | |
375 | __thread_fpu_end(new); | |
376 | } | |
377 | } | |
378 | ||
379 | /* | |
380 | * Signal frame handlers... | |
381 | */ | |
72a671ce SS |
382 | extern int save_xstate_sig(void __user *buf, void __user *fx, int size); |
383 | extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size); | |
384 | ||
385 | static inline int xstate_sigframe_size(void) | |
386 | { | |
387 | return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; | |
388 | } | |
389 | ||
390 | static inline int restore_xstate_sig(void __user *buf, int ia32_frame) | |
391 | { | |
392 | void __user *buf_fx = buf; | |
393 | int size = xstate_sigframe_size(); | |
394 | ||
395 | if (ia32_frame && use_fxsr()) { | |
396 | buf_fx = buf + sizeof(struct i387_fsave_struct); | |
397 | size += sizeof(struct i387_fsave_struct); | |
398 | } | |
399 | ||
400 | return __restore_xstate_sig(buf, buf_fx, size); | |
401 | } | |
1361b83a | 402 | |
72a671ce | 403 | static inline void __drop_fpu(struct task_struct *tsk) |
1361b83a LT |
404 | { |
405 | if (__thread_has_fpu(tsk)) { | |
406 | /* Ignore delayed exceptions from user space */ | |
407 | asm volatile("1: fwait\n" | |
408 | "2:\n" | |
409 | _ASM_EXTABLE(1b, 2b)); | |
410 | __thread_fpu_end(tsk); | |
411 | } | |
412 | } | |
413 | ||
414 | /* | |
415 | * The actual user_fpu_begin/end() functions | |
416 | * need to be preemption-safe. | |
417 | * | |
418 | * NOTE! user_fpu_end() must be used only after you | |
419 | * have saved the FP state, and user_fpu_begin() must | |
420 | * be used only immediately before restoring it. | |
421 | * These functions do not do any save/restore on | |
422 | * their own. | |
423 | */ | |
424 | static inline void user_fpu_end(void) | |
425 | { | |
426 | preempt_disable(); | |
427 | __thread_fpu_end(current); | |
428 | preempt_enable(); | |
429 | } | |
430 | ||
431 | static inline void user_fpu_begin(void) | |
432 | { | |
433 | preempt_disable(); | |
434 | if (!user_has_fpu()) | |
435 | __thread_fpu_begin(current); | |
436 | preempt_enable(); | |
437 | } | |
438 | ||
439 | /* | |
440 | * These disable preemption on their own and are safe | |
441 | */ | |
442 | static inline void save_init_fpu(struct task_struct *tsk) | |
443 | { | |
444 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); | |
445 | preempt_disable(); | |
446 | __save_init_fpu(tsk); | |
447 | __thread_fpu_end(tsk); | |
448 | preempt_enable(); | |
449 | } | |
450 | ||
72a671ce SS |
451 | static inline void drop_fpu(struct task_struct *tsk) |
452 | { | |
453 | /* | |
454 | * Forget coprocessor state.. | |
455 | */ | |
e9625917 | 456 | tsk->fpu_counter = 0; |
1361b83a | 457 | preempt_disable(); |
72a671ce | 458 | __drop_fpu(tsk); |
1361b83a | 459 | preempt_enable(); |
72a671ce | 460 | clear_used_math(); |
1361b83a LT |
461 | } |
462 | ||
463 | /* | |
464 | * i387 state interaction | |
465 | */ | |
466 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) | |
467 | { | |
468 | if (cpu_has_fxsr) { | |
469 | return tsk->thread.fpu.state->fxsave.cwd; | |
470 | } else { | |
471 | return (unsigned short)tsk->thread.fpu.state->fsave.cwd; | |
472 | } | |
473 | } | |
474 | ||
475 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) | |
476 | { | |
477 | if (cpu_has_fxsr) { | |
478 | return tsk->thread.fpu.state->fxsave.swd; | |
479 | } else { | |
480 | return (unsigned short)tsk->thread.fpu.state->fsave.swd; | |
481 | } | |
482 | } | |
483 | ||
484 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) | |
485 | { | |
486 | if (cpu_has_xmm) { | |
487 | return tsk->thread.fpu.state->fxsave.mxcsr; | |
488 | } else { | |
489 | return MXCSR_DEFAULT; | |
490 | } | |
491 | } | |
492 | ||
493 | static bool fpu_allocated(struct fpu *fpu) | |
494 | { | |
495 | return fpu->state != NULL; | |
496 | } | |
497 | ||
498 | static inline int fpu_alloc(struct fpu *fpu) | |
499 | { | |
500 | if (fpu_allocated(fpu)) | |
501 | return 0; | |
502 | fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); | |
503 | if (!fpu->state) | |
504 | return -ENOMEM; | |
505 | WARN_ON((unsigned long)fpu->state & 15); | |
506 | return 0; | |
507 | } | |
508 | ||
509 | static inline void fpu_free(struct fpu *fpu) | |
510 | { | |
511 | if (fpu->state) { | |
512 | kmem_cache_free(task_xstate_cachep, fpu->state); | |
513 | fpu->state = NULL; | |
514 | } | |
515 | } | |
516 | ||
517 | static inline void fpu_copy(struct fpu *dst, struct fpu *src) | |
518 | { | |
519 | memcpy(dst->state, src->state, xstate_size); | |
520 | } | |
521 | ||
522 | extern void fpu_finit(struct fpu *fpu); | |
523 | ||
72a671ce SS |
524 | static inline unsigned long |
525 | alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, | |
526 | unsigned long *size) | |
527 | { | |
528 | unsigned long frame_size = xstate_sigframe_size(); | |
529 | ||
530 | *buf_fx = sp = round_down(sp - frame_size, 64); | |
531 | if (ia32_frame && use_fxsr()) { | |
532 | frame_size += sizeof(struct i387_fsave_struct); | |
533 | sp -= sizeof(struct i387_fsave_struct); | |
534 | } | |
535 | ||
536 | *size = frame_size; | |
537 | return sp; | |
538 | } | |
539 | ||
1361b83a | 540 | #endif |