]>
Commit | Line | Data |
---|---|---|
0c867537 | 1 | /* |
ae02679c | 2 | * x86 FPU boot time init code: |
0c867537 | 3 | */ |
78f7f1e5 | 4 | #include <asm/fpu/internal.h> |
0c867537 IM |
5 | #include <asm/tlbflush.h> |
6 | ||
5aaeb5c0 IM |
7 | #include <linux/sched.h> |
8 | ||
ae02679c IM |
9 | /* |
10 | * Initialize the TS bit in CR0 according to the style of context-switches | |
11 | * we are using: | |
12 | */ | |
41e78410 IM |
13 | static void fpu__init_cpu_ctx_switch(void) |
14 | { | |
15 | if (!cpu_has_eager_fpu) | |
16 | stts(); | |
17 | else | |
18 | clts(); | |
19 | } | |
20 | ||
21 | /* | |
22 | * Initialize the registers found in all CPUs, CR0 and CR4: | |
23 | */ | |
24 | static void fpu__init_cpu_generic(void) | |
25 | { | |
26 | unsigned long cr0; | |
27 | unsigned long cr4_mask = 0; | |
28 | ||
29 | if (cpu_has_fxsr) | |
30 | cr4_mask |= X86_CR4_OSFXSR; | |
31 | if (cpu_has_xmm) | |
32 | cr4_mask |= X86_CR4_OSXMMEXCPT; | |
33 | if (cr4_mask) | |
34 | cr4_set_bits(cr4_mask); | |
35 | ||
36 | cr0 = read_cr0(); | |
37 | cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ | |
38 | if (!cpu_has_fpu) | |
39 | cr0 |= X86_CR0_EM; | |
40 | write_cr0(cr0); | |
b1276c48 IM |
41 | |
42 | /* Flush out any pending x87 state: */ | |
5fc96038 IM |
43 | #ifdef CONFIG_MATH_EMULATION |
44 | if (!cpu_has_fpu) | |
45 | fpstate_init_soft(¤t->thread.fpu.state.soft); | |
46 | else | |
47 | #endif | |
48 | asm volatile ("fninit"); | |
41e78410 IM |
49 | } |
50 | ||
51 | /* | |
ae02679c | 52 | * Enable all supported FPU features. Called when a CPU is brought online: |
41e78410 IM |
53 | */ |
54 | void fpu__init_cpu(void) | |
55 | { | |
56 | fpu__init_cpu_generic(); | |
57 | fpu__init_cpu_xstate(); | |
58 | fpu__init_cpu_ctx_switch(); | |
59 | } | |
60 | ||
2e2f3da7 | 61 | /* |
dd863880 IM |
62 | * The earliest FPU detection code. |
63 | * | |
64 | * Set the X86_FEATURE_FPU CPU-capability bit based on | |
65 | * trying to execute an actual sequence of FPU instructions: | |
2e2f3da7 IM |
66 | */ |
67 | static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) | |
68 | { | |
69 | unsigned long cr0; | |
70 | u16 fsw, fcw; | |
71 | ||
72 | fsw = fcw = 0xffff; | |
73 | ||
74 | cr0 = read_cr0(); | |
75 | cr0 &= ~(X86_CR0_TS | X86_CR0_EM); | |
76 | write_cr0(cr0); | |
77 | ||
78 | asm volatile("fninit ; fnstsw %0 ; fnstcw %1" | |
79 | : "+m" (fsw), "+m" (fcw)); | |
80 | ||
81 | if (fsw == 0 && (fcw & 0x103f) == 0x003f) | |
82 | set_cpu_cap(c, X86_FEATURE_FPU); | |
83 | else | |
84 | clear_cpu_cap(c, X86_FEATURE_FPU); | |
e83ab9ad IM |
85 | |
86 | #ifndef CONFIG_MATH_EMULATION | |
87 | if (!cpu_has_fpu) { | |
ae02679c | 88 | pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n"); |
e83ab9ad IM |
89 | for (;;) |
90 | asm volatile("hlt"); | |
91 | } | |
92 | #endif | |
2e2f3da7 IM |
93 | } |
94 | ||
4d164092 IM |
95 | /* |
96 | * Boot time FPU feature detection code: | |
97 | */ | |
0c867537 | 98 | unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; |
91a8c2a5 | 99 | |
32231879 | 100 | static void __init fpu__init_system_mxcsr(void) |
0c867537 | 101 | { |
91a8c2a5 | 102 | unsigned int mask = 0; |
0c867537 IM |
103 | |
104 | if (cpu_has_fxsr) { | |
b96fecbf IM |
105 | /* Static because GCC does not get 16-byte stack alignment right: */ |
106 | static struct fxregs_state fxregs __initdata; | |
91a8c2a5 | 107 | |
b96fecbf | 108 | asm volatile("fxsave %0" : "+m" (fxregs)); |
91a8c2a5 | 109 | |
b96fecbf | 110 | mask = fxregs.mxcsr_mask; |
91a8c2a5 IM |
111 | |
112 | /* | |
113 | * If zero then use the default features mask, | |
114 | * which has all features set, except the | |
115 | * denormals-are-zero feature bit: | |
116 | */ | |
0c867537 IM |
117 | if (mask == 0) |
118 | mask = 0x0000ffbf; | |
119 | } | |
120 | mxcsr_feature_mask &= mask; | |
121 | } | |
122 | ||
7218e8b7 IM |
123 | /* |
124 | * Once per bootup FPU initialization sequences that will run on most x86 CPUs: | |
125 | */ | |
32231879 | 126 | static void __init fpu__init_system_generic(void) |
7218e8b7 IM |
127 | { |
128 | /* | |
129 | * Set up the legacy init FPU context. (xstate init might overwrite this | |
130 | * with a more modern format, if the CPU supports it.) | |
131 | */ | |
6f575023 | 132 | fpstate_init_fxstate(&init_fpstate.fxsave); |
7218e8b7 IM |
133 | |
134 | fpu__init_system_mxcsr(); | |
135 | } | |
136 | ||
ae02679c IM |
137 | /* |
138 | * Size of the FPU context state. All tasks in the system use the | |
139 | * same context size, regardless of what portion they use. | |
140 | * This is inherent to the XSAVE architecture which puts all state | |
141 | * components into a single, continuous memory block: | |
142 | */ | |
41e78410 IM |
143 | unsigned int xstate_size; |
144 | EXPORT_SYMBOL_GPL(xstate_size); | |
145 | ||
5aaeb5c0 IM |
146 | /* Enforce that 'MEMBER' is the last field of 'TYPE': */ |
147 | #define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \ | |
148 | BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER)) | |
0c8c0f03 DH |
149 | |
150 | /* | |
5aaeb5c0 | 151 | * We append the 'struct fpu' to the task_struct: |
0c8c0f03 | 152 | */ |
5aaeb5c0 | 153 | static void __init fpu__init_task_struct_size(void) |
0c8c0f03 DH |
154 | { |
155 | int task_size = sizeof(struct task_struct); | |
156 | ||
157 | /* | |
158 | * Subtract off the static size of the register state. | |
159 | * It potentially has a bunch of padding. | |
160 | */ | |
161 | task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state); | |
162 | ||
163 | /* | |
164 | * Add back the dynamically-calculated register state | |
165 | * size. | |
166 | */ | |
167 | task_size += xstate_size; | |
168 | ||
169 | /* | |
170 | * We dynamically size 'struct fpu', so we require that | |
171 | * it be at the end of 'thread_struct' and that | |
172 | * 'thread_struct' be at the end of 'task_struct'. If | |
173 | * you hit a compile error here, check the structure to | |
174 | * see if something got added to the end. | |
175 | */ | |
176 | CHECK_MEMBER_AT_END_OF(struct fpu, state); | |
177 | CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu); | |
178 | CHECK_MEMBER_AT_END_OF(struct task_struct, thread); | |
179 | ||
5aaeb5c0 | 180 | arch_task_struct_size = task_size; |
0c8c0f03 DH |
181 | } |
182 | ||
41e78410 IM |
183 | /* |
184 | * Set up the xstate_size based on the legacy FPU context size. | |
185 | * | |
186 | * We set this up first, and later it will be overwritten by | |
187 | * fpu__init_system_xstate() if the CPU knows about xstates. | |
188 | */ | |
32231879 | 189 | static void __init fpu__init_system_xstate_size_legacy(void) |
0c867537 | 190 | { |
e97131a8 IM |
191 | static int on_boot_cpu = 1; |
192 | ||
193 | WARN_ON_FPU(!on_boot_cpu); | |
194 | on_boot_cpu = 0; | |
195 | ||
0c867537 IM |
196 | /* |
197 | * Note that xstate_size might be overwriten later during | |
c42103b2 | 198 | * fpu__init_system_xstate(). |
0c867537 IM |
199 | */ |
200 | ||
201 | if (!cpu_has_fpu) { | |
202 | /* | |
203 | * Disable xsave as we do not support it if i387 | |
204 | * emulation is enabled. | |
205 | */ | |
206 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | |
207 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | |
c47ada30 | 208 | xstate_size = sizeof(struct swregs_state); |
6a133207 IM |
209 | } else { |
210 | if (cpu_has_fxsr) | |
c47ada30 | 211 | xstate_size = sizeof(struct fxregs_state); |
6a133207 | 212 | else |
c47ada30 | 213 | xstate_size = sizeof(struct fregs_state); |
0c867537 | 214 | } |
6f56a8d0 IM |
215 | /* |
216 | * Quirk: we don't yet handle the XSAVES* instructions | |
217 | * correctly, as we don't correctly convert between | |
218 | * standard and compacted format when interfacing | |
219 | * with user-space - so disable it for now. | |
220 | * | |
221 | * The difference is small: with recent CPUs the | |
222 | * compacted format is only marginally smaller than | |
223 | * the standard FPU state format. | |
224 | * | |
225 | * ( This is easy to backport while we are fixing | |
226 | * XSAVES* support. ) | |
227 | */ | |
228 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | |
0c867537 IM |
229 | } |
230 | ||
ae02679c IM |
231 | /* |
232 | * FPU context switching strategies: | |
233 | * | |
234 | * Against popular belief, we don't do lazy FPU saves, due to the | |
235 | * task migration complications it brings on SMP - we only do | |
236 | * lazy FPU restores. | |
237 | * | |
238 | * 'lazy' is the traditional strategy, which is based on setting | |
239 | * CR0::TS to 1 during context-switch (instead of doing a full | |
240 | * restore of the FPU state), which causes the first FPU instruction | |
241 | * after the context switch (whenever it is executed) to fault - at | |
242 | * which point we lazily restore the FPU state into FPU registers. | |
243 | * | |
244 | * Tasks are of course under no obligation to execute FPU instructions, | |
245 | * so it can easily happen that another context-switch occurs without | |
246 | * a single FPU instruction being executed. If we eventually switch | |
247 | * back to the original task (that still owns the FPU) then we have | |
248 | * not only saved the restores along the way, but we also have the | |
249 | * FPU ready to be used for the original task. | |
250 | * | |
251 | * 'eager' switching is used on modern CPUs, there we switch the FPU | |
252 | * state during every context switch, regardless of whether the task | |
253 | * has used FPU instructions in that time slice or not. This is done | |
254 | * because modern FPU context saving instructions are able to optimize | |
255 | * state saving and restoration in hardware: they can detect both | |
256 | * unused and untouched FPU state and optimize accordingly. | |
257 | * | |
258 | * [ Note that even in 'lazy' mode we might optimize context switches | |
259 | * to use 'eager' restores, if we detect that a task is using the FPU | |
260 | * frequently. See the fpu->counter logic in fpu/internal.h for that. ] | |
261 | */ | |
6f5d265a IM |
262 | static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; |
263 | ||
264 | static int __init eager_fpu_setup(char *s) | |
265 | { | |
266 | if (!strcmp(s, "on")) | |
267 | eagerfpu = ENABLE; | |
268 | else if (!strcmp(s, "off")) | |
269 | eagerfpu = DISABLE; | |
270 | else if (!strcmp(s, "auto")) | |
271 | eagerfpu = AUTO; | |
272 | return 1; | |
273 | } | |
274 | __setup("eagerfpu=", eager_fpu_setup); | |
275 | ||
276 | /* | |
ae02679c | 277 | * Pick the FPU context switching strategy: |
6f5d265a | 278 | */ |
32231879 | 279 | static void __init fpu__init_system_ctx_switch(void) |
6f5d265a | 280 | { |
e97131a8 IM |
281 | static bool on_boot_cpu = 1; |
282 | ||
283 | WARN_ON_FPU(!on_boot_cpu); | |
284 | on_boot_cpu = 0; | |
285 | ||
286 | WARN_ON_FPU(current->thread.fpu.fpstate_active); | |
6f5d265a IM |
287 | current_thread_info()->status = 0; |
288 | ||
289 | /* Auto enable eagerfpu for xsaveopt */ | |
290 | if (cpu_has_xsaveopt && eagerfpu != DISABLE) | |
291 | eagerfpu = ENABLE; | |
292 | ||
293 | if (xfeatures_mask & XSTATE_EAGER) { | |
294 | if (eagerfpu == DISABLE) { | |
295 | pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", | |
296 | xfeatures_mask & XSTATE_EAGER); | |
297 | xfeatures_mask &= ~XSTATE_EAGER; | |
298 | } else { | |
299 | eagerfpu = ENABLE; | |
300 | } | |
301 | } | |
302 | ||
303 | if (eagerfpu == ENABLE) | |
304 | setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); | |
305 | ||
32231879 | 306 | printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy"); |
6f5d265a IM |
307 | } |
308 | ||
e35f6f14 | 309 | /* |
ae02679c IM |
310 | * Called on the boot CPU once per system bootup, to set up the initial |
311 | * FPU state that is later cloned into all processes: | |
e35f6f14 | 312 | */ |
32231879 | 313 | void __init fpu__init_system(struct cpuinfo_x86 *c) |
e35f6f14 | 314 | { |
dd863880 IM |
315 | fpu__init_system_early_generic(c); |
316 | ||
ae02679c IM |
317 | /* |
318 | * The FPU has to be operational for some of the | |
319 | * later FPU init activities: | |
320 | */ | |
e35f6f14 | 321 | fpu__init_cpu(); |
0c867537 | 322 | |
530b37e4 | 323 | /* |
ae02679c IM |
324 | * But don't leave CR0::TS set yet, as some of the FPU setup |
325 | * methods depend on being able to execute FPU instructions | |
326 | * that will fault on a set TS, such as the FXSAVE in | |
327 | * fpu__init_system_mxcsr(). | |
530b37e4 IM |
328 | */ |
329 | clts(); | |
330 | ||
7218e8b7 | 331 | fpu__init_system_generic(); |
7638b74b | 332 | fpu__init_system_xstate_size_legacy(); |
c42103b2 | 333 | fpu__init_system_xstate(); |
5aaeb5c0 | 334 | fpu__init_task_struct_size(); |
997578b1 | 335 | |
011545b5 | 336 | fpu__init_system_ctx_switch(); |
0c867537 | 337 | } |
146ed598 | 338 | |
ae02679c IM |
339 | /* |
340 | * Boot parameter to turn off FPU support and fall back to math-emu: | |
341 | */ | |
146ed598 IM |
342 | static int __init no_387(char *s) |
343 | { | |
344 | setup_clear_cpu_cap(X86_FEATURE_FPU); | |
345 | return 1; | |
346 | } | |
146ed598 | 347 | __setup("no387", no_387); |
7cf82d33 | 348 | |
5856afed IM |
349 | /* |
350 | * Disable all xstate CPU features: | |
351 | */ | |
352 | static int __init x86_noxsave_setup(char *s) | |
7cf82d33 IM |
353 | { |
354 | if (strlen(s)) | |
355 | return 0; | |
5856afed | 356 | |
7cf82d33 IM |
357 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); |
358 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | |
5bc016f1 | 359 | setup_clear_cpu_cap(X86_FEATURE_XSAVEC); |
7cf82d33 IM |
360 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); |
361 | setup_clear_cpu_cap(X86_FEATURE_AVX); | |
362 | setup_clear_cpu_cap(X86_FEATURE_AVX2); | |
5bc016f1 JB |
363 | setup_clear_cpu_cap(X86_FEATURE_AVX512F); |
364 | setup_clear_cpu_cap(X86_FEATURE_AVX512PF); | |
365 | setup_clear_cpu_cap(X86_FEATURE_AVX512ER); | |
366 | setup_clear_cpu_cap(X86_FEATURE_AVX512CD); | |
367 | setup_clear_cpu_cap(X86_FEATURE_MPX); | |
5856afed | 368 | |
7cf82d33 IM |
369 | return 1; |
370 | } | |
5856afed | 371 | __setup("noxsave", x86_noxsave_setup); |
7cf82d33 | 372 | |
5856afed IM |
373 | /* |
374 | * Disable the XSAVEOPT instruction specifically: | |
375 | */ | |
376 | static int __init x86_noxsaveopt_setup(char *s) | |
7cf82d33 IM |
377 | { |
378 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | |
5856afed | 379 | |
7cf82d33 IM |
380 | return 1; |
381 | } | |
5856afed | 382 | __setup("noxsaveopt", x86_noxsaveopt_setup); |
7cf82d33 | 383 | |
5856afed IM |
384 | /* |
385 | * Disable the XSAVES instruction: | |
386 | */ | |
387 | static int __init x86_noxsaves_setup(char *s) | |
7cf82d33 IM |
388 | { |
389 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | |
5856afed | 390 | |
7cf82d33 IM |
391 | return 1; |
392 | } | |
5856afed | 393 | __setup("noxsaves", x86_noxsaves_setup); |
7cf82d33 | 394 | |
5856afed IM |
395 | /* |
396 | * Disable FX save/restore and SSE support: | |
397 | */ | |
398 | static int __init x86_nofxsr_setup(char *s) | |
7cf82d33 IM |
399 | { |
400 | setup_clear_cpu_cap(X86_FEATURE_FXSR); | |
401 | setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); | |
402 | setup_clear_cpu_cap(X86_FEATURE_XMM); | |
5856afed | 403 | |
7cf82d33 IM |
404 | return 1; |
405 | } | |
5856afed | 406 | __setup("nofxsr", x86_nofxsr_setup); |