]>
Commit | Line | Data |
---|---|---|
8f11c721 TL |
1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
2 | From: Brian Behlendorf <behlendorf1@llnl.gov> | |
3 | Date: Thu, 3 Oct 2019 00:03:20 +0000 | |
4 | Subject: [PATCH] Linux 4.14, 4.19, 5.0+ compat: SIMD save/restore | |
5 | ||
6 | Contrary to initial testing we cannot rely on these kernels to | |
7 | invalidate the per-cpu FPU state and restore the FPU registers. | |
8 | Therefore, the kfpu_begin() and kfpu_end() functions have been | |
9 | updated to unconditionally save and restore the FPU state. | |
10 | ||
11 | Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> | |
12 | Issue #9346 | |
13 | (cherry picked from commit 813fd014a90229127f80b970a8fef5049fd4c713) | |
14 | Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com> | |
15 | --- | |
16 | config/kernel-fpu.m4 | 82 +++++++++++---------- | |
17 | include/linux/simd_x86.h | 152 ++++++++++++++++++++++++++++----------- | |
18 | 2 files changed, 155 insertions(+), 79 deletions(-) | |
19 | ||
20 | diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4 | |
21 | index a2c47d65a..9ed9b14ad 100644 | |
22 | --- a/config/kernel-fpu.m4 | |
23 | +++ b/config/kernel-fpu.m4 | |
24 | @@ -2,15 +2,9 @@ dnl # | |
25 | dnl # Handle differences in kernel FPU code. | |
26 | dnl # | |
27 | dnl # Kernel | |
28 | -dnl # 5.2: The fpu->initialized flag was replaced by TIF_NEED_FPU_LOAD. | |
29 | -dnl # HAVE_KERNEL_TIF_NEED_FPU_LOAD | |
30 | -dnl # | |
31 | -dnl # 5.0: As an optimization SIMD operations performed by kernel | |
32 | -dnl # threads can skip saving and restoring their FPU context. | |
33 | -dnl # Wrappers have been introduced to determine the running | |
34 | -dnl # context and use either the SIMD or generic implementation. | |
35 | +dnl # 5.0: Wrappers have been introduced to save/restore the FPU state. | |
36 | dnl # This change was made to the 4.19.38 and 4.14.120 LTS kernels. | |
37 | -dnl # HAVE_KERNEL_FPU_INITIALIZED | |
38 | +dnl # HAVE_KERNEL_FPU_INTERNAL | |
39 | dnl # | |
40 | dnl # 4.2: Use __kernel_fpu_{begin,end}() | |
41 | dnl # HAVE_UNDERSCORE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU | |
42 | @@ -61,22 +55,47 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [ | |
43 | __kernel_fpu_end(); | |
44 | ], [], [$ZFS_META_LICENSE]) | |
45 | ||
46 | - ZFS_LINUX_TEST_SRC([fpu_initialized], [ | |
47 | - #include <linux/module.h> | |
48 | + ZFS_LINUX_TEST_SRC([fpu_internal], [ | |
49 | + #if defined(__x86_64) || defined(__x86_64__) || \ | |
50 | + defined(__i386) || defined(__i386__) | |
51 | + #if !defined(__x86) | |
52 | + #define __x86 | |
53 | + #endif | |
54 | + #endif | |
55 | + | |
56 | + #if !defined(__x86) | |
57 | + #error Unsupported architecture | |
58 | + #endif | |
59 | + | |
60 | #include <linux/sched.h> | |
61 | - ],[ | |
62 | - struct fpu *fpu = ¤t->thread.fpu; | |
63 | - if (fpu->initialized) { return (0); }; | |
64 | - ]) | |
65 | ||
66 | - ZFS_LINUX_TEST_SRC([tif_need_fpu_load], [ | |
67 | - #include <linux/module.h> | |
68 | - #include <asm/thread_info.h> | |
69 | + #if !defined(PF_KTHREAD) | |
70 | + #error PF_KTHREAD not defined | |
71 | + #endif | |
72 | ||
73 | - #if !defined(TIF_NEED_FPU_LOAD) | |
74 | - #error "TIF_NEED_FPU_LOAD undefined" | |
75 | + #ifdef HAVE_KERNEL_FPU_API_HEADER | |
76 | + #include <asm/fpu/api.h> | |
77 | + #include <asm/fpu/internal.h> | |
78 | + #else | |
79 | + #include <asm/i387.h> | |
80 | + #include <asm/xcr.h> | |
81 | + #endif | |
82 | + | |
83 | + #if !defined(XSTATE_XSAVE) | |
84 | + #error XSTATE_XSAVE not defined | |
85 | + #endif | |
86 | + | |
87 | + #if !defined(XSTATE_XRESTORE) | |
88 | + #error XSTATE_XRESTORE not defined | |
89 | #endif | |
90 | - ],[]) | |
91 | + ],[ | |
92 | + struct fpu *fpu = ¤t->thread.fpu; | |
93 | + union fpregs_state *st = &fpu->state; | |
94 | + struct fregs_state *fr __attribute__ ((unused)) = &st->fsave; | |
95 | + struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave; | |
96 | + struct xregs_state *xr __attribute__ ((unused)) = &st->xsave; | |
97 | + fpu->last_cpu = -1; | |
98 | + ]) | |
99 | ]) | |
100 | ||
101 | AC_DEFUN([ZFS_AC_KERNEL_FPU], [ | |
102 | @@ -104,25 +123,12 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ | |
103 | AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, | |
104 | [kernel exports FPU functions]) | |
105 | ],[ | |
106 | - dnl # | |
107 | - dnl # Linux 5.0 kernel | |
108 | - dnl # | |
109 | - ZFS_LINUX_TEST_RESULT([fpu_initialized], [ | |
110 | - AC_MSG_RESULT(fpu.initialized) | |
111 | - AC_DEFINE(HAVE_KERNEL_FPU_INITIALIZED, 1, | |
112 | - [kernel fpu.initialized exists]) | |
113 | + ZFS_LINUX_TEST_RESULT([fpu_internal], [ | |
114 | + AC_MSG_RESULT(internal) | |
115 | + AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1, | |
116 | + [kernel fpu internal]) | |
117 | ],[ | |
118 | - dnl # | |
119 | - dnl # Linux 5.2 kernel | |
120 | - dnl # | |
121 | - ZFS_LINUX_TEST_RESULT([tif_need_fpu_load], [ | |
122 | - AC_MSG_RESULT(TIF_NEED_FPU_LOAD) | |
123 | - AC_DEFINE( | |
124 | - HAVE_KERNEL_TIF_NEED_FPU_LOAD, 1, | |
125 | - [kernel TIF_NEED_FPU_LOAD exists]) | |
126 | - ],[ | |
127 | - AC_MSG_RESULT(unavailable) | |
128 | - ]) | |
129 | + AC_MSG_RESULT(unavailable) | |
130 | ]) | |
131 | ]) | |
132 | ]) | |
133 | diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h | |
134 | index 641f43955..d1ded3af2 100644 | |
135 | --- a/include/linux/simd_x86.h | |
136 | +++ b/include/linux/simd_x86.h | |
137 | @@ -126,38 +126,68 @@ | |
138 | #endif | |
139 | ||
140 | #else /* defined(KERNEL_EXPORTS_X86_FPU) */ | |
141 | + | |
142 | /* | |
143 | * When the kernel_fpu_* symbols are unavailable then provide our own | |
144 | * versions which allow the FPU to be safely used in kernel threads. | |
145 | * In practice, this is not a significant restriction for ZFS since the | |
146 | * vast majority of SIMD operations are performed by the IO pipeline. | |
147 | */ | |
148 | +#if defined(HAVE_KERNEL_FPU_INTERNAL) | |
149 | ||
150 | /* | |
151 | - * Returns non-zero if FPU operations are allowed in the current context. | |
152 | + * FPU usage only allowed in dedicated kernel threads. | |
153 | */ | |
154 | -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD) | |
155 | -#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \ | |
156 | - test_thread_flag(TIF_NEED_FPU_LOAD)) | |
157 | -#elif defined(HAVE_KERNEL_FPU_INITIALIZED) | |
158 | -#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \ | |
159 | - current->thread.fpu.initialized) | |
160 | -#else | |
161 | -#define kfpu_allowed() 0 | |
162 | -#endif | |
163 | +#define kfpu_allowed() (current->flags & PF_KTHREAD) | |
164 | +#define ex_handler_fprestore ex_handler_default | |
165 | + | |
166 | +/* | |
167 | + * FPU save and restore instructions. | |
168 | + */ | |
169 | +#define __asm __asm__ __volatile__ | |
170 | +#define kfpu_fxsave(addr) __asm("fxsave %0" : "=m" (*(addr))) | |
171 | +#define kfpu_fxsaveq(addr) __asm("fxsaveq %0" : "=m" (*(addr))) | |
172 | +#define kfpu_fnsave(addr) __asm("fnsave %0; fwait" : "=m" (*(addr))) | |
173 | +#define kfpu_fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) | |
174 | +#define kfpu_fxrstorq(addr) __asm("fxrstorq %0" : : "m" (*(addr))) | |
175 | +#define kfpu_frstor(addr) __asm("frstor %0" : : "m" (*(addr))) | |
176 | +#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \ | |
177 | + : : [addr] "m" (rval)); | |
178 | ||
179 | static inline void | |
180 | kfpu_initialize(void) | |
181 | { | |
182 | WARN_ON_ONCE(!(current->flags & PF_KTHREAD)); | |
183 | ||
184 | -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD) | |
185 | - __fpu_invalidate_fpregs_state(¤t->thread.fpu); | |
186 | - set_thread_flag(TIF_NEED_FPU_LOAD); | |
187 | -#elif defined(HAVE_KERNEL_FPU_INITIALIZED) | |
188 | - __fpu_invalidate_fpregs_state(¤t->thread.fpu); | |
189 | - current->thread.fpu.initialized = 1; | |
190 | -#endif | |
191 | + /* Invalidate the task's FPU state */ | |
192 | + current->thread.fpu.last_cpu = -1; | |
193 | +} | |
194 | + | |
195 | +static inline void | |
196 | +kfpu_save_xsave(struct xregs_state *addr, uint64_t mask) | |
197 | +{ | |
198 | + uint32_t low, hi; | |
199 | + int err; | |
200 | + | |
201 | + low = mask; | |
202 | + hi = mask >> 32; | |
203 | + XSTATE_XSAVE(addr, low, hi, err); | |
204 | + WARN_ON_ONCE(err); | |
205 | +} | |
206 | + | |
207 | +static inline void | |
208 | +kfpu_save_fxsr(struct fxregs_state *addr) | |
209 | +{ | |
210 | + if (IS_ENABLED(CONFIG_X86_32)) | |
211 | + kfpu_fxsave(addr); | |
212 | + else | |
213 | + kfpu_fxsaveq(addr); | |
214 | +} | |
215 | + | |
216 | +static inline void | |
217 | +kfpu_save_fsave(struct fregs_state *addr) | |
218 | +{ | |
219 | + kfpu_fnsave(addr); | |
220 | } | |
221 | ||
222 | static inline void | |
223 | @@ -172,46 +202,86 @@ kfpu_begin(void) | |
224 | preempt_disable(); | |
225 | local_irq_disable(); | |
226 | ||
227 | -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD) | |
228 | /* | |
229 | * The current FPU registers need to be preserved by kfpu_begin() | |
230 | - * and restored by kfpu_end(). This is required because we can | |
231 | - * not call __cpu_invalidate_fpregs_state() to invalidate the | |
232 | - * per-cpu FPU state and force them to be restored during a | |
233 | - * context switch. | |
234 | + * and restored by kfpu_end(). This is always required because we | |
235 | + * can not call __cpu_invalidate_fpregs_state() to invalidate the | |
236 | + * per-cpu FPU state and force them to be restored. Furthermore, | |
237 | + * this implementation relies on the space provided in the task | |
238 | + * structure to store the user FPU state. As such, it can only | |
239 | + * be used with dedicated kernels which by definition will never | |
240 | + * store user FPU state. | |
241 | */ | |
242 | - copy_fpregs_to_fpstate(¤t->thread.fpu); | |
243 | -#elif defined(HAVE_KERNEL_FPU_INITIALIZED) | |
244 | + if (static_cpu_has(X86_FEATURE_XSAVE)) { | |
245 | + kfpu_save_xsave(¤t->thread.fpu.state.xsave, ~0); | |
246 | + } else if (static_cpu_has(X86_FEATURE_FXSR)) { | |
247 | + kfpu_save_fxsr(¤t->thread.fpu.state.fxsave); | |
248 | + } else { | |
249 | + kfpu_save_fsave(¤t->thread.fpu.state.fsave); | |
250 | + } | |
251 | +} | |
252 | + | |
253 | +static inline void | |
254 | +kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask) | |
255 | +{ | |
256 | + uint32_t low, hi; | |
257 | + | |
258 | + low = mask; | |
259 | + hi = mask >> 32; | |
260 | + XSTATE_XRESTORE(addr, low, hi); | |
261 | +} | |
262 | + | |
263 | +static inline void | |
264 | +kfpu_restore_fxsr(struct fxregs_state *addr) | |
265 | +{ | |
266 | /* | |
267 | - * There is no need to preserve and restore the FPU registers. | |
268 | - * They will always be restored from the task's stored FPU state | |
269 | - * when switching contexts. | |
270 | + * On AuthenticAMD K7 and K8 processors the fxrstor instruction only | |
271 | + * restores the _x87 FOP, FIP, and FDP registers when an exception | |
272 | + * is pending. Clean the _x87 state to force the restore. | |
273 | */ | |
274 | - WARN_ON_ONCE(current->thread.fpu.initialized == 0); | |
275 | -#endif | |
276 | + if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) | |
277 | + kfpu_fxsr_clean(addr); | |
278 | + | |
279 | + if (IS_ENABLED(CONFIG_X86_32)) { | |
280 | + kfpu_fxrstor(addr); | |
281 | + } else { | |
282 | + kfpu_fxrstorq(addr); | |
283 | + } | |
284 | } | |
285 | ||
286 | static inline void | |
287 | -kfpu_end(void) | |
288 | +kfpu_restore_fsave(struct fregs_state *addr) | |
289 | { | |
290 | -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD) | |
291 | - union fpregs_state *state = ¤t->thread.fpu.state; | |
292 | - int error; | |
293 | + kfpu_frstor(addr); | |
294 | +} | |
295 | ||
296 | - if (use_xsave()) { | |
297 | - error = copy_kernel_to_xregs_err(&state->xsave, -1); | |
298 | - } else if (use_fxsr()) { | |
299 | - error = copy_kernel_to_fxregs_err(&state->fxsave); | |
300 | +static inline void | |
301 | +kfpu_end(void) | |
302 | +{ | |
303 | + if (static_cpu_has(X86_FEATURE_XSAVE)) { | |
304 | + kfpu_restore_xsave(¤t->thread.fpu.state.xsave, ~0); | |
305 | + } else if (static_cpu_has(X86_FEATURE_FXSR)) { | |
306 | + kfpu_restore_fxsr(¤t->thread.fpu.state.fxsave); | |
307 | } else { | |
308 | - error = copy_kernel_to_fregs_err(&state->fsave); | |
309 | + kfpu_restore_fsave(¤t->thread.fpu.state.fsave); | |
310 | } | |
311 | - WARN_ON_ONCE(error); | |
312 | -#endif | |
313 | ||
314 | local_irq_enable(); | |
315 | preempt_enable(); | |
316 | } | |
317 | -#endif /* defined(HAVE_KERNEL_FPU) */ | |
318 | + | |
319 | +#else | |
320 | + | |
321 | +/* | |
322 | + * FPU support is unavailable. | |
323 | + */ | |
324 | +#define kfpu_allowed() 0 | |
325 | +#define kfpu_initialize(tsk) do {} while (0) | |
326 | +#define kfpu_begin() do {} while (0) | |
327 | +#define kfpu_end() do {} while (0) | |
328 | + | |
329 | +#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */ | |
330 | +#endif /* defined(KERNEL_EXPORTS_X86_FPU) */ | |
331 | ||
332 | #else /* defined(_KERNEL) */ | |
333 | /* |