]> git.proxmox.com Git - zfsonlinux.git/blob - debian/patches/0008-Linux-4.14-4.19-5.0-compat-SIMD-save-restore.patch
FPU/SIMD: separate ZFS and Kernel approach for less interference
[zfsonlinux.git] / debian / patches / 0008-Linux-4.14-4.19-5.0-compat-SIMD-save-restore.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Brian Behlendorf <behlendorf1@llnl.gov>
3 Date: Thu, 3 Oct 2019 00:03:20 +0000
4 Subject: [PATCH] Linux 4.14, 4.19, 5.0+ compat: SIMD save/restore
5
6 Contrary to initial testing we cannot rely on these kernels to
7 invalidate the per-cpu FPU state and restore the FPU registers.
8 Therefore, the kfpu_begin() and kfpu_end() functions have been
9 updated to unconditionally save and restore the FPU state.
10
11 Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
12 Issue #9346
13 (cherry picked from commit 813fd014a90229127f80b970a8fef5049fd4c713)
14 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
15 ---
16 config/kernel-fpu.m4 | 82 +++++++++++----------
17 include/linux/simd_x86.h | 152 ++++++++++++++++++++++++++++-----------
18 2 files changed, 155 insertions(+), 79 deletions(-)
19
20 diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4
21 index a2c47d65a..9ed9b14ad 100644
22 --- a/config/kernel-fpu.m4
23 +++ b/config/kernel-fpu.m4
24 @@ -2,15 +2,9 @@ dnl #
25 dnl # Handle differences in kernel FPU code.
26 dnl #
27 dnl # Kernel
28 -dnl # 5.2: The fpu->initialized flag was replaced by TIF_NEED_FPU_LOAD.
29 -dnl # HAVE_KERNEL_TIF_NEED_FPU_LOAD
30 -dnl #
31 -dnl # 5.0: As an optimization SIMD operations performed by kernel
32 -dnl # threads can skip saving and restoring their FPU context.
33 -dnl # Wrappers have been introduced to determine the running
34 -dnl # context and use either the SIMD or generic implementation.
35 +dnl # 5.0: Wrappers have been introduced to save/restore the FPU state.
36 dnl # This change was made to the 4.19.38 and 4.14.120 LTS kernels.
37 -dnl # HAVE_KERNEL_FPU_INITIALIZED
38 +dnl # HAVE_KERNEL_FPU_INTERNAL
39 dnl #
40 dnl # 4.2: Use __kernel_fpu_{begin,end}()
41 dnl # HAVE_UNDERSCORE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU
42 @@ -61,22 +55,47 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
43 __kernel_fpu_end();
44 ], [], [$ZFS_META_LICENSE])
45
46 - ZFS_LINUX_TEST_SRC([fpu_initialized], [
47 - #include <linux/module.h>
48 + ZFS_LINUX_TEST_SRC([fpu_internal], [
49 + #if defined(__x86_64) || defined(__x86_64__) || \
50 + defined(__i386) || defined(__i386__)
51 + #if !defined(__x86)
52 + #define __x86
53 + #endif
54 + #endif
55 +
56 + #if !defined(__x86)
57 + #error Unsupported architecture
58 + #endif
59 +
60 #include <linux/sched.h>
61 - ],[
62 - struct fpu *fpu = &current->thread.fpu;
63 - if (fpu->initialized) { return (0); };
64 - ])
65
66 - ZFS_LINUX_TEST_SRC([tif_need_fpu_load], [
67 - #include <linux/module.h>
68 - #include <asm/thread_info.h>
69 + #if !defined(PF_KTHREAD)
70 + #error PF_KTHREAD not defined
71 + #endif
72
73 - #if !defined(TIF_NEED_FPU_LOAD)
74 - #error "TIF_NEED_FPU_LOAD undefined"
75 + #ifdef HAVE_KERNEL_FPU_API_HEADER
76 + #include <asm/fpu/api.h>
77 + #include <asm/fpu/internal.h>
78 + #else
79 + #include <asm/i387.h>
80 + #include <asm/xcr.h>
81 + #endif
82 +
83 + #if !defined(XSTATE_XSAVE)
84 + #error XSTATE_XSAVE not defined
85 + #endif
86 +
87 + #if !defined(XSTATE_XRESTORE)
88 + #error XSTATE_XRESTORE not defined
89 #endif
90 - ],[])
91 + ],[
92 + struct fpu *fpu = &current->thread.fpu;
93 + union fpregs_state *st = &fpu->state;
94 + struct fregs_state *fr __attribute__ ((unused)) = &st->fsave;
95 + struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave;
96 + struct xregs_state *xr __attribute__ ((unused)) = &st->xsave;
97 + fpu->last_cpu = -1;
98 + ])
99 ])
100
101 AC_DEFUN([ZFS_AC_KERNEL_FPU], [
102 @@ -104,25 +123,12 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [
103 AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1,
104 [kernel exports FPU functions])
105 ],[
106 - dnl #
107 - dnl # Linux 5.0 kernel
108 - dnl #
109 - ZFS_LINUX_TEST_RESULT([fpu_initialized], [
110 - AC_MSG_RESULT(fpu.initialized)
111 - AC_DEFINE(HAVE_KERNEL_FPU_INITIALIZED, 1,
112 - [kernel fpu.initialized exists])
113 + ZFS_LINUX_TEST_RESULT([fpu_internal], [
114 + AC_MSG_RESULT(internal)
115 + AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1,
116 + [kernel fpu internal])
117 ],[
118 - dnl #
119 - dnl # Linux 5.2 kernel
120 - dnl #
121 - ZFS_LINUX_TEST_RESULT([tif_need_fpu_load], [
122 - AC_MSG_RESULT(TIF_NEED_FPU_LOAD)
123 - AC_DEFINE(
124 - HAVE_KERNEL_TIF_NEED_FPU_LOAD, 1,
125 - [kernel TIF_NEED_FPU_LOAD exists])
126 - ],[
127 - AC_MSG_RESULT(unavailable)
128 - ])
129 + AC_MSG_RESULT(unavailable)
130 ])
131 ])
132 ])
133 diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h
134 index 641f43955..d1ded3af2 100644
135 --- a/include/linux/simd_x86.h
136 +++ b/include/linux/simd_x86.h
137 @@ -126,38 +126,68 @@
138 #endif
139
140 #else /* defined(KERNEL_EXPORTS_X86_FPU) */
141 +
142 /*
143 * When the kernel_fpu_* symbols are unavailable then provide our own
144 * versions which allow the FPU to be safely used in kernel threads.
145 * In practice, this is not a significant restriction for ZFS since the
146 * vast majority of SIMD operations are performed by the IO pipeline.
147 */
148 +#if defined(HAVE_KERNEL_FPU_INTERNAL)
149
150 /*
151 - * Returns non-zero if FPU operations are allowed in the current context.
152 + * FPU usage only allowed in dedicated kernel threads.
153 */
154 -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
155 -#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \
156 - test_thread_flag(TIF_NEED_FPU_LOAD))
157 -#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
158 -#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \
159 - current->thread.fpu.initialized)
160 -#else
161 -#define kfpu_allowed() 0
162 -#endif
163 +#define kfpu_allowed() (current->flags & PF_KTHREAD)
164 +#define ex_handler_fprestore ex_handler_default
165 +
166 +/*
167 + * FPU save and restore instructions.
168 + */
169 +#define __asm __asm__ __volatile__
170 +#define kfpu_fxsave(addr) __asm("fxsave %0" : "=m" (*(addr)))
171 +#define kfpu_fxsaveq(addr) __asm("fxsaveq %0" : "=m" (*(addr)))
172 +#define kfpu_fnsave(addr) __asm("fnsave %0; fwait" : "=m" (*(addr)))
173 +#define kfpu_fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
174 +#define kfpu_fxrstorq(addr) __asm("fxrstorq %0" : : "m" (*(addr)))
175 +#define kfpu_frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
176 +#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \
177 + : : [addr] "m" (rval));
178
179 static inline void
180 kfpu_initialize(void)
181 {
182 WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
183
184 -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
185 - __fpu_invalidate_fpregs_state(&current->thread.fpu);
186 - set_thread_flag(TIF_NEED_FPU_LOAD);
187 -#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
188 - __fpu_invalidate_fpregs_state(&current->thread.fpu);
189 - current->thread.fpu.initialized = 1;
190 -#endif
191 + /* Invalidate the task's FPU state */
192 + current->thread.fpu.last_cpu = -1;
193 +}
194 +
195 +static inline void
196 +kfpu_save_xsave(struct xregs_state *addr, uint64_t mask)
197 +{
198 + uint32_t low, hi;
199 + int err;
200 +
201 + low = mask;
202 + hi = mask >> 32;
203 + XSTATE_XSAVE(addr, low, hi, err);
204 + WARN_ON_ONCE(err);
205 +}
206 +
207 +static inline void
208 +kfpu_save_fxsr(struct fxregs_state *addr)
209 +{
210 + if (IS_ENABLED(CONFIG_X86_32))
211 + kfpu_fxsave(addr);
212 + else
213 + kfpu_fxsaveq(addr);
214 +}
215 +
216 +static inline void
217 +kfpu_save_fsave(struct fregs_state *addr)
218 +{
219 + kfpu_fnsave(addr);
220 }
221
222 static inline void
223 @@ -172,46 +202,86 @@ kfpu_begin(void)
224 preempt_disable();
225 local_irq_disable();
226
227 -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
228 /*
229 * The current FPU registers need to be preserved by kfpu_begin()
230 - * and restored by kfpu_end(). This is required because we can
231 - * not call __cpu_invalidate_fpregs_state() to invalidate the
232 - * per-cpu FPU state and force them to be restored during a
233 - * context switch.
234 + * and restored by kfpu_end(). This is always required because we
235 + * can not call __cpu_invalidate_fpregs_state() to invalidate the
236 + * per-cpu FPU state and force them to be restored. Furthermore,
237 + * this implementation relies on the space provided in the task
238 + * structure to store the user FPU state. As such, it can only
239 + * be used with dedicated kernels which by definition will never
240 + * store user FPU state.
241 */
242 - copy_fpregs_to_fpstate(&current->thread.fpu);
243 -#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
244 + if (static_cpu_has(X86_FEATURE_XSAVE)) {
245 + kfpu_save_xsave(&current->thread.fpu.state.xsave, ~0);
246 + } else if (static_cpu_has(X86_FEATURE_FXSR)) {
247 + kfpu_save_fxsr(&current->thread.fpu.state.fxsave);
248 + } else {
249 + kfpu_save_fsave(&current->thread.fpu.state.fsave);
250 + }
251 +}
252 +
253 +static inline void
254 +kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask)
255 +{
256 + uint32_t low, hi;
257 +
258 + low = mask;
259 + hi = mask >> 32;
260 + XSTATE_XRESTORE(addr, low, hi);
261 +}
262 +
263 +static inline void
264 +kfpu_restore_fxsr(struct fxregs_state *addr)
265 +{
266 /*
267 - * There is no need to preserve and restore the FPU registers.
268 - * They will always be restored from the task's stored FPU state
269 - * when switching contexts.
270 + * On AuthenticAMD K7 and K8 processors the fxrstor instruction only
271 + * restores the _x87 FOP, FIP, and FDP registers when an exception
272 + * is pending. Clean the _x87 state to force the restore.
273 */
274 - WARN_ON_ONCE(current->thread.fpu.initialized == 0);
275 -#endif
276 + if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK)))
277 + kfpu_fxsr_clean(addr);
278 +
279 + if (IS_ENABLED(CONFIG_X86_32)) {
280 + kfpu_fxrstor(addr);
281 + } else {
282 + kfpu_fxrstorq(addr);
283 + }
284 }
285
286 static inline void
287 -kfpu_end(void)
288 +kfpu_restore_fsave(struct fregs_state *addr)
289 {
290 -#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
291 - union fpregs_state *state = &current->thread.fpu.state;
292 - int error;
293 + kfpu_frstor(addr);
294 +}
295
296 - if (use_xsave()) {
297 - error = copy_kernel_to_xregs_err(&state->xsave, -1);
298 - } else if (use_fxsr()) {
299 - error = copy_kernel_to_fxregs_err(&state->fxsave);
300 +static inline void
301 +kfpu_end(void)
302 +{
303 + if (static_cpu_has(X86_FEATURE_XSAVE)) {
304 + kfpu_restore_xsave(&current->thread.fpu.state.xsave, ~0);
305 + } else if (static_cpu_has(X86_FEATURE_FXSR)) {
306 + kfpu_restore_fxsr(&current->thread.fpu.state.fxsave);
307 } else {
308 - error = copy_kernel_to_fregs_err(&state->fsave);
309 + kfpu_restore_fsave(&current->thread.fpu.state.fsave);
310 }
311 - WARN_ON_ONCE(error);
312 -#endif
313
314 local_irq_enable();
315 preempt_enable();
316 }
317 -#endif /* defined(HAVE_KERNEL_FPU) */
318 +
319 +#else
320 +
321 +/*
322 + * FPU support is unavailable.
323 + */
324 +#define kfpu_allowed() 0
325 +#define kfpu_initialize(tsk) do {} while (0)
326 +#define kfpu_begin() do {} while (0)
327 +#define kfpu_end() do {} while (0)
328 +
329 +#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
330 +#endif /* defined(KERNEL_EXPORTS_X86_FPU) */
331
332 #else /* defined(_KERNEL) */
333 /*