]> git.proxmox.com Git - zfsonlinux.git/blame - debian/patches/0008-Linux-4.14-4.19-5.0-compat-SIMD-save-restore.patch
FPU/SIMD: separate ZFS and Kernel approach for less interference
[zfsonlinux.git] / debian / patches / 0008-Linux-4.14-4.19-5.0-compat-SIMD-save-restore.patch
CommitLineData
8f11c721
TL
1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2From: Brian Behlendorf <behlendorf1@llnl.gov>
3Date: Thu, 3 Oct 2019 00:03:20 +0000
4Subject: [PATCH] Linux 4.14, 4.19, 5.0+ compat: SIMD save/restore
5
6Contrary to initial testing we cannot rely on these kernels to
7invalidate the per-cpu FPU state and restore the FPU registers.
8Therefore, the kfpu_begin() and kfpu_end() functions have been
9updated to unconditionally save and restore the FPU state.
10
11Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
12Issue #9346
13(cherry picked from commit 813fd014a90229127f80b970a8fef5049fd4c713)
14Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
15---
16 config/kernel-fpu.m4 | 82 +++++++++++----------
17 include/linux/simd_x86.h | 152 ++++++++++++++++++++++++++++-----------
18 2 files changed, 155 insertions(+), 79 deletions(-)
19
20diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4
21index a2c47d65a..9ed9b14ad 100644
22--- a/config/kernel-fpu.m4
23+++ b/config/kernel-fpu.m4
24@@ -2,15 +2,9 @@ dnl #
25 dnl # Handle differences in kernel FPU code.
26 dnl #
27 dnl # Kernel
28-dnl # 5.2: The fpu->initialized flag was replaced by TIF_NEED_FPU_LOAD.
29-dnl # HAVE_KERNEL_TIF_NEED_FPU_LOAD
30-dnl #
31-dnl # 5.0: As an optimization SIMD operations performed by kernel
32-dnl # threads can skip saving and restoring their FPU context.
33-dnl # Wrappers have been introduced to determine the running
34-dnl # context and use either the SIMD or generic implementation.
35+dnl # 5.0: Wrappers have been introduced to save/restore the FPU state.
36 dnl # This change was made to the 4.19.38 and 4.14.120 LTS kernels.
37-dnl # HAVE_KERNEL_FPU_INITIALIZED
38+dnl # HAVE_KERNEL_FPU_INTERNAL
39 dnl #
40 dnl # 4.2: Use __kernel_fpu_{begin,end}()
41 dnl # HAVE_UNDERSCORE_KERNEL_FPU & KERNEL_EXPORTS_X86_FPU
42@@ -61,22 +55,47 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
43 __kernel_fpu_end();
44 ], [], [$ZFS_META_LICENSE])
45
46- ZFS_LINUX_TEST_SRC([fpu_initialized], [
47- #include <linux/module.h>
48+ ZFS_LINUX_TEST_SRC([fpu_internal], [
49+ #if defined(__x86_64) || defined(__x86_64__) || \
50+ defined(__i386) || defined(__i386__)
51+ #if !defined(__x86)
52+ #define __x86
53+ #endif
54+ #endif
55+
56+ #if !defined(__x86)
57+ #error Unsupported architecture
58+ #endif
59+
60 #include <linux/sched.h>
61- ],[
62- struct fpu *fpu = &current->thread.fpu;
63- if (fpu->initialized) { return (0); };
64- ])
65
66- ZFS_LINUX_TEST_SRC([tif_need_fpu_load], [
67- #include <linux/module.h>
68- #include <asm/thread_info.h>
69+ #if !defined(PF_KTHREAD)
70+ #error PF_KTHREAD not defined
71+ #endif
72
73- #if !defined(TIF_NEED_FPU_LOAD)
74- #error "TIF_NEED_FPU_LOAD undefined"
75+ #ifdef HAVE_KERNEL_FPU_API_HEADER
76+ #include <asm/fpu/api.h>
77+ #include <asm/fpu/internal.h>
78+ #else
79+ #include <asm/i387.h>
80+ #include <asm/xcr.h>
81+ #endif
82+
83+ #if !defined(XSTATE_XSAVE)
84+ #error XSTATE_XSAVE not defined
85+ #endif
86+
87+ #if !defined(XSTATE_XRESTORE)
88+ #error XSTATE_XRESTORE not defined
89 #endif
90- ],[])
91+ ],[
92+ struct fpu *fpu = &current->thread.fpu;
93+ union fpregs_state *st = &fpu->state;
94+ struct fregs_state *fr __attribute__ ((unused)) = &st->fsave;
95+ struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave;
96+ struct xregs_state *xr __attribute__ ((unused)) = &st->xsave;
97+ fpu->last_cpu = -1;
98+ ])
99 ])
100
101 AC_DEFUN([ZFS_AC_KERNEL_FPU], [
102@@ -104,25 +123,12 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [
103 AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1,
104 [kernel exports FPU functions])
105 ],[
106- dnl #
107- dnl # Linux 5.0 kernel
108- dnl #
109- ZFS_LINUX_TEST_RESULT([fpu_initialized], [
110- AC_MSG_RESULT(fpu.initialized)
111- AC_DEFINE(HAVE_KERNEL_FPU_INITIALIZED, 1,
112- [kernel fpu.initialized exists])
113+ ZFS_LINUX_TEST_RESULT([fpu_internal], [
114+ AC_MSG_RESULT(internal)
115+ AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1,
116+ [kernel fpu internal])
117 ],[
118- dnl #
119- dnl # Linux 5.2 kernel
120- dnl #
121- ZFS_LINUX_TEST_RESULT([tif_need_fpu_load], [
122- AC_MSG_RESULT(TIF_NEED_FPU_LOAD)
123- AC_DEFINE(
124- HAVE_KERNEL_TIF_NEED_FPU_LOAD, 1,
125- [kernel TIF_NEED_FPU_LOAD exists])
126- ],[
127- AC_MSG_RESULT(unavailable)
128- ])
129+ AC_MSG_RESULT(unavailable)
130 ])
131 ])
132 ])
133diff --git a/include/linux/simd_x86.h b/include/linux/simd_x86.h
134index 641f43955..d1ded3af2 100644
135--- a/include/linux/simd_x86.h
136+++ b/include/linux/simd_x86.h
137@@ -126,38 +126,68 @@
138 #endif
139
140 #else /* defined(KERNEL_EXPORTS_X86_FPU) */
141+
142 /*
143 * When the kernel_fpu_* symbols are unavailable then provide our own
144 * versions which allow the FPU to be safely used in kernel threads.
145 * In practice, this is not a significant restriction for ZFS since the
146 * vast majority of SIMD operations are performed by the IO pipeline.
147 */
148+#if defined(HAVE_KERNEL_FPU_INTERNAL)
149
150 /*
151- * Returns non-zero if FPU operations are allowed in the current context.
152+ * FPU usage only allowed in dedicated kernel threads.
153 */
154-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
155-#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \
156- test_thread_flag(TIF_NEED_FPU_LOAD))
157-#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
158-#define kfpu_allowed() ((current->flags & PF_KTHREAD) && \
159- current->thread.fpu.initialized)
160-#else
161-#define kfpu_allowed() 0
162-#endif
163+#define kfpu_allowed() (current->flags & PF_KTHREAD)
164+#define ex_handler_fprestore ex_handler_default
165+
166+/*
167+ * FPU save and restore instructions.
168+ */
169+#define __asm __asm__ __volatile__
170+#define kfpu_fxsave(addr) __asm("fxsave %0" : "=m" (*(addr)))
171+#define kfpu_fxsaveq(addr) __asm("fxsaveq %0" : "=m" (*(addr)))
172+#define kfpu_fnsave(addr) __asm("fnsave %0; fwait" : "=m" (*(addr)))
173+#define kfpu_fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
174+#define kfpu_fxrstorq(addr) __asm("fxrstorq %0" : : "m" (*(addr)))
175+#define kfpu_frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
176+#define kfpu_fxsr_clean(rval) __asm("fnclex; emms; fildl %P[addr]" \
177+ : : [addr] "m" (rval));
178
179 static inline void
180 kfpu_initialize(void)
181 {
182 WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
183
184-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
185- __fpu_invalidate_fpregs_state(&current->thread.fpu);
186- set_thread_flag(TIF_NEED_FPU_LOAD);
187-#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
188- __fpu_invalidate_fpregs_state(&current->thread.fpu);
189- current->thread.fpu.initialized = 1;
190-#endif
191+ /* Invalidate the task's FPU state */
192+ current->thread.fpu.last_cpu = -1;
193+}
194+
195+static inline void
196+kfpu_save_xsave(struct xregs_state *addr, uint64_t mask)
197+{
198+ uint32_t low, hi;
199+ int err;
200+
201+ low = mask;
202+ hi = mask >> 32;
203+ XSTATE_XSAVE(addr, low, hi, err);
204+ WARN_ON_ONCE(err);
205+}
206+
207+static inline void
208+kfpu_save_fxsr(struct fxregs_state *addr)
209+{
210+ if (IS_ENABLED(CONFIG_X86_32))
211+ kfpu_fxsave(addr);
212+ else
213+ kfpu_fxsaveq(addr);
214+}
215+
216+static inline void
217+kfpu_save_fsave(struct fregs_state *addr)
218+{
219+ kfpu_fnsave(addr);
220 }
221
222 static inline void
223@@ -172,46 +202,86 @@ kfpu_begin(void)
224 preempt_disable();
225 local_irq_disable();
226
227-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
228 /*
229 * The current FPU registers need to be preserved by kfpu_begin()
230- * and restored by kfpu_end(). This is required because we can
231- * not call __cpu_invalidate_fpregs_state() to invalidate the
232- * per-cpu FPU state and force them to be restored during a
233- * context switch.
234+ * and restored by kfpu_end(). This is always required because we
235+ * can not call __cpu_invalidate_fpregs_state() to invalidate the
236+ * per-cpu FPU state and force them to be restored. Furthermore,
237+ * this implementation relies on the space provided in the task
238+ * structure to store the user FPU state. As such, it can only
239+ * be used with dedicated kernels which by definition will never
240+ * store user FPU state.
241 */
242- copy_fpregs_to_fpstate(&current->thread.fpu);
243-#elif defined(HAVE_KERNEL_FPU_INITIALIZED)
244+ if (static_cpu_has(X86_FEATURE_XSAVE)) {
245+ kfpu_save_xsave(&current->thread.fpu.state.xsave, ~0);
246+ } else if (static_cpu_has(X86_FEATURE_FXSR)) {
247+ kfpu_save_fxsr(&current->thread.fpu.state.fxsave);
248+ } else {
249+ kfpu_save_fsave(&current->thread.fpu.state.fsave);
250+ }
251+}
252+
253+static inline void
254+kfpu_restore_xsave(struct xregs_state *addr, uint64_t mask)
255+{
256+ uint32_t low, hi;
257+
258+ low = mask;
259+ hi = mask >> 32;
260+ XSTATE_XRESTORE(addr, low, hi);
261+}
262+
263+static inline void
264+kfpu_restore_fxsr(struct fxregs_state *addr)
265+{
266 /*
267- * There is no need to preserve and restore the FPU registers.
268- * They will always be restored from the task's stored FPU state
269- * when switching contexts.
270+ * On AuthenticAMD K7 and K8 processors the fxrstor instruction only
271+ * restores the _x87 FOP, FIP, and FDP registers when an exception
272+ * is pending. Clean the _x87 state to force the restore.
273 */
274- WARN_ON_ONCE(current->thread.fpu.initialized == 0);
275-#endif
276+ if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK)))
277+ kfpu_fxsr_clean(addr);
278+
279+ if (IS_ENABLED(CONFIG_X86_32)) {
280+ kfpu_fxrstor(addr);
281+ } else {
282+ kfpu_fxrstorq(addr);
283+ }
284 }
285
286 static inline void
287-kfpu_end(void)
288+kfpu_restore_fsave(struct fregs_state *addr)
289 {
290-#if defined(HAVE_KERNEL_TIF_NEED_FPU_LOAD)
291- union fpregs_state *state = &current->thread.fpu.state;
292- int error;
293+ kfpu_frstor(addr);
294+}
295
296- if (use_xsave()) {
297- error = copy_kernel_to_xregs_err(&state->xsave, -1);
298- } else if (use_fxsr()) {
299- error = copy_kernel_to_fxregs_err(&state->fxsave);
300+static inline void
301+kfpu_end(void)
302+{
303+ if (static_cpu_has(X86_FEATURE_XSAVE)) {
304+ kfpu_restore_xsave(&current->thread.fpu.state.xsave, ~0);
305+ } else if (static_cpu_has(X86_FEATURE_FXSR)) {
306+ kfpu_restore_fxsr(&current->thread.fpu.state.fxsave);
307 } else {
308- error = copy_kernel_to_fregs_err(&state->fsave);
309+ kfpu_restore_fsave(&current->thread.fpu.state.fsave);
310 }
311- WARN_ON_ONCE(error);
312-#endif
313
314 local_irq_enable();
315 preempt_enable();
316 }
317-#endif /* defined(HAVE_KERNEL_FPU) */
318+
319+#else
320+
321+/*
322+ * FPU support is unavailable.
323+ */
324+#define kfpu_allowed() 0
325+#define kfpu_initialize(tsk) do {} while (0)
326+#define kfpu_begin() do {} while (0)
327+#define kfpu_end() do {} while (0)
328+
329+#endif /* defined(HAVE_KERNEL_FPU_INTERNAL) */
330+#endif /* defined(KERNEL_EXPORTS_X86_FPU) */
331
332 #else /* defined(_KERNEL) */
333 /*