]> git.proxmox.com Git - pve-kernel.git/commitdiff
build: re-enable BTF
authorFabian Grünbichler <f.gruenbichler@proxmox.com>
Mon, 30 Jan 2023 13:27:19 +0000 (14:27 +0100)
committerThomas Lamprecht <t.lamprecht@proxmox.com>
Tue, 31 Jan 2023 16:44:18 +0000 (17:44 +0100)
but allow discarding BTF information when loading modules, so that upgrades
which are otherwise ABI compatible still work. this allows using BTF
information when matching and available, while degrading gracefully if the
currently running kernel is not identical to the one that module was built for.

in case of a mismatch, the kernel will log a warning when loading the module,
for example:

Jan 30 13:57:58 test kernel: BPF:          type_id=184 bits_offset=4096
Jan 30 13:57:58 test kernel: BPF:
Jan 30 13:57:58 test kernel: BPF: Invalid name
Jan 30 13:57:58 test kernel: BPF:
Jan 30 13:57:58 test kernel: failed to validate module [bonding] BTF: -22

Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
27 files changed:
debian/rules
patches/kernel/0005-net-core-downgrade-unregister_netdevice-refcount-lea.patch
patches/kernel/0006-bug-introduce-ASSERT_STRUCT_OFFSET.patch [new file with mode: 0644]
patches/kernel/0006-do-not-generate-split-BTF-type-info-per-default.patch [deleted file]
patches/kernel/0007-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch [new file with mode: 0644]
patches/kernel/0007-bug-introduce-ASSERT_STRUCT_OFFSET.patch [deleted file]
patches/kernel/0008-KVM-x86-emulator-smm-add-structs-for-KVM-s-smram-lay.patch [new file with mode: 0644]
patches/kernel/0008-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch [deleted file]
patches/kernel/0009-KVM-x86-emulator-smm-add-structs-for-KVM-s-smram-lay.patch [deleted file]
patches/kernel/0009-KVM-x86-emulator-smm-use-smram-structs-in-the-common.patch [new file with mode: 0644]
patches/kernel/0010-KVM-x86-emulator-smm-use-smram-struct-for-32-bit-smr.patch [new file with mode: 0644]
patches/kernel/0010-KVM-x86-emulator-smm-use-smram-structs-in-the-common.patch [deleted file]
patches/kernel/0011-KVM-x86-emulator-smm-use-smram-struct-for-32-bit-smr.patch [deleted file]
patches/kernel/0011-KVM-x86-emulator-smm-use-smram-struct-for-64-bit-smr.patch [new file with mode: 0644]
patches/kernel/0012-KVM-x86-SVM-use-smram-structs.patch [new file with mode: 0644]
patches/kernel/0012-KVM-x86-emulator-smm-use-smram-struct-for-64-bit-smr.patch [deleted file]
patches/kernel/0013-KVM-x86-SVM-don-t-save-SVM-state-to-SMRAM-when-VM-is.patch [new file with mode: 0644]
patches/kernel/0013-KVM-x86-SVM-use-smram-structs.patch [deleted file]
patches/kernel/0014-KVM-x86-SVM-don-t-save-SVM-state-to-SMRAM-when-VM-is.patch [deleted file]
patches/kernel/0014-KVM-x86-emulator-smm-preserve-interrupt-shadow-in-SM.patch [new file with mode: 0644]
patches/kernel/0015-KVM-x86-emulator-smm-preserve-interrupt-shadow-in-SM.patch [deleted file]
patches/kernel/0015-Revert-RDMA-irdma-Report-the-correct-link-speed.patch [new file with mode: 0644]
patches/kernel/0016-Revert-RDMA-irdma-Report-the-correct-link-speed.patch [deleted file]
patches/kernel/0016-Revert-gro-add-support-of-hw-gro-packets-to-gro-stac.patch [new file with mode: 0644]
patches/kernel/0017-Revert-fortify-Do-not-cast-to-unsigned-char.patch [new file with mode: 0644]
patches/kernel/0017-Revert-gro-add-support-of-hw-gro-packets-to-gro-stac.patch [deleted file]
patches/kernel/0018-Revert-fortify-Do-not-cast-to-unsigned-char.patch [deleted file]

index 4a7a742fc4961d127c8a15c727b5f4e1757f7ab8..1d0d6158d297b23f013e85768ac027060813626a 100755 (executable)
@@ -78,6 +78,7 @@ PVE_CONFIG_OPTS= \
 -d CONFIG_DEFAULT_SECURITY_DAC \
 -e CONFIG_DEFAULT_SECURITY_APPARMOR \
 --set-str CONFIG_DEFAULT_SECURITY apparmor \
+-e CONFIG_MODULE_ALLOW_BTF_MISMATCH \
 -d CONFIG_UNWINDER_ORC \
 -d CONFIG_UNWINDER_GUESS \
 -e CONFIG_UNWINDER_FRAME_POINTER \
index 4862123ea0885df5423293303841001f73dc0178..1cb3f9a6164f1b74a493c1f5e7a5b82b59dfac04 100644 (file)
@@ -3,8 +3,12 @@ From: Thomas Lamprecht <t.lamprecht@proxmox.com>
 Date: Wed, 7 Oct 2020 17:18:28 +0200
 Subject: [PATCH] net: core: downgrade unregister_netdevice refcount leak from
  emergency to error
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
 
 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
 ---
  net/core/dev.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/patches/kernel/0006-bug-introduce-ASSERT_STRUCT_OFFSET.patch b/patches/kernel/0006-bug-introduce-ASSERT_STRUCT_OFFSET.patch
new file mode 100644 (file)
index 0000000..9bceac6
--- /dev/null
@@ -0,0 +1,57 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Wed, 3 Aug 2022 18:49:59 +0300
+Subject: [PATCH] bug: introduce ASSERT_STRUCT_OFFSET
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+ASSERT_STRUCT_OFFSET allows to assert during the build of
+the kernel that a field in a struct have an expected offset.
+
+KVM used to have such macro, but there is almost nothing KVM specific
+in it so move it to build_bug.h, so that it can be used in other
+places in KVM.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/vmx/vmcs12.h | 5 ++---
+ include/linux/build_bug.h | 9 +++++++++
+ 2 files changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
+index 746129ddd5ae..01936013428b 100644
+--- a/arch/x86/kvm/vmx/vmcs12.h
++++ b/arch/x86/kvm/vmx/vmcs12.h
+@@ -208,9 +208,8 @@ struct __packed vmcs12 {
+ /*
+  * For save/restore compatibility, the vmcs12 field offsets must not change.
+  */
+-#define CHECK_OFFSET(field, loc)                              \
+-      BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc),       \
+-              "Offset of " #field " in struct vmcs12 has changed.")
++#define CHECK_OFFSET(field, loc) \
++      ASSERT_STRUCT_OFFSET(struct vmcs12, field, loc)
+ static inline void vmx_check_vmcs12_offsets(void)
+ {
+diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h
+index e3a0be2c90ad..3aa3640f8c18 100644
+--- a/include/linux/build_bug.h
++++ b/include/linux/build_bug.h
+@@ -77,4 +77,13 @@
+ #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
+ #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
++
++/*
++ * Compile time check that field has an expected offset
++ */
++#define ASSERT_STRUCT_OFFSET(type, field, expected_offset)    \
++      BUILD_BUG_ON_MSG(offsetof(type, field) != (expected_offset),    \
++              "Offset of " #field " in " #type " has changed.")
++
++
+ #endif        /* _LINUX_BUILD_BUG_H */
diff --git a/patches/kernel/0006-do-not-generate-split-BTF-type-info-per-default.patch b/patches/kernel/0006-do-not-generate-split-BTF-type-info-per-default.patch
deleted file mode 100644 (file)
index 48fbf70..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
-Date: Thu, 14 Sep 2017 11:09:58 +0200
-Subject: [PATCH] do not generate split BTF type info per default
-
-This reverts commit a8ed1a0607cfa5478ff6009539f44790c4d0956d.
-
-It breaks ZFS sometimes:
-https://github.com/openzfs/zfs/issues/12301#issuecomment-873303739
-
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- lib/Kconfig.debug | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
-index 12dfe6691dd5..930bbd7ddffd 100644
---- a/lib/Kconfig.debug
-+++ b/lib/Kconfig.debug
-@@ -365,7 +365,7 @@ config PAHOLE_HAS_BTF_TAG
-         these attributes, so make the config depend on CC_IS_CLANG.
- config DEBUG_INFO_BTF_MODULES
--      def_bool y
-+      def_bool n
-       depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
-       help
-         Generate compact split BTF type information for kernel modules.
diff --git a/patches/kernel/0007-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch b/patches/kernel/0007-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch
new file mode 100644 (file)
index 0000000..620609e
--- /dev/null
@@ -0,0 +1,38 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Wed, 3 Aug 2022 18:50:02 +0300
+Subject: [PATCH] KVM: x86: emulator: update the emulation mode after rsm
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This ensures that RIP will be correctly written back,
+because the RSM instruction can switch the CPU mode from
+32 bit (or less) to 64 bit.
+
+This fixes a guest crash in case the #SMI is received
+while the guest runs a code from an address > 32 bit.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/emulate.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index 4a43261d25a2..4f7f5117ec7a 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -2654,6 +2654,11 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
+       if (ret != X86EMUL_CONTINUE)
+               goto emulate_shutdown;
++
++      ret = emulator_recalc_and_set_mode(ctxt);
++      if (ret != X86EMUL_CONTINUE)
++              goto emulate_shutdown;
++
+       /*
+        * Note, the ctxt->ops callbacks are responsible for handling side
+        * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID
diff --git a/patches/kernel/0007-bug-introduce-ASSERT_STRUCT_OFFSET.patch b/patches/kernel/0007-bug-introduce-ASSERT_STRUCT_OFFSET.patch
deleted file mode 100644 (file)
index 245bb95..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Maxim Levitsky <mlevitsk@redhat.com>
-Date: Wed, 3 Aug 2022 18:49:59 +0300
-Subject: [PATCH] bug: introduce ASSERT_STRUCT_OFFSET
-
-ASSERT_STRUCT_OFFSET allows to assert during the build of
-the kernel that a field in a struct have an expected offset.
-
-KVM used to have such macro, but there is almost nothing KVM specific
-in it so move it to build_bug.h, so that it can be used in other
-places in KVM.
-
-Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/kvm/vmx/vmcs12.h | 5 ++---
- include/linux/build_bug.h | 9 +++++++++
- 2 files changed, 11 insertions(+), 3 deletions(-)
-
-diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
-index 746129ddd5ae..01936013428b 100644
---- a/arch/x86/kvm/vmx/vmcs12.h
-+++ b/arch/x86/kvm/vmx/vmcs12.h
-@@ -208,9 +208,8 @@ struct __packed vmcs12 {
- /*
-  * For save/restore compatibility, the vmcs12 field offsets must not change.
-  */
--#define CHECK_OFFSET(field, loc)                              \
--      BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc),       \
--              "Offset of " #field " in struct vmcs12 has changed.")
-+#define CHECK_OFFSET(field, loc) \
-+      ASSERT_STRUCT_OFFSET(struct vmcs12, field, loc)
- static inline void vmx_check_vmcs12_offsets(void)
- {
-diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h
-index e3a0be2c90ad..3aa3640f8c18 100644
---- a/include/linux/build_bug.h
-+++ b/include/linux/build_bug.h
-@@ -77,4 +77,13 @@
- #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
- #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
-+
-+/*
-+ * Compile time check that field has an expected offset
-+ */
-+#define ASSERT_STRUCT_OFFSET(type, field, expected_offset)    \
-+      BUILD_BUG_ON_MSG(offsetof(type, field) != (expected_offset),    \
-+              "Offset of " #field " in " #type " has changed.")
-+
-+
- #endif        /* _LINUX_BUILD_BUG_H */
diff --git a/patches/kernel/0008-KVM-x86-emulator-smm-add-structs-for-KVM-s-smram-lay.patch b/patches/kernel/0008-KVM-x86-emulator-smm-add-structs-for-KVM-s-smram-lay.patch
new file mode 100644 (file)
index 0000000..221b508
--- /dev/null
@@ -0,0 +1,284 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Wed, 3 Aug 2022 18:50:05 +0300
+Subject: [PATCH] KVM: x86: emulator/smm: add structs for KVM's smram layout
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Those structs will be used to read/write the smram state image.
+
+Also document the differences between KVM's SMRAM layout and SMRAM
+layout that is used by real Intel/AMD cpus.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/emulate.c     |   6 +
+ arch/x86/kvm/kvm_emulate.h | 218 +++++++++++++++++++++++++++++++++++++
+ arch/x86/kvm/x86.c         |   1 +
+ 3 files changed, 225 insertions(+)
+
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index 4f7f5117ec7a..470dd4453b01 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -5856,3 +5856,9 @@ bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
+       return true;
+ }
++
++void  __init kvm_emulator_init(void)
++{
++      __check_smram32_offsets();
++      __check_smram64_offsets();
++}
+diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
+index 89246446d6aa..dd0ae61e44a1 100644
+--- a/arch/x86/kvm/kvm_emulate.h
++++ b/arch/x86/kvm/kvm_emulate.h
+@@ -13,6 +13,7 @@
+ #define _ASM_X86_KVM_X86_EMULATE_H
+ #include <asm/desc_defs.h>
++#include <linux/build_bug.h>
+ #include "fpu.h"
+ struct x86_emulate_ctxt;
+@@ -503,6 +504,223 @@ enum x86_intercept {
+       nr_x86_intercepts
+ };
++
++/* 32 bit KVM's emulated SMM layout. Loosely based on Intel's layout */
++
++struct kvm_smm_seg_state_32 {
++      u32 flags;
++      u32 limit;
++      u32 base;
++} __packed;
++
++struct kvm_smram_state_32 {
++      u32 reserved1[62];
++      u32 smbase;
++      u32 smm_revision;
++      u32 reserved2[5];
++      u32 cr4; /* CR4 is not present in Intel/AMD SMRAM image */
++      u32 reserved3[5];
++
++      /*
++       * Segment state is not present/documented in the Intel/AMD SMRAM image
++       * Instead this area on Intel/AMD contains IO/HLT restart flags.
++       */
++      struct kvm_smm_seg_state_32 ds;
++      struct kvm_smm_seg_state_32 fs;
++      struct kvm_smm_seg_state_32 gs;
++      struct kvm_smm_seg_state_32 idtr; /* IDTR has only base and limit */
++      struct kvm_smm_seg_state_32 tr;
++      u32 reserved;
++      struct kvm_smm_seg_state_32 gdtr; /* GDTR has only base and limit */
++      struct kvm_smm_seg_state_32 ldtr;
++      struct kvm_smm_seg_state_32 es;
++      struct kvm_smm_seg_state_32 cs;
++      struct kvm_smm_seg_state_32 ss;
++
++      u32 es_sel;
++      u32 cs_sel;
++      u32 ss_sel;
++      u32 ds_sel;
++      u32 fs_sel;
++      u32 gs_sel;
++      u32 ldtr_sel;
++      u32 tr_sel;
++
++      u32 dr7;
++      u32 dr6;
++      u32 gprs[8]; /* GPRS in the "natural" X86 order (EAX/ECX/EDX.../EDI) */
++      u32 eip;
++      u32 eflags;
++      u32 cr3;
++      u32 cr0;
++} __packed;
++
++
++static inline void __check_smram32_offsets(void)
++{
++#define __CHECK_SMRAM32_OFFSET(field, offset) \
++      ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
++
++      __CHECK_SMRAM32_OFFSET(reserved1,       0xFE00);
++      __CHECK_SMRAM32_OFFSET(smbase,          0xFEF8);
++      __CHECK_SMRAM32_OFFSET(smm_revision,    0xFEFC);
++      __CHECK_SMRAM32_OFFSET(reserved2,       0xFF00);
++      __CHECK_SMRAM32_OFFSET(cr4,             0xFF14);
++      __CHECK_SMRAM32_OFFSET(reserved3,       0xFF18);
++      __CHECK_SMRAM32_OFFSET(ds,              0xFF2C);
++      __CHECK_SMRAM32_OFFSET(fs,              0xFF38);
++      __CHECK_SMRAM32_OFFSET(gs,              0xFF44);
++      __CHECK_SMRAM32_OFFSET(idtr,            0xFF50);
++      __CHECK_SMRAM32_OFFSET(tr,              0xFF5C);
++      __CHECK_SMRAM32_OFFSET(gdtr,            0xFF6C);
++      __CHECK_SMRAM32_OFFSET(ldtr,            0xFF78);
++      __CHECK_SMRAM32_OFFSET(es,              0xFF84);
++      __CHECK_SMRAM32_OFFSET(cs,              0xFF90);
++      __CHECK_SMRAM32_OFFSET(ss,              0xFF9C);
++      __CHECK_SMRAM32_OFFSET(es_sel,          0xFFA8);
++      __CHECK_SMRAM32_OFFSET(cs_sel,          0xFFAC);
++      __CHECK_SMRAM32_OFFSET(ss_sel,          0xFFB0);
++      __CHECK_SMRAM32_OFFSET(ds_sel,          0xFFB4);
++      __CHECK_SMRAM32_OFFSET(fs_sel,          0xFFB8);
++      __CHECK_SMRAM32_OFFSET(gs_sel,          0xFFBC);
++      __CHECK_SMRAM32_OFFSET(ldtr_sel,        0xFFC0);
++      __CHECK_SMRAM32_OFFSET(tr_sel,          0xFFC4);
++      __CHECK_SMRAM32_OFFSET(dr7,             0xFFC8);
++      __CHECK_SMRAM32_OFFSET(dr6,             0xFFCC);
++      __CHECK_SMRAM32_OFFSET(gprs,            0xFFD0);
++      __CHECK_SMRAM32_OFFSET(eip,             0xFFF0);
++      __CHECK_SMRAM32_OFFSET(eflags,          0xFFF4);
++      __CHECK_SMRAM32_OFFSET(cr3,             0xFFF8);
++      __CHECK_SMRAM32_OFFSET(cr0,             0xFFFC);
++#undef __CHECK_SMRAM32_OFFSET
++}
++
++
++/* 64 bit KVM's emulated SMM layout. Based on AMD64 layout */
++
++struct kvm_smm_seg_state_64 {
++      u16 selector;
++      u16 attributes;
++      u32 limit;
++      u64 base;
++};
++
++struct kvm_smram_state_64 {
++
++      struct kvm_smm_seg_state_64 es;
++      struct kvm_smm_seg_state_64 cs;
++      struct kvm_smm_seg_state_64 ss;
++      struct kvm_smm_seg_state_64 ds;
++      struct kvm_smm_seg_state_64 fs;
++      struct kvm_smm_seg_state_64 gs;
++      struct kvm_smm_seg_state_64 gdtr; /* GDTR has only base and limit*/
++      struct kvm_smm_seg_state_64 ldtr;
++      struct kvm_smm_seg_state_64 idtr; /* IDTR has only base and limit*/
++      struct kvm_smm_seg_state_64 tr;
++
++      /* I/O restart and auto halt restart are not implemented by KVM */
++      u64 io_restart_rip;
++      u64 io_restart_rcx;
++      u64 io_restart_rsi;
++      u64 io_restart_rdi;
++      u32 io_restart_dword;
++      u32 reserved1;
++      u8 io_inst_restart;
++      u8 auto_hlt_restart;
++      u8 reserved2[6];
++
++      u64 efer;
++
++      /*
++       * Two fields below are implemented on AMD only, to store
++       * SVM guest vmcb address if the #SMI was received while in the guest mode.
++       */
++      u64 svm_guest_flag;
++      u64 svm_guest_vmcb_gpa;
++      u64 svm_guest_virtual_int; /* unknown purpose, not implemented */
++
++      u32 reserved3[3];
++      u32 smm_revison;
++      u32 smbase;
++      u32 reserved4[5];
++
++      /* ssp and svm_* fields below are not implemented by KVM */
++      u64 ssp;
++      u64 svm_guest_pat;
++      u64 svm_host_efer;
++      u64 svm_host_cr4;
++      u64 svm_host_cr3;
++      u64 svm_host_cr0;
++
++      u64 cr4;
++      u64 cr3;
++      u64 cr0;
++      u64 dr7;
++      u64 dr6;
++      u64 rflags;
++      u64 rip;
++      u64 gprs[16]; /* GPRS in a reversed "natural" X86 order (R15/R14/../RCX/RAX.) */
++};
++
++
++static inline void __check_smram64_offsets(void)
++{
++#define __CHECK_SMRAM64_OFFSET(field, offset) \
++      ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
++
++      __CHECK_SMRAM64_OFFSET(es,                      0xFE00);
++      __CHECK_SMRAM64_OFFSET(cs,                      0xFE10);
++      __CHECK_SMRAM64_OFFSET(ss,                      0xFE20);
++      __CHECK_SMRAM64_OFFSET(ds,                      0xFE30);
++      __CHECK_SMRAM64_OFFSET(fs,                      0xFE40);
++      __CHECK_SMRAM64_OFFSET(gs,                      0xFE50);
++      __CHECK_SMRAM64_OFFSET(gdtr,                    0xFE60);
++      __CHECK_SMRAM64_OFFSET(ldtr,                    0xFE70);
++      __CHECK_SMRAM64_OFFSET(idtr,                    0xFE80);
++      __CHECK_SMRAM64_OFFSET(tr,                      0xFE90);
++      __CHECK_SMRAM64_OFFSET(io_restart_rip,          0xFEA0);
++      __CHECK_SMRAM64_OFFSET(io_restart_rcx,          0xFEA8);
++      __CHECK_SMRAM64_OFFSET(io_restart_rsi,          0xFEB0);
++      __CHECK_SMRAM64_OFFSET(io_restart_rdi,          0xFEB8);
++      __CHECK_SMRAM64_OFFSET(io_restart_dword,        0xFEC0);
++      __CHECK_SMRAM64_OFFSET(reserved1,               0xFEC4);
++      __CHECK_SMRAM64_OFFSET(io_inst_restart,         0xFEC8);
++      __CHECK_SMRAM64_OFFSET(auto_hlt_restart,        0xFEC9);
++      __CHECK_SMRAM64_OFFSET(reserved2,               0xFECA);
++      __CHECK_SMRAM64_OFFSET(efer,                    0xFED0);
++      __CHECK_SMRAM64_OFFSET(svm_guest_flag,          0xFED8);
++      __CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa,      0xFEE0);
++      __CHECK_SMRAM64_OFFSET(svm_guest_virtual_int,   0xFEE8);
++      __CHECK_SMRAM64_OFFSET(reserved3,               0xFEF0);
++      __CHECK_SMRAM64_OFFSET(smm_revison,             0xFEFC);
++      __CHECK_SMRAM64_OFFSET(smbase,                  0xFF00);
++      __CHECK_SMRAM64_OFFSET(reserved4,               0xFF04);
++      __CHECK_SMRAM64_OFFSET(ssp,                     0xFF18);
++      __CHECK_SMRAM64_OFFSET(svm_guest_pat,           0xFF20);
++      __CHECK_SMRAM64_OFFSET(svm_host_efer,           0xFF28);
++      __CHECK_SMRAM64_OFFSET(svm_host_cr4,            0xFF30);
++      __CHECK_SMRAM64_OFFSET(svm_host_cr3,            0xFF38);
++      __CHECK_SMRAM64_OFFSET(svm_host_cr0,            0xFF40);
++      __CHECK_SMRAM64_OFFSET(cr4,                     0xFF48);
++      __CHECK_SMRAM64_OFFSET(cr3,                     0xFF50);
++      __CHECK_SMRAM64_OFFSET(cr0,                     0xFF58);
++      __CHECK_SMRAM64_OFFSET(dr7,                     0xFF60);
++      __CHECK_SMRAM64_OFFSET(dr6,                     0xFF68);
++      __CHECK_SMRAM64_OFFSET(rflags,                  0xFF70);
++      __CHECK_SMRAM64_OFFSET(rip,                     0xFF78);
++      __CHECK_SMRAM64_OFFSET(gprs,                    0xFF80);
++#undef __CHECK_SMRAM64_OFFSET
++}
++
++union kvm_smram {
++      struct kvm_smram_state_64 smram64;
++      struct kvm_smram_state_32 smram32;
++      u8 bytes[512];
++};
++
++void  __init kvm_emulator_init(void);
++
++
+ /* Host execution mode. */
+ #if defined(CONFIG_X86_32)
+ #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index acca85b10545..20aec64e3521 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -13730,6 +13730,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
+ static int __init kvm_x86_init(void)
+ {
+       kvm_mmu_x86_module_init();
++      kvm_emulator_init();
+       return 0;
+ }
+ module_init(kvm_x86_init);
diff --git a/patches/kernel/0008-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch b/patches/kernel/0008-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch
deleted file mode 100644 (file)
index 9c225a8..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Maxim Levitsky <mlevitsk@redhat.com>
-Date: Wed, 3 Aug 2022 18:50:02 +0300
-Subject: [PATCH] KVM: x86: emulator: update the emulation mode after rsm
-
-This ensures that RIP will be correctly written back,
-because the RSM instruction can switch the CPU mode from
-32 bit (or less) to 64 bit.
-
-This fixes a guest crash in case the #SMI is received
-while the guest runs a code from an address > 32 bit.
-
-Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/kvm/emulate.c | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
-index 4a43261d25a2..4f7f5117ec7a 100644
---- a/arch/x86/kvm/emulate.c
-+++ b/arch/x86/kvm/emulate.c
-@@ -2654,6 +2654,11 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
-       if (ret != X86EMUL_CONTINUE)
-               goto emulate_shutdown;
-+
-+      ret = emulator_recalc_and_set_mode(ctxt);
-+      if (ret != X86EMUL_CONTINUE)
-+              goto emulate_shutdown;
-+
-       /*
-        * Note, the ctxt->ops callbacks are responsible for handling side
-        * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID
diff --git a/patches/kernel/0009-KVM-x86-emulator-smm-add-structs-for-KVM-s-smram-lay.patch b/patches/kernel/0009-KVM-x86-emulator-smm-add-structs-for-KVM-s-smram-lay.patch
deleted file mode 100644 (file)
index 546eee7..0000000
+++ /dev/null
@@ -1,280 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Maxim Levitsky <mlevitsk@redhat.com>
-Date: Wed, 3 Aug 2022 18:50:05 +0300
-Subject: [PATCH] KVM: x86: emulator/smm: add structs for KVM's smram layout
-
-Those structs will be used to read/write the smram state image.
-
-Also document the differences between KVM's SMRAM layout and SMRAM
-layout that is used by real Intel/AMD cpus.
-
-Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/kvm/emulate.c     |   6 +
- arch/x86/kvm/kvm_emulate.h | 218 +++++++++++++++++++++++++++++++++++++
- arch/x86/kvm/x86.c         |   1 +
- 3 files changed, 225 insertions(+)
-
-diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
-index 4f7f5117ec7a..470dd4453b01 100644
---- a/arch/x86/kvm/emulate.c
-+++ b/arch/x86/kvm/emulate.c
-@@ -5856,3 +5856,9 @@ bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
-       return true;
- }
-+
-+void  __init kvm_emulator_init(void)
-+{
-+      __check_smram32_offsets();
-+      __check_smram64_offsets();
-+}
-diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
-index 89246446d6aa..dd0ae61e44a1 100644
---- a/arch/x86/kvm/kvm_emulate.h
-+++ b/arch/x86/kvm/kvm_emulate.h
-@@ -13,6 +13,7 @@
- #define _ASM_X86_KVM_X86_EMULATE_H
- #include <asm/desc_defs.h>
-+#include <linux/build_bug.h>
- #include "fpu.h"
- struct x86_emulate_ctxt;
-@@ -503,6 +504,223 @@ enum x86_intercept {
-       nr_x86_intercepts
- };
-+
-+/* 32 bit KVM's emulated SMM layout. Loosely based on Intel's layout */
-+
-+struct kvm_smm_seg_state_32 {
-+      u32 flags;
-+      u32 limit;
-+      u32 base;
-+} __packed;
-+
-+struct kvm_smram_state_32 {
-+      u32 reserved1[62];
-+      u32 smbase;
-+      u32 smm_revision;
-+      u32 reserved2[5];
-+      u32 cr4; /* CR4 is not present in Intel/AMD SMRAM image */
-+      u32 reserved3[5];
-+
-+      /*
-+       * Segment state is not present/documented in the Intel/AMD SMRAM image
-+       * Instead this area on Intel/AMD contains IO/HLT restart flags.
-+       */
-+      struct kvm_smm_seg_state_32 ds;
-+      struct kvm_smm_seg_state_32 fs;
-+      struct kvm_smm_seg_state_32 gs;
-+      struct kvm_smm_seg_state_32 idtr; /* IDTR has only base and limit */
-+      struct kvm_smm_seg_state_32 tr;
-+      u32 reserved;
-+      struct kvm_smm_seg_state_32 gdtr; /* GDTR has only base and limit */
-+      struct kvm_smm_seg_state_32 ldtr;
-+      struct kvm_smm_seg_state_32 es;
-+      struct kvm_smm_seg_state_32 cs;
-+      struct kvm_smm_seg_state_32 ss;
-+
-+      u32 es_sel;
-+      u32 cs_sel;
-+      u32 ss_sel;
-+      u32 ds_sel;
-+      u32 fs_sel;
-+      u32 gs_sel;
-+      u32 ldtr_sel;
-+      u32 tr_sel;
-+
-+      u32 dr7;
-+      u32 dr6;
-+      u32 gprs[8]; /* GPRS in the "natural" X86 order (EAX/ECX/EDX.../EDI) */
-+      u32 eip;
-+      u32 eflags;
-+      u32 cr3;
-+      u32 cr0;
-+} __packed;
-+
-+
-+static inline void __check_smram32_offsets(void)
-+{
-+#define __CHECK_SMRAM32_OFFSET(field, offset) \
-+      ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
-+
-+      __CHECK_SMRAM32_OFFSET(reserved1,       0xFE00);
-+      __CHECK_SMRAM32_OFFSET(smbase,          0xFEF8);
-+      __CHECK_SMRAM32_OFFSET(smm_revision,    0xFEFC);
-+      __CHECK_SMRAM32_OFFSET(reserved2,       0xFF00);
-+      __CHECK_SMRAM32_OFFSET(cr4,             0xFF14);
-+      __CHECK_SMRAM32_OFFSET(reserved3,       0xFF18);
-+      __CHECK_SMRAM32_OFFSET(ds,              0xFF2C);
-+      __CHECK_SMRAM32_OFFSET(fs,              0xFF38);
-+      __CHECK_SMRAM32_OFFSET(gs,              0xFF44);
-+      __CHECK_SMRAM32_OFFSET(idtr,            0xFF50);
-+      __CHECK_SMRAM32_OFFSET(tr,              0xFF5C);
-+      __CHECK_SMRAM32_OFFSET(gdtr,            0xFF6C);
-+      __CHECK_SMRAM32_OFFSET(ldtr,            0xFF78);
-+      __CHECK_SMRAM32_OFFSET(es,              0xFF84);
-+      __CHECK_SMRAM32_OFFSET(cs,              0xFF90);
-+      __CHECK_SMRAM32_OFFSET(ss,              0xFF9C);
-+      __CHECK_SMRAM32_OFFSET(es_sel,          0xFFA8);
-+      __CHECK_SMRAM32_OFFSET(cs_sel,          0xFFAC);
-+      __CHECK_SMRAM32_OFFSET(ss_sel,          0xFFB0);
-+      __CHECK_SMRAM32_OFFSET(ds_sel,          0xFFB4);
-+      __CHECK_SMRAM32_OFFSET(fs_sel,          0xFFB8);
-+      __CHECK_SMRAM32_OFFSET(gs_sel,          0xFFBC);
-+      __CHECK_SMRAM32_OFFSET(ldtr_sel,        0xFFC0);
-+      __CHECK_SMRAM32_OFFSET(tr_sel,          0xFFC4);
-+      __CHECK_SMRAM32_OFFSET(dr7,             0xFFC8);
-+      __CHECK_SMRAM32_OFFSET(dr6,             0xFFCC);
-+      __CHECK_SMRAM32_OFFSET(gprs,            0xFFD0);
-+      __CHECK_SMRAM32_OFFSET(eip,             0xFFF0);
-+      __CHECK_SMRAM32_OFFSET(eflags,          0xFFF4);
-+      __CHECK_SMRAM32_OFFSET(cr3,             0xFFF8);
-+      __CHECK_SMRAM32_OFFSET(cr0,             0xFFFC);
-+#undef __CHECK_SMRAM32_OFFSET
-+}
-+
-+
-+/* 64 bit KVM's emulated SMM layout. Based on AMD64 layout */
-+
-+struct kvm_smm_seg_state_64 {
-+      u16 selector;
-+      u16 attributes;
-+      u32 limit;
-+      u64 base;
-+};
-+
-+struct kvm_smram_state_64 {
-+
-+      struct kvm_smm_seg_state_64 es;
-+      struct kvm_smm_seg_state_64 cs;
-+      struct kvm_smm_seg_state_64 ss;
-+      struct kvm_smm_seg_state_64 ds;
-+      struct kvm_smm_seg_state_64 fs;
-+      struct kvm_smm_seg_state_64 gs;
-+      struct kvm_smm_seg_state_64 gdtr; /* GDTR has only base and limit*/
-+      struct kvm_smm_seg_state_64 ldtr;
-+      struct kvm_smm_seg_state_64 idtr; /* IDTR has only base and limit*/
-+      struct kvm_smm_seg_state_64 tr;
-+
-+      /* I/O restart and auto halt restart are not implemented by KVM */
-+      u64 io_restart_rip;
-+      u64 io_restart_rcx;
-+      u64 io_restart_rsi;
-+      u64 io_restart_rdi;
-+      u32 io_restart_dword;
-+      u32 reserved1;
-+      u8 io_inst_restart;
-+      u8 auto_hlt_restart;
-+      u8 reserved2[6];
-+
-+      u64 efer;
-+
-+      /*
-+       * Two fields below are implemented on AMD only, to store
-+       * SVM guest vmcb address if the #SMI was received while in the guest mode.
-+       */
-+      u64 svm_guest_flag;
-+      u64 svm_guest_vmcb_gpa;
-+      u64 svm_guest_virtual_int; /* unknown purpose, not implemented */
-+
-+      u32 reserved3[3];
-+      u32 smm_revison;
-+      u32 smbase;
-+      u32 reserved4[5];
-+
-+      /* ssp and svm_* fields below are not implemented by KVM */
-+      u64 ssp;
-+      u64 svm_guest_pat;
-+      u64 svm_host_efer;
-+      u64 svm_host_cr4;
-+      u64 svm_host_cr3;
-+      u64 svm_host_cr0;
-+
-+      u64 cr4;
-+      u64 cr3;
-+      u64 cr0;
-+      u64 dr7;
-+      u64 dr6;
-+      u64 rflags;
-+      u64 rip;
-+      u64 gprs[16]; /* GPRS in a reversed "natural" X86 order (R15/R14/../RCX/RAX.) */
-+};
-+
-+
-+static inline void __check_smram64_offsets(void)
-+{
-+#define __CHECK_SMRAM64_OFFSET(field, offset) \
-+      ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
-+
-+      __CHECK_SMRAM64_OFFSET(es,                      0xFE00);
-+      __CHECK_SMRAM64_OFFSET(cs,                      0xFE10);
-+      __CHECK_SMRAM64_OFFSET(ss,                      0xFE20);
-+      __CHECK_SMRAM64_OFFSET(ds,                      0xFE30);
-+      __CHECK_SMRAM64_OFFSET(fs,                      0xFE40);
-+      __CHECK_SMRAM64_OFFSET(gs,                      0xFE50);
-+      __CHECK_SMRAM64_OFFSET(gdtr,                    0xFE60);
-+      __CHECK_SMRAM64_OFFSET(ldtr,                    0xFE70);
-+      __CHECK_SMRAM64_OFFSET(idtr,                    0xFE80);
-+      __CHECK_SMRAM64_OFFSET(tr,                      0xFE90);
-+      __CHECK_SMRAM64_OFFSET(io_restart_rip,          0xFEA0);
-+      __CHECK_SMRAM64_OFFSET(io_restart_rcx,          0xFEA8);
-+      __CHECK_SMRAM64_OFFSET(io_restart_rsi,          0xFEB0);
-+      __CHECK_SMRAM64_OFFSET(io_restart_rdi,          0xFEB8);
-+      __CHECK_SMRAM64_OFFSET(io_restart_dword,        0xFEC0);
-+      __CHECK_SMRAM64_OFFSET(reserved1,               0xFEC4);
-+      __CHECK_SMRAM64_OFFSET(io_inst_restart,         0xFEC8);
-+      __CHECK_SMRAM64_OFFSET(auto_hlt_restart,        0xFEC9);
-+      __CHECK_SMRAM64_OFFSET(reserved2,               0xFECA);
-+      __CHECK_SMRAM64_OFFSET(efer,                    0xFED0);
-+      __CHECK_SMRAM64_OFFSET(svm_guest_flag,          0xFED8);
-+      __CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa,      0xFEE0);
-+      __CHECK_SMRAM64_OFFSET(svm_guest_virtual_int,   0xFEE8);
-+      __CHECK_SMRAM64_OFFSET(reserved3,               0xFEF0);
-+      __CHECK_SMRAM64_OFFSET(smm_revison,             0xFEFC);
-+      __CHECK_SMRAM64_OFFSET(smbase,                  0xFF00);
-+      __CHECK_SMRAM64_OFFSET(reserved4,               0xFF04);
-+      __CHECK_SMRAM64_OFFSET(ssp,                     0xFF18);
-+      __CHECK_SMRAM64_OFFSET(svm_guest_pat,           0xFF20);
-+      __CHECK_SMRAM64_OFFSET(svm_host_efer,           0xFF28);
-+      __CHECK_SMRAM64_OFFSET(svm_host_cr4,            0xFF30);
-+      __CHECK_SMRAM64_OFFSET(svm_host_cr3,            0xFF38);
-+      __CHECK_SMRAM64_OFFSET(svm_host_cr0,            0xFF40);
-+      __CHECK_SMRAM64_OFFSET(cr4,                     0xFF48);
-+      __CHECK_SMRAM64_OFFSET(cr3,                     0xFF50);
-+      __CHECK_SMRAM64_OFFSET(cr0,                     0xFF58);
-+      __CHECK_SMRAM64_OFFSET(dr7,                     0xFF60);
-+      __CHECK_SMRAM64_OFFSET(dr6,                     0xFF68);
-+      __CHECK_SMRAM64_OFFSET(rflags,                  0xFF70);
-+      __CHECK_SMRAM64_OFFSET(rip,                     0xFF78);
-+      __CHECK_SMRAM64_OFFSET(gprs,                    0xFF80);
-+#undef __CHECK_SMRAM64_OFFSET
-+}
-+
-+union kvm_smram {
-+      struct kvm_smram_state_64 smram64;
-+      struct kvm_smram_state_32 smram32;
-+      u8 bytes[512];
-+};
-+
-+void  __init kvm_emulator_init(void);
-+
-+
- /* Host execution mode. */
- #if defined(CONFIG_X86_32)
- #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index acca85b10545..20aec64e3521 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -13730,6 +13730,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
- static int __init kvm_x86_init(void)
- {
-       kvm_mmu_x86_module_init();
-+      kvm_emulator_init();
-       return 0;
- }
- module_init(kvm_x86_init);
diff --git a/patches/kernel/0009-KVM-x86-emulator-smm-use-smram-structs-in-the-common.patch b/patches/kernel/0009-KVM-x86-emulator-smm-use-smram-structs-in-the-common.patch
new file mode 100644 (file)
index 0000000..cabb73e
--- /dev/null
@@ -0,0 +1,218 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Wed, 3 Aug 2022 18:50:06 +0300
+Subject: [PATCH] KVM: x86: emulator/smm: use smram structs in the common code
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Switch from using a raw array to 'union kvm_smram'.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/kvm_host.h |  5 +++--
+ arch/x86/kvm/emulate.c          | 12 +++++++-----
+ arch/x86/kvm/kvm_emulate.h      |  3 ++-
+ arch/x86/kvm/svm/svm.c          |  8 ++++++--
+ arch/x86/kvm/vmx/vmx.c          |  4 ++--
+ arch/x86/kvm/x86.c              | 16 ++++++++--------
+ 6 files changed, 28 insertions(+), 20 deletions(-)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index f05ebaa26f0f..6885f3839e25 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -204,6 +204,7 @@ typedef enum exit_fastpath_completion fastpath_t;
+ struct x86_emulate_ctxt;
+ struct x86_exception;
++union kvm_smram;
+ enum x86_intercept;
+ enum x86_intercept_stage;
+@@ -1613,8 +1614,8 @@ struct kvm_x86_ops {
+       void (*setup_mce)(struct kvm_vcpu *vcpu);
+       int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
+-      int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
+-      int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
++      int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram);
++      int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram);
+       void (*enable_smi_window)(struct kvm_vcpu *vcpu);
+       int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index 470dd4453b01..7294dffa794a 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -2582,16 +2582,18 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
+ static int em_rsm(struct x86_emulate_ctxt *ctxt)
+ {
+       unsigned long cr0, cr4, efer;
+-      char buf[512];
++      const union kvm_smram smram;
+       u64 smbase;
+       int ret;
++      BUILD_BUG_ON(sizeof(smram) != 512);
++
+       if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
+               return emulate_ud(ctxt);
+       smbase = ctxt->ops->get_smbase(ctxt);
+-      ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
++      ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, (void *)&smram, sizeof(smram));
+       if (ret != X86EMUL_CONTINUE)
+               return X86EMUL_UNHANDLEABLE;
+@@ -2641,15 +2643,15 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
+        * state (e.g. enter guest mode) before loading state from the SMM
+        * state-save area.
+        */
+-      if (ctxt->ops->leave_smm(ctxt, buf))
++      if (ctxt->ops->leave_smm(ctxt, &smram))
+               goto emulate_shutdown;
+ #ifdef CONFIG_X86_64
+       if (emulator_has_longmode(ctxt))
+-              ret = rsm_load_state_64(ctxt, buf);
++              ret = rsm_load_state_64(ctxt, (const char *)&smram);
+       else
+ #endif
+-              ret = rsm_load_state_32(ctxt, buf);
++              ret = rsm_load_state_32(ctxt, (const char *)&smram);
+       if (ret != X86EMUL_CONTINUE)
+               goto emulate_shutdown;
+diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
+index dd0ae61e44a1..76c0b8e7890b 100644
+--- a/arch/x86/kvm/kvm_emulate.h
++++ b/arch/x86/kvm/kvm_emulate.h
+@@ -19,6 +19,7 @@
+ struct x86_emulate_ctxt;
+ enum x86_intercept;
+ enum x86_intercept_stage;
++union kvm_smram;
+ struct x86_exception {
+       u8 vector;
+@@ -236,7 +237,7 @@ struct x86_emulate_ops {
+       unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
+       void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
+-      int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const char *smstate);
++      int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const union kvm_smram *smram);
+       void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
+       int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
+ };
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index ce362e88a567..45c4def86cd3 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4385,12 +4385,14 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+       return 1;
+ }
+-static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
++static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+       struct kvm_host_map map_save;
+       int ret;
++      char *smstate = (char *)smram;
++
+       if (!is_guest_mode(vcpu))
+               return 0;
+@@ -4432,7 +4434,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+       return 0;
+ }
+-static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
++static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+       struct kvm_host_map map, map_save;
+@@ -4440,6 +4442,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+       struct vmcb *vmcb12;
+       int ret;
++      const char *smstate = (const char *)smram;
++
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+               return 0;
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index 63247c57c72c..4319f65181f7 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -7914,7 +7914,7 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+       return !is_smm(vcpu);
+ }
+-static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
++static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
+ {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+@@ -7935,7 +7935,7 @@ static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+       return 0;
+ }
+-static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
++static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
+ {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int ret;
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 20aec64e3521..94c29391b065 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8186,9 +8186,9 @@ static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
+ }
+ static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt,
+-                                const char *smstate)
++                            const union kvm_smram *smram)
+ {
+-      return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate);
++      return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smram);
+ }
+ static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
+@@ -10246,25 +10246,25 @@ static void enter_smm(struct kvm_vcpu *vcpu)
+       struct kvm_segment cs, ds;
+       struct desc_ptr dt;
+       unsigned long cr0;
+-      char buf[512];
++      union kvm_smram smram;
+-      memset(buf, 0, 512);
++      memset(smram.bytes, 0, sizeof(smram.bytes));
+ #ifdef CONFIG_X86_64
+       if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
+-              enter_smm_save_state_64(vcpu, buf);
++              enter_smm_save_state_64(vcpu, (char *)&smram);
+       else
+ #endif
+-              enter_smm_save_state_32(vcpu, buf);
++              enter_smm_save_state_32(vcpu, (char *)&smram);
+       /*
+        * Give enter_smm() a chance to make ISA-specific changes to the vCPU
+        * state (e.g. leave guest mode) after we've saved the state into the
+        * SMM state-save area.
+        */
+-      static_call(kvm_x86_enter_smm)(vcpu, buf);
++      static_call(kvm_x86_enter_smm)(vcpu, &smram);
+       kvm_smm_changed(vcpu, true);
+-      kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
++      kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram));
+       if (static_call(kvm_x86_get_nmi_mask)(vcpu))
+               vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
diff --git a/patches/kernel/0010-KVM-x86-emulator-smm-use-smram-struct-for-32-bit-smr.patch b/patches/kernel/0010-KVM-x86-emulator-smm-use-smram-struct-for-32-bit-smr.patch
new file mode 100644 (file)
index 0000000..31847cf
--- /dev/null
@@ -0,0 +1,272 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Wed, 3 Aug 2022 18:50:07 +0300
+Subject: [PATCH] KVM: x86: emulator/smm: use smram struct for 32 bit smram
+ load/restore
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Use kvm_smram_state_32 struct to save/restore 32 bit SMM state
+(used when X86_FEATURE_LM is not present in the guest CPUID).
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/emulate.c | 81 +++++++++++++++---------------------------
+ arch/x86/kvm/x86.c     | 75 +++++++++++++++++---------------------
+ 2 files changed, 60 insertions(+), 96 deletions(-)
+
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index 7294dffa794a..65d82292ccec 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -2359,25 +2359,17 @@ static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
+       desc->type = (flags >>  8) & 15;
+ }
+-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
++static void rsm_load_seg_32(struct x86_emulate_ctxt *ctxt,
++                         const struct kvm_smm_seg_state_32 *state,
++                         u16 selector,
+                          int n)
+ {
+       struct desc_struct desc;
+-      int offset;
+-      u16 selector;
+-
+-      selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
+-
+-      if (n < 3)
+-              offset = 0x7f84 + n * 12;
+-      else
+-              offset = 0x7f2c + (n - 3) * 12;
+-      set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
+-      set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
+-      rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
++      set_desc_base(&desc,      state->base);
++      set_desc_limit(&desc,     state->limit);
++      rsm_set_desc_flags(&desc, state->flags);
+       ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
+-      return X86EMUL_CONTINUE;
+ }
+ #ifdef CONFIG_X86_64
+@@ -2448,63 +2440,46 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
+ }
+ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
+-                           const char *smstate)
++                           const struct kvm_smram_state_32 *smstate)
+ {
+-      struct desc_struct desc;
+       struct desc_ptr dt;
+-      u16 selector;
+-      u32 val, cr0, cr3, cr4;
+       int i;
+-      cr0 =                      GET_SMSTATE(u32, smstate, 0x7ffc);
+-      cr3 =                      GET_SMSTATE(u32, smstate, 0x7ff8);
+-      ctxt->eflags =             GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
+-      ctxt->_eip =               GET_SMSTATE(u32, smstate, 0x7ff0);
++      ctxt->eflags =  smstate->eflags | X86_EFLAGS_FIXED;
++      ctxt->_eip =  smstate->eip;
+       for (i = 0; i < 8; i++)
+-              *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
+-
+-      val = GET_SMSTATE(u32, smstate, 0x7fcc);
++              *reg_write(ctxt, i) = smstate->gprs[i];
+-      if (ctxt->ops->set_dr(ctxt, 6, val))
++      if (ctxt->ops->set_dr(ctxt, 6, smstate->dr6))
+               return X86EMUL_UNHANDLEABLE;
+-
+-      val = GET_SMSTATE(u32, smstate, 0x7fc8);
+-
+-      if (ctxt->ops->set_dr(ctxt, 7, val))
++      if (ctxt->ops->set_dr(ctxt, 7, smstate->dr7))
+               return X86EMUL_UNHANDLEABLE;
+-      selector =                 GET_SMSTATE(u32, smstate, 0x7fc4);
+-      set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f64));
+-      set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f60));
+-      rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f5c));
+-      ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
++      rsm_load_seg_32(ctxt, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
++      rsm_load_seg_32(ctxt, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
+-      selector =                 GET_SMSTATE(u32, smstate, 0x7fc0);
+-      set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f80));
+-      set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f7c));
+-      rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f78));
+-      ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
+-      dt.address =               GET_SMSTATE(u32, smstate, 0x7f74);
+-      dt.size =                  GET_SMSTATE(u32, smstate, 0x7f70);
++      dt.address =               smstate->gdtr.base;
++      dt.size =                  smstate->gdtr.limit;
+       ctxt->ops->set_gdt(ctxt, &dt);
+-      dt.address =               GET_SMSTATE(u32, smstate, 0x7f58);
+-      dt.size =                  GET_SMSTATE(u32, smstate, 0x7f54);
++      dt.address =               smstate->idtr.base;
++      dt.size =                  smstate->idtr.limit;
+       ctxt->ops->set_idt(ctxt, &dt);
+-      for (i = 0; i < 6; i++) {
+-              int r = rsm_load_seg_32(ctxt, smstate, i);
+-              if (r != X86EMUL_CONTINUE)
+-                      return r;
+-      }
++      rsm_load_seg_32(ctxt, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
++      rsm_load_seg_32(ctxt, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
++      rsm_load_seg_32(ctxt, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
+-      cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
++      rsm_load_seg_32(ctxt, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
++      rsm_load_seg_32(ctxt, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
++      rsm_load_seg_32(ctxt, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
+-      ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
++      ctxt->ops->set_smbase(ctxt, smstate->smbase);
+-      return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
++      return rsm_enter_protected_mode(ctxt, smstate->cr0,
++                                      smstate->cr3, smstate->cr4);
+ }
+ #ifdef CONFIG_X86_64
+@@ -2651,7 +2626,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
+               ret = rsm_load_state_64(ctxt, (const char *)&smram);
+       else
+ #endif
+-              ret = rsm_load_state_32(ctxt, (const char *)&smram);
++              ret = rsm_load_state_32(ctxt, &smram.smram32);
+       if (ret != X86EMUL_CONTINUE)
+               goto emulate_shutdown;
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 94c29391b065..579a1cb6a7c8 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -10100,22 +10100,18 @@ static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
+       return flags;
+ }
+-static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
++static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
++                                       struct kvm_smm_seg_state_32 *state,
++                                       u32 *selector,
++                                       int n)
+ {
+       struct kvm_segment seg;
+-      int offset;
+       kvm_get_segment(vcpu, &seg, n);
+-      put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
+-
+-      if (n < 3)
+-              offset = 0x7f84 + n * 12;
+-      else
+-              offset = 0x7f2c + (n - 3) * 12;
+-
+-      put_smstate(u32, buf, offset + 8, seg.base);
+-      put_smstate(u32, buf, offset + 4, seg.limit);
+-      put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
++      *selector = seg.selector;
++      state->base = seg.base;
++      state->limit = seg.limit;
++      state->flags = enter_smm_get_segment_flags(&seg);
+ }
+ #ifdef CONFIG_X86_64
+@@ -10136,54 +10132,47 @@ static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+ }
+ #endif
+-static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
++static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_state_32 *smram)
+ {
+       struct desc_ptr dt;
+-      struct kvm_segment seg;
+       unsigned long val;
+       int i;
+-      put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
+-      put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
+-      put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
+-      put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
++      smram->cr0     = kvm_read_cr0(vcpu);
++      smram->cr3     = kvm_read_cr3(vcpu);
++      smram->eflags  = kvm_get_rflags(vcpu);
++      smram->eip     = kvm_rip_read(vcpu);
+       for (i = 0; i < 8; i++)
+-              put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
++              smram->gprs[i] = kvm_register_read_raw(vcpu, i);
+       kvm_get_dr(vcpu, 6, &val);
+-      put_smstate(u32, buf, 0x7fcc, (u32)val);
++      smram->dr6     = (u32)val;
+       kvm_get_dr(vcpu, 7, &val);
+-      put_smstate(u32, buf, 0x7fc8, (u32)val);
++      smram->dr7     = (u32)val;
+-      kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+-      put_smstate(u32, buf, 0x7fc4, seg.selector);
+-      put_smstate(u32, buf, 0x7f64, seg.base);
+-      put_smstate(u32, buf, 0x7f60, seg.limit);
+-      put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
+-
+-      kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+-      put_smstate(u32, buf, 0x7fc0, seg.selector);
+-      put_smstate(u32, buf, 0x7f80, seg.base);
+-      put_smstate(u32, buf, 0x7f7c, seg.limit);
+-      put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
++      enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
++      enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
+       static_call(kvm_x86_get_gdt)(vcpu, &dt);
+-      put_smstate(u32, buf, 0x7f74, dt.address);
+-      put_smstate(u32, buf, 0x7f70, dt.size);
++      smram->gdtr.base = dt.address;
++      smram->gdtr.limit = dt.size;
+       static_call(kvm_x86_get_idt)(vcpu, &dt);
+-      put_smstate(u32, buf, 0x7f58, dt.address);
+-      put_smstate(u32, buf, 0x7f54, dt.size);
++      smram->idtr.base = dt.address;
++      smram->idtr.limit = dt.size;
+-      for (i = 0; i < 6; i++)
+-              enter_smm_save_seg_32(vcpu, buf, i);
++      enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
++      enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
++      enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
+-      put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
++      enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
++      enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
++      enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
+-      /* revision id */
+-      put_smstate(u32, buf, 0x7efc, 0x00020000);
+-      put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
++      smram->cr4 = kvm_read_cr4(vcpu);
++      smram->smm_revision = 0x00020000;
++      smram->smbase = vcpu->arch.smbase;
+ }
+ #ifdef CONFIG_X86_64
+@@ -10254,7 +10243,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
+               enter_smm_save_state_64(vcpu, (char *)&smram);
+       else
+ #endif
+-              enter_smm_save_state_32(vcpu, (char *)&smram);
++              enter_smm_save_state_32(vcpu, &smram.smram32);
+       /*
+        * Give enter_smm() a chance to make ISA-specific changes to the vCPU
diff --git a/patches/kernel/0010-KVM-x86-emulator-smm-use-smram-structs-in-the-common.patch b/patches/kernel/0010-KVM-x86-emulator-smm-use-smram-structs-in-the-common.patch
deleted file mode 100644 (file)
index 6a46cc5..0000000
+++ /dev/null
@@ -1,214 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Maxim Levitsky <mlevitsk@redhat.com>
-Date: Wed, 3 Aug 2022 18:50:06 +0300
-Subject: [PATCH] KVM: x86: emulator/smm: use smram structs in the common code
-
-Switch from using a raw array to 'union kvm_smram'.
-
-Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/include/asm/kvm_host.h |  5 +++--
- arch/x86/kvm/emulate.c          | 12 +++++++-----
- arch/x86/kvm/kvm_emulate.h      |  3 ++-
- arch/x86/kvm/svm/svm.c          |  8 ++++++--
- arch/x86/kvm/vmx/vmx.c          |  4 ++--
- arch/x86/kvm/x86.c              | 16 ++++++++--------
- 6 files changed, 28 insertions(+), 20 deletions(-)
-
-diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
-index f05ebaa26f0f..6885f3839e25 100644
---- a/arch/x86/include/asm/kvm_host.h
-+++ b/arch/x86/include/asm/kvm_host.h
-@@ -204,6 +204,7 @@ typedef enum exit_fastpath_completion fastpath_t;
- struct x86_emulate_ctxt;
- struct x86_exception;
-+union kvm_smram;
- enum x86_intercept;
- enum x86_intercept_stage;
-@@ -1613,8 +1614,8 @@ struct kvm_x86_ops {
-       void (*setup_mce)(struct kvm_vcpu *vcpu);
-       int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
--      int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
--      int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
-+      int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram);
-+      int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram);
-       void (*enable_smi_window)(struct kvm_vcpu *vcpu);
-       int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
-diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
-index 470dd4453b01..7294dffa794a 100644
---- a/arch/x86/kvm/emulate.c
-+++ b/arch/x86/kvm/emulate.c
-@@ -2582,16 +2582,18 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
- static int em_rsm(struct x86_emulate_ctxt *ctxt)
- {
-       unsigned long cr0, cr4, efer;
--      char buf[512];
-+      const union kvm_smram smram;
-       u64 smbase;
-       int ret;
-+      BUILD_BUG_ON(sizeof(smram) != 512);
-+
-       if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
-               return emulate_ud(ctxt);
-       smbase = ctxt->ops->get_smbase(ctxt);
--      ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
-+      ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, (void *)&smram, sizeof(smram));
-       if (ret != X86EMUL_CONTINUE)
-               return X86EMUL_UNHANDLEABLE;
-@@ -2641,15 +2643,15 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
-        * state (e.g. enter guest mode) before loading state from the SMM
-        * state-save area.
-        */
--      if (ctxt->ops->leave_smm(ctxt, buf))
-+      if (ctxt->ops->leave_smm(ctxt, &smram))
-               goto emulate_shutdown;
- #ifdef CONFIG_X86_64
-       if (emulator_has_longmode(ctxt))
--              ret = rsm_load_state_64(ctxt, buf);
-+              ret = rsm_load_state_64(ctxt, (const char *)&smram);
-       else
- #endif
--              ret = rsm_load_state_32(ctxt, buf);
-+              ret = rsm_load_state_32(ctxt, (const char *)&smram);
-       if (ret != X86EMUL_CONTINUE)
-               goto emulate_shutdown;
-diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
-index dd0ae61e44a1..76c0b8e7890b 100644
---- a/arch/x86/kvm/kvm_emulate.h
-+++ b/arch/x86/kvm/kvm_emulate.h
-@@ -19,6 +19,7 @@
- struct x86_emulate_ctxt;
- enum x86_intercept;
- enum x86_intercept_stage;
-+union kvm_smram;
- struct x86_exception {
-       u8 vector;
-@@ -236,7 +237,7 @@ struct x86_emulate_ops {
-       unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
-       void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
--      int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const char *smstate);
-+      int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const union kvm_smram *smram);
-       void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
-       int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
- };
-diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
-index ce362e88a567..45c4def86cd3 100644
---- a/arch/x86/kvm/svm/svm.c
-+++ b/arch/x86/kvm/svm/svm.c
-@@ -4385,12 +4385,14 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
-       return 1;
- }
--static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
-+static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
- {
-       struct vcpu_svm *svm = to_svm(vcpu);
-       struct kvm_host_map map_save;
-       int ret;
-+      char *smstate = (char *)smram;
-+
-       if (!is_guest_mode(vcpu))
-               return 0;
-@@ -4432,7 +4434,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
-       return 0;
- }
--static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
-+static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
- {
-       struct vcpu_svm *svm = to_svm(vcpu);
-       struct kvm_host_map map, map_save;
-@@ -4440,6 +4442,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
-       struct vmcb *vmcb12;
-       int ret;
-+      const char *smstate = (const char *)smram;
-+
-       if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
-               return 0;
-diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
-index 63247c57c72c..4319f65181f7 100644
---- a/arch/x86/kvm/vmx/vmx.c
-+++ b/arch/x86/kvm/vmx/vmx.c
-@@ -7914,7 +7914,7 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
-       return !is_smm(vcpu);
- }
--static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
-+static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
- {
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
-@@ -7935,7 +7935,7 @@ static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
-       return 0;
- }
--static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
-+static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
- {
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
-       int ret;
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index 20aec64e3521..94c29391b065 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -8186,9 +8186,9 @@ static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
- }
- static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt,
--                                const char *smstate)
-+                            const union kvm_smram *smram)
- {
--      return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate);
-+      return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smram);
- }
- static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
-@@ -10246,25 +10246,25 @@ static void enter_smm(struct kvm_vcpu *vcpu)
-       struct kvm_segment cs, ds;
-       struct desc_ptr dt;
-       unsigned long cr0;
--      char buf[512];
-+      union kvm_smram smram;
--      memset(buf, 0, 512);
-+      memset(smram.bytes, 0, sizeof(smram.bytes));
- #ifdef CONFIG_X86_64
-       if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
--              enter_smm_save_state_64(vcpu, buf);
-+              enter_smm_save_state_64(vcpu, (char *)&smram);
-       else
- #endif
--              enter_smm_save_state_32(vcpu, buf);
-+              enter_smm_save_state_32(vcpu, (char *)&smram);
-       /*
-        * Give enter_smm() a chance to make ISA-specific changes to the vCPU
-        * state (e.g. leave guest mode) after we've saved the state into the
-        * SMM state-save area.
-        */
--      static_call(kvm_x86_enter_smm)(vcpu, buf);
-+      static_call(kvm_x86_enter_smm)(vcpu, &smram);
-       kvm_smm_changed(vcpu, true);
--      kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
-+      kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram));
-       if (static_call(kvm_x86_get_nmi_mask)(vcpu))
-               vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
diff --git a/patches/kernel/0011-KVM-x86-emulator-smm-use-smram-struct-for-32-bit-smr.patch b/patches/kernel/0011-KVM-x86-emulator-smm-use-smram-struct-for-32-bit-smr.patch
deleted file mode 100644 (file)
index 40234cf..0000000
+++ /dev/null
@@ -1,268 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Maxim Levitsky <mlevitsk@redhat.com>
-Date: Wed, 3 Aug 2022 18:50:07 +0300
-Subject: [PATCH] KVM: x86: emulator/smm: use smram struct for 32 bit smram
- load/restore
-
-Use kvm_smram_state_32 struct to save/restore 32 bit SMM state
-(used when X86_FEATURE_LM is not present in the guest CPUID).
-
-Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/kvm/emulate.c | 81 +++++++++++++++---------------------------
- arch/x86/kvm/x86.c     | 75 +++++++++++++++++---------------------
- 2 files changed, 60 insertions(+), 96 deletions(-)
-
-diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
-index 7294dffa794a..65d82292ccec 100644
---- a/arch/x86/kvm/emulate.c
-+++ b/arch/x86/kvm/emulate.c
-@@ -2359,25 +2359,17 @@ static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
-       desc->type = (flags >>  8) & 15;
- }
--static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
-+static void rsm_load_seg_32(struct x86_emulate_ctxt *ctxt,
-+                         const struct kvm_smm_seg_state_32 *state,
-+                         u16 selector,
-                          int n)
- {
-       struct desc_struct desc;
--      int offset;
--      u16 selector;
--
--      selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
--
--      if (n < 3)
--              offset = 0x7f84 + n * 12;
--      else
--              offset = 0x7f2c + (n - 3) * 12;
--      set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
--      set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
--      rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
-+      set_desc_base(&desc,      state->base);
-+      set_desc_limit(&desc,     state->limit);
-+      rsm_set_desc_flags(&desc, state->flags);
-       ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
--      return X86EMUL_CONTINUE;
- }
- #ifdef CONFIG_X86_64
-@@ -2448,63 +2440,46 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
- }
- static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
--                           const char *smstate)
-+                           const struct kvm_smram_state_32 *smstate)
- {
--      struct desc_struct desc;
-       struct desc_ptr dt;
--      u16 selector;
--      u32 val, cr0, cr3, cr4;
-       int i;
--      cr0 =                      GET_SMSTATE(u32, smstate, 0x7ffc);
--      cr3 =                      GET_SMSTATE(u32, smstate, 0x7ff8);
--      ctxt->eflags =             GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
--      ctxt->_eip =               GET_SMSTATE(u32, smstate, 0x7ff0);
-+      ctxt->eflags =  smstate->eflags | X86_EFLAGS_FIXED;
-+      ctxt->_eip =  smstate->eip;
-       for (i = 0; i < 8; i++)
--              *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
--
--      val = GET_SMSTATE(u32, smstate, 0x7fcc);
-+              *reg_write(ctxt, i) = smstate->gprs[i];
--      if (ctxt->ops->set_dr(ctxt, 6, val))
-+      if (ctxt->ops->set_dr(ctxt, 6, smstate->dr6))
-               return X86EMUL_UNHANDLEABLE;
--
--      val = GET_SMSTATE(u32, smstate, 0x7fc8);
--
--      if (ctxt->ops->set_dr(ctxt, 7, val))
-+      if (ctxt->ops->set_dr(ctxt, 7, smstate->dr7))
-               return X86EMUL_UNHANDLEABLE;
--      selector =                 GET_SMSTATE(u32, smstate, 0x7fc4);
--      set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f64));
--      set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f60));
--      rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f5c));
--      ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
-+      rsm_load_seg_32(ctxt, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
-+      rsm_load_seg_32(ctxt, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
--      selector =                 GET_SMSTATE(u32, smstate, 0x7fc0);
--      set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f80));
--      set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f7c));
--      rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f78));
--      ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
--      dt.address =               GET_SMSTATE(u32, smstate, 0x7f74);
--      dt.size =                  GET_SMSTATE(u32, smstate, 0x7f70);
-+      dt.address =               smstate->gdtr.base;
-+      dt.size =                  smstate->gdtr.limit;
-       ctxt->ops->set_gdt(ctxt, &dt);
--      dt.address =               GET_SMSTATE(u32, smstate, 0x7f58);
--      dt.size =                  GET_SMSTATE(u32, smstate, 0x7f54);
-+      dt.address =               smstate->idtr.base;
-+      dt.size =                  smstate->idtr.limit;
-       ctxt->ops->set_idt(ctxt, &dt);
--      for (i = 0; i < 6; i++) {
--              int r = rsm_load_seg_32(ctxt, smstate, i);
--              if (r != X86EMUL_CONTINUE)
--                      return r;
--      }
-+      rsm_load_seg_32(ctxt, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
-+      rsm_load_seg_32(ctxt, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
-+      rsm_load_seg_32(ctxt, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
--      cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
-+      rsm_load_seg_32(ctxt, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
-+      rsm_load_seg_32(ctxt, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
-+      rsm_load_seg_32(ctxt, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
--      ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
-+      ctxt->ops->set_smbase(ctxt, smstate->smbase);
--      return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
-+      return rsm_enter_protected_mode(ctxt, smstate->cr0,
-+                                      smstate->cr3, smstate->cr4);
- }
- #ifdef CONFIG_X86_64
-@@ -2651,7 +2626,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
-               ret = rsm_load_state_64(ctxt, (const char *)&smram);
-       else
- #endif
--              ret = rsm_load_state_32(ctxt, (const char *)&smram);
-+              ret = rsm_load_state_32(ctxt, &smram.smram32);
-       if (ret != X86EMUL_CONTINUE)
-               goto emulate_shutdown;
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index 94c29391b065..579a1cb6a7c8 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -10100,22 +10100,18 @@ static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
-       return flags;
- }
--static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
-+static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
-+                                       struct kvm_smm_seg_state_32 *state,
-+                                       u32 *selector,
-+                                       int n)
- {
-       struct kvm_segment seg;
--      int offset;
-       kvm_get_segment(vcpu, &seg, n);
--      put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
--
--      if (n < 3)
--              offset = 0x7f84 + n * 12;
--      else
--              offset = 0x7f2c + (n - 3) * 12;
--
--      put_smstate(u32, buf, offset + 8, seg.base);
--      put_smstate(u32, buf, offset + 4, seg.limit);
--      put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
-+      *selector = seg.selector;
-+      state->base = seg.base;
-+      state->limit = seg.limit;
-+      state->flags = enter_smm_get_segment_flags(&seg);
- }
- #ifdef CONFIG_X86_64
-@@ -10136,54 +10132,47 @@ static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
- }
- #endif
--static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
-+static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_state_32 *smram)
- {
-       struct desc_ptr dt;
--      struct kvm_segment seg;
-       unsigned long val;
-       int i;
--      put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
--      put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
--      put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
--      put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
-+      smram->cr0     = kvm_read_cr0(vcpu);
-+      smram->cr3     = kvm_read_cr3(vcpu);
-+      smram->eflags  = kvm_get_rflags(vcpu);
-+      smram->eip     = kvm_rip_read(vcpu);
-       for (i = 0; i < 8; i++)
--              put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
-+              smram->gprs[i] = kvm_register_read_raw(vcpu, i);
-       kvm_get_dr(vcpu, 6, &val);
--      put_smstate(u32, buf, 0x7fcc, (u32)val);
-+      smram->dr6     = (u32)val;
-       kvm_get_dr(vcpu, 7, &val);
--      put_smstate(u32, buf, 0x7fc8, (u32)val);
-+      smram->dr7     = (u32)val;
--      kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
--      put_smstate(u32, buf, 0x7fc4, seg.selector);
--      put_smstate(u32, buf, 0x7f64, seg.base);
--      put_smstate(u32, buf, 0x7f60, seg.limit);
--      put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
--
--      kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
--      put_smstate(u32, buf, 0x7fc0, seg.selector);
--      put_smstate(u32, buf, 0x7f80, seg.base);
--      put_smstate(u32, buf, 0x7f7c, seg.limit);
--      put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
-+      enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
-+      enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
-       static_call(kvm_x86_get_gdt)(vcpu, &dt);
--      put_smstate(u32, buf, 0x7f74, dt.address);
--      put_smstate(u32, buf, 0x7f70, dt.size);
-+      smram->gdtr.base = dt.address;
-+      smram->gdtr.limit = dt.size;
-       static_call(kvm_x86_get_idt)(vcpu, &dt);
--      put_smstate(u32, buf, 0x7f58, dt.address);
--      put_smstate(u32, buf, 0x7f54, dt.size);
-+      smram->idtr.base = dt.address;
-+      smram->idtr.limit = dt.size;
--      for (i = 0; i < 6; i++)
--              enter_smm_save_seg_32(vcpu, buf, i);
-+      enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
-+      enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
-+      enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
--      put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
-+      enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
-+      enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
-+      enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
--      /* revision id */
--      put_smstate(u32, buf, 0x7efc, 0x00020000);
--      put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
-+      smram->cr4 = kvm_read_cr4(vcpu);
-+      smram->smm_revision = 0x00020000;
-+      smram->smbase = vcpu->arch.smbase;
- }
- #ifdef CONFIG_X86_64
-@@ -10254,7 +10243,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
-               enter_smm_save_state_64(vcpu, (char *)&smram);
-       else
- #endif
--              enter_smm_save_state_32(vcpu, (char *)&smram);
-+              enter_smm_save_state_32(vcpu, &smram.smram32);
-       /*
-        * Give enter_smm() a chance to make ISA-specific changes to the vCPU
diff --git a/patches/kernel/0011-KVM-x86-emulator-smm-use-smram-struct-for-64-bit-smr.patch b/patches/kernel/0011-KVM-x86-emulator-smm-use-smram-struct-for-64-bit-smr.patch
new file mode 100644 (file)
index 0000000..ee7b219
--- /dev/null
@@ -0,0 +1,283 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Wed, 3 Aug 2022 18:50:08 +0300
+Subject: [PATCH] KVM: x86: emulator/smm: use smram struct for 64 bit smram
+ load/restore
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Use kvm_smram_state_64 struct to save/restore the 64 bit SMM state
+(used when X86_FEATURE_LM is present in the guest CPUID,
+regardless of 32-bitness of the guest).
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/emulate.c | 88 ++++++++++++++----------------------------
+ arch/x86/kvm/x86.c     | 75 ++++++++++++++++-------------------
+ 2 files changed, 62 insertions(+), 101 deletions(-)
+
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index 65d82292ccec..03f9e5aa036e 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -2373,24 +2373,16 @@ static void rsm_load_seg_32(struct x86_emulate_ctxt *ctxt,
+ }
+ #ifdef CONFIG_X86_64
+-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
+-                         int n)
++static void rsm_load_seg_64(struct x86_emulate_ctxt *ctxt,
++                          const struct kvm_smm_seg_state_64 *state,
++                          int n)
+ {
+       struct desc_struct desc;
+-      int offset;
+-      u16 selector;
+-      u32 base3;
+-
+-      offset = 0x7e00 + n * 16;
+-
+-      selector =                GET_SMSTATE(u16, smstate, offset);
+-      rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
+-      set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
+-      set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
+-      base3 =                   GET_SMSTATE(u32, smstate, offset + 12);
+-      ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
+-      return X86EMUL_CONTINUE;
++      rsm_set_desc_flags(&desc, state->attributes << 8);
++      set_desc_limit(&desc,     state->limit);
++      set_desc_base(&desc,      (u32)state->base);
++      ctxt->ops->set_segment(ctxt, state->selector, &desc, state->base >> 32, n);
+ }
+ #endif
+@@ -2484,71 +2476,49 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
+ #ifdef CONFIG_X86_64
+ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
+-                           const char *smstate)
++                           const struct kvm_smram_state_64 *smstate)
+ {
+-      struct desc_struct desc;
+       struct desc_ptr dt;
+-      u64 val, cr0, cr3, cr4;
+-      u32 base3;
+-      u16 selector;
+       int i, r;
+       for (i = 0; i < 16; i++)
+-              *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
++              *reg_write(ctxt, i) = smstate->gprs[15 - i];
+-      ctxt->_eip   = GET_SMSTATE(u64, smstate, 0x7f78);
+-      ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
++      ctxt->_eip   = smstate->rip;
++      ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
+-      val = GET_SMSTATE(u64, smstate, 0x7f68);
+-
+-      if (ctxt->ops->set_dr(ctxt, 6, val))
++      if (ctxt->ops->set_dr(ctxt, 6, smstate->dr6))
+               return X86EMUL_UNHANDLEABLE;
+-
+-      val = GET_SMSTATE(u64, smstate, 0x7f60);
+-
+-      if (ctxt->ops->set_dr(ctxt, 7, val))
++      if (ctxt->ops->set_dr(ctxt, 7, smstate->dr7))
+               return X86EMUL_UNHANDLEABLE;
+-      cr0 =                       GET_SMSTATE(u64, smstate, 0x7f58);
+-      cr3 =                       GET_SMSTATE(u64, smstate, 0x7f50);
+-      cr4 =                       GET_SMSTATE(u64, smstate, 0x7f48);
+-      ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
+-      val =                       GET_SMSTATE(u64, smstate, 0x7ed0);
++      ctxt->ops->set_smbase(ctxt, smstate->smbase);
+-      if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
++      if (ctxt->ops->set_msr(ctxt, MSR_EFER, smstate->efer & ~EFER_LMA))
+               return X86EMUL_UNHANDLEABLE;
+-      selector =                  GET_SMSTATE(u32, smstate, 0x7e90);
+-      rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e92) << 8);
+-      set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e94));
+-      set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e98));
+-      base3 =                     GET_SMSTATE(u32, smstate, 0x7e9c);
+-      ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
++      rsm_load_seg_64(ctxt, &smstate->tr, VCPU_SREG_TR);
+-      dt.size =                   GET_SMSTATE(u32, smstate, 0x7e84);
+-      dt.address =                GET_SMSTATE(u64, smstate, 0x7e88);
++      dt.size =                   smstate->idtr.limit;
++      dt.address =                smstate->idtr.base;
+       ctxt->ops->set_idt(ctxt, &dt);
+-      selector =                  GET_SMSTATE(u32, smstate, 0x7e70);
+-      rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e72) << 8);
+-      set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e74));
+-      set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e78));
+-      base3 =                     GET_SMSTATE(u32, smstate, 0x7e7c);
+-      ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
++      rsm_load_seg_64(ctxt, &smstate->ldtr, VCPU_SREG_LDTR);
+-      dt.size =                   GET_SMSTATE(u32, smstate, 0x7e64);
+-      dt.address =                GET_SMSTATE(u64, smstate, 0x7e68);
++      dt.size =                   smstate->gdtr.limit;
++      dt.address =                smstate->gdtr.base;
+       ctxt->ops->set_gdt(ctxt, &dt);
+-      r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
++      r = rsm_enter_protected_mode(ctxt, smstate->cr0, smstate->cr3, smstate->cr4);
+       if (r != X86EMUL_CONTINUE)
+               return r;
+-      for (i = 0; i < 6; i++) {
+-              r = rsm_load_seg_64(ctxt, smstate, i);
+-              if (r != X86EMUL_CONTINUE)
+-                      return r;
+-      }
++      rsm_load_seg_64(ctxt, &smstate->es, VCPU_SREG_ES);
++      rsm_load_seg_64(ctxt, &smstate->cs, VCPU_SREG_CS);
++      rsm_load_seg_64(ctxt, &smstate->ss, VCPU_SREG_SS);
++      rsm_load_seg_64(ctxt, &smstate->ds, VCPU_SREG_DS);
++      rsm_load_seg_64(ctxt, &smstate->fs, VCPU_SREG_FS);
++      rsm_load_seg_64(ctxt, &smstate->gs, VCPU_SREG_GS);
+       return X86EMUL_CONTINUE;
+ }
+@@ -2623,7 +2593,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
+ #ifdef CONFIG_X86_64
+       if (emulator_has_longmode(ctxt))
+-              ret = rsm_load_state_64(ctxt, (const char *)&smram);
++              ret = rsm_load_state_64(ctxt, &smram.smram64);
+       else
+ #endif
+               ret = rsm_load_state_32(ctxt, &smram.smram32);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 579a1cb6a7c8..7a4d86f9bdcd 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -10115,20 +10115,17 @@ static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
+ }
+ #ifdef CONFIG_X86_64
+-static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
++static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
++                                struct kvm_smm_seg_state_64 *state,
++                                int n)
+ {
+       struct kvm_segment seg;
+-      int offset;
+-      u16 flags;
+       kvm_get_segment(vcpu, &seg, n);
+-      offset = 0x7e00 + n * 16;
+-
+-      flags = enter_smm_get_segment_flags(&seg) >> 8;
+-      put_smstate(u16, buf, offset, seg.selector);
+-      put_smstate(u16, buf, offset + 2, flags);
+-      put_smstate(u32, buf, offset + 4, seg.limit);
+-      put_smstate(u64, buf, offset + 8, seg.base);
++      state->selector = seg.selector;
++      state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
++      state->limit = seg.limit;
++      state->base = seg.base;
+ }
+ #endif
+@@ -10176,57 +10173,51 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_stat
+ }
+ #ifdef CONFIG_X86_64
+-static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
++static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, struct kvm_smram_state_64 *smram)
+ {
+       struct desc_ptr dt;
+-      struct kvm_segment seg;
+       unsigned long val;
+       int i;
+       for (i = 0; i < 16; i++)
+-              put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
++              smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
++
++      smram->rip    = kvm_rip_read(vcpu);
++      smram->rflags = kvm_get_rflags(vcpu);
+-      put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
+-      put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
+       kvm_get_dr(vcpu, 6, &val);
+-      put_smstate(u64, buf, 0x7f68, val);
++      smram->dr6 = val;
+       kvm_get_dr(vcpu, 7, &val);
+-      put_smstate(u64, buf, 0x7f60, val);
+-
+-      put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
+-      put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
+-      put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
++      smram->dr7 = val;
+-      put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
++      smram->cr0 = kvm_read_cr0(vcpu);
++      smram->cr3 = kvm_read_cr3(vcpu);
++      smram->cr4 = kvm_read_cr4(vcpu);
+-      /* revision id */
+-      put_smstate(u32, buf, 0x7efc, 0x00020064);
++      smram->smbase = vcpu->arch.smbase;
++      smram->smm_revison = 0x00020064;
+-      put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
++      smram->efer = vcpu->arch.efer;
+-      kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+-      put_smstate(u16, buf, 0x7e90, seg.selector);
+-      put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
+-      put_smstate(u32, buf, 0x7e94, seg.limit);
+-      put_smstate(u64, buf, 0x7e98, seg.base);
++      enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
+       static_call(kvm_x86_get_idt)(vcpu, &dt);
+-      put_smstate(u32, buf, 0x7e84, dt.size);
+-      put_smstate(u64, buf, 0x7e88, dt.address);
++      smram->idtr.limit = dt.size;
++      smram->idtr.base = dt.address;
+-      kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+-      put_smstate(u16, buf, 0x7e70, seg.selector);
+-      put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
+-      put_smstate(u32, buf, 0x7e74, seg.limit);
+-      put_smstate(u64, buf, 0x7e78, seg.base);
++      enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
+       static_call(kvm_x86_get_gdt)(vcpu, &dt);
+-      put_smstate(u32, buf, 0x7e64, dt.size);
+-      put_smstate(u64, buf, 0x7e68, dt.address);
++      smram->gdtr.limit = dt.size;
++      smram->gdtr.base = dt.address;
+-      for (i = 0; i < 6; i++)
+-              enter_smm_save_seg_64(vcpu, buf, i);
++      enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
++      enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
++      enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
++      enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
++      enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
++      enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
+ }
+ #endif
+@@ -10240,7 +10231,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
+       memset(smram.bytes, 0, sizeof(smram.bytes));
+ #ifdef CONFIG_X86_64
+       if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
+-              enter_smm_save_state_64(vcpu, (char *)&smram);
++              enter_smm_save_state_64(vcpu, &smram.smram64);
+       else
+ #endif
+               enter_smm_save_state_32(vcpu, &smram.smram32);
diff --git a/patches/kernel/0012-KVM-x86-SVM-use-smram-structs.patch b/patches/kernel/0012-KVM-x86-SVM-use-smram-structs.patch
new file mode 100644 (file)
index 0000000..ac298b5
--- /dev/null
@@ -0,0 +1,102 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Wed, 3 Aug 2022 18:50:09 +0300
+Subject: [PATCH] KVM: x86: SVM: use smram structs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This removes the last user of put_smstate/GET_SMSTATE so
+remove these functions as well.
+
+Also add a sanity check that we don't attempt to enter the SMM
+on non long mode capable guest CPU with a running nested guest.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/include/asm/kvm_host.h |  6 ------
+ arch/x86/kvm/svm/svm.c          | 21 ++++++---------------
+ 2 files changed, 6 insertions(+), 21 deletions(-)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index 6885f3839e25..f5b82b6f4f84 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -2090,12 +2090,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
+ #endif
+ }
+-#define put_smstate(type, buf, offset, val)                      \
+-      *(type *)((buf) + (offset) - 0x7e00) = val
+-
+-#define GET_SMSTATE(type, buf, offset)                \
+-      (*(type *)((buf) + (offset) - 0x7e00))
+-
+ int kvm_cpu_dirty_log_size(void);
+ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index 45c4def86cd3..bfacbef667d7 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4391,15 +4391,11 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
+       struct kvm_host_map map_save;
+       int ret;
+-      char *smstate = (char *)smram;
+-
+       if (!is_guest_mode(vcpu))
+               return 0;
+-      /* FED8h - SVM Guest */
+-      put_smstate(u64, smstate, 0x7ed8, 1);
+-      /* FEE0h - SVM Guest VMCB Physical Address */
+-      put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
++      smram->smram64.svm_guest_flag = 1;
++      smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa;
+       svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
+       svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
+@@ -4438,28 +4434,23 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+       struct kvm_host_map map, map_save;
+-      u64 saved_efer, vmcb12_gpa;
+       struct vmcb *vmcb12;
+       int ret;
+-      const char *smstate = (const char *)smram;
+-
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+               return 0;
+       /* Non-zero if SMI arrived while vCPU was in guest mode. */
+-      if (!GET_SMSTATE(u64, smstate, 0x7ed8))
++      if (!smram->smram64.svm_guest_flag)
+               return 0;
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
+               return 1;
+-      saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
+-      if (!(saved_efer & EFER_SVME))
++      if (!(smram->smram64.efer & EFER_SVME))
+               return 1;
+-      vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
+-      if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
++      if (kvm_vcpu_map(vcpu, gpa_to_gfn(smram->smram64.svm_guest_vmcb_gpa), &map) == -EINVAL)
+               return 1;
+       ret = 1;
+@@ -4485,7 +4476,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
+       vmcb12 = map.hva;
+       nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
+       nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
+-      ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
++      ret = enter_svm_guest_mode(vcpu, smram->smram64.svm_guest_vmcb_gpa, vmcb12, false);
+       if (ret)
+               goto unmap_save;
diff --git a/patches/kernel/0012-KVM-x86-emulator-smm-use-smram-struct-for-64-bit-smr.patch b/patches/kernel/0012-KVM-x86-emulator-smm-use-smram-struct-for-64-bit-smr.patch
deleted file mode 100644 (file)
index aa962a5..0000000
+++ /dev/null
@@ -1,279 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Maxim Levitsky <mlevitsk@redhat.com>
-Date: Wed, 3 Aug 2022 18:50:08 +0300
-Subject: [PATCH] KVM: x86: emulator/smm: use smram struct for 64 bit smram
- load/restore
-
-Use kvm_smram_state_64 struct to save/restore the 64 bit SMM state
-(used when X86_FEATURE_LM is present in the guest CPUID,
-regardless of 32-bitness of the guest).
-
-Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/kvm/emulate.c | 88 ++++++++++++++----------------------------
- arch/x86/kvm/x86.c     | 75 ++++++++++++++++-------------------
- 2 files changed, 62 insertions(+), 101 deletions(-)
-
-diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
-index 65d82292ccec..03f9e5aa036e 100644
---- a/arch/x86/kvm/emulate.c
-+++ b/arch/x86/kvm/emulate.c
-@@ -2373,24 +2373,16 @@ static void rsm_load_seg_32(struct x86_emulate_ctxt *ctxt,
- }
- #ifdef CONFIG_X86_64
--static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
--                         int n)
-+static void rsm_load_seg_64(struct x86_emulate_ctxt *ctxt,
-+                          const struct kvm_smm_seg_state_64 *state,
-+                          int n)
- {
-       struct desc_struct desc;
--      int offset;
--      u16 selector;
--      u32 base3;
--
--      offset = 0x7e00 + n * 16;
--
--      selector =                GET_SMSTATE(u16, smstate, offset);
--      rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
--      set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
--      set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
--      base3 =                   GET_SMSTATE(u32, smstate, offset + 12);
--      ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
--      return X86EMUL_CONTINUE;
-+      rsm_set_desc_flags(&desc, state->attributes << 8);
-+      set_desc_limit(&desc,     state->limit);
-+      set_desc_base(&desc,      (u32)state->base);
-+      ctxt->ops->set_segment(ctxt, state->selector, &desc, state->base >> 32, n);
- }
- #endif
-@@ -2484,71 +2476,49 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
- #ifdef CONFIG_X86_64
- static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
--                           const char *smstate)
-+                           const struct kvm_smram_state_64 *smstate)
- {
--      struct desc_struct desc;
-       struct desc_ptr dt;
--      u64 val, cr0, cr3, cr4;
--      u32 base3;
--      u16 selector;
-       int i, r;
-       for (i = 0; i < 16; i++)
--              *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
-+              *reg_write(ctxt, i) = smstate->gprs[15 - i];
--      ctxt->_eip   = GET_SMSTATE(u64, smstate, 0x7f78);
--      ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
-+      ctxt->_eip   = smstate->rip;
-+      ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
--      val = GET_SMSTATE(u64, smstate, 0x7f68);
--
--      if (ctxt->ops->set_dr(ctxt, 6, val))
-+      if (ctxt->ops->set_dr(ctxt, 6, smstate->dr6))
-               return X86EMUL_UNHANDLEABLE;
--
--      val = GET_SMSTATE(u64, smstate, 0x7f60);
--
--      if (ctxt->ops->set_dr(ctxt, 7, val))
-+      if (ctxt->ops->set_dr(ctxt, 7, smstate->dr7))
-               return X86EMUL_UNHANDLEABLE;
--      cr0 =                       GET_SMSTATE(u64, smstate, 0x7f58);
--      cr3 =                       GET_SMSTATE(u64, smstate, 0x7f50);
--      cr4 =                       GET_SMSTATE(u64, smstate, 0x7f48);
--      ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
--      val =                       GET_SMSTATE(u64, smstate, 0x7ed0);
-+      ctxt->ops->set_smbase(ctxt, smstate->smbase);
--      if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
-+      if (ctxt->ops->set_msr(ctxt, MSR_EFER, smstate->efer & ~EFER_LMA))
-               return X86EMUL_UNHANDLEABLE;
--      selector =                  GET_SMSTATE(u32, smstate, 0x7e90);
--      rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e92) << 8);
--      set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e94));
--      set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e98));
--      base3 =                     GET_SMSTATE(u32, smstate, 0x7e9c);
--      ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
-+      rsm_load_seg_64(ctxt, &smstate->tr, VCPU_SREG_TR);
--      dt.size =                   GET_SMSTATE(u32, smstate, 0x7e84);
--      dt.address =                GET_SMSTATE(u64, smstate, 0x7e88);
-+      dt.size =                   smstate->idtr.limit;
-+      dt.address =                smstate->idtr.base;
-       ctxt->ops->set_idt(ctxt, &dt);
--      selector =                  GET_SMSTATE(u32, smstate, 0x7e70);
--      rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e72) << 8);
--      set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e74));
--      set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e78));
--      base3 =                     GET_SMSTATE(u32, smstate, 0x7e7c);
--      ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
-+      rsm_load_seg_64(ctxt, &smstate->ldtr, VCPU_SREG_LDTR);
--      dt.size =                   GET_SMSTATE(u32, smstate, 0x7e64);
--      dt.address =                GET_SMSTATE(u64, smstate, 0x7e68);
-+      dt.size =                   smstate->gdtr.limit;
-+      dt.address =                smstate->gdtr.base;
-       ctxt->ops->set_gdt(ctxt, &dt);
--      r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
-+      r = rsm_enter_protected_mode(ctxt, smstate->cr0, smstate->cr3, smstate->cr4);
-       if (r != X86EMUL_CONTINUE)
-               return r;
--      for (i = 0; i < 6; i++) {
--              r = rsm_load_seg_64(ctxt, smstate, i);
--              if (r != X86EMUL_CONTINUE)
--                      return r;
--      }
-+      rsm_load_seg_64(ctxt, &smstate->es, VCPU_SREG_ES);
-+      rsm_load_seg_64(ctxt, &smstate->cs, VCPU_SREG_CS);
-+      rsm_load_seg_64(ctxt, &smstate->ss, VCPU_SREG_SS);
-+      rsm_load_seg_64(ctxt, &smstate->ds, VCPU_SREG_DS);
-+      rsm_load_seg_64(ctxt, &smstate->fs, VCPU_SREG_FS);
-+      rsm_load_seg_64(ctxt, &smstate->gs, VCPU_SREG_GS);
-       return X86EMUL_CONTINUE;
- }
-@@ -2623,7 +2593,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
- #ifdef CONFIG_X86_64
-       if (emulator_has_longmode(ctxt))
--              ret = rsm_load_state_64(ctxt, (const char *)&smram);
-+              ret = rsm_load_state_64(ctxt, &smram.smram64);
-       else
- #endif
-               ret = rsm_load_state_32(ctxt, &smram.smram32);
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index 579a1cb6a7c8..7a4d86f9bdcd 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -10115,20 +10115,17 @@ static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
- }
- #ifdef CONFIG_X86_64
--static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
-+static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
-+                                struct kvm_smm_seg_state_64 *state,
-+                                int n)
- {
-       struct kvm_segment seg;
--      int offset;
--      u16 flags;
-       kvm_get_segment(vcpu, &seg, n);
--      offset = 0x7e00 + n * 16;
--
--      flags = enter_smm_get_segment_flags(&seg) >> 8;
--      put_smstate(u16, buf, offset, seg.selector);
--      put_smstate(u16, buf, offset + 2, flags);
--      put_smstate(u32, buf, offset + 4, seg.limit);
--      put_smstate(u64, buf, offset + 8, seg.base);
-+      state->selector = seg.selector;
-+      state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
-+      state->limit = seg.limit;
-+      state->base = seg.base;
- }
- #endif
-@@ -10176,57 +10173,51 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_stat
- }
- #ifdef CONFIG_X86_64
--static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
-+static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, struct kvm_smram_state_64 *smram)
- {
-       struct desc_ptr dt;
--      struct kvm_segment seg;
-       unsigned long val;
-       int i;
-       for (i = 0; i < 16; i++)
--              put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
-+              smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
-+
-+      smram->rip    = kvm_rip_read(vcpu);
-+      smram->rflags = kvm_get_rflags(vcpu);
--      put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
--      put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
-       kvm_get_dr(vcpu, 6, &val);
--      put_smstate(u64, buf, 0x7f68, val);
-+      smram->dr6 = val;
-       kvm_get_dr(vcpu, 7, &val);
--      put_smstate(u64, buf, 0x7f60, val);
--
--      put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
--      put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
--      put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
-+      smram->dr7 = val;
--      put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
-+      smram->cr0 = kvm_read_cr0(vcpu);
-+      smram->cr3 = kvm_read_cr3(vcpu);
-+      smram->cr4 = kvm_read_cr4(vcpu);
--      /* revision id */
--      put_smstate(u32, buf, 0x7efc, 0x00020064);
-+      smram->smbase = vcpu->arch.smbase;
-+      smram->smm_revison = 0x00020064;
--      put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
-+      smram->efer = vcpu->arch.efer;
--      kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
--      put_smstate(u16, buf, 0x7e90, seg.selector);
--      put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
--      put_smstate(u32, buf, 0x7e94, seg.limit);
--      put_smstate(u64, buf, 0x7e98, seg.base);
-+      enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
-       static_call(kvm_x86_get_idt)(vcpu, &dt);
--      put_smstate(u32, buf, 0x7e84, dt.size);
--      put_smstate(u64, buf, 0x7e88, dt.address);
-+      smram->idtr.limit = dt.size;
-+      smram->idtr.base = dt.address;
--      kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
--      put_smstate(u16, buf, 0x7e70, seg.selector);
--      put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
--      put_smstate(u32, buf, 0x7e74, seg.limit);
--      put_smstate(u64, buf, 0x7e78, seg.base);
-+      enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
-       static_call(kvm_x86_get_gdt)(vcpu, &dt);
--      put_smstate(u32, buf, 0x7e64, dt.size);
--      put_smstate(u64, buf, 0x7e68, dt.address);
-+      smram->gdtr.limit = dt.size;
-+      smram->gdtr.base = dt.address;
--      for (i = 0; i < 6; i++)
--              enter_smm_save_seg_64(vcpu, buf, i);
-+      enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
-+      enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
-+      enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
-+      enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
-+      enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
-+      enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
- }
- #endif
-@@ -10240,7 +10231,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
-       memset(smram.bytes, 0, sizeof(smram.bytes));
- #ifdef CONFIG_X86_64
-       if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
--              enter_smm_save_state_64(vcpu, (char *)&smram);
-+              enter_smm_save_state_64(vcpu, &smram.smram64);
-       else
- #endif
-               enter_smm_save_state_32(vcpu, &smram.smram32);
diff --git a/patches/kernel/0013-KVM-x86-SVM-don-t-save-SVM-state-to-SMRAM-when-VM-is.patch b/patches/kernel/0013-KVM-x86-SVM-don-t-save-SVM-state-to-SMRAM-when-VM-is.patch
new file mode 100644 (file)
index 0000000..efbeab1
--- /dev/null
@@ -0,0 +1,44 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Wed, 3 Aug 2022 18:50:10 +0300
+Subject: [PATCH] KVM: x86: SVM: don't save SVM state to SMRAM when VM is not
+ long mode capable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When the guest CPUID doesn't have support for long mode, 32 bit SMRAM
+layout is used and it has no support for preserving EFER and/or SVM
+state.
+
+Note that this isn't relevant to running 32 bit guests on VM which is
+long mode capable - such VM can still run 32 bit guests in compatibility
+mode.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/svm/svm.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index bfacbef667d7..6b02f99fe70c 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4394,6 +4394,15 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
+       if (!is_guest_mode(vcpu))
+               return 0;
++      /*
++       * 32 bit SMRAM format doesn't preserve EFER and SVM state.
++       * SVM should not be enabled by the userspace without marking
++       * the CPU as at least long mode capable.
++       */
++
++      if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
++              return 1;
++
+       smram->smram64.svm_guest_flag = 1;
+       smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa;
diff --git a/patches/kernel/0013-KVM-x86-SVM-use-smram-structs.patch b/patches/kernel/0013-KVM-x86-SVM-use-smram-structs.patch
deleted file mode 100644 (file)
index 0cbb708..0000000
+++ /dev/null
@@ -1,98 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Maxim Levitsky <mlevitsk@redhat.com>
-Date: Wed, 3 Aug 2022 18:50:09 +0300
-Subject: [PATCH] KVM: x86: SVM: use smram structs
-
-This removes the last user of put_smstate/GET_SMSTATE so
-remove these functions as well.
-
-Also add a sanity check that we don't attempt to enter the SMM
-on non long mode capable guest CPU with a running nested guest.
-
-Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/include/asm/kvm_host.h |  6 ------
- arch/x86/kvm/svm/svm.c          | 21 ++++++---------------
- 2 files changed, 6 insertions(+), 21 deletions(-)
-
-diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
-index 6885f3839e25..f5b82b6f4f84 100644
---- a/arch/x86/include/asm/kvm_host.h
-+++ b/arch/x86/include/asm/kvm_host.h
-@@ -2090,12 +2090,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
- #endif
- }
--#define put_smstate(type, buf, offset, val)                      \
--      *(type *)((buf) + (offset) - 0x7e00) = val
--
--#define GET_SMSTATE(type, buf, offset)                \
--      (*(type *)((buf) + (offset) - 0x7e00))
--
- int kvm_cpu_dirty_log_size(void);
- int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
-diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
-index 45c4def86cd3..bfacbef667d7 100644
---- a/arch/x86/kvm/svm/svm.c
-+++ b/arch/x86/kvm/svm/svm.c
-@@ -4391,15 +4391,11 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
-       struct kvm_host_map map_save;
-       int ret;
--      char *smstate = (char *)smram;
--
-       if (!is_guest_mode(vcpu))
-               return 0;
--      /* FED8h - SVM Guest */
--      put_smstate(u64, smstate, 0x7ed8, 1);
--      /* FEE0h - SVM Guest VMCB Physical Address */
--      put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
-+      smram->smram64.svm_guest_flag = 1;
-+      smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa;
-       svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
-       svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
-@@ -4438,28 +4434,23 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
- {
-       struct vcpu_svm *svm = to_svm(vcpu);
-       struct kvm_host_map map, map_save;
--      u64 saved_efer, vmcb12_gpa;
-       struct vmcb *vmcb12;
-       int ret;
--      const char *smstate = (const char *)smram;
--
-       if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
-               return 0;
-       /* Non-zero if SMI arrived while vCPU was in guest mode. */
--      if (!GET_SMSTATE(u64, smstate, 0x7ed8))
-+      if (!smram->smram64.svm_guest_flag)
-               return 0;
-       if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
-               return 1;
--      saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
--      if (!(saved_efer & EFER_SVME))
-+      if (!(smram->smram64.efer & EFER_SVME))
-               return 1;
--      vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
--      if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
-+      if (kvm_vcpu_map(vcpu, gpa_to_gfn(smram->smram64.svm_guest_vmcb_gpa), &map) == -EINVAL)
-               return 1;
-       ret = 1;
-@@ -4485,7 +4476,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
-       vmcb12 = map.hva;
-       nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
-       nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
--      ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
-+      ret = enter_svm_guest_mode(vcpu, smram->smram64.svm_guest_vmcb_gpa, vmcb12, false);
-       if (ret)
-               goto unmap_save;
diff --git a/patches/kernel/0014-KVM-x86-SVM-don-t-save-SVM-state-to-SMRAM-when-VM-is.patch b/patches/kernel/0014-KVM-x86-SVM-don-t-save-SVM-state-to-SMRAM-when-VM-is.patch
deleted file mode 100644 (file)
index fedb9bd..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Maxim Levitsky <mlevitsk@redhat.com>
-Date: Wed, 3 Aug 2022 18:50:10 +0300
-Subject: [PATCH] KVM: x86: SVM: don't save SVM state to SMRAM when VM is not
- long mode capable
-
-When the guest CPUID doesn't have support for long mode, 32 bit SMRAM
-layout is used and it has no support for preserving EFER and/or SVM
-state.
-
-Note that this isn't relevant to running 32 bit guests on VM which is
-long mode capable - such VM can still run 32 bit guests in compatibility
-mode.
-
-Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/kvm/svm/svm.c | 9 +++++++++
- 1 file changed, 9 insertions(+)
-
-diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
-index bfacbef667d7..6b02f99fe70c 100644
---- a/arch/x86/kvm/svm/svm.c
-+++ b/arch/x86/kvm/svm/svm.c
-@@ -4394,6 +4394,15 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
-       if (!is_guest_mode(vcpu))
-               return 0;
-+      /*
-+       * 32 bit SMRAM format doesn't preserve EFER and SVM state.
-+       * SVM should not be enabled by the userspace without marking
-+       * the CPU as at least long mode capable.
-+       */
-+
-+      if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
-+              return 1;
-+
-       smram->smram64.svm_guest_flag = 1;
-       smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa;
diff --git a/patches/kernel/0014-KVM-x86-emulator-smm-preserve-interrupt-shadow-in-SM.patch b/patches/kernel/0014-KVM-x86-emulator-smm-preserve-interrupt-shadow-in-SM.patch
new file mode 100644 (file)
index 0000000..a5e3101
--- /dev/null
@@ -0,0 +1,184 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Wed, 3 Aug 2022 18:50:11 +0300
+Subject: [PATCH] KVM: x86: emulator/smm: preserve interrupt shadow in SMRAM
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When #SMI is asserted, the CPU can be in interrupt shadow
+due to sti or mov ss.
+
+It is not mandatory in  Intel/AMD prm to have the #SMI
+blocked during the shadow, and on top of
+that, since neither SVM nor VMX has true support for SMI
+window, waiting for one instruction would mean single stepping
+the guest.
+
+Instead, allow #SMI in this case, but both reset the interrupt
+window and stash its value in SMRAM to restore it on exit
+from SMM.
+
+This fixes rare failures seen mostly on windows guests on VMX,
+when #SMI falls on the sti instruction which mainfest in
+VM entry failure due to EFLAGS.IF not being set, but STI interrupt
+window still being set in the VMCS.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/emulate.c     | 17 ++++++++++++++---
+ arch/x86/kvm/kvm_emulate.h | 10 ++++++----
+ arch/x86/kvm/x86.c         | 12 ++++++++++++
+ 3 files changed, 32 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index 03f9e5aa036e..bb008a5be539 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -2435,7 +2435,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
+                            const struct kvm_smram_state_32 *smstate)
+ {
+       struct desc_ptr dt;
+-      int i;
++      int i, r;
+       ctxt->eflags =  smstate->eflags | X86_EFLAGS_FIXED;
+       ctxt->_eip =  smstate->eip;
+@@ -2470,8 +2470,16 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
+       ctxt->ops->set_smbase(ctxt, smstate->smbase);
+-      return rsm_enter_protected_mode(ctxt, smstate->cr0,
+-                                      smstate->cr3, smstate->cr4);
++      r = rsm_enter_protected_mode(ctxt, smstate->cr0,
++                                   smstate->cr3, smstate->cr4);
++
++      if (r != X86EMUL_CONTINUE)
++              return r;
++
++      ctxt->ops->set_int_shadow(ctxt, 0);
++      ctxt->interruptibility = (u8)smstate->int_shadow;
++
++      return X86EMUL_CONTINUE;
+ }
+ #ifdef CONFIG_X86_64
+@@ -2520,6 +2528,9 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
+       rsm_load_seg_64(ctxt, &smstate->fs, VCPU_SREG_FS);
+       rsm_load_seg_64(ctxt, &smstate->gs, VCPU_SREG_GS);
++      ctxt->ops->set_int_shadow(ctxt, 0);
++      ctxt->interruptibility = (u8)smstate->int_shadow;
++
+       return X86EMUL_CONTINUE;
+ }
+ #endif
+diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
+index 76c0b8e7890b..a7313add0f2a 100644
+--- a/arch/x86/kvm/kvm_emulate.h
++++ b/arch/x86/kvm/kvm_emulate.h
+@@ -234,6 +234,7 @@ struct x86_emulate_ops {
+       bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
+       void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
++      void (*set_int_shadow)(struct x86_emulate_ctxt *ctxt, u8 shadow);
+       unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
+       void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
+@@ -518,7 +519,8 @@ struct kvm_smram_state_32 {
+       u32 reserved1[62];
+       u32 smbase;
+       u32 smm_revision;
+-      u32 reserved2[5];
++      u32 reserved2[4];
++      u32 int_shadow; /* KVM extension */
+       u32 cr4; /* CR4 is not present in Intel/AMD SMRAM image */
+       u32 reserved3[5];
+@@ -566,6 +568,7 @@ static inline void __check_smram32_offsets(void)
+       __CHECK_SMRAM32_OFFSET(smbase,          0xFEF8);
+       __CHECK_SMRAM32_OFFSET(smm_revision,    0xFEFC);
+       __CHECK_SMRAM32_OFFSET(reserved2,       0xFF00);
++      __CHECK_SMRAM32_OFFSET(int_shadow,      0xFF10);
+       __CHECK_SMRAM32_OFFSET(cr4,             0xFF14);
+       __CHECK_SMRAM32_OFFSET(reserved3,       0xFF18);
+       __CHECK_SMRAM32_OFFSET(ds,              0xFF2C);
+@@ -625,7 +628,7 @@ struct kvm_smram_state_64 {
+       u64 io_restart_rsi;
+       u64 io_restart_rdi;
+       u32 io_restart_dword;
+-      u32 reserved1;
++      u32 int_shadow;
+       u8 io_inst_restart;
+       u8 auto_hlt_restart;
+       u8 reserved2[6];
+@@ -663,7 +666,6 @@ struct kvm_smram_state_64 {
+       u64 gprs[16]; /* GPRS in a reversed "natural" X86 order (R15/R14/../RCX/RAX.) */
+ };
+-
+ static inline void __check_smram64_offsets(void)
+ {
+ #define __CHECK_SMRAM64_OFFSET(field, offset) \
+@@ -684,7 +686,7 @@ static inline void __check_smram64_offsets(void)
+       __CHECK_SMRAM64_OFFSET(io_restart_rsi,          0xFEB0);
+       __CHECK_SMRAM64_OFFSET(io_restart_rdi,          0xFEB8);
+       __CHECK_SMRAM64_OFFSET(io_restart_dword,        0xFEC0);
+-      __CHECK_SMRAM64_OFFSET(reserved1,               0xFEC4);
++      __CHECK_SMRAM64_OFFSET(int_shadow,              0xFEC4);
+       __CHECK_SMRAM64_OFFSET(io_inst_restart,         0xFEC8);
+       __CHECK_SMRAM64_OFFSET(auto_hlt_restart,        0xFEC9);
+       __CHECK_SMRAM64_OFFSET(reserved2,               0xFECA);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 7a4d86f9bdcd..609829ec1d13 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8173,6 +8173,11 @@ static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
+       static_call(kvm_x86_set_nmi_mask)(emul_to_vcpu(ctxt), masked);
+ }
++static void emulator_set_int_shadow(struct x86_emulate_ctxt *ctxt, u8 shadow)
++{
++       static_call(kvm_x86_set_interrupt_shadow)(emul_to_vcpu(ctxt), shadow);
++}
++
+ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
+ {
+       return emul_to_vcpu(ctxt)->arch.hflags;
+@@ -8253,6 +8258,7 @@ static const struct x86_emulate_ops emulate_ops = {
+       .guest_has_fxsr      = emulator_guest_has_fxsr,
+       .guest_has_rdpid     = emulator_guest_has_rdpid,
+       .set_nmi_mask        = emulator_set_nmi_mask,
++      .set_int_shadow      = emulator_set_int_shadow,
+       .get_hflags          = emulator_get_hflags,
+       .exiting_smm         = emulator_exiting_smm,
+       .leave_smm           = emulator_leave_smm,
+@@ -10170,6 +10176,8 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_stat
+       smram->cr4 = kvm_read_cr4(vcpu);
+       smram->smm_revision = 0x00020000;
+       smram->smbase = vcpu->arch.smbase;
++
++      smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
+ }
+ #ifdef CONFIG_X86_64
+@@ -10218,6 +10226,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, struct kvm_smram_stat
+       enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
+       enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
+       enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
++
++      smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
+ }
+ #endif
+@@ -10254,6 +10264,8 @@ static void enter_smm(struct kvm_vcpu *vcpu)
+       kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
+       kvm_rip_write(vcpu, 0x8000);
++      static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
++
+       cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
+       static_call(kvm_x86_set_cr0)(vcpu, cr0);
+       vcpu->arch.cr0 = cr0;
diff --git a/patches/kernel/0015-KVM-x86-emulator-smm-preserve-interrupt-shadow-in-SM.patch b/patches/kernel/0015-KVM-x86-emulator-smm-preserve-interrupt-shadow-in-SM.patch
deleted file mode 100644 (file)
index 7248b78..0000000
+++ /dev/null
@@ -1,180 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Maxim Levitsky <mlevitsk@redhat.com>
-Date: Wed, 3 Aug 2022 18:50:11 +0300
-Subject: [PATCH] KVM: x86: emulator/smm: preserve interrupt shadow in SMRAM
-
-When #SMI is asserted, the CPU can be in interrupt shadow
-due to sti or mov ss.
-
-It is not mandatory in  Intel/AMD prm to have the #SMI
-blocked during the shadow, and on top of
-that, since neither SVM nor VMX has true support for SMI
-window, waiting for one instruction would mean single stepping
-the guest.
-
-Instead, allow #SMI in this case, but both reset the interrupt
-window and stash its value in SMRAM to restore it on exit
-from SMM.
-
-This fixes rare failures seen mostly on windows guests on VMX,
-when #SMI falls on the sti instruction which mainfest in
-VM entry failure due to EFLAGS.IF not being set, but STI interrupt
-window still being set in the VMCS.
-
-Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- arch/x86/kvm/emulate.c     | 17 ++++++++++++++---
- arch/x86/kvm/kvm_emulate.h | 10 ++++++----
- arch/x86/kvm/x86.c         | 12 ++++++++++++
- 3 files changed, 32 insertions(+), 7 deletions(-)
-
-diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
-index 03f9e5aa036e..bb008a5be539 100644
---- a/arch/x86/kvm/emulate.c
-+++ b/arch/x86/kvm/emulate.c
-@@ -2435,7 +2435,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
-                            const struct kvm_smram_state_32 *smstate)
- {
-       struct desc_ptr dt;
--      int i;
-+      int i, r;
-       ctxt->eflags =  smstate->eflags | X86_EFLAGS_FIXED;
-       ctxt->_eip =  smstate->eip;
-@@ -2470,8 +2470,16 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
-       ctxt->ops->set_smbase(ctxt, smstate->smbase);
--      return rsm_enter_protected_mode(ctxt, smstate->cr0,
--                                      smstate->cr3, smstate->cr4);
-+      r = rsm_enter_protected_mode(ctxt, smstate->cr0,
-+                                   smstate->cr3, smstate->cr4);
-+
-+      if (r != X86EMUL_CONTINUE)
-+              return r;
-+
-+      ctxt->ops->set_int_shadow(ctxt, 0);
-+      ctxt->interruptibility = (u8)smstate->int_shadow;
-+
-+      return X86EMUL_CONTINUE;
- }
- #ifdef CONFIG_X86_64
-@@ -2520,6 +2528,9 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
-       rsm_load_seg_64(ctxt, &smstate->fs, VCPU_SREG_FS);
-       rsm_load_seg_64(ctxt, &smstate->gs, VCPU_SREG_GS);
-+      ctxt->ops->set_int_shadow(ctxt, 0);
-+      ctxt->interruptibility = (u8)smstate->int_shadow;
-+
-       return X86EMUL_CONTINUE;
- }
- #endif
-diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
-index 76c0b8e7890b..a7313add0f2a 100644
---- a/arch/x86/kvm/kvm_emulate.h
-+++ b/arch/x86/kvm/kvm_emulate.h
-@@ -234,6 +234,7 @@ struct x86_emulate_ops {
-       bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
-       void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
-+      void (*set_int_shadow)(struct x86_emulate_ctxt *ctxt, u8 shadow);
-       unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
-       void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
-@@ -518,7 +519,8 @@ struct kvm_smram_state_32 {
-       u32 reserved1[62];
-       u32 smbase;
-       u32 smm_revision;
--      u32 reserved2[5];
-+      u32 reserved2[4];
-+      u32 int_shadow; /* KVM extension */
-       u32 cr4; /* CR4 is not present in Intel/AMD SMRAM image */
-       u32 reserved3[5];
-@@ -566,6 +568,7 @@ static inline void __check_smram32_offsets(void)
-       __CHECK_SMRAM32_OFFSET(smbase,          0xFEF8);
-       __CHECK_SMRAM32_OFFSET(smm_revision,    0xFEFC);
-       __CHECK_SMRAM32_OFFSET(reserved2,       0xFF00);
-+      __CHECK_SMRAM32_OFFSET(int_shadow,      0xFF10);
-       __CHECK_SMRAM32_OFFSET(cr4,             0xFF14);
-       __CHECK_SMRAM32_OFFSET(reserved3,       0xFF18);
-       __CHECK_SMRAM32_OFFSET(ds,              0xFF2C);
-@@ -625,7 +628,7 @@ struct kvm_smram_state_64 {
-       u64 io_restart_rsi;
-       u64 io_restart_rdi;
-       u32 io_restart_dword;
--      u32 reserved1;
-+      u32 int_shadow;
-       u8 io_inst_restart;
-       u8 auto_hlt_restart;
-       u8 reserved2[6];
-@@ -663,7 +666,6 @@ struct kvm_smram_state_64 {
-       u64 gprs[16]; /* GPRS in a reversed "natural" X86 order (R15/R14/../RCX/RAX.) */
- };
--
- static inline void __check_smram64_offsets(void)
- {
- #define __CHECK_SMRAM64_OFFSET(field, offset) \
-@@ -684,7 +686,7 @@ static inline void __check_smram64_offsets(void)
-       __CHECK_SMRAM64_OFFSET(io_restart_rsi,          0xFEB0);
-       __CHECK_SMRAM64_OFFSET(io_restart_rdi,          0xFEB8);
-       __CHECK_SMRAM64_OFFSET(io_restart_dword,        0xFEC0);
--      __CHECK_SMRAM64_OFFSET(reserved1,               0xFEC4);
-+      __CHECK_SMRAM64_OFFSET(int_shadow,              0xFEC4);
-       __CHECK_SMRAM64_OFFSET(io_inst_restart,         0xFEC8);
-       __CHECK_SMRAM64_OFFSET(auto_hlt_restart,        0xFEC9);
-       __CHECK_SMRAM64_OFFSET(reserved2,               0xFECA);
-diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index 7a4d86f9bdcd..609829ec1d13 100644
---- a/arch/x86/kvm/x86.c
-+++ b/arch/x86/kvm/x86.c
-@@ -8173,6 +8173,11 @@ static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
-       static_call(kvm_x86_set_nmi_mask)(emul_to_vcpu(ctxt), masked);
- }
-+static void emulator_set_int_shadow(struct x86_emulate_ctxt *ctxt, u8 shadow)
-+{
-+       static_call(kvm_x86_set_interrupt_shadow)(emul_to_vcpu(ctxt), shadow);
-+}
-+
- static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
- {
-       return emul_to_vcpu(ctxt)->arch.hflags;
-@@ -8253,6 +8258,7 @@ static const struct x86_emulate_ops emulate_ops = {
-       .guest_has_fxsr      = emulator_guest_has_fxsr,
-       .guest_has_rdpid     = emulator_guest_has_rdpid,
-       .set_nmi_mask        = emulator_set_nmi_mask,
-+      .set_int_shadow      = emulator_set_int_shadow,
-       .get_hflags          = emulator_get_hflags,
-       .exiting_smm         = emulator_exiting_smm,
-       .leave_smm           = emulator_leave_smm,
-@@ -10170,6 +10176,8 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_stat
-       smram->cr4 = kvm_read_cr4(vcpu);
-       smram->smm_revision = 0x00020000;
-       smram->smbase = vcpu->arch.smbase;
-+
-+      smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
- }
- #ifdef CONFIG_X86_64
-@@ -10218,6 +10226,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, struct kvm_smram_stat
-       enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
-       enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
-       enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
-+
-+      smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
- }
- #endif
-@@ -10254,6 +10264,8 @@ static void enter_smm(struct kvm_vcpu *vcpu)
-       kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
-       kvm_rip_write(vcpu, 0x8000);
-+      static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
-+
-       cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
-       static_call(kvm_x86_set_cr0)(vcpu, cr0);
-       vcpu->arch.cr0 = cr0;
diff --git a/patches/kernel/0015-Revert-RDMA-irdma-Report-the-correct-link-speed.patch b/patches/kernel/0015-Revert-RDMA-irdma-Report-the-correct-link-speed.patch
new file mode 100644 (file)
index 0000000..18a6565
--- /dev/null
@@ -0,0 +1,72 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Date: Sat, 7 Jan 2023 13:48:41 +0100
+Subject: [PATCH] Revert "RDMA/irdma: Report the correct link speed"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+seem to cause a regression with some NICs:
+https://lore.kernel.org/netdev/CAK8fFZ6A_Gphw_3-QMGKEFQk=sfCw1Qmq0TVZK3rtAi7vb621A@mail.gmail.com/
+
+This reverts commit e8553504e366c8a47d1f6156c30d6eb9778cda13.
+
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/infiniband/hw/irdma/verbs.c | 35 ++++++++++++++++++++++++++---
+ 1 file changed, 32 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
+index f6973ea55eda..132fe91bb799 100644
+--- a/drivers/infiniband/hw/irdma/verbs.c
++++ b/drivers/infiniband/hw/irdma/verbs.c
+@@ -63,6 +63,36 @@ static int irdma_query_device(struct ib_device *ibdev,
+       return 0;
+ }
++/**
++ * irdma_get_eth_speed_and_width - Get IB port speed and width from netdev speed
++ * @link_speed: netdev phy link speed
++ * @active_speed: IB port speed
++ * @active_width: IB port width
++ */
++static void irdma_get_eth_speed_and_width(u32 link_speed, u16 *active_speed,
++                                        u8 *active_width)
++{
++      if (link_speed <= SPEED_1000) {
++              *active_width = IB_WIDTH_1X;
++              *active_speed = IB_SPEED_SDR;
++      } else if (link_speed <= SPEED_10000) {
++              *active_width = IB_WIDTH_1X;
++              *active_speed = IB_SPEED_FDR10;
++      } else if (link_speed <= SPEED_20000) {
++              *active_width = IB_WIDTH_4X;
++              *active_speed = IB_SPEED_DDR;
++      } else if (link_speed <= SPEED_25000) {
++              *active_width = IB_WIDTH_1X;
++              *active_speed = IB_SPEED_EDR;
++      } else if (link_speed <= SPEED_40000) {
++              *active_width = IB_WIDTH_4X;
++              *active_speed = IB_SPEED_FDR10;
++      } else {
++              *active_width = IB_WIDTH_4X;
++              *active_speed = IB_SPEED_EDR;
++      }
++}
++
+ /**
+  * irdma_query_port - get port attributes
+  * @ibdev: device pointer from stack
+@@ -90,9 +120,8 @@ static int irdma_query_port(struct ib_device *ibdev, u32 port,
+               props->state = IB_PORT_DOWN;
+               props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+       }
+-
+-      ib_get_eth_speed(ibdev, port, &props->active_speed,
+-                       &props->active_width);
++      irdma_get_eth_speed_and_width(SPEED_100000, &props->active_speed,
++                                    &props->active_width);
+       if (rdma_protocol_roce(ibdev, 1)) {
+               props->gid_tbl_len = 32;
diff --git a/patches/kernel/0016-Revert-RDMA-irdma-Report-the-correct-link-speed.patch b/patches/kernel/0016-Revert-RDMA-irdma-Report-the-correct-link-speed.patch
deleted file mode 100644 (file)
index accfb6c..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Lamprecht <t.lamprecht@proxmox.com>
-Date: Sat, 7 Jan 2023 13:48:41 +0100
-Subject: [PATCH] Revert "RDMA/irdma: Report the correct link speed"
-
-seem to cause a regression with some NICs:
-https://lore.kernel.org/netdev/CAK8fFZ6A_Gphw_3-QMGKEFQk=sfCw1Qmq0TVZK3rtAi7vb621A@mail.gmail.com/
-
-This reverts commit e8553504e366c8a47d1f6156c30d6eb9778cda13.
-
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- drivers/infiniband/hw/irdma/verbs.c | 35 ++++++++++++++++++++++++++---
- 1 file changed, 32 insertions(+), 3 deletions(-)
-
-diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
-index f6973ea55eda..132fe91bb799 100644
---- a/drivers/infiniband/hw/irdma/verbs.c
-+++ b/drivers/infiniband/hw/irdma/verbs.c
-@@ -63,6 +63,36 @@ static int irdma_query_device(struct ib_device *ibdev,
-       return 0;
- }
-+/**
-+ * irdma_get_eth_speed_and_width - Get IB port speed and width from netdev speed
-+ * @link_speed: netdev phy link speed
-+ * @active_speed: IB port speed
-+ * @active_width: IB port width
-+ */
-+static void irdma_get_eth_speed_and_width(u32 link_speed, u16 *active_speed,
-+                                        u8 *active_width)
-+{
-+      if (link_speed <= SPEED_1000) {
-+              *active_width = IB_WIDTH_1X;
-+              *active_speed = IB_SPEED_SDR;
-+      } else if (link_speed <= SPEED_10000) {
-+              *active_width = IB_WIDTH_1X;
-+              *active_speed = IB_SPEED_FDR10;
-+      } else if (link_speed <= SPEED_20000) {
-+              *active_width = IB_WIDTH_4X;
-+              *active_speed = IB_SPEED_DDR;
-+      } else if (link_speed <= SPEED_25000) {
-+              *active_width = IB_WIDTH_1X;
-+              *active_speed = IB_SPEED_EDR;
-+      } else if (link_speed <= SPEED_40000) {
-+              *active_width = IB_WIDTH_4X;
-+              *active_speed = IB_SPEED_FDR10;
-+      } else {
-+              *active_width = IB_WIDTH_4X;
-+              *active_speed = IB_SPEED_EDR;
-+      }
-+}
-+
- /**
-  * irdma_query_port - get port attributes
-  * @ibdev: device pointer from stack
-@@ -90,9 +120,8 @@ static int irdma_query_port(struct ib_device *ibdev, u32 port,
-               props->state = IB_PORT_DOWN;
-               props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
-       }
--
--      ib_get_eth_speed(ibdev, port, &props->active_speed,
--                       &props->active_width);
-+      irdma_get_eth_speed_and_width(SPEED_100000, &props->active_speed,
-+                                    &props->active_width);
-       if (rdma_protocol_roce(ibdev, 1)) {
-               props->gid_tbl_len = 32;
diff --git a/patches/kernel/0016-Revert-gro-add-support-of-hw-gro-packets-to-gro-stac.patch b/patches/kernel/0016-Revert-gro-add-support-of-hw-gro-packets-to-gro-stac.patch
new file mode 100644 (file)
index 0000000..a18708e
--- /dev/null
@@ -0,0 +1,122 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Date: Sat, 7 Jan 2023 13:50:22 +0100
+Subject: [PATCH] Revert "gro: add support of (hw)gro packets to gro stack"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Seems to be the cause of a regression in network performance:
+https://lore.kernel.org/netdev/CAK8fFZ5pzMaw3U1KXgC_OK4shKGsN=HDcR62cfPOuL0umXE1Ww@mail.gmail.com/
+
+This reverts commit 5eddb24901ee49eee23c0bfce6af2e83fd5679bd.
+
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ net/core/gro.c         | 18 ++++--------------
+ net/ipv4/tcp_offload.c | 17 ++---------------
+ 2 files changed, 6 insertions(+), 29 deletions(-)
+
+diff --git a/net/core/gro.c b/net/core/gro.c
+index bc9451743307..b4190eb08467 100644
+--- a/net/core/gro.c
++++ b/net/core/gro.c
+@@ -160,7 +160,6 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
+       unsigned int gro_max_size;
+       unsigned int new_truesize;
+       struct sk_buff *lp;
+-      int segs;
+       /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
+       gro_max_size = READ_ONCE(p->dev->gro_max_size);
+@@ -176,7 +175,6 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
+                       return -E2BIG;
+       }
+-      segs = NAPI_GRO_CB(skb)->count;
+       lp = NAPI_GRO_CB(p)->last;
+       pinfo = skb_shinfo(lp);
+@@ -267,7 +265,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
+       lp = p;
+ done:
+-      NAPI_GRO_CB(p)->count += segs;
++      NAPI_GRO_CB(p)->count++;
+       p->data_len += len;
+       p->truesize += delta_truesize;
+       p->len += len;
+@@ -498,15 +496,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
+               BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed),
+                                        sizeof(u32))); /* Avoid slow unaligned acc */
+               *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
+-              NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb);
++              NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
+               NAPI_GRO_CB(skb)->is_atomic = 1;
+-              NAPI_GRO_CB(skb)->count = 1;
+-              if (unlikely(skb_is_gso(skb))) {
+-                      NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
+-                      /* Only support TCP at the moment. */
+-                      if (!skb_is_gso_tcp(skb))
+-                              NAPI_GRO_CB(skb)->flush = 1;
+-              }
+               /* Setup for GRO checksum validation */
+               switch (skb->ip_summed) {
+@@ -554,10 +545,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
+       else
+               gro_list->count++;
++      NAPI_GRO_CB(skb)->count = 1;
+       NAPI_GRO_CB(skb)->age = jiffies;
+       NAPI_GRO_CB(skb)->last = skb;
+-      if (!skb_is_gso(skb))
+-              skb_shinfo(skb)->gso_size = skb_gro_len(skb);
++      skb_shinfo(skb)->gso_size = skb_gro_len(skb);
+       list_add(&skb->list, &gro_list->list);
+       ret = GRO_HELD;
+@@ -669,7 +660,6 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
+       skb->encapsulation = 0;
+       skb_shinfo(skb)->gso_type = 0;
+-      skb_shinfo(skb)->gso_size = 0;
+       if (unlikely(skb->slow_gro)) {
+               skb_orphan(skb);
+               skb_ext_reset(skb);
+diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
+index 45dda7889387..a844a0d38482 100644
+--- a/net/ipv4/tcp_offload.c
++++ b/net/ipv4/tcp_offload.c
+@@ -255,15 +255,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
+       mss = skb_shinfo(p)->gso_size;
+-      /* If skb is a GRO packet, make sure its gso_size matches prior packet mss.
+-       * If it is a single frame, do not aggregate it if its length
+-       * is bigger than our mss.
+-       */
+-      if (unlikely(skb_is_gso(skb)))
+-              flush |= (mss != skb_shinfo(skb)->gso_size);
+-      else
+-              flush |= (len - 1) >= mss;
+-
++      flush |= (len - 1) >= mss;
+       flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
+ #ifdef CONFIG_TLS_DEVICE
+       flush |= p->decrypted ^ skb->decrypted;
+@@ -277,12 +269,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
+       tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
+ out_check_final:
+-      /* Force a flush if last segment is smaller than mss. */
+-      if (unlikely(skb_is_gso(skb)))
+-              flush = len != NAPI_GRO_CB(skb)->count * skb_shinfo(skb)->gso_size;
+-      else
+-              flush = len < mss;
+-
++      flush = len < mss;
+       flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
+                                       TCP_FLAG_RST | TCP_FLAG_SYN |
+                                       TCP_FLAG_FIN));
diff --git a/patches/kernel/0017-Revert-fortify-Do-not-cast-to-unsigned-char.patch b/patches/kernel/0017-Revert-fortify-Do-not-cast-to-unsigned-char.patch
new file mode 100644 (file)
index 0000000..892e153
--- /dev/null
@@ -0,0 +1,29 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Date: Tue, 10 Jan 2023 08:52:40 +0100
+Subject: [PATCH] Revert "fortify: Do not cast to "unsigned char""
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This reverts commit 106b7a61c488d2022f44e3531ce33461c7c0685f.
+
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ include/linux/fortify-string.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
+index 5001a11258e4..1067a8450826 100644
+--- a/include/linux/fortify-string.h
++++ b/include/linux/fortify-string.h
+@@ -18,7 +18,7 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning("
+ #define __compiletime_strlen(p)                                       \
+ ({                                                            \
+-      char *__p = (char *)(p);                                \
++      unsigned char *__p = (unsigned char *)(p);              \
+       size_t __ret = SIZE_MAX;                                \
+       size_t __p_size = __member_size(p);                     \
+       if (__p_size != SIZE_MAX &&                             \
diff --git a/patches/kernel/0017-Revert-gro-add-support-of-hw-gro-packets-to-gro-stac.patch b/patches/kernel/0017-Revert-gro-add-support-of-hw-gro-packets-to-gro-stac.patch
deleted file mode 100644 (file)
index da34516..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Lamprecht <t.lamprecht@proxmox.com>
-Date: Sat, 7 Jan 2023 13:50:22 +0100
-Subject: [PATCH] Revert "gro: add support of (hw)gro packets to gro stack"
-
-Seems to be the cause of a regression in network performance:
-https://lore.kernel.org/netdev/CAK8fFZ5pzMaw3U1KXgC_OK4shKGsN=HDcR62cfPOuL0umXE1Ww@mail.gmail.com/
-
-This reverts commit 5eddb24901ee49eee23c0bfce6af2e83fd5679bd.
-
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- net/core/gro.c         | 18 ++++--------------
- net/ipv4/tcp_offload.c | 17 ++---------------
- 2 files changed, 6 insertions(+), 29 deletions(-)
-
-diff --git a/net/core/gro.c b/net/core/gro.c
-index bc9451743307..b4190eb08467 100644
---- a/net/core/gro.c
-+++ b/net/core/gro.c
-@@ -160,7 +160,6 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
-       unsigned int gro_max_size;
-       unsigned int new_truesize;
-       struct sk_buff *lp;
--      int segs;
-       /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
-       gro_max_size = READ_ONCE(p->dev->gro_max_size);
-@@ -176,7 +175,6 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
-                       return -E2BIG;
-       }
--      segs = NAPI_GRO_CB(skb)->count;
-       lp = NAPI_GRO_CB(p)->last;
-       pinfo = skb_shinfo(lp);
-@@ -267,7 +265,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
-       lp = p;
- done:
--      NAPI_GRO_CB(p)->count += segs;
-+      NAPI_GRO_CB(p)->count++;
-       p->data_len += len;
-       p->truesize += delta_truesize;
-       p->len += len;
-@@ -498,15 +496,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
-               BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed),
-                                        sizeof(u32))); /* Avoid slow unaligned acc */
-               *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
--              NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb);
-+              NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
-               NAPI_GRO_CB(skb)->is_atomic = 1;
--              NAPI_GRO_CB(skb)->count = 1;
--              if (unlikely(skb_is_gso(skb))) {
--                      NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
--                      /* Only support TCP at the moment. */
--                      if (!skb_is_gso_tcp(skb))
--                              NAPI_GRO_CB(skb)->flush = 1;
--              }
-               /* Setup for GRO checksum validation */
-               switch (skb->ip_summed) {
-@@ -554,10 +545,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
-       else
-               gro_list->count++;
-+      NAPI_GRO_CB(skb)->count = 1;
-       NAPI_GRO_CB(skb)->age = jiffies;
-       NAPI_GRO_CB(skb)->last = skb;
--      if (!skb_is_gso(skb))
--              skb_shinfo(skb)->gso_size = skb_gro_len(skb);
-+      skb_shinfo(skb)->gso_size = skb_gro_len(skb);
-       list_add(&skb->list, &gro_list->list);
-       ret = GRO_HELD;
-@@ -669,7 +660,6 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
-       skb->encapsulation = 0;
-       skb_shinfo(skb)->gso_type = 0;
--      skb_shinfo(skb)->gso_size = 0;
-       if (unlikely(skb->slow_gro)) {
-               skb_orphan(skb);
-               skb_ext_reset(skb);
-diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
-index 45dda7889387..a844a0d38482 100644
---- a/net/ipv4/tcp_offload.c
-+++ b/net/ipv4/tcp_offload.c
-@@ -255,15 +255,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
-       mss = skb_shinfo(p)->gso_size;
--      /* If skb is a GRO packet, make sure its gso_size matches prior packet mss.
--       * If it is a single frame, do not aggregate it if its length
--       * is bigger than our mss.
--       */
--      if (unlikely(skb_is_gso(skb)))
--              flush |= (mss != skb_shinfo(skb)->gso_size);
--      else
--              flush |= (len - 1) >= mss;
--
-+      flush |= (len - 1) >= mss;
-       flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
- #ifdef CONFIG_TLS_DEVICE
-       flush |= p->decrypted ^ skb->decrypted;
-@@ -277,12 +269,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
-       tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
- out_check_final:
--      /* Force a flush if last segment is smaller than mss. */
--      if (unlikely(skb_is_gso(skb)))
--              flush = len != NAPI_GRO_CB(skb)->count * skb_shinfo(skb)->gso_size;
--      else
--              flush = len < mss;
--
-+      flush = len < mss;
-       flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
-                                       TCP_FLAG_RST | TCP_FLAG_SYN |
-                                       TCP_FLAG_FIN));
diff --git a/patches/kernel/0018-Revert-fortify-Do-not-cast-to-unsigned-char.patch b/patches/kernel/0018-Revert-fortify-Do-not-cast-to-unsigned-char.patch
deleted file mode 100644 (file)
index 6d7f479..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Thomas Lamprecht <t.lamprecht@proxmox.com>
-Date: Tue, 10 Jan 2023 08:52:40 +0100
-Subject: [PATCH] Revert "fortify: Do not cast to "unsigned char""
-
-This reverts commit 106b7a61c488d2022f44e3531ce33461c7c0685f.
-
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- include/linux/fortify-string.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
-index 5001a11258e4..1067a8450826 100644
---- a/include/linux/fortify-string.h
-+++ b/include/linux/fortify-string.h
-@@ -18,7 +18,7 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning("
- #define __compiletime_strlen(p)                                       \
- ({                                                            \
--      char *__p = (char *)(p);                                \
-+      unsigned char *__p = (unsigned char *)(p);              \
-       size_t __ret = SIZE_MAX;                                \
-       size_t __p_size = __member_size(p);                     \
-       if (__p_size != SIZE_MAX &&                             \