]> git.proxmox.com Git - pve-kernel.git/commitdiff
backport constraining guest-supported xfeatures only at KVM_GET_XSAVE{2}
authorThomas Lamprecht <t.lamprecht@proxmox.com>
Sat, 21 Oct 2023 13:16:25 +0000 (15:16 +0200)
committerThomas Lamprecht <t.lamprecht@proxmox.com>
Sat, 21 Oct 2023 13:16:56 +0000 (15:16 +0200)
This improves compatibility for guests w.r.t. live-migration, or live
snapshot rollback, to hosts with less (FPU) xfeatures supported, as
long as the set of features that was actually exposed to the guest is
still supported.

This improves on the ad856280ddea ("x86/kvm/fpu: Limit guest
user_xfeatures to supported bits of XCR0") bug fix.

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
patches/kernel/0016-x86-fpu-Allow-caller-to-constrain-xfeatures-when-cop.patch [new file with mode: 0644]
patches/kernel/0017-KVM-x86-Constrain-guest-supported-xfeatures-only-at-.patch [new file with mode: 0644]

diff --git a/patches/kernel/0016-x86-fpu-Allow-caller-to-constrain-xfeatures-when-cop.patch b/patches/kernel/0016-x86-fpu-Allow-caller-to-constrain-xfeatures-when-cop.patch
new file mode 100644 (file)
index 0000000..83a64ce
--- /dev/null
@@ -0,0 +1,164 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 27 Sep 2023 17:19:52 -0700
+Subject: [PATCH] x86/fpu: Allow caller to constrain xfeatures when copying to
+ uabi buffer
+
+Plumb an xfeatures mask into __copy_xstate_to_uabi_buf() so that KVM can
+constrain which xfeatures are saved into the userspace buffer without
+having to modify the user_xfeatures field in KVM's guest_fpu state.
+
+KVM's ABI for KVM_GET_XSAVE{2} is that features that are not exposed to
+guest must not show up in the effective xstate_bv field of the buffer.
+Saving only the guest-supported xfeatures allows userspace to load the
+saved state on a different host with a fewer xfeatures, so long as the
+target host supports the xfeatures that are exposed to the guest.
+
+KVM currently sets user_xfeatures directly to restrict KVM_GET_XSAVE{2} to
+the set of guest-supported xfeatures, but doing so broke KVM's historical
+ABI for KVM_SET_XSAVE, which allows userspace to load any xfeatures that
+are supported by the *host*.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230928001956.924301-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 18164f66e6c59fda15c198b371fa008431efdb22)
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ arch/x86/include/asm/fpu/api.h |  3 ++-
+ arch/x86/kernel/fpu/core.c     |  5 +++--
+ arch/x86/kernel/fpu/xstate.c   |  7 +++++--
+ arch/x86/kernel/fpu/xstate.h   |  3 ++-
+ arch/x86/kvm/x86.c             | 21 +++++++++------------
+ 5 files changed, 21 insertions(+), 18 deletions(-)
+
+diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
+index b475d9a582b8..e829fa4c6788 100644
+--- a/arch/x86/include/asm/fpu/api.h
++++ b/arch/x86/include/asm/fpu/api.h
+@@ -148,7 +148,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) {
+ static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
+ #endif
+-extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
++extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
++                                         unsigned int size, u64 xfeatures, u32 pkru);
+ extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
+ static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
+diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
+index caf33486dc5e..cddd5018e6a4 100644
+--- a/arch/x86/kernel/fpu/core.c
++++ b/arch/x86/kernel/fpu/core.c
+@@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
+ EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
+ void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
+-                                  unsigned int size, u32 pkru)
++                                  unsigned int size, u64 xfeatures, u32 pkru)
+ {
+       struct fpstate *kstate = gfpu->fpstate;
+       union fpregs_state *ustate = buf;
+       struct membuf mb = { .p = buf, .left = size };
+       if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
+-              __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
++              __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru,
++                                        XSTATE_COPY_XSAVE);
+       } else {
+               memcpy(&ustate->fxsave, &kstate->regs.fxsave,
+                      sizeof(ustate->fxsave));
+diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
+index 1afbc4866b10..463ec0cd0dab 100644
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -1053,6 +1053,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
+  * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
+  * @to:               membuf descriptor
+  * @fpstate:  The fpstate buffer from which to copy
++ * @xfeatures:        The mask of xfeatures to save (XSAVE mode only)
+  * @pkru_val: The PKRU value to store in the PKRU component
+  * @copy_mode:        The requested copy mode
+  *
+@@ -1063,7 +1064,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
+  * It supports partial copy but @to.pos always starts from zero.
+  */
+ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
+-                             u32 pkru_val, enum xstate_copy_mode copy_mode)
++                             u64 xfeatures, u32 pkru_val,
++                             enum xstate_copy_mode copy_mode)
+ {
+       const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
+       struct xregs_state *xinit = &init_fpstate.regs.xsave;
+@@ -1087,7 +1089,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
+               break;
+       case XSTATE_COPY_XSAVE:
+-              header.xfeatures &= fpstate->user_xfeatures;
++              header.xfeatures &= fpstate->user_xfeatures & xfeatures;
+               break;
+       }
+@@ -1189,6 +1191,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
+                            enum xstate_copy_mode copy_mode)
+ {
+       __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
++                                tsk->thread.fpu.fpstate->user_xfeatures,
+                                 tsk->thread.pkru, copy_mode);
+ }
+diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
+index a4ecb04d8d64..3518fb26d06b 100644
+--- a/arch/x86/kernel/fpu/xstate.h
++++ b/arch/x86/kernel/fpu/xstate.h
+@@ -43,7 +43,8 @@ enum xstate_copy_mode {
+ struct membuf;
+ extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
+-                                    u32 pkru_val, enum xstate_copy_mode copy_mode);
++                                    u64 xfeatures, u32 pkru_val,
++                                    enum xstate_copy_mode copy_mode);
+ extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
+                                   enum xstate_copy_mode mode);
+ extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index ff92ff41d5ce..a43a950d04cb 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5314,26 +5314,23 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
+       return 0;
+ }
+-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+-                                       struct kvm_xsave *guest_xsave)
++
++static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
++                                        u8 *state, unsigned int size)
+ {
+       if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
+               return;
+-      fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
+-                                     guest_xsave->region,
+-                                     sizeof(guest_xsave->region),
++      fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
++                                     vcpu->arch.guest_fpu.fpstate->user_xfeatures,
+                                      vcpu->arch.pkru);
+ }
+-static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
+-                                        u8 *state, unsigned int size)
++static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
++                                       struct kvm_xsave *guest_xsave)
+ {
+-      if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
+-              return;
+-
+-      fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
+-                                     state, size, vcpu->arch.pkru);
++      return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
++                                           sizeof(guest_xsave->region));
+ }
+ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
diff --git a/patches/kernel/0017-KVM-x86-Constrain-guest-supported-xfeatures-only-at-.patch b/patches/kernel/0017-KVM-x86-Constrain-guest-supported-xfeatures-only-at-.patch
new file mode 100644 (file)
index 0000000..9154817
--- /dev/null
@@ -0,0 +1,119 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 27 Sep 2023 17:19:53 -0700
+Subject: [PATCH] KVM: x86: Constrain guest-supported xfeatures only at
+ KVM_GET_XSAVE{2}
+
+Mask off xfeatures that aren't exposed to the guest only when saving guest
+state via KVM_GET_XSAVE{2} instead of modifying user_xfeatures directly.
+Preserving the maximal set of xfeatures in user_xfeatures restores KVM's
+ABI for KVM_SET_XSAVE, which prior to commit ad856280ddea ("x86/kvm/fpu:
+Limit guest user_xfeatures to supported bits of XCR0") allowed userspace
+to load xfeatures that are supported by the host, irrespective of what
+xfeatures are exposed to the guest.
+
+There is no known use case where userspace *intentionally* loads xfeatures
+that aren't exposed to the guest, but the bug fixed by commit ad856280ddea
+was specifically that KVM_GET_SAVE{2} would save xfeatures that weren't
+exposed to the guest, e.g. would lead to userspace unintentionally loading
+guest-unsupported xfeatures when live migrating a VM.
+
+Restricting KVM_SET_XSAVE to guest-supported xfeatures is especially
+problematic for QEMU-based setups, as QEMU has a bug where instead of
+terminating the VM if KVM_SET_XSAVE fails, QEMU instead simply stops
+loading guest state, i.e. resumes the guest after live migration with
+incomplete guest state, and ultimately results in guest data corruption.
+
+Note, letting userspace restore all host-supported xfeatures does not fix
+setups where a VM is migrated from a host *without* commit ad856280ddea,
+to a target with a subset of host-supported xfeatures.  However there is
+no way to safely address that scenario, e.g. KVM could silently drop the
+unsupported features, but that would be a clear violation of KVM's ABI and
+so would require userspace to opt-in, at which point userspace could
+simply be updated to sanitize the to-be-loaded XSAVE state.
+
+Reported-by: Tyler Stachecki <stachecki.tyler@gmail.com>
+Closes: https://lore.kernel.org/all/20230914010003.358162-1-tstachecki@bloomberg.net
+Fixes: ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0")
+Cc: stable@vger.kernel.org
+Cc: Leonardo Bras <leobras@redhat.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Message-Id: <20230928001956.924301-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 8647c52e9504c99752a39f1d44f6268f82c40a5c)
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ arch/x86/kernel/fpu/xstate.c |  5 +----
+ arch/x86/kvm/cpuid.c         |  8 --------
+ arch/x86/kvm/x86.c           | 18 ++++++++++++++++--
+ 3 files changed, 17 insertions(+), 14 deletions(-)
+
+diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
+index 463ec0cd0dab..ebe698f8af73 100644
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -1543,10 +1543,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
+               fpregs_restore_userregs();
+       newfps->xfeatures = curfps->xfeatures | xfeatures;
+-
+-      if (!guest_fpu)
+-              newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+-
++      newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+       newfps->xfd = curfps->xfd & ~xfeatures;
+       /* Do the final updates within the locked region */
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index 61aefeb3fdbc..e5393ee652ba 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -350,14 +350,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
+       vcpu->arch.guest_supported_xcr0 =
+               cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
+-      /*
+-       * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
+-       * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
+-       * supported by the host.
+-       */
+-      vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 |
+-                                                     XFEATURE_MASK_FPSSE;
+-
+       kvm_update_pv_runtime(vcpu);
+       vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index a43a950d04cb..a4a44adf7c72 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5318,12 +5318,26 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
+ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
+                                         u8 *state, unsigned int size)
+ {
++      /*
++       * Only copy state for features that are enabled for the guest.  The
++       * state itself isn't problematic, but setting bits in the header for
++       * features that are supported in *this* host but not exposed to the
++       * guest can result in KVM_SET_XSAVE failing when live migrating to a
++       * compatible host without the features that are NOT exposed to the
++       * guest.
++       *
++       * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
++       * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
++       * supported by the host.
++       */
++      u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 |
++                           XFEATURE_MASK_FPSSE;
++
+       if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
+               return;
+       fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
+-                                     vcpu->arch.guest_fpu.fpstate->user_xfeatures,
+-                                     vcpu->arch.pkru);
++                                     supported_xcr0, vcpu->arch.pkru);
+ }
+ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,