]> git.proxmox.com Git - pve-kernel.git/commitdiff
cherry-pick KVM fix for old CPUs
authorFabian Grünbichler <f.gruenbichler@proxmox.com>
Mon, 4 Dec 2017 08:26:27 +0000 (09:26 +0100)
committerFabian Grünbichler <f.gruenbichler@proxmox.com>
Mon, 4 Dec 2017 08:36:58 +0000 (09:36 +0100)
patches/kernel/0013-kvm-vmx-Reinstate-support-for-CPUs-without-virtual-N.patch [new file with mode: 0644]

diff --git a/patches/kernel/0013-kvm-vmx-Reinstate-support-for-CPUs-without-virtual-N.patch b/patches/kernel/0013-kvm-vmx-Reinstate-support-for-CPUs-without-virtual-N.patch
new file mode 100644 (file)
index 0000000..d8cda5d
--- /dev/null
@@ -0,0 +1,300 @@
+From 6c42032f6917937fd440a6c21fe28781088aae5a Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 6 Nov 2017 13:31:12 +0100
+Subject: [PATCH 13/13] kvm: vmx: Reinstate support for CPUs without virtual
+ NMI
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 8a1b43922d0d1279e7936ba85c4c2a870403c95f upstream.
+
+This is more or less a revert of commit 2c82878b0cb3 ("KVM: VMX: require
+virtual NMI support", 2017-03-27); it turns out that Core 2 Duo machines
+only had virtual NMIs in some SKUs.
+
+The revert is not trivial because in the meanwhile there have been several
+fixes to nested NMI injection.  Therefore, the entire vNMI state is moved
+to struct loaded_vmcs.
+
+Another change compared to before the patch is a simplification here:
+
+       if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
+           !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
+                                       get_vmcs12(vcpu))))) {
+
+The final condition here is always true (because nested_cpu_has_virtual_nmis
+is always false) and is removed.
+
+Fixes: 2c82878b0cb38fd516fd612c67852a6bbf282003
+Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1490803
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ arch/x86/kvm/vmx.c | 150 +++++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 106 insertions(+), 44 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 118709e7597d..a2c95522ac99 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -202,6 +202,10 @@ struct loaded_vmcs {
+       bool nmi_known_unmasked;
+       unsigned long vmcs_host_cr3;    /* May not match real cr3 */
+       unsigned long vmcs_host_cr4;    /* May not match real cr4 */
++      /* Support for vnmi-less CPUs */
++      int soft_vnmi_blocked;
++      ktime_t entry_time;
++      s64 vnmi_blocked_time;
+       struct list_head loaded_vmcss_on_cpu_link;
+ };
+@@ -1288,6 +1292,11 @@ static inline bool cpu_has_vmx_invpcid(void)
+               SECONDARY_EXEC_ENABLE_INVPCID;
+ }
++static inline bool cpu_has_virtual_nmis(void)
++{
++      return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
++}
++
+ static inline bool cpu_has_vmx_wbinvd_exit(void)
+ {
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+@@ -1339,11 +1348,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
+               (vmcs12->secondary_vm_exec_control & bit);
+ }
+-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
+-{
+-      return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
+-}
+-
+ static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
+ {
+       return vmcs12->pin_based_vm_exec_control &
+@@ -3676,9 +3680,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
+                               &_vmexit_control) < 0)
+               return -EIO;
+-      min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
+-              PIN_BASED_VIRTUAL_NMIS;
+-      opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
++      min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
++      opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
++               PIN_BASED_VMX_PREEMPTION_TIMER;
+       if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
+                               &_pin_based_exec_control) < 0)
+               return -EIO;
+@@ -5538,7 +5542,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
+ static void enable_nmi_window(struct kvm_vcpu *vcpu)
+ {
+-      if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
++      if (!cpu_has_virtual_nmis() ||
++          vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+               enable_irq_window(vcpu);
+               return;
+       }
+@@ -5578,6 +5583,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
++      if (!cpu_has_virtual_nmis()) {
++              /*
++               * Tracking the NMI-blocked state in software is built upon
++               * finding the next open IRQ window. This, in turn, depends on
++               * well-behaving guests: They have to keep IRQs disabled at
++               * least as long as the NMI handler runs. Otherwise we may
++               * cause NMI nesting, maybe breaking the guest. But as this is
++               * highly unlikely, we can live with the residual risk.
++               */
++              vmx->loaded_vmcs->soft_vnmi_blocked = 1;
++              vmx->loaded_vmcs->vnmi_blocked_time = 0;
++      }
++
+       ++vcpu->stat.nmi_injections;
+       vmx->loaded_vmcs->nmi_known_unmasked = false;
+@@ -5596,6 +5614,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       bool masked;
++      if (!cpu_has_virtual_nmis())
++              return vmx->loaded_vmcs->soft_vnmi_blocked;
+       if (vmx->loaded_vmcs->nmi_known_unmasked)
+               return false;
+       masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
+@@ -5607,13 +5627,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+ {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+-      vmx->loaded_vmcs->nmi_known_unmasked = !masked;
+-      if (masked)
+-              vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+-                            GUEST_INTR_STATE_NMI);
+-      else
+-              vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+-                              GUEST_INTR_STATE_NMI);
++      if (!cpu_has_virtual_nmis()) {
++              if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
++                      vmx->loaded_vmcs->soft_vnmi_blocked = masked;
++                      vmx->loaded_vmcs->vnmi_blocked_time = 0;
++              }
++      } else {
++              vmx->loaded_vmcs->nmi_known_unmasked = !masked;
++              if (masked)
++                      vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
++                                    GUEST_INTR_STATE_NMI);
++              else
++                      vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
++                                      GUEST_INTR_STATE_NMI);
++      }
+ }
+ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
+@@ -5621,6 +5648,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
+       if (to_vmx(vcpu)->nested.nested_run_pending)
+               return 0;
++      if (!cpu_has_virtual_nmis() &&
++          to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
++              return 0;
++
+       return  !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+                 (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
+                  | GUEST_INTR_STATE_NMI));
+@@ -6348,6 +6379,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
+        * AAK134, BY25.
+        */
+       if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
++                      cpu_has_virtual_nmis() &&
+                       (exit_qualification & INTR_INFO_UNBLOCK_NMI))
+               vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
+@@ -6820,7 +6852,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
+       }
+       /* Create a new VMCS */
+-      item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
++      item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+       if (!item)
+               return NULL;
+       item->vmcs02.vmcs = alloc_vmcs();
+@@ -7837,6 +7869,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
+        * "blocked by NMI" bit has to be set before next VM entry.
+        */
+       if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
++                      cpu_has_virtual_nmis() &&
+                       (exit_qualification & INTR_INFO_UNBLOCK_NMI))
+               vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+                               GUEST_INTR_STATE_NMI);
+@@ -8554,6 +8587,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
+               return 0;
+       }
++      if (unlikely(!cpu_has_virtual_nmis() &&
++                   vmx->loaded_vmcs->soft_vnmi_blocked)) {
++              if (vmx_interrupt_allowed(vcpu)) {
++                      vmx->loaded_vmcs->soft_vnmi_blocked = 0;
++              } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
++                         vcpu->arch.nmi_pending) {
++                      /*
++                       * This CPU don't support us in finding the end of an
++                       * NMI-blocked window if the guest runs with IRQs
++                       * disabled. So we pull the trigger after 1 s of
++                       * futile waiting, but inform the user about this.
++                       */
++                      printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
++                             "state on VCPU %d after 1 s timeout\n",
++                             __func__, vcpu->vcpu_id);
++                      vmx->loaded_vmcs->soft_vnmi_blocked = 0;
++              }
++      }
++
+       if (exit_reason < kvm_vmx_max_exit_handlers
+           && kvm_vmx_exit_handlers[exit_reason])
+               return kvm_vmx_exit_handlers[exit_reason](vcpu);
+@@ -8837,33 +8889,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
+       idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
+-      if (vmx->loaded_vmcs->nmi_known_unmasked)
+-              return;
+-      /*
+-       * Can't use vmx->exit_intr_info since we're not sure what
+-       * the exit reason is.
+-       */
+-      exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+-      unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
+-      vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
+-      /*
+-       * SDM 3: 27.7.1.2 (September 2008)
+-       * Re-set bit "block by NMI" before VM entry if vmexit caused by
+-       * a guest IRET fault.
+-       * SDM 3: 23.2.2 (September 2008)
+-       * Bit 12 is undefined in any of the following cases:
+-       *  If the VM exit sets the valid bit in the IDT-vectoring
+-       *   information field.
+-       *  If the VM exit is due to a double fault.
+-       */
+-      if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+-          vector != DF_VECTOR && !idtv_info_valid)
+-              vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+-                            GUEST_INTR_STATE_NMI);
+-      else
+-              vmx->loaded_vmcs->nmi_known_unmasked =
+-                      !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+-                        & GUEST_INTR_STATE_NMI);
++      if (cpu_has_virtual_nmis()) {
++              if (vmx->loaded_vmcs->nmi_known_unmasked)
++                      return;
++              /*
++               * Can't use vmx->exit_intr_info since we're not sure what
++               * the exit reason is.
++               */
++              exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
++              unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
++              vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
++              /*
++               * SDM 3: 27.7.1.2 (September 2008)
++               * Re-set bit "block by NMI" before VM entry if vmexit caused by
++               * a guest IRET fault.
++               * SDM 3: 23.2.2 (September 2008)
++               * Bit 12 is undefined in any of the following cases:
++               *  If the VM exit sets the valid bit in the IDT-vectoring
++               *   information field.
++               *  If the VM exit is due to a double fault.
++               */
++              if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
++                  vector != DF_VECTOR && !idtv_info_valid)
++                      vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
++                                    GUEST_INTR_STATE_NMI);
++              else
++                      vmx->loaded_vmcs->nmi_known_unmasked =
++                              !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
++                                & GUEST_INTR_STATE_NMI);
++      } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
++              vmx->loaded_vmcs->vnmi_blocked_time +=
++                      ktime_to_ns(ktime_sub(ktime_get(),
++                                            vmx->loaded_vmcs->entry_time));
+ }
+ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
+@@ -8980,6 +9037,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       unsigned long debugctlmsr, cr3, cr4;
++      /* Record the guest's net vcpu time for enforced NMI injections. */
++      if (unlikely(!cpu_has_virtual_nmis() &&
++                   vmx->loaded_vmcs->soft_vnmi_blocked))
++              vmx->loaded_vmcs->entry_time = ktime_get();
++
+       /* Don't enter VMX if guest state is invalid, let the exit handler
+          start emulation until we arrive back to a valid state */
+       if (vmx->emulation_required)
+-- 
+2.14.2
+