KVM: VMX: Tell the nested hypervisor to skip L1D flush on vmentry

author Paolo Bonzini <pbonzini@redhat.com>

Sun, 5 Aug 2018 14:07:47 +0000 (16:07 +0200)

committer Thomas Gleixner <tglx@linutronix.de>

Sun, 5 Aug 2018 15:10:20 +0000 (17:10 +0200)
author Paolo Bonzini <pbonzini@redhat.com>
Sun, 5 Aug 2018 14:07:47 +0000 (16:07 +0200)
committer Thomas Gleixner <tglx@linutronix.de>
Sun, 5 Aug 2018 15:10:20 +0000 (17:10 +0200)
diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/l1tf.rst

index 5dadb4503ec991860e70efb878869fb9957c71af..bae52b845de0b93af644ea55103d5a912dfca753 100644 (file)
--- a/Documentation/admin-guide/l1tf.rst
+++ b/Documentation/admin-guide/l1tf.rst
@@ -546,6 +546,27 @@ available:
      EPT can be disabled in the hypervisor via the 'kvm-intel.ept'
      parameter.
  
+3.4. Nested virtual machines
+""""""""""""""""""""""""""""
+
+When nested virtualization is in use, three operating systems are involved:
+the bare metal hypervisor, the nested hypervisor and the nested virtual
+machine.  VMENTER operations from the nested hypervisor into the nested
+guest will always be processed by the bare metal hypervisor. If KVM is the
+bare metal hypervisor it wiil:
+
+ - Flush the L1D cache on every switch from the nested hypervisor to the
+   nested virtual machine, so that the nested hypervisor's secrets are not
+   exposed to the nested virtual machine;
+
+ - Flush the L1D cache on every switch from the nested virtual machine to
+   the nested hypervisor; this is a complex operation, and flushing the L1D
+   cache avoids that the bare metal hypervisor's secrets are exposed to the
+   nested virtual machine;
+
+ - Instruct the nested hypervisor to not perform any L1D cache flush. This
+   is an optimization to avoid double L1D flushing.
+
  
  .. _default_mitigations:
  
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 37749429afd9d8403f91b7ee5be11cce42fbe8d8..acebb808c4b5f7f602e470ba1c4e5f890278dc09 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1418,6 +1418,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
  void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
  void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
  
+u64 kvm_get_arch_capabilities(void);
  void kvm_define_shared_msr(unsigned index, u32 msr);
  int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
  
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index 29d6eab6920e69bc2c64ad1c3f85585b7af4bd8c..58bba7a7572addee4679d30aea696f06fb128f3d 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6465,8 +6465,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
                 ++vmx->nmsrs;
         }
  
-       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
+       vmx->arch_capabilities = kvm_get_arch_capabilities();
  
         vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
  
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 79c8ca2c2ad90301e00fff2fdf1f53f1b3fc8c1d..a5caa5e5480ca1bf659c7c045931aa4c22888a62 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1103,11 +1103,35 @@ static u32 msr_based_features[] = {
  
  static unsigned int num_msr_based_features;
  
+u64 kvm_get_arch_capabilities(void)
+{
+       u64 data;
+
+       rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
+
+       /*
+        * If we're doing cache flushes (either "always" or "cond")
+        * we will do one whenever the guest does a vmlaunch/vmresume.
+        * If an outer hypervisor is doing the cache flush for us
+        * (VMENTER_L1D_FLUSH_NESTED_VM), we can safely pass that
+        * capability to the guest too, and if EPT is disabled we're not
+        * vulnerable.  Overall, only VMENTER_L1D_FLUSH_NEVER will
+        * require a nested hypervisor to do a flush of its own.
+        */
+       if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
+               data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
+
+       return data;
+}
+EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
+
  static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
  {
         switch (msr->index) {
-       case MSR_IA32_UCODE_REV:
         case MSR_IA32_ARCH_CAPABILITIES:
+               msr->data = kvm_get_arch_capabilities();
+               break;
+       case MSR_IA32_UCODE_REV:
                 rdmsrl_safe(msr->index, &msr->data);
                 break;
         default:
author	Paolo Bonzini <pbonzini@redhat.com>
	Sun, 5 Aug 2018 14:07:47 +0000 (16:07 +0200)
committer	Thomas Gleixner <tglx@linutronix.de>
	Sun, 5 Aug 2018 15:10:20 +0000 (17:10 +0200)
Documentation/admin-guide/l1tf.rst		patch \| blob \| blame \| history
arch/x86/include/asm/kvm_host.h		patch \| blob \| blame \| history
arch/x86/kvm/vmx.c		patch \| blob \| blame \| history
arch/x86/kvm/x86.c		patch \| blob \| blame \| history