]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - arch/x86/kvm/vmx.c
KVM: nVMX: generate MSR_IA32_CR{0,4}_FIXED1 from guest CPUID
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kvm / vmx.c
index 5382b82462fcba28fed9a5064776cfb527e8eaa3..fa29585c63ff42470182c44f1f6eb474ad638012 100644 (file)
@@ -132,6 +132,12 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
 
 #define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
 
+#define VMX_VPID_EXTENT_SUPPORTED_MASK         \
+       (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT |  \
+       VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT |    \
+       VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT |    \
+       VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
+
 /*
  * These 2 parameters are used to config the controls for Pause-Loop Exiting:
  * ple_gap:    upper bound on the amount of time between two successive
@@ -446,23 +452,31 @@ struct nested_vmx {
        u16 vpid02;
        u16 last_vpid;
 
+       /*
+        * We only store the "true" versions of the VMX capability MSRs. We
+        * generate the "non-true" versions by setting the must-be-1 bits
+        * according to the SDM.
+        */
        u32 nested_vmx_procbased_ctls_low;
        u32 nested_vmx_procbased_ctls_high;
-       u32 nested_vmx_true_procbased_ctls_low;
        u32 nested_vmx_secondary_ctls_low;
        u32 nested_vmx_secondary_ctls_high;
        u32 nested_vmx_pinbased_ctls_low;
        u32 nested_vmx_pinbased_ctls_high;
        u32 nested_vmx_exit_ctls_low;
        u32 nested_vmx_exit_ctls_high;
-       u32 nested_vmx_true_exit_ctls_low;
        u32 nested_vmx_entry_ctls_low;
        u32 nested_vmx_entry_ctls_high;
-       u32 nested_vmx_true_entry_ctls_low;
        u32 nested_vmx_misc_low;
        u32 nested_vmx_misc_high;
        u32 nested_vmx_ept_caps;
        u32 nested_vmx_vpid_caps;
+       u64 nested_vmx_basic;
+       u64 nested_vmx_cr0_fixed0;
+       u64 nested_vmx_cr0_fixed1;
+       u64 nested_vmx_cr4_fixed0;
+       u64 nested_vmx_cr4_fixed1;
+       u64 nested_vmx_vmcs_enum;
 };
 
 #define POSTED_INTR_ON  0
@@ -520,6 +534,12 @@ static inline void pi_set_sn(struct pi_desc *pi_desc)
                        (unsigned long *)&pi_desc->control);
 }
 
+static inline void pi_clear_on(struct pi_desc *pi_desc)
+{
+       clear_bit(POSTED_INTR_ON,
+                 (unsigned long *)&pi_desc->control);
+}
+
 static inline int pi_test_on(struct pi_desc *pi_desc)
 {
        return test_bit(POSTED_INTR_ON,
@@ -920,16 +940,32 @@ static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
 static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
 
-static unsigned long *vmx_io_bitmap_a;
-static unsigned long *vmx_io_bitmap_b;
-static unsigned long *vmx_msr_bitmap_legacy;
-static unsigned long *vmx_msr_bitmap_longmode;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
-static unsigned long *vmx_vmread_bitmap;
-static unsigned long *vmx_vmwrite_bitmap;
+enum {
+       VMX_IO_BITMAP_A,
+       VMX_IO_BITMAP_B,
+       VMX_MSR_BITMAP_LEGACY,
+       VMX_MSR_BITMAP_LONGMODE,
+       VMX_MSR_BITMAP_LEGACY_X2APIC_APICV,
+       VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV,
+       VMX_MSR_BITMAP_LEGACY_X2APIC,
+       VMX_MSR_BITMAP_LONGMODE_X2APIC,
+       VMX_VMREAD_BITMAP,
+       VMX_VMWRITE_BITMAP,
+       VMX_BITMAP_NR
+};
+
+static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
+
+#define vmx_io_bitmap_a                      (vmx_bitmap[VMX_IO_BITMAP_A])
+#define vmx_io_bitmap_b                      (vmx_bitmap[VMX_IO_BITMAP_B])
+#define vmx_msr_bitmap_legacy                (vmx_bitmap[VMX_MSR_BITMAP_LEGACY])
+#define vmx_msr_bitmap_longmode              (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE])
+#define vmx_msr_bitmap_legacy_x2apic_apicv   (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV])
+#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV])
+#define vmx_msr_bitmap_legacy_x2apic         (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC])
+#define vmx_msr_bitmap_longmode_x2apic       (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC])
+#define vmx_vmread_bitmap                    (vmx_bitmap[VMX_VMREAD_BITMAP])
+#define vmx_vmwrite_bitmap                   (vmx_bitmap[VMX_VMWRITE_BITMAP])
 
 static bool cpu_has_load_ia32_efer;
 static bool cpu_has_load_perf_global_ctrl;
@@ -2529,14 +2565,14 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
                  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
                if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
                        if (is_long_mode(vcpu))
-                               msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
+                               msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv;
                        else
-                               msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
+                               msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv;
                } else {
                        if (is_long_mode(vcpu))
-                               msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
+                               msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
                        else
-                               msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
+                               msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
                }
        } else {
                if (is_long_mode(vcpu))
@@ -2712,9 +2748,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
 
        /* We support free control of debug control saving. */
-       vmx->nested.nested_vmx_true_exit_ctls_low =
-               vmx->nested.nested_vmx_exit_ctls_low &
-               ~VM_EXIT_SAVE_DEBUG_CONTROLS;
+       vmx->nested.nested_vmx_exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
 
        /* entry controls */
        rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2733,9 +2767,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
 
        /* We support free control of debug control loading. */
-       vmx->nested.nested_vmx_true_entry_ctls_low =
-               vmx->nested.nested_vmx_entry_ctls_low &
-               ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
+       vmx->nested.nested_vmx_entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
 
        /* cpu-based controls */
        rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -2768,8 +2800,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                CPU_BASED_USE_MSR_BITMAPS;
 
        /* We support free control of CR3 access interception. */
-       vmx->nested.nested_vmx_true_procbased_ctls_low =
-               vmx->nested.nested_vmx_procbased_ctls_low &
+       vmx->nested.nested_vmx_procbased_ctls_low &=
                ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
 
        /* secondary cpu-based controls */
@@ -2780,6 +2811,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
        vmx->nested.nested_vmx_secondary_ctls_high &=
                SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
                SECONDARY_EXEC_RDTSCP |
+               SECONDARY_EXEC_DESC |
                SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
                SECONDARY_EXEC_ENABLE_VPID |
                SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -2811,8 +2843,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
         */
        if (enable_vpid)
                vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
-                               VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT |
-                               VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
+                       VMX_VPID_EXTENT_SUPPORTED_MASK;
        else
                vmx->nested.nested_vmx_vpid_caps = 0;
 
@@ -2829,14 +2860,52 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
                VMX_MISC_ACTIVITY_HLT;
        vmx->nested.nested_vmx_misc_high = 0;
+
+       /*
+        * This MSR reports some information about VMX support. We
+        * should return information about the VMX we emulate for the
+        * guest, and the VMCS structure we give it - not about the
+        * VMX support of the underlying hardware.
+        */
+       vmx->nested.nested_vmx_basic =
+               VMCS12_REVISION |
+               VMX_BASIC_TRUE_CTLS |
+               ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
+               (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
+
+       if (cpu_has_vmx_basic_inout())
+               vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT;
+
+       /*
+        * These MSRs specify bits which the guest must keep fixed on
+        * while L1 is in VMXON mode (in L1's root mode, or running an L2).
+        * We picked the standard core2 setting.
+        */
+#define VMXON_CR0_ALWAYSON     (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
+#define VMXON_CR4_ALWAYSON     X86_CR4_VMXE
+       vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON;
+       vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON;
+
+       /* These MSRs specify bits which the guest must keep fixed off. */
+       rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx->nested.nested_vmx_cr0_fixed1);
+       rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
+
+       /* highest index: VMX_PREEMPTION_TIMER_VALUE */
+       vmx->nested.nested_vmx_vmcs_enum = 0x2e;
+}
+
+/*
+ * if fixed0[i] == 1: val[i] must be 1
+ * if fixed1[i] == 0: val[i] must be 0
+ */
+static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1)
+{
+       return ((val & fixed1) | fixed0) == val;
 }
 
 static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
 {
-       /*
-        * Bits 0 in high must be 0, and bits 1 in low must be 1.
-        */
-       return ((control & high) | low) == control;
+       return fixed_bits_valid(control, low, high);
 }
 
 static inline u64 vmx_control_msr(u32 low, u32 high)
@@ -2844,87 +2913,285 @@ static inline u64 vmx_control_msr(u32 low, u32 high)
        return low | ((u64)high << 32);
 }
 
-/* Returns 0 on success, non-0 otherwise. */
-static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
+{
+       superset &= mask;
+       subset &= mask;
+
+       return (superset | subset) == superset;
+}
+
+static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
+{
+       const u64 feature_and_reserved =
+               /* feature (except bit 48; see below) */
+               BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
+               /* reserved */
+               BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
+       u64 vmx_basic = vmx->nested.nested_vmx_basic;
+
+       if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
+               return -EINVAL;
+
+       /*
+        * KVM does not emulate a version of VMX that constrains physical
+        * addresses of VMX structures (e.g. VMCS) to 32-bits.
+        */
+       if (data & BIT_ULL(48))
+               return -EINVAL;
+
+       if (vmx_basic_vmcs_revision_id(vmx_basic) !=
+           vmx_basic_vmcs_revision_id(data))
+               return -EINVAL;
+
+       if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
+               return -EINVAL;
+
+       vmx->nested.nested_vmx_basic = data;
+       return 0;
+}
+
+static int
+vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
+{
+       u64 supported;
+       u32 *lowp, *highp;
+
+       switch (msr_index) {
+       case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+               lowp = &vmx->nested.nested_vmx_pinbased_ctls_low;
+               highp = &vmx->nested.nested_vmx_pinbased_ctls_high;
+               break;
+       case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
+               lowp = &vmx->nested.nested_vmx_procbased_ctls_low;
+               highp = &vmx->nested.nested_vmx_procbased_ctls_high;
+               break;
+       case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+               lowp = &vmx->nested.nested_vmx_exit_ctls_low;
+               highp = &vmx->nested.nested_vmx_exit_ctls_high;
+               break;
+       case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+               lowp = &vmx->nested.nested_vmx_entry_ctls_low;
+               highp = &vmx->nested.nested_vmx_entry_ctls_high;
+               break;
+       case MSR_IA32_VMX_PROCBASED_CTLS2:
+               lowp = &vmx->nested.nested_vmx_secondary_ctls_low;
+               highp = &vmx->nested.nested_vmx_secondary_ctls_high;
+               break;
+       default:
+               BUG();
+       }
+
+       supported = vmx_control_msr(*lowp, *highp);
+
+       /* Check must-be-1 bits are still 1. */
+       if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
+               return -EINVAL;
+
+       /* Check must-be-0 bits are still 0. */
+       if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
+               return -EINVAL;
+
+       *lowp = data;
+       *highp = data >> 32;
+       return 0;
+}
+
+static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
+{
+       const u64 feature_and_reserved_bits =
+               /* feature */
+               BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
+               BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
+               /* reserved */
+               GENMASK_ULL(13, 9) | BIT_ULL(31);
+       u64 vmx_misc;
+
+       vmx_misc = vmx_control_msr(vmx->nested.nested_vmx_misc_low,
+                                  vmx->nested.nested_vmx_misc_high);
+
+       if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
+               return -EINVAL;
+
+       if ((vmx->nested.nested_vmx_pinbased_ctls_high &
+            PIN_BASED_VMX_PREEMPTION_TIMER) &&
+           vmx_misc_preemption_timer_rate(data) !=
+           vmx_misc_preemption_timer_rate(vmx_misc))
+               return -EINVAL;
+
+       if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
+               return -EINVAL;
+
+       if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
+               return -EINVAL;
+
+       if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
+               return -EINVAL;
+
+       vmx->nested.nested_vmx_misc_low = data;
+       vmx->nested.nested_vmx_misc_high = data >> 32;
+       return 0;
+}
+
+static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
+{
+       u64 vmx_ept_vpid_cap;
+
+       vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.nested_vmx_ept_caps,
+                                          vmx->nested.nested_vmx_vpid_caps);
+
+       /* Every bit is either reserved or a feature bit. */
+       if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
+               return -EINVAL;
+
+       vmx->nested.nested_vmx_ept_caps = data;
+       vmx->nested.nested_vmx_vpid_caps = data >> 32;
+       return 0;
+}
+
+static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
+{
+       u64 *msr;
+
+       switch (msr_index) {
+       case MSR_IA32_VMX_CR0_FIXED0:
+               msr = &vmx->nested.nested_vmx_cr0_fixed0;
+               break;
+       case MSR_IA32_VMX_CR4_FIXED0:
+               msr = &vmx->nested.nested_vmx_cr4_fixed0;
+               break;
+       default:
+               BUG();
+       }
+
+       /*
+        * 1 bits (which indicates bits which "must-be-1" during VMX operation)
+        * must be 1 in the restored value.
+        */
+       if (!is_bitwise_subset(data, *msr, -1ULL))
+               return -EINVAL;
+
+       *msr = data;
+       return 0;
+}
+
+/*
+ * Called when userspace is restoring VMX MSRs.
+ *
+ * Returns 0 on success, non-0 otherwise.
+ */
+static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
        switch (msr_index) {
        case MSR_IA32_VMX_BASIC:
+               return vmx_restore_vmx_basic(vmx, data);
+       case MSR_IA32_VMX_PINBASED_CTLS:
+       case MSR_IA32_VMX_PROCBASED_CTLS:
+       case MSR_IA32_VMX_EXIT_CTLS:
+       case MSR_IA32_VMX_ENTRY_CTLS:
                /*
-                * This MSR reports some information about VMX support. We
-                * should return information about the VMX we emulate for the
-                * guest, and the VMCS structure we give it - not about the
-                * VMX support of the underlying hardware.
+                * The "non-true" VMX capability MSRs are generated from the
+                * "true" MSRs, so we do not support restoring them directly.
+                *
+                * If userspace wants to emulate VMX_BASIC[55]=0, userspace
+                * should restore the "true" MSRs with the must-be-1 bits
+                * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
+                * DEFAULT SETTINGS".
                 */
-               *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS |
-                          ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
-                          (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
-               if (cpu_has_vmx_basic_inout())
-                       *pdata |= VMX_BASIC_INOUT;
+               return -EINVAL;
+       case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+       case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
+       case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+       case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+       case MSR_IA32_VMX_PROCBASED_CTLS2:
+               return vmx_restore_control_msr(vmx, msr_index, data);
+       case MSR_IA32_VMX_MISC:
+               return vmx_restore_vmx_misc(vmx, data);
+       case MSR_IA32_VMX_CR0_FIXED0:
+       case MSR_IA32_VMX_CR4_FIXED0:
+               return vmx_restore_fixed0_msr(vmx, msr_index, data);
+       case MSR_IA32_VMX_CR0_FIXED1:
+       case MSR_IA32_VMX_CR4_FIXED1:
+               /*
+                * These MSRs are generated based on the vCPU's CPUID, so we
+                * do not support restoring them directly.
+                */
+               return -EINVAL;
+       case MSR_IA32_VMX_EPT_VPID_CAP:
+               return vmx_restore_vmx_ept_vpid_cap(vmx, data);
+       case MSR_IA32_VMX_VMCS_ENUM:
+               vmx->nested.nested_vmx_vmcs_enum = data;
+               return 0;
+       default:
+               /*
+                * The rest of the VMX capability MSRs do not support restore.
+                */
+               return -EINVAL;
+       }
+}
+
+/* Returns 0 on success, non-0 otherwise. */
+static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+       switch (msr_index) {
+       case MSR_IA32_VMX_BASIC:
+               *pdata = vmx->nested.nested_vmx_basic;
                break;
        case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
        case MSR_IA32_VMX_PINBASED_CTLS:
                *pdata = vmx_control_msr(
                        vmx->nested.nested_vmx_pinbased_ctls_low,
                        vmx->nested.nested_vmx_pinbased_ctls_high);
+               if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
+                       *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
                break;
        case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
-               *pdata = vmx_control_msr(
-                       vmx->nested.nested_vmx_true_procbased_ctls_low,
-                       vmx->nested.nested_vmx_procbased_ctls_high);
-               break;
        case MSR_IA32_VMX_PROCBASED_CTLS:
                *pdata = vmx_control_msr(
                        vmx->nested.nested_vmx_procbased_ctls_low,
                        vmx->nested.nested_vmx_procbased_ctls_high);
+               if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
+                       *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
                break;
        case MSR_IA32_VMX_TRUE_EXIT_CTLS:
-               *pdata = vmx_control_msr(
-                       vmx->nested.nested_vmx_true_exit_ctls_low,
-                       vmx->nested.nested_vmx_exit_ctls_high);
-               break;
        case MSR_IA32_VMX_EXIT_CTLS:
                *pdata = vmx_control_msr(
                        vmx->nested.nested_vmx_exit_ctls_low,
                        vmx->nested.nested_vmx_exit_ctls_high);
+               if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
+                       *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
                break;
        case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
-               *pdata = vmx_control_msr(
-                       vmx->nested.nested_vmx_true_entry_ctls_low,
-                       vmx->nested.nested_vmx_entry_ctls_high);
-               break;
        case MSR_IA32_VMX_ENTRY_CTLS:
                *pdata = vmx_control_msr(
                        vmx->nested.nested_vmx_entry_ctls_low,
                        vmx->nested.nested_vmx_entry_ctls_high);
+               if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
+                       *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
                break;
        case MSR_IA32_VMX_MISC:
                *pdata = vmx_control_msr(
                        vmx->nested.nested_vmx_misc_low,
                        vmx->nested.nested_vmx_misc_high);
                break;
-       /*
-        * These MSRs specify bits which the guest must keep fixed (on or off)
-        * while L1 is in VMXON mode (in L1's root mode, or running an L2).
-        * We picked the standard core2 setting.
-        */
-#define VMXON_CR0_ALWAYSON     (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
-#define VMXON_CR4_ALWAYSON     X86_CR4_VMXE
        case MSR_IA32_VMX_CR0_FIXED0:
-               *pdata = VMXON_CR0_ALWAYSON;
+               *pdata = vmx->nested.nested_vmx_cr0_fixed0;
                break;
        case MSR_IA32_VMX_CR0_FIXED1:
-               *pdata = -1ULL;
+               *pdata = vmx->nested.nested_vmx_cr0_fixed1;
                break;
        case MSR_IA32_VMX_CR4_FIXED0:
-               *pdata = VMXON_CR4_ALWAYSON;
+               *pdata = vmx->nested.nested_vmx_cr4_fixed0;
                break;
        case MSR_IA32_VMX_CR4_FIXED1:
-               *pdata = -1ULL;
+               *pdata = vmx->nested.nested_vmx_cr4_fixed1;
                break;
        case MSR_IA32_VMX_VMCS_ENUM:
-               *pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */
+               *pdata = vmx->nested.nested_vmx_vmcs_enum;
                break;
        case MSR_IA32_VMX_PROCBASED_CTLS2:
                *pdata = vmx_control_msr(
@@ -3107,7 +3374,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        vmx_leave_nested(vcpu);
                break;
        case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
-               return 1; /* they are read-only */
+               if (!msr_info->host_initiated)
+                       return 1; /* they are read-only */
+               if (!nested_vmx_allowed(vcpu))
+                       return 1;
+               return vmx_set_vmx_msr(vcpu, msr_index, data);
        case MSR_IA32_XSS:
                if (!vmx_xsaves_supported())
                        return 1;
@@ -3869,6 +4140,40 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
                  (unsigned long *)&vcpu->arch.regs_dirty);
 }
 
+static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
+{
+       u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
+       u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
+       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+       if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
+               SECONDARY_EXEC_UNRESTRICTED_GUEST &&
+           nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
+               fixed0 &= ~(X86_CR0_PE | X86_CR0_PG);
+
+       return fixed_bits_valid(val, fixed0, fixed1);
+}
+
+static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
+{
+       u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
+       u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
+
+       return fixed_bits_valid(val, fixed0, fixed1);
+}
+
+static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
+{
+       u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed0;
+       u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed1;
+
+       return fixed_bits_valid(val, fixed0, fixed1);
+}
+
+/* No difference in the restrictions on guest and host CR4 in VMX operation. */
+#define nested_guest_cr4_valid nested_cr4_valid
+#define nested_host_cr4_valid  nested_cr4_valid
+
 static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 
 static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
@@ -3997,8 +4302,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                if (!nested_vmx_allowed(vcpu))
                        return 1;
        }
-       if (to_vmx(vcpu)->nested.vmxon &&
-           ((cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON))
+
+       if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
                return 1;
 
        vcpu->arch.cr4 = cr4;
@@ -4575,41 +4880,6 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
        }
 }
 
-static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
-                                               u32 msr, int type)
-{
-       int f = sizeof(unsigned long);
-
-       if (!cpu_has_vmx_msr_bitmap())
-               return;
-
-       /*
-        * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
-        * have the write-low and read-high bitmap offsets the wrong way round.
-        * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
-        */
-       if (msr <= 0x1fff) {
-               if (type & MSR_TYPE_R)
-                       /* read-low */
-                       __set_bit(msr, msr_bitmap + 0x000 / f);
-
-               if (type & MSR_TYPE_W)
-                       /* write-low */
-                       __set_bit(msr, msr_bitmap + 0x800 / f);
-
-       } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
-               msr &= 0x1fff;
-               if (type & MSR_TYPE_R)
-                       /* read-high */
-                       __set_bit(msr, msr_bitmap + 0x400 / f);
-
-               if (type & MSR_TYPE_W)
-                       /* write-high */
-                       __set_bit(msr, msr_bitmap + 0xc00 / f);
-
-       }
-}
-
 /*
  * If a msr is allowed by L0, we should check whether it is allowed by L1.
  * The corresponding bit will be cleared unless both of L0 and L1 allow it.
@@ -4665,48 +4935,18 @@ static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
                                                msr, MSR_TYPE_R | MSR_TYPE_W);
 }
 
-static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
+static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active)
 {
        if (apicv_active) {
-               __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
-                               msr, MSR_TYPE_R);
-               __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
-                               msr, MSR_TYPE_R);
+               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv,
+                               msr, type);
+               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv,
+                               msr, type);
        } else {
-               __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
-                               msr, MSR_TYPE_R);
-               __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
-                               msr, MSR_TYPE_R);
-       }
-}
-
-static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
-{
-       if (apicv_active) {
                __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
-                               msr, MSR_TYPE_R);
+                               msr, type);
                __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
-                               msr, MSR_TYPE_R);
-       } else {
-               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
-                               msr, MSR_TYPE_R);
-               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
-                               msr, MSR_TYPE_R);
-       }
-}
-
-static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active)
-{
-       if (apicv_active) {
-               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
-                               msr, MSR_TYPE_W);
-               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
-                               msr, MSR_TYPE_W);
-       } else {
-               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
-                               msr, MSR_TYPE_W);
-               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
-                               msr, MSR_TYPE_W);
+                               msr, type);
        }
 }
 
@@ -4828,9 +5068,15 @@ static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-       if (!pi_test_and_clear_on(&vmx->pi_desc))
+       if (!pi_test_on(&vmx->pi_desc))
                return;
 
+       pi_clear_on(&vmx->pi_desc);
+       /*
+        * IOMMU can write to PIR.ON, so the barrier matters even on UP.
+        * But on x86 this is just a compiler barrier anyway.
+        */
+       smp_mb__after_atomic();
        kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
 }
 
@@ -5587,7 +5833,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu)
 static int handle_io(struct kvm_vcpu *vcpu)
 {
        unsigned long exit_qualification;
-       int size, in, string;
+       int size, in, string, ret;
        unsigned port;
 
        exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -5601,9 +5847,14 @@ static int handle_io(struct kvm_vcpu *vcpu)
 
        port = exit_qualification >> 16;
        size = (exit_qualification & 7) + 1;
-       skip_emulated_instruction(vcpu);
 
-       return kvm_fast_pio_out(vcpu, size, port);
+       ret = kvm_skip_emulated_instruction(vcpu);
+
+       /*
+        * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
+        * KVM_EXIT_DEBUG here.
+        */
+       return kvm_fast_pio_out(vcpu, size, port) && ret;
 }
 
 static void
@@ -5617,18 +5868,6 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
        hypercall[2] = 0xc1;
 }
 
-static bool nested_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
-{
-       unsigned long always_on = VMXON_CR0_ALWAYSON;
-       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-
-       if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
-               SECONDARY_EXEC_UNRESTRICTED_GUEST &&
-           nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
-               always_on &= ~(X86_CR0_PE | X86_CR0_PG);
-       return (val & always_on) == always_on;
-}
-
 /* called to set cr0 as appropriate for a mov-to-cr0 exit. */
 static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
 {
@@ -5647,7 +5886,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
                val = (val & ~vmcs12->cr0_guest_host_mask) |
                        (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
 
-               if (!nested_cr0_valid(vcpu, val))
+               if (!nested_guest_cr0_valid(vcpu, val))
                        return 1;
 
                if (kvm_set_cr0(vcpu, val))
@@ -5656,8 +5895,9 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
                return 0;
        } else {
                if (to_vmx(vcpu)->nested.vmxon &&
-                   ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
+                   !nested_host_cr0_valid(vcpu, val))
                        return 1;
+
                return kvm_set_cr0(vcpu, val);
        }
 }
@@ -5701,6 +5941,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
        int cr;
        int reg;
        int err;
+       int ret;
 
        exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
        cr = exit_qualification & 15;
@@ -5712,25 +5953,27 @@ static int handle_cr(struct kvm_vcpu *vcpu)
                switch (cr) {
                case 0:
                        err = handle_set_cr0(vcpu, val);
-                       kvm_complete_insn_gp(vcpu, err);
-                       return 1;
+                       return kvm_complete_insn_gp(vcpu, err);
                case 3:
                        err = kvm_set_cr3(vcpu, val);
-                       kvm_complete_insn_gp(vcpu, err);
-                       return 1;
+                       return kvm_complete_insn_gp(vcpu, err);
                case 4:
                        err = handle_set_cr4(vcpu, val);
-                       kvm_complete_insn_gp(vcpu, err);
-                       return 1;
+                       return kvm_complete_insn_gp(vcpu, err);
                case 8: {
                                u8 cr8_prev = kvm_get_cr8(vcpu);
                                u8 cr8 = (u8)val;
                                err = kvm_set_cr8(vcpu, cr8);
-                               kvm_complete_insn_gp(vcpu, err);
+                               ret = kvm_complete_insn_gp(vcpu, err);
                                if (lapic_in_kernel(vcpu))
-                                       return 1;
+                                       return ret;
                                if (cr8_prev <= cr8)
-                                       return 1;
+                                       return ret;
+                               /*
+                                * TODO: we might be squashing a
+                                * KVM_GUESTDBG_SINGLESTEP-triggered
+                                * KVM_EXIT_DEBUG here.
+                                */
                                vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
                                return 0;
                        }
@@ -5739,23 +5982,20 @@ static int handle_cr(struct kvm_vcpu *vcpu)
        case 2: /* clts */
                handle_clts(vcpu);
                trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
-               skip_emulated_instruction(vcpu);
                vmx_fpu_activate(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        case 1: /*mov from cr*/
                switch (cr) {
                case 3:
                        val = kvm_read_cr3(vcpu);
                        kvm_register_write(vcpu, reg, val);
                        trace_kvm_cr_read(cr, val);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                case 8:
                        val = kvm_get_cr8(vcpu);
                        kvm_register_write(vcpu, reg, val);
                        trace_kvm_cr_read(cr, val);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
                break;
        case 3: /* lmsw */
@@ -5763,8 +6003,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
                trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
                kvm_lmsw(vcpu, val);
 
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        default:
                break;
        }
@@ -5835,8 +6074,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
                if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
                        return 1;
 
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
@@ -5868,8 +6106,7 @@ static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
 
 static int handle_cpuid(struct kvm_vcpu *vcpu)
 {
-       kvm_emulate_cpuid(vcpu);
-       return 1;
+       return kvm_emulate_cpuid(vcpu);
 }
 
 static int handle_rdmsr(struct kvm_vcpu *vcpu)
@@ -5890,8 +6127,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
        /* FIXME: handling of bits 32:63 of rax, rdx */
        vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
        vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_wrmsr(struct kvm_vcpu *vcpu)
@@ -5911,8 +6147,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
        }
 
        trace_kvm_msr_write(ecx, data);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
@@ -5956,8 +6191,7 @@ static int handle_invlpg(struct kvm_vcpu *vcpu)
        unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
        kvm_mmu_invlpg(vcpu, exit_qualification);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_rdpmc(struct kvm_vcpu *vcpu)
@@ -5965,15 +6199,12 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu)
        int err;
 
        err = kvm_rdpmc(vcpu);
-       kvm_complete_insn_gp(vcpu, err);
-
-       return 1;
+       return kvm_complete_insn_gp(vcpu, err);
 }
 
 static int handle_wbinvd(struct kvm_vcpu *vcpu)
 {
-       kvm_emulate_wbinvd(vcpu);
-       return 1;
+       return kvm_emulate_wbinvd(vcpu);
 }
 
 static int handle_xsetbv(struct kvm_vcpu *vcpu)
@@ -5982,20 +6213,20 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
        u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
 
        if (kvm_set_xcr(vcpu, index, new_bv) == 0)
-               skip_emulated_instruction(vcpu);
+               return kvm_skip_emulated_instruction(vcpu);
        return 1;
 }
 
 static int handle_xsaves(struct kvm_vcpu *vcpu)
 {
-       skip_emulated_instruction(vcpu);
+       kvm_skip_emulated_instruction(vcpu);
        WARN(1, "this should never happen\n");
        return 1;
 }
 
 static int handle_xrstors(struct kvm_vcpu *vcpu)
 {
-       skip_emulated_instruction(vcpu);
+       kvm_skip_emulated_instruction(vcpu);
        WARN(1, "this should never happen\n");
        return 1;
 }
@@ -6016,8 +6247,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
                if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
                    (offset == APIC_EOI)) {
                        kvm_lapic_set_eoi(vcpu);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
        }
        return emulate_instruction(vcpu, 0) == EMULATE_DONE;
@@ -6165,9 +6395,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 
        gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
        if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
-               skip_emulated_instruction(vcpu);
                trace_kvm_fast_mmio(gpa);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        ret = handle_mmio_page_fault(vcpu, gpa, true);
@@ -6352,50 +6581,13 @@ static __init int hardware_setup(void)
        for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
                kvm_define_shared_msr(i, vmx_msr_index[i]);
 
-       vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_io_bitmap_a)
-               return r;
+       for (i = 0; i < VMX_BITMAP_NR; i++) {
+               vmx_bitmap[i] = (unsigned long *)__get_free_page(GFP_KERNEL);
+               if (!vmx_bitmap[i])
+                       goto out;
+       }
 
        vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_io_bitmap_b)
-               goto out;
-
-       vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_legacy)
-               goto out1;
-
-       vmx_msr_bitmap_legacy_x2apic =
-                               (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_legacy_x2apic)
-               goto out2;
-
-       vmx_msr_bitmap_legacy_x2apic_apicv_inactive =
-                               (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive)
-               goto out3;
-
-       vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_longmode)
-               goto out4;
-
-       vmx_msr_bitmap_longmode_x2apic =
-                               (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_longmode_x2apic)
-               goto out5;
-
-       vmx_msr_bitmap_longmode_x2apic_apicv_inactive =
-                               (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive)
-               goto out6;
-
-       vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_vmread_bitmap)
-               goto out7;
-
-       vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_vmwrite_bitmap)
-               goto out8;
-
        memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
        memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
 
@@ -6413,7 +6605,7 @@ static __init int hardware_setup(void)
 
        if (setup_vmcs_config(&vmcs_config) < 0) {
                r = -EIO;
-               goto out9;
+               goto out;
        }
 
        if (boot_cpu_has(X86_FEATURE_NX))
@@ -6476,39 +6668,34 @@ static __init int hardware_setup(void)
        vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
        vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
 
-       memcpy(vmx_msr_bitmap_legacy_x2apic,
+       memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
                        vmx_msr_bitmap_legacy, PAGE_SIZE);
-       memcpy(vmx_msr_bitmap_longmode_x2apic,
+       memcpy(vmx_msr_bitmap_longmode_x2apic_apicv,
                        vmx_msr_bitmap_longmode, PAGE_SIZE);
-       memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+       memcpy(vmx_msr_bitmap_legacy_x2apic,
                        vmx_msr_bitmap_legacy, PAGE_SIZE);
-       memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+       memcpy(vmx_msr_bitmap_longmode_x2apic,
                        vmx_msr_bitmap_longmode, PAGE_SIZE);
 
        set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
 
+       for (msr = 0x800; msr <= 0x8ff; msr++) {
+               if (msr == 0x839 /* TMCCT */)
+                       continue;
+               vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true);
+       }
+
        /*
-        * enable_apicv && kvm_vcpu_apicv_active()
+        * TPR reads and writes can be virtualized even if virtual interrupt
+        * delivery is not in use.
         */
-       for (msr = 0x800; msr <= 0x8ff; msr++)
-               vmx_disable_intercept_msr_read_x2apic(msr, true);
+       vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true);
+       vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false);
 
-       /* TMCCT */
-       vmx_enable_intercept_msr_read_x2apic(0x839, true);
-       /* TPR */
-       vmx_disable_intercept_msr_write_x2apic(0x808, true);
        /* EOI */
-       vmx_disable_intercept_msr_write_x2apic(0x80b, true);
+       vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true);
        /* SELF-IPI */
-       vmx_disable_intercept_msr_write_x2apic(0x83f, true);
-
-       /*
-        * (enable_apicv && !kvm_vcpu_apicv_active()) ||
-        *      !enable_apicv
-        */
-       /* TPR */
-       vmx_disable_intercept_msr_read_x2apic(0x808, false);
-       vmx_disable_intercept_msr_write_x2apic(0x808, false);
+       vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true);
 
        if (enable_ept) {
                kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
@@ -6555,42 +6742,19 @@ static __init int hardware_setup(void)
 
        return alloc_kvm_area();
 
-out9:
-       free_page((unsigned long)vmx_vmwrite_bitmap);
-out8:
-       free_page((unsigned long)vmx_vmread_bitmap);
-out7:
-       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
-out6:
-       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out5:
-       free_page((unsigned long)vmx_msr_bitmap_longmode);
-out4:
-       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
-out3:
-       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-out2:
-       free_page((unsigned long)vmx_msr_bitmap_legacy);
-out1:
-       free_page((unsigned long)vmx_io_bitmap_b);
 out:
-       free_page((unsigned long)vmx_io_bitmap_a);
+       for (i = 0; i < VMX_BITMAP_NR; i++)
+               free_page((unsigned long)vmx_bitmap[i]);
 
     return r;
 }
 
 static __exit void hardware_unsetup(void)
 {
-       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
-       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
-       free_page((unsigned long)vmx_msr_bitmap_legacy);
-       free_page((unsigned long)vmx_msr_bitmap_longmode);
-       free_page((unsigned long)vmx_io_bitmap_b);
-       free_page((unsigned long)vmx_io_bitmap_a);
-       free_page((unsigned long)vmx_vmwrite_bitmap);
-       free_page((unsigned long)vmx_vmread_bitmap);
+       int i;
+
+       for (i = 0; i < VMX_BITMAP_NR; i++)
+               free_page((unsigned long)vmx_bitmap[i]);
 
        free_kvm_area();
 }
@@ -6604,16 +6768,13 @@ static int handle_pause(struct kvm_vcpu *vcpu)
        if (ple_gap)
                grow_ple_window(vcpu);
 
-       skip_emulated_instruction(vcpu);
        kvm_vcpu_on_spin(vcpu);
-
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_nop(struct kvm_vcpu *vcpu)
 {
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_mwait(struct kvm_vcpu *vcpu)
@@ -6920,8 +7081,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
                 */
                if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
                        nested_vmx_failInvalid(vcpu);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
 
                page = nested_get_page(vcpu, vmptr);
@@ -6929,8 +7089,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
                    *(u32 *)kmap(page) != VMCS12_REVISION) {
                        nested_vmx_failInvalid(vcpu);
                        kunmap(page);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
                kunmap(page);
                vmx->nested.vmxon_ptr = vmptr;
@@ -6939,30 +7098,26 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
                if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
                        nested_vmx_failValid(vcpu,
                                             VMXERR_VMCLEAR_INVALID_ADDRESS);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
 
                if (vmptr == vmx->nested.vmxon_ptr) {
                        nested_vmx_failValid(vcpu,
                                             VMXERR_VMCLEAR_VMXON_POINTER);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
                break;
        case EXIT_REASON_VMPTRLD:
                if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
                        nested_vmx_failValid(vcpu,
                                             VMXERR_VMPTRLD_INVALID_ADDRESS);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
 
                if (vmptr == vmx->nested.vmxon_ptr) {
                        nested_vmx_failValid(vcpu,
                                             VMXERR_VMCLEAR_VMXON_POINTER);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
                break;
        default:
@@ -7018,8 +7173,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 
        if (vmx->nested.vmxon) {
                nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
@@ -7059,9 +7213,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 
        vmx->nested.vmxon = true;
 
-       skip_emulated_instruction(vcpu);
        nested_vmx_succeed(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 
 out_shadow_vmcs:
        kfree(vmx->nested.cached_vmcs12);
@@ -7180,9 +7333,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
        if (!nested_vmx_check_permission(vcpu))
                return 1;
        free_nested(to_vmx(vcpu));
-       skip_emulated_instruction(vcpu);
        nested_vmx_succeed(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 /* Emulate the VMCLEAR instruction */
@@ -7221,9 +7373,8 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 
        nested_free_vmcs02(vmx, vmptr);
 
-       skip_emulated_instruction(vcpu);
        nested_vmx_succeed(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
@@ -7421,7 +7572,6 @@ static int nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        if (vmx->nested.current_vmptr == -1ull) {
                nested_vmx_failInvalid(vcpu);
-               skip_emulated_instruction(vcpu);
                return 0;
        }
        return 1;
@@ -7435,17 +7585,18 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
        u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
        gva_t gva = 0;
 
-       if (!nested_vmx_check_permission(vcpu) ||
-           !nested_vmx_check_vmcs12(vcpu))
+       if (!nested_vmx_check_permission(vcpu))
                return 1;
 
+       if (!nested_vmx_check_vmcs12(vcpu))
+               return kvm_skip_emulated_instruction(vcpu);
+
        /* Decode instruction info and find the field to read */
        field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
        /* Read the field, zero-extended to a u64 field_value */
        if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
                nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
        /*
         * Now copy part of this value to register or memory, as requested.
@@ -7465,8 +7616,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
        }
 
        nested_vmx_succeed(vcpu);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 
@@ -7485,10 +7635,12 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
        u64 field_value = 0;
        struct x86_exception e;
 
-       if (!nested_vmx_check_permission(vcpu) ||
-           !nested_vmx_check_vmcs12(vcpu))
+       if (!nested_vmx_check_permission(vcpu))
                return 1;
 
+       if (!nested_vmx_check_vmcs12(vcpu))
+               return kvm_skip_emulated_instruction(vcpu);
+
        if (vmx_instruction_info & (1u << 10))
                field_value = kvm_register_readl(vcpu,
                        (((vmx_instruction_info) >> 3) & 0xf));
@@ -7508,19 +7660,16 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
        if (vmcs_field_readonly(field)) {
                nested_vmx_failValid(vcpu,
                        VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        if (vmcs12_write_any(vcpu, field, field_value) < 0) {
                nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        nested_vmx_succeed(vcpu);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 /* Emulate the VMPTRLD instruction */
@@ -7541,8 +7690,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
                page = nested_get_page(vcpu, vmptr);
                if (page == NULL) {
                        nested_vmx_failInvalid(vcpu);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
                new_vmcs12 = kmap(page);
                if (new_vmcs12->revision_id != VMCS12_REVISION) {
@@ -7550,8 +7698,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
                        nested_release_page_clean(page);
                        nested_vmx_failValid(vcpu,
                                VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
 
                nested_release_vmcs12(vmx);
@@ -7575,8 +7722,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
        }
 
        nested_vmx_succeed(vcpu);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 /* Emulate the VMPTRST instruction */
@@ -7601,8 +7747,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
                return 1;
        }
        nested_vmx_succeed(vcpu);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 /* Emulate the INVEPT instruction */
@@ -7640,8 +7785,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
        if (type >= 32 || !(types & (1 << type))) {
                nested_vmx_failValid(vcpu,
                                VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        /* According to the Intel VMX instruction reference, the memory
@@ -7672,8 +7816,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
                break;
        }
 
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_invvpid(struct kvm_vcpu *vcpu)
@@ -7698,13 +7841,13 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
        vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
        type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
 
-       types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7;
+       types = (vmx->nested.nested_vmx_vpid_caps &
+                       VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
 
        if (type >= 32 || !(types & (1 << type))) {
                nested_vmx_failValid(vcpu,
                        VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        /* according to the intel vmx instruction reference, the memory
@@ -7720,23 +7863,26 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
        }
 
        switch (type) {
+       case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
        case VMX_VPID_EXTENT_SINGLE_CONTEXT:
-               /*
-                * Old versions of KVM use the single-context version so we
-                * have to support it; just treat it the same as all-context.
-                */
+       case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
+               if (!vpid) {
+                       nested_vmx_failValid(vcpu,
+                               VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+                       return kvm_skip_emulated_instruction(vcpu);
+               }
+               break;
        case VMX_VPID_EXTENT_ALL_CONTEXT:
-               __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
-               nested_vmx_succeed(vcpu);
                break;
        default:
-               /* Trap individual address invalidation invvpid calls */
-               BUG_ON(1);
-               break;
+               WARN_ON_ONCE(1);
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
-       skip_emulated_instruction(vcpu);
-       return 1;
+       __vmx_flush_tlb(vcpu, vmx->nested.vpid02);
+       nested_vmx_succeed(vcpu);
+
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_pml_full(struct kvm_vcpu *vcpu)
@@ -8075,6 +8221,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
        case EXIT_REASON_IO_INSTRUCTION:
                return nested_vmx_exit_handled_io(vcpu, vmcs12);
+       case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
+               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
        case EXIT_REASON_MSR_READ:
        case EXIT_REASON_MSR_WRITE:
                return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
@@ -8624,11 +8772,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
        u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
        register void *__sp asm(_ASM_SP);
 
-       /*
-        * If external interrupt exists, IF bit is set in rflags/eflags on the
-        * interrupt stack frame, and interrupt will be enabled on a return
-        * from interrupt handler.
-        */
        if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
                        == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
                unsigned int vector;
@@ -8813,7 +8956,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
                                        msrs[i].host);
 }
 
-void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
+static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u64 tscl;
@@ -9283,6 +9426,50 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl)
                     (new_ctl & ~mask) | (cur_ctl & mask));
 }
 
+/*
+ * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits
+ * (indicating "allowed-1") if they are supported in the guest's CPUID.
+ */
+static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       struct kvm_cpuid_entry2 *entry;
+
+       vmx->nested.nested_vmx_cr0_fixed1 = 0xffffffff;
+       vmx->nested.nested_vmx_cr4_fixed1 = X86_CR4_PCE;
+
+#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do {           \
+       if (entry && (entry->_reg & (_cpuid_mask)))                     \
+               vmx->nested.nested_vmx_cr4_fixed1 |= (_cr4_mask);       \
+} while (0)
+
+       entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+       cr4_fixed1_update(X86_CR4_VME,        edx, bit(X86_FEATURE_VME));
+       cr4_fixed1_update(X86_CR4_PVI,        edx, bit(X86_FEATURE_VME));
+       cr4_fixed1_update(X86_CR4_TSD,        edx, bit(X86_FEATURE_TSC));
+       cr4_fixed1_update(X86_CR4_DE,         edx, bit(X86_FEATURE_DE));
+       cr4_fixed1_update(X86_CR4_PSE,        edx, bit(X86_FEATURE_PSE));
+       cr4_fixed1_update(X86_CR4_PAE,        edx, bit(X86_FEATURE_PAE));
+       cr4_fixed1_update(X86_CR4_MCE,        edx, bit(X86_FEATURE_MCE));
+       cr4_fixed1_update(X86_CR4_PGE,        edx, bit(X86_FEATURE_PGE));
+       cr4_fixed1_update(X86_CR4_OSFXSR,     edx, bit(X86_FEATURE_FXSR));
+       cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
+       cr4_fixed1_update(X86_CR4_VMXE,       ecx, bit(X86_FEATURE_VMX));
+       cr4_fixed1_update(X86_CR4_SMXE,       ecx, bit(X86_FEATURE_SMX));
+       cr4_fixed1_update(X86_CR4_PCIDE,      ecx, bit(X86_FEATURE_PCID));
+       cr4_fixed1_update(X86_CR4_OSXSAVE,    ecx, bit(X86_FEATURE_XSAVE));
+
+       entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
+       cr4_fixed1_update(X86_CR4_FSGSBASE,   ebx, bit(X86_FEATURE_FSGSBASE));
+       cr4_fixed1_update(X86_CR4_SMEP,       ebx, bit(X86_FEATURE_SMEP));
+       cr4_fixed1_update(X86_CR4_SMAP,       ebx, bit(X86_FEATURE_SMAP));
+       cr4_fixed1_update(X86_CR4_PKE,        ecx, bit(X86_FEATURE_PKU));
+       /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */
+       cr4_fixed1_update(bit(11),            ecx, bit(2));
+
+#undef cr4_fixed1_update
+}
+
 static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpuid_entry2 *best;
@@ -9324,6 +9511,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
        else
                to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
                        ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+
+       if (nested_vmx_allowed(vcpu))
+               nested_vmx_cr_fixed1_bits_update(vcpu);
 }
 
 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -9967,6 +10157,15 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
         */
        vmx_set_constant_host_state(vmx);
 
+       /*
+        * Set the MSR load/store lists to match L0's settings.
+        */
+       vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+       vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+       vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
+       vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+       vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
+
        /*
         * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
         * entry, but only if the current (host) sp changed from the value
@@ -10130,11 +10329,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
        bool ia32e;
        u32 msr_entry_idx;
 
-       if (!nested_vmx_check_permission(vcpu) ||
-           !nested_vmx_check_vmcs12(vcpu))
+       if (!nested_vmx_check_permission(vcpu))
                return 1;
 
-       skip_emulated_instruction(vcpu);
+       if (!nested_vmx_check_vmcs12(vcpu))
+               goto out;
+
        vmcs12 = get_vmcs12(vcpu);
 
        if (enable_shadow_vmcs)
@@ -10154,37 +10354,37 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
                nested_vmx_failValid(vcpu,
                        launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
                               : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
-               return 1;
+               goto out;
        }
 
        if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
            vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) {
                nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-               return 1;
+               goto out;
        }
 
        if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
                nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-               return 1;
+               goto out;
        }
 
        if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) {
                nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-               return 1;
+               goto out;
        }
 
        if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) {
                nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-               return 1;
+               goto out;
        }
 
        if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) {
                nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-               return 1;
+               goto out;
        }
 
        if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
-                               vmx->nested.nested_vmx_true_procbased_ctls_low,
+                               vmx->nested.nested_vmx_procbased_ctls_low,
                                vmx->nested.nested_vmx_procbased_ctls_high) ||
            !vmx_control_verify(vmcs12->secondary_vm_exec_control,
                                vmx->nested.nested_vmx_secondary_ctls_low,
@@ -10193,33 +10393,33 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
                                vmx->nested.nested_vmx_pinbased_ctls_low,
                                vmx->nested.nested_vmx_pinbased_ctls_high) ||
            !vmx_control_verify(vmcs12->vm_exit_controls,
-                               vmx->nested.nested_vmx_true_exit_ctls_low,
+                               vmx->nested.nested_vmx_exit_ctls_low,
                                vmx->nested.nested_vmx_exit_ctls_high) ||
            !vmx_control_verify(vmcs12->vm_entry_controls,
-                               vmx->nested.nested_vmx_true_entry_ctls_low,
+                               vmx->nested.nested_vmx_entry_ctls_low,
                                vmx->nested.nested_vmx_entry_ctls_high))
        {
                nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-               return 1;
+               goto out;
        }
 
-       if (((vmcs12->host_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) ||
-           ((vmcs12->host_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
+       if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
+           !nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) {
                nested_vmx_failValid(vcpu,
                        VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
-               return 1;
+               goto out;
        }
 
-       if (!nested_cr0_valid(vcpu, vmcs12->guest_cr0) ||
-           ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
+       if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
+           !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) {
                nested_vmx_entry_failure(vcpu, vmcs12,
                        EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
-               return 1;
+               goto out;
        }
        if (vmcs12->vmcs_link_pointer != -1ull) {
                nested_vmx_entry_failure(vcpu, vmcs12,
                        EXIT_REASON_INVALID_STATE, ENTRY_FAIL_VMCS_LINK_PTR);
-               return 1;
+               goto out;
        }
 
        /*
@@ -10239,7 +10439,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
                     ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) {
                        nested_vmx_entry_failure(vcpu, vmcs12,
                                EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
-                       return 1;
+                       goto out;
                }
        }
 
@@ -10257,7 +10457,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
                    ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) {
                        nested_vmx_entry_failure(vcpu, vmcs12,
                                EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
-                       return 1;
+                       goto out;
                }
        }
 
@@ -10270,6 +10470,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
        if (!vmcs02)
                return -ENOMEM;
 
+       /*
+        * After this point, the trap flag no longer triggers a singlestep trap
+        * on the vm entry instructions. Don't call
+        * kvm_skip_emulated_instruction.
+        */
+       skip_emulated_instruction(vcpu);
        enter_guest_mode(vcpu);
 
        if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
@@ -10311,6 +10517,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
         * the success flag) when L2 exits (see nested_vmx_vmexit()).
         */
        return 1;
+
+out:
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 /*
@@ -10755,6 +10964,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+       u32 vm_inst_error = 0;
 
        /* trying to cancel vmlaunch/vmresume is a bug */
        WARN_ON_ONCE(vmx->nested.nested_run_pending);
@@ -10767,6 +10977,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
                                 vmcs12->vm_exit_msr_store_count))
                nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
 
+       if (unlikely(vmx->fail))
+               vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR);
+
        vmx_load_vmcs01(vcpu);
 
        if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
@@ -10795,6 +11008,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
        load_vmcs12_host_state(vcpu, vmcs12);
 
        /* Update any VMCS fields that might have changed while L2 ran */
+       vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+       vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
        vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
        if (vmx->hv_deadline_tsc == -1)
                vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
@@ -10843,7 +11058,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
         */
        if (unlikely(vmx->fail)) {
                vmx->fail = 0;
-               nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR));
+               nested_vmx_failValid(vcpu, vm_inst_error);
        } else
                nested_vmx_succeed(vcpu);
        if (enable_shadow_vmcs)