]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - arch/x86/kvm/vmx.c
KVM: nVMX: Allow to disable VM_{ENTRY_LOAD,EXIT_SAVE}_DEBUG_CONTROLS
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kvm / vmx.c
index 33e8c028842fb4b0b59bc269a973b195a104cdf8..b93e2ae2bb628adc5a23f2fcd3f5bce085f92e71 100644 (file)
@@ -354,6 +354,7 @@ struct vmcs02_list {
 struct nested_vmx {
        /* Has the level1 guest done vmxon? */
        bool vmxon;
+       gpa_t vmxon_ptr;
 
        /* The guest-physical address of the current VMCS L1 keeps for L2 */
        gpa_t current_vmptr;
@@ -382,6 +383,9 @@ struct nested_vmx {
 
        struct hrtimer preemption_timer;
        bool preemption_timer_expired;
+
+       /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
+       u64 vmcs01_debugctl;
 };
 
 #define POSTED_INTR_ON  0
@@ -413,7 +417,6 @@ struct vcpu_vmx {
        struct kvm_vcpu       vcpu;
        unsigned long         host_rsp;
        u8                    fail;
-       u8                    cpl;
        bool                  nmi_known_unmasked;
        u32                   exit_intr_info;
        u32                   idt_vectoring_info;
@@ -2239,10 +2242,13 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
  * or other means.
  */
 static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high;
+static u32 nested_vmx_true_procbased_ctls_low;
 static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high;
 static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
 static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
+static u32 nested_vmx_true_exit_ctls_low;
 static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
+static u32 nested_vmx_true_entry_ctls_low;
 static u32 nested_vmx_misc_low, nested_vmx_misc_high;
 static u32 nested_vmx_ept_caps;
 static __init void nested_vmx_setup_ctls_msrs(void)
@@ -2265,25 +2271,17 @@ static __init void nested_vmx_setup_ctls_msrs(void)
        /* pin-based controls */
        rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
              nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high);
-       /*
-        * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is
-        * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR.
-        */
        nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
        nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
                PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS;
        nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
                PIN_BASED_VMX_PREEMPTION_TIMER;
 
-       /*
-        * Exit controls
-        * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and
-        * 17 must be 1.
-        */
+       /* exit controls */
        rdmsr(MSR_IA32_VMX_EXIT_CTLS,
                nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high);
        nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
-       /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */
+
        nested_vmx_exit_ctls_high &=
 #ifdef CONFIG_X86_64
                VM_EXIT_HOST_ADDR_SPACE_SIZE |
@@ -2291,14 +2289,18 @@ static __init void nested_vmx_setup_ctls_msrs(void)
                VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
        nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
                VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
-               VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+               VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
+
        if (vmx_mpx_supported())
                nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
 
+       /* We support free control of debug control saving. */
+       nested_vmx_true_exit_ctls_low = nested_vmx_exit_ctls_low &
+               ~VM_EXIT_SAVE_DEBUG_CONTROLS;
+
        /* entry controls */
        rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
                nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
-       /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */
        nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
        nested_vmx_entry_ctls_high &=
 #ifdef CONFIG_X86_64
@@ -2310,10 +2312,14 @@ static __init void nested_vmx_setup_ctls_msrs(void)
        if (vmx_mpx_supported())
                nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
 
+       /* We support free control of debug control loading. */
+       nested_vmx_true_entry_ctls_low = nested_vmx_entry_ctls_low &
+               ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
+
        /* cpu-based controls */
        rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
                nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
-       nested_vmx_procbased_ctls_low = 0;
+       nested_vmx_procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
        nested_vmx_procbased_ctls_high &=
                CPU_BASED_VIRTUAL_INTR_PENDING |
                CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
@@ -2334,7 +2340,12 @@ static __init void nested_vmx_setup_ctls_msrs(void)
         * can use it to avoid exits to L1 - even when L0 runs L2
         * without MSR bitmaps.
         */
-       nested_vmx_procbased_ctls_high |= CPU_BASED_USE_MSR_BITMAPS;
+       nested_vmx_procbased_ctls_high |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
+               CPU_BASED_USE_MSR_BITMAPS;
+
+       /* We support free control of CR3 access interception. */
+       nested_vmx_true_procbased_ctls_low = nested_vmx_procbased_ctls_low &
+               ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
 
        /* secondary cpu-based controls */
        rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
@@ -2353,12 +2364,11 @@ static __init void nested_vmx_setup_ctls_msrs(void)
                         VMX_EPT_INVEPT_BIT;
                nested_vmx_ept_caps &= vmx_capability.ept;
                /*
-                * Since invept is completely emulated we support both global
-                * and context invalidation independent of what host cpu
-                * supports
+                * For nested guests, we don't do anything specific
+                * for single context invalidation. Hence, only advertise
+                * support for global context invalidation.
                 */
-               nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
-                       VMX_EPT_EXTENT_CONTEXT_BIT;
+               nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT;
        } else
                nested_vmx_ept_caps = 0;
 
@@ -2394,7 +2404,7 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
                 * guest, and the VMCS structure we give it - not about the
                 * VMX support of the underlying hardware.
                 */
-               *pdata = VMCS12_REVISION |
+               *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS |
                           ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
                           (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
                break;
@@ -2404,16 +2414,25 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
                                        nested_vmx_pinbased_ctls_high);
                break;
        case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
+               *pdata = vmx_control_msr(nested_vmx_true_procbased_ctls_low,
+                                       nested_vmx_procbased_ctls_high);
+               break;
        case MSR_IA32_VMX_PROCBASED_CTLS:
                *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low,
                                        nested_vmx_procbased_ctls_high);
                break;
        case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+               *pdata = vmx_control_msr(nested_vmx_true_exit_ctls_low,
+                                       nested_vmx_exit_ctls_high);
+               break;
        case MSR_IA32_VMX_EXIT_CTLS:
                *pdata = vmx_control_msr(nested_vmx_exit_ctls_low,
                                        nested_vmx_exit_ctls_high);
                break;
        case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+               *pdata = vmx_control_msr(nested_vmx_true_entry_ctls_low,
+                                       nested_vmx_entry_ctls_high);
+               break;
        case MSR_IA32_VMX_ENTRY_CTLS:
                *pdata = vmx_control_msr(nested_vmx_entry_ctls_low,
                                        nested_vmx_entry_ctls_high);
@@ -3186,10 +3205,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
        fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
        fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
        fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
-
-       /* CPL is always 0 when CPU enters protected mode */
-       __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
-       vmx->cpl = 0;
 }
 
 static void fix_rmode_seg(int seg, struct kvm_segment *save)
@@ -3591,22 +3606,14 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-       if (!is_protmode(vcpu))
+       if (unlikely(vmx->rmode.vm86_active))
                return 0;
-
-       if (!is_long_mode(vcpu)
-           && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */
-               return 3;
-
-       if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
-               __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
-               vmx->cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS) & 3;
+       else {
+               int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
+               return AR_DPL(ar);
        }
-
-       return vmx->cpl;
 }
 
-
 static u32 vmx_segment_access_rights(struct kvm_segment *var)
 {
        u32 ar;
@@ -3634,8 +3641,6 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
        const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 
        vmx_segment_cache_clear(vmx);
-       if (seg == VCPU_SREG_CS)
-               __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
 
        if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
                vmx->rmode.segs[seg] = *var;
@@ -4564,6 +4569,16 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
                PIN_BASED_EXT_INTR_MASK;
 }
 
+/*
+ * In nested virtualization, check if L1 has set
+ * VM_EXIT_ACK_INTR_ON_EXIT
+ */
+static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
+{
+       return get_vmcs12(vcpu)->vm_exit_controls &
+               VM_EXIT_ACK_INTR_ON_EXIT;
+}
+
 static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
 {
        return get_vmcs12(vcpu)->pin_based_vm_exec_control &
@@ -4878,6 +4893,9 @@ static int handle_exception(struct kvm_vcpu *vcpu)
                      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
                        vcpu->arch.dr6 &= ~15;
                        vcpu->arch.dr6 |= dr6;
+                       if (!(dr6 & ~DR6_RESERVED)) /* icebp */
+                               skip_emulated_instruction(vcpu);
+
                        kvm_queue_exception(vcpu, DB_VECTOR);
                        return 1;
                }
@@ -5166,7 +5184,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
                        return 1;
                kvm_register_write(vcpu, reg, val);
        } else
-               if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]))
+               if (kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg)))
                        return 1;
 
        skip_emulated_instruction(vcpu);
@@ -5439,7 +5457,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
        }
 
        /* clear all local breakpoint enable flags */
-       vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
+       vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55);
 
        /*
         * TODO: What about debug traps on tss switch?
@@ -5565,6 +5583,10 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
        gpa_t gpa;
 
        gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+       if (!kvm_io_bus_write(vcpu->kvm, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
+               skip_emulated_instruction(vcpu);
+               return 1;
+       }
 
        ret = handle_mmio_page_fault_common(vcpu, gpa, true);
        if (likely(ret == RET_MMIO_PF_EMULATE))
@@ -5669,12 +5691,24 @@ static int handle_pause(struct kvm_vcpu *vcpu)
        return 1;
 }
 
-static int handle_invalid_op(struct kvm_vcpu *vcpu)
+static int handle_nop(struct kvm_vcpu *vcpu)
 {
-       kvm_queue_exception(vcpu, UD_VECTOR);
+       skip_emulated_instruction(vcpu);
        return 1;
 }
 
+static int handle_mwait(struct kvm_vcpu *vcpu)
+{
+       printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
+       return handle_nop(vcpu);
+}
+
+static int handle_monitor(struct kvm_vcpu *vcpu)
+{
+       printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
+       return handle_nop(vcpu);
+}
+
 /*
  * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
  * We could reuse a single VMCS for all the L2 guests, but we also want the
@@ -5811,6 +5845,154 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
        return HRTIMER_NORESTART;
 }
 
+/*
+ * Decode the memory-address operand of a vmx instruction, as recorded on an
+ * exit caused by such an instruction (run by a guest hypervisor).
+ * On success, returns 0. When the operand is invalid, returns 1 and throws
+ * #UD or #GP.
+ */
+static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
+                                unsigned long exit_qualification,
+                                u32 vmx_instruction_info, gva_t *ret)
+{
+       /*
+        * According to Vol. 3B, "Information for VM Exits Due to Instruction
+        * Execution", on an exit, vmx_instruction_info holds most of the
+        * addressing components of the operand. Only the displacement part
+        * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
+        * For how an actual address is calculated from all these components,
+        * refer to Vol. 1, "Operand Addressing".
+        */
+       int  scaling = vmx_instruction_info & 3;
+       int  addr_size = (vmx_instruction_info >> 7) & 7;
+       bool is_reg = vmx_instruction_info & (1u << 10);
+       int  seg_reg = (vmx_instruction_info >> 15) & 7;
+       int  index_reg = (vmx_instruction_info >> 18) & 0xf;
+       bool index_is_valid = !(vmx_instruction_info & (1u << 22));
+       int  base_reg       = (vmx_instruction_info >> 23) & 0xf;
+       bool base_is_valid  = !(vmx_instruction_info & (1u << 27));
+
+       if (is_reg) {
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
+
+       /* Addr = segment_base + offset */
+       /* offset = base + [index * scale] + displacement */
+       *ret = vmx_get_segment_base(vcpu, seg_reg);
+       if (base_is_valid)
+               *ret += kvm_register_read(vcpu, base_reg);
+       if (index_is_valid)
+               *ret += kvm_register_read(vcpu, index_reg)<<scaling;
+       *ret += exit_qualification; /* holds the displacement */
+
+       if (addr_size == 1) /* 32 bit */
+               *ret &= 0xffffffff;
+
+       /*
+        * TODO: throw #GP (and return 1) in various cases that the VM*
+        * instructions require it - e.g., offset beyond segment limit,
+        * unusable or unreadable/unwritable segment, non-canonical 64-bit
+        * address, and so on. Currently these are not checked.
+        */
+       return 0;
+}
+
+/*
+ * This function performs the various checks including
+ * - if it's 4KB aligned
+ * - No bits beyond the physical address width are set
+ * - Returns 0 on success or else 1
+ * (Intel SDM Section 30.3)
+ */
+static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
+                                 gpa_t *vmpointer)
+{
+       gva_t gva;
+       gpa_t vmptr;
+       struct x86_exception e;
+       struct page *page;
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+       if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+                       vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
+               return 1;
+
+       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
+                               sizeof(vmptr), &e)) {
+               kvm_inject_page_fault(vcpu, &e);
+               return 1;
+       }
+
+       switch (exit_reason) {
+       case EXIT_REASON_VMON:
+               /*
+                * SDM 3: 24.11.5
+                * The first 4 bytes of VMXON region contain the supported
+                * VMCS revision identifier
+                *
+                * Note - IA32_VMX_BASIC[48] will never be 1
+                * for the nested case;
+                * which replaces physical address width with 32
+                *
+                */
+               if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
+                       nested_vmx_failInvalid(vcpu);
+                       skip_emulated_instruction(vcpu);
+                       return 1;
+               }
+
+               page = nested_get_page(vcpu, vmptr);
+               if (page == NULL ||
+                   *(u32 *)kmap(page) != VMCS12_REVISION) {
+                       nested_vmx_failInvalid(vcpu);
+                       kunmap(page);
+                       skip_emulated_instruction(vcpu);
+                       return 1;
+               }
+               kunmap(page);
+               vmx->nested.vmxon_ptr = vmptr;
+               break;
+       case EXIT_REASON_VMCLEAR:
+               if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
+                       nested_vmx_failValid(vcpu,
+                                            VMXERR_VMCLEAR_INVALID_ADDRESS);
+                       skip_emulated_instruction(vcpu);
+                       return 1;
+               }
+
+               if (vmptr == vmx->nested.vmxon_ptr) {
+                       nested_vmx_failValid(vcpu,
+                                            VMXERR_VMCLEAR_VMXON_POINTER);
+                       skip_emulated_instruction(vcpu);
+                       return 1;
+               }
+               break;
+       case EXIT_REASON_VMPTRLD:
+               if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
+                       nested_vmx_failValid(vcpu,
+                                            VMXERR_VMPTRLD_INVALID_ADDRESS);
+                       skip_emulated_instruction(vcpu);
+                       return 1;
+               }
+
+               if (vmptr == vmx->nested.vmxon_ptr) {
+                       nested_vmx_failValid(vcpu,
+                                            VMXERR_VMCLEAR_VMXON_POINTER);
+                       skip_emulated_instruction(vcpu);
+                       return 1;
+               }
+               break;
+       default:
+               return 1; /* shouldn't happen */
+       }
+
+       if (vmpointer)
+               *vmpointer = vmptr;
+       return 0;
+}
+
 /*
  * Emulate the VMXON instruction.
  * Currently, we just remember that VMX is active, and do not save or even
@@ -5849,6 +6031,10 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
                kvm_inject_gp(vcpu, 0);
                return 1;
        }
+
+       if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL))
+               return 1;
+
        if (vmx->nested.vmxon) {
                nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
                skip_emulated_instruction(vcpu);
@@ -5971,88 +6157,20 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
        return 1;
 }
 
-/*
- * Decode the memory-address operand of a vmx instruction, as recorded on an
- * exit caused by such an instruction (run by a guest hypervisor).
- * On success, returns 0. When the operand is invalid, returns 1 and throws
- * #UD or #GP.
- */
-static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
-                                unsigned long exit_qualification,
-                                u32 vmx_instruction_info, gva_t *ret)
-{
-       /*
-        * According to Vol. 3B, "Information for VM Exits Due to Instruction
-        * Execution", on an exit, vmx_instruction_info holds most of the
-        * addressing components of the operand. Only the displacement part
-        * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
-        * For how an actual address is calculated from all these components,
-        * refer to Vol. 1, "Operand Addressing".
-        */
-       int  scaling = vmx_instruction_info & 3;
-       int  addr_size = (vmx_instruction_info >> 7) & 7;
-       bool is_reg = vmx_instruction_info & (1u << 10);
-       int  seg_reg = (vmx_instruction_info >> 15) & 7;
-       int  index_reg = (vmx_instruction_info >> 18) & 0xf;
-       bool index_is_valid = !(vmx_instruction_info & (1u << 22));
-       int  base_reg       = (vmx_instruction_info >> 23) & 0xf;
-       bool base_is_valid  = !(vmx_instruction_info & (1u << 27));
-
-       if (is_reg) {
-               kvm_queue_exception(vcpu, UD_VECTOR);
-               return 1;
-       }
-
-       /* Addr = segment_base + offset */
-       /* offset = base + [index * scale] + displacement */
-       *ret = vmx_get_segment_base(vcpu, seg_reg);
-       if (base_is_valid)
-               *ret += kvm_register_read(vcpu, base_reg);
-       if (index_is_valid)
-               *ret += kvm_register_read(vcpu, index_reg)<<scaling;
-       *ret += exit_qualification; /* holds the displacement */
-
-       if (addr_size == 1) /* 32 bit */
-               *ret &= 0xffffffff;
-
-       /*
-        * TODO: throw #GP (and return 1) in various cases that the VM*
-        * instructions require it - e.g., offset beyond segment limit,
-        * unusable or unreadable/unwritable segment, non-canonical 64-bit
-        * address, and so on. Currently these are not checked.
-        */
-       return 0;
-}
-
 /* Emulate the VMCLEAR instruction */
 static int handle_vmclear(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       gva_t gva;
        gpa_t vmptr;
        struct vmcs12 *vmcs12;
        struct page *page;
-       struct x86_exception e;
 
        if (!nested_vmx_check_permission(vcpu))
                return 1;
 
-       if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
-                       vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
+       if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr))
                return 1;
 
-       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
-                               sizeof(vmptr), &e)) {
-               kvm_inject_page_fault(vcpu, &e);
-               return 1;
-       }
-
-       if (!IS_ALIGNED(vmptr, PAGE_SIZE)) {
-               nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
-               skip_emulated_instruction(vcpu);
-               return 1;
-       }
-
        if (vmptr == vmx->nested.current_vmptr) {
                nested_release_vmcs12(vmx);
                vmx->nested.current_vmptr = -1ull;
@@ -6372,29 +6490,14 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 static int handle_vmptrld(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       gva_t gva;
        gpa_t vmptr;
-       struct x86_exception e;
        u32 exec_control;
 
        if (!nested_vmx_check_permission(vcpu))
                return 1;
 
-       if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
-                       vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
-               return 1;
-
-       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
-                               sizeof(vmptr), &e)) {
-               kvm_inject_page_fault(vcpu, &e);
-               return 1;
-       }
-
-       if (!IS_ALIGNED(vmptr, PAGE_SIZE)) {
-               nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
-               skip_emulated_instruction(vcpu);
+       if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMPTRLD, &vmptr))
                return 1;
-       }
 
        if (vmx->nested.current_vmptr != vmptr) {
                struct vmcs12 *new_vmcs12;
@@ -6471,7 +6574,6 @@ static int handle_invept(struct kvm_vcpu *vcpu)
        struct {
                u64 eptp, gpa;
        } operand;
-       u64 eptp_mask = ((1ull << 51) - 1) & PAGE_MASK;
 
        if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) ||
            !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
@@ -6511,16 +6613,13 @@ static int handle_invept(struct kvm_vcpu *vcpu)
        }
 
        switch (type) {
-       case VMX_EPT_EXTENT_CONTEXT:
-               if ((operand.eptp & eptp_mask) !=
-                               (nested_ept_get_cr3(vcpu) & eptp_mask))
-                       break;
        case VMX_EPT_EXTENT_GLOBAL:
                kvm_mmu_sync_roots(vcpu);
                kvm_mmu_flush_tlb(vcpu);
                nested_vmx_succeed(vcpu);
                break;
        default:
+               /* Trap single context invalidation invept calls */
                BUG_ON(1);
                break;
        }
@@ -6571,8 +6670,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_EPT_VIOLATION]           = handle_ept_violation,
        [EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
        [EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
-       [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_invalid_op,
-       [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
+       [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_mwait,
+       [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
        [EXIT_REASON_INVEPT]                  = handle_invept,
 };
 
@@ -7413,7 +7512,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
        vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
                                  | (1 << VCPU_EXREG_RFLAGS)
-                                 | (1 << VCPU_EXREG_CPL)
                                  | (1 << VCPU_EXREG_PDPTR)
                                  | (1 << VCPU_EXREG_SEGMENTS)
                                  | (1 << VCPU_EXREG_CR3));
@@ -7757,7 +7855,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
        vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
 
-       vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
+       if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
+               kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
+               vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
+       } else {
+               kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
+               vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
+       }
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
                vmcs12->vm_entry_intr_info_field);
        vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
@@ -7767,7 +7871,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
                vmcs12->guest_interruptibility_info);
        vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
-       kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
        vmx_set_rflags(vcpu, vmcs12->guest_rflags);
        vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
                vmcs12->guest_pending_dbg_exceptions);
@@ -7778,7 +7881,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
        exec_control = vmcs12->pin_based_vm_exec_control;
        exec_control |= vmcs_config.pin_based_exec_ctrl;
-       exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+       exec_control &= ~(PIN_BASED_VMX_PREEMPTION_TIMER |
+                          PIN_BASED_POSTED_INTR);
        vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
 
        vmx->nested.preemption_timer_expired = false;
@@ -7815,7 +7919,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                if (!vmx->rdtscp_enabled)
                        exec_control &= ~SECONDARY_EXEC_RDTSCP;
                /* Take the following fields only from vmcs12 */
-               exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+               exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+                                 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+                                  SECONDARY_EXEC_APIC_REGISTER_VIRT);
                if (nested_cpu_has(vmcs12,
                                CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
                        exec_control |= vmcs12->secondary_vm_exec_control;
@@ -8031,14 +8137,14 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
        }
 
        if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) &&
-                       !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) {
+                       !PAGE_ALIGNED(vmcs12->msr_bitmap)) {
                /*TODO: Also verify bits beyond physical address width are 0*/
                nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
                return 1;
        }
 
        if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
-                       !IS_ALIGNED(vmcs12->apic_access_addr, PAGE_SIZE)) {
+                       !PAGE_ALIGNED(vmcs12->apic_access_addr)) {
                /*TODO: Also verify bits beyond physical address width are 0*/
                nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
                return 1;
@@ -8054,15 +8160,18 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
        }
 
        if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
-             nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high) ||
+                               nested_vmx_true_procbased_ctls_low,
+                               nested_vmx_procbased_ctls_high) ||
            !vmx_control_verify(vmcs12->secondary_vm_exec_control,
              nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) ||
            !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
              nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) ||
            !vmx_control_verify(vmcs12->vm_exit_controls,
-             nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) ||
+                               nested_vmx_true_exit_ctls_low,
+                               nested_vmx_exit_ctls_high) ||
            !vmx_control_verify(vmcs12->vm_entry_controls,
-             nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high))
+                               nested_vmx_true_entry_ctls_low,
+                               nested_vmx_entry_ctls_high))
        {
                nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
                return 1;
@@ -8139,6 +8248,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
        vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
 
+       if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
+               vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+
        cpu = get_cpu();
        vmx->loaded_vmcs = vmcs02;
        vmx_vcpu_put(vcpu);
@@ -8316,7 +8428,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
        vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
        vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
 
-       kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
        vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
        vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP);
        vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
@@ -8395,9 +8506,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
                (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
                (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
 
+       if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) {
+               kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
+               vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+       }
+
        /* TODO: These cannot have changed unless we have MSR bitmaps and
         * the relevant bit asks not to trap the change */
-       vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
        if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
                vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
        if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
@@ -8598,6 +8713,14 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
        prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
                       exit_qualification);
 
+       if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
+           && nested_exit_intr_ack_set(vcpu)) {
+               int irq = kvm_cpu_get_interrupt(vcpu);
+               WARN_ON(irq < 0);
+               vmcs12->vm_exit_intr_info = irq |
+                       INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
+       }
+
        trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
                                       vmcs12->exit_qualification,
                                       vmcs12->idt_vectoring_info_field,