]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - arch/x86/kvm/vmx.c
x86/KVM/VMX: Expose SPEC_CTRL Bit(2) to the guest
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kvm / vmx.c
index 9361ff616114624642569ad6b34174c22f1f56e1..f8163169fad7c431ae9411b6b8ba841c64a68878 100644 (file)
@@ -50,6 +50,8 @@
 #include <asm/apic.h>
 #include <asm/irq_remapping.h>
 #include <asm/mmu_context.h>
+#include <asm/nospec-branch.h>
+#include <asm/microcode.h>
 
 #include "trace.h"
 #include "pmu.h"
@@ -202,6 +204,10 @@ struct loaded_vmcs {
        bool nmi_known_unmasked;
        unsigned long vmcs_host_cr3;    /* May not match real cr3 */
        unsigned long vmcs_host_cr4;    /* May not match real cr4 */
+       /* Support for vnmi-less CPUs */
+       int soft_vnmi_blocked;
+       ktime_t entry_time;
+       s64 vnmi_blocked_time;
        struct list_head loaded_vmcss_on_cpu_link;
 };
 
@@ -1076,6 +1082,13 @@ static inline bool is_machine_check(u32 intr_info)
                (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
 }
 
+/* Undocumented: icebp/int1 */
+static inline bool is_icebp(u32 intr_info)
+{
+       return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
+               == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK);
+}
+
 static inline bool cpu_has_vmx_msr_bitmap(void)
 {
        return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
@@ -1288,6 +1301,11 @@ static inline bool cpu_has_vmx_invpcid(void)
                SECONDARY_EXEC_ENABLE_INVPCID;
 }
 
+static inline bool cpu_has_virtual_nmis(void)
+{
+       return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
+}
+
 static inline bool cpu_has_vmx_wbinvd_exit(void)
 {
        return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -1339,11 +1357,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
                (vmcs12->secondary_vm_exec_control & bit);
 }
 
-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
-{
-       return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
-}
-
 static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
 {
        return vmcs12->pin_based_vm_exec_control &
@@ -1483,6 +1496,7 @@ static void vmcs_load(struct vmcs *vmcs)
        if (error)
                printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
                       vmcs, phys_addr);
+
 }
 
 #ifdef CONFIG_KEXEC_CORE
@@ -2263,6 +2277,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
                per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
                vmcs_load(vmx->loaded_vmcs->vmcs);
+               if (ibpb_inuse)
+                       native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
        }
 
        if (!already_loaded) {
@@ -2276,7 +2292,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                 * processors.  See 22.2.4.
                 */
                vmcs_writel(HOST_TR_BASE,
-                           (unsigned long)this_cpu_ptr(&cpu_tss));
+                           (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
                vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
 
                /*
@@ -3243,6 +3259,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_IA32_TSC:
                msr_info->data = guest_read_tsc(vcpu);
                break;
+       case MSR_IA32_SPEC_CTRL:
+               msr_info->data = vcpu->arch.spec_ctrl;
+               break;
        case MSR_IA32_SYSENTER_CS:
                msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
                break;
@@ -3347,6 +3366,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_IA32_TSC:
                kvm_write_tsc(vcpu, msr_info);
                break;
+       case MSR_IA32_SPEC_CTRL:
+               if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_RDS))
+                       return 1;
+
+               vcpu->arch.spec_ctrl = msr_info->data;
+               break;
        case MSR_IA32_CR_PAT:
                if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
                        if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
@@ -3676,9 +3701,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                                &_vmexit_control) < 0)
                return -EIO;
 
-       min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
-               PIN_BASED_VIRTUAL_NMIS;
-       opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
+       min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+       opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
+                PIN_BASED_VMX_PREEMPTION_TIMER;
        if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
                                &_pin_based_exec_control) < 0)
                return -EIO;
@@ -5170,7 +5195,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
        vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
        vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  /* 22.2.4 */
 
-       native_store_idt(&dt);
+       store_idt(&dt);
        vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
        vmx->host_idt_base = dt.address;
 
@@ -5538,7 +5563,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
 
 static void enable_nmi_window(struct kvm_vcpu *vcpu)
 {
-       if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+       if (!cpu_has_virtual_nmis() ||
+           vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
                enable_irq_window(vcpu);
                return;
        }
@@ -5578,6 +5604,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+       if (!cpu_has_virtual_nmis()) {
+               /*
+                * Tracking the NMI-blocked state in software is built upon
+                * finding the next open IRQ window. This, in turn, depends on
+                * well-behaving guests: They have to keep IRQs disabled at
+                * least as long as the NMI handler runs. Otherwise we may
+                * cause NMI nesting, maybe breaking the guest. But as this is
+                * highly unlikely, we can live with the residual risk.
+                */
+               vmx->loaded_vmcs->soft_vnmi_blocked = 1;
+               vmx->loaded_vmcs->vnmi_blocked_time = 0;
+       }
+
        ++vcpu->stat.nmi_injections;
        vmx->loaded_vmcs->nmi_known_unmasked = false;
 
@@ -5596,6 +5635,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        bool masked;
 
+       if (!cpu_has_virtual_nmis())
+               return vmx->loaded_vmcs->soft_vnmi_blocked;
        if (vmx->loaded_vmcs->nmi_known_unmasked)
                return false;
        masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
@@ -5607,13 +5648,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-       vmx->loaded_vmcs->nmi_known_unmasked = !masked;
-       if (masked)
-               vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
-                             GUEST_INTR_STATE_NMI);
-       else
-               vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
-                               GUEST_INTR_STATE_NMI);
+       if (!cpu_has_virtual_nmis()) {
+               if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
+                       vmx->loaded_vmcs->soft_vnmi_blocked = masked;
+                       vmx->loaded_vmcs->vnmi_blocked_time = 0;
+               }
+       } else {
+               vmx->loaded_vmcs->nmi_known_unmasked = !masked;
+               if (masked)
+                       vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+                                     GUEST_INTR_STATE_NMI);
+               else
+                       vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+                                       GUEST_INTR_STATE_NMI);
+       }
 }
 
 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -5621,6 +5669,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
        if (to_vmx(vcpu)->nested.nested_run_pending)
                return 0;
 
+       if (!cpu_has_virtual_nmis() &&
+           to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
+               return 0;
+
        return  !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
                  (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
                   | GUEST_INTR_STATE_NMI));
@@ -5802,7 +5854,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
                      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
                        vcpu->arch.dr6 &= ~15;
                        vcpu->arch.dr6 |= dr6 | DR6_RTM;
-                       if (!(dr6 & ~DR6_RESERVED)) /* icebp */
+                       if (is_icebp(intr_info))
                                skip_emulated_instruction(vcpu);
 
                        kvm_queue_exception(vcpu, DB_VECTOR);
@@ -6115,6 +6167,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
 
        msr_info.index = ecx;
        msr_info.host_initiated = false;
+
        if (vmx_get_msr(vcpu, &msr_info)) {
                trace_kvm_msr_read_ex(ecx);
                kvm_inject_gp(vcpu, 0);
@@ -6348,6 +6401,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
         * AAK134, BY25.
         */
        if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+                       cpu_has_virtual_nmis() &&
                        (exit_qualification & INTR_INFO_UNBLOCK_NMI))
                vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
 
@@ -6588,12 +6642,7 @@ static __init int hardware_setup(void)
        memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
        memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
 
-       /*
-        * Allow direct access to the PC debug port (it is often used for I/O
-        * delays, but the vmexits simply slow things down).
-        */
        memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
-       clear_bit(0x80, vmx_io_bitmap_a);
 
        memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
 
@@ -6667,6 +6716,8 @@ static __init int hardware_setup(void)
        vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
        vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
        vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_SPEC_CTRL, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
 
        memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
                        vmx_msr_bitmap_legacy, PAGE_SIZE);
@@ -6820,7 +6871,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
        }
 
        /* Create a new VMCS */
-       item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+       item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
        if (!item)
                return NULL;
        item->vmcs02.vmcs = alloc_vmcs();
@@ -7837,6 +7888,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
         * "blocked by NMI" bit has to be set before next VM entry.
         */
        if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+                       cpu_has_virtual_nmis() &&
                        (exit_qualification & INTR_INFO_UNBLOCK_NMI))
                vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
                                GUEST_INTR_STATE_NMI);
@@ -8554,6 +8606,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
                return 0;
        }
 
+       if (unlikely(!cpu_has_virtual_nmis() &&
+                    vmx->loaded_vmcs->soft_vnmi_blocked)) {
+               if (vmx_interrupt_allowed(vcpu)) {
+                       vmx->loaded_vmcs->soft_vnmi_blocked = 0;
+               } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
+                          vcpu->arch.nmi_pending) {
+                       /*
+                        * This CPU don't support us in finding the end of an
+                        * NMI-blocked window if the guest runs with IRQs
+                        * disabled. So we pull the trigger after 1 s of
+                        * futile waiting, but inform the user about this.
+                        */
+                       printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
+                              "state on VCPU %d after 1 s timeout\n",
+                              __func__, vcpu->vcpu_id);
+                       vmx->loaded_vmcs->soft_vnmi_blocked = 0;
+               }
+       }
+
        if (exit_reason < kvm_vmx_max_exit_handlers
            && kvm_vmx_exit_handlers[exit_reason])
                return kvm_vmx_exit_handlers[exit_reason](vcpu);
@@ -8772,7 +8843,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 {
        u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-       register void *__sp asm(_ASM_SP);
 
        if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
                        == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
@@ -8796,14 +8866,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 #endif
                        "pushf\n\t"
                        __ASM_SIZE(push) " $%c[cs]\n\t"
-                       "call *%[entry]\n\t"
+                       CALL_NOSPEC
                        :
 #ifdef CONFIG_X86_64
                        [sp]"=&r"(tmp),
 #endif
-                       "+r"(__sp)
+                       ASM_CALL_CONSTRAINT
                        :
-                       [entry]"r"(entry),
+                       THUNK_TARGET(entry),
                        [ss]"i"(__KERNEL_DS),
                        [cs]"i"(__KERNEL_CS)
                        );
@@ -8837,33 +8907,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 
        idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
 
-       if (vmx->loaded_vmcs->nmi_known_unmasked)
-               return;
-       /*
-        * Can't use vmx->exit_intr_info since we're not sure what
-        * the exit reason is.
-        */
-       exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-       unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
-       vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
-       /*
-        * SDM 3: 27.7.1.2 (September 2008)
-        * Re-set bit "block by NMI" before VM entry if vmexit caused by
-        * a guest IRET fault.
-        * SDM 3: 23.2.2 (September 2008)
-        * Bit 12 is undefined in any of the following cases:
-        *  If the VM exit sets the valid bit in the IDT-vectoring
-        *   information field.
-        *  If the VM exit is due to a double fault.
-        */
-       if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
-           vector != DF_VECTOR && !idtv_info_valid)
-               vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
-                             GUEST_INTR_STATE_NMI);
-       else
-               vmx->loaded_vmcs->nmi_known_unmasked =
-                       !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
-                         & GUEST_INTR_STATE_NMI);
+       if (cpu_has_virtual_nmis()) {
+               if (vmx->loaded_vmcs->nmi_known_unmasked)
+                       return;
+               /*
+                * Can't use vmx->exit_intr_info since we're not sure what
+                * the exit reason is.
+                */
+               exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+               unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
+               vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
+               /*
+                * SDM 3: 27.7.1.2 (September 2008)
+                * Re-set bit "block by NMI" before VM entry if vmexit caused by
+                * a guest IRET fault.
+                * SDM 3: 23.2.2 (September 2008)
+                * Bit 12 is undefined in any of the following cases:
+                *  If the VM exit sets the valid bit in the IDT-vectoring
+                *   information field.
+                *  If the VM exit is due to a double fault.
+                */
+               if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+                   vector != DF_VECTOR && !idtv_info_valid)
+                       vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+                                     GUEST_INTR_STATE_NMI);
+               else
+                       vmx->loaded_vmcs->nmi_known_unmasked =
+                               !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+                                 & GUEST_INTR_STATE_NMI);
+       } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
+               vmx->loaded_vmcs->vnmi_blocked_time +=
+                       ktime_to_ns(ktime_sub(ktime_get(),
+                                             vmx->loaded_vmcs->entry_time));
 }
 
 static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
@@ -8980,6 +9055,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long debugctlmsr, cr3, cr4;
 
+       /* Record the guest's net vcpu time for enforced NMI injections. */
+       if (unlikely(!cpu_has_virtual_nmis() &&
+                    vmx->loaded_vmcs->soft_vnmi_blocked))
+               vmx->loaded_vmcs->entry_time = ktime_get();
+
        /* Don't enter VMX if guest state is invalid, let the exit handler
           start emulation until we arrive back to a valid state */
        if (vmx->emulation_required)
@@ -9026,6 +9106,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
                __write_pkru(vcpu->arch.pkru);
 
        atomic_switch_perf_msrs(vmx);
+
+       /* SMB: Ignore ibrs_inuse but rely on vcpu value */
+       x86_spec_ctrl_set_guest(vcpu->arch.spec_ctrl);
+
        debugctlmsr = get_debugctlmsr();
 
        vmx_arm_hv_timer(vcpu);
@@ -9078,6 +9162,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
                /* Save guest registers, load host registers, keep flags */
                "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
                "pop %0 \n\t"
+               "setbe %c[fail](%0)\n\t"
                "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
                "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
                __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
@@ -9094,12 +9179,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
                "mov %%r13, %c[r13](%0) \n\t"
                "mov %%r14, %c[r14](%0) \n\t"
                "mov %%r15, %c[r15](%0) \n\t"
+               "xor %%r8d,  %%r8d \n\t"
+               "xor %%r9d,  %%r9d \n\t"
+               "xor %%r10d, %%r10d \n\t"
+               "xor %%r11d, %%r11d \n\t"
+               "xor %%r12d, %%r12d \n\t"
+               "xor %%r13d, %%r13d \n\t"
+               "xor %%r14d, %%r14d \n\t"
+               "xor %%r15d, %%r15d \n\t"
 #endif
                "mov %%cr2, %%" _ASM_AX "   \n\t"
                "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
 
+               "xor %%eax, %%eax \n\t"
+               "xor %%ebx, %%ebx \n\t"
+               "xor %%esi, %%esi \n\t"
+               "xor %%edi, %%edi \n\t"
                "pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
-               "setbe %c[fail](%0) \n\t"
                ".pushsection .rodata \n\t"
                ".global vmx_return \n\t"
                "vmx_return: " _ASM_PTR " 2b \n\t"
@@ -9136,6 +9232,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
              );
 
+       x86_spec_ctrl_restore_host(vcpu->arch.spec_ctrl);
+
+       /* Eliminate branch target predictions from guest mode */
+       vmexit_fill_RSB();
+
        /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
        if (debugctlmsr)
                update_debugctlmsr(debugctlmsr);