};
MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
+static bool __read_mostly nosmt;
+module_param(nosmt, bool, S_IRUGO);
+
static bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
extern const ulong vmx_return;
+static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
+
+/* These MUST be in sync with vmentry_l1d_param order. */
+enum vmx_l1d_flush_state {
+ VMENTER_L1D_FLUSH_NEVER,
+ VMENTER_L1D_FLUSH_COND,
+ VMENTER_L1D_FLUSH_ALWAYS,
+};
+
+static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush = VMENTER_L1D_FLUSH_COND;
+
+static const struct {
+ const char *option;
+ enum vmx_l1d_flush_state cmd;
+} vmentry_l1d_param[] = {
+ {"never", VMENTER_L1D_FLUSH_NEVER},
+ {"cond", VMENTER_L1D_FLUSH_COND},
+ {"always", VMENTER_L1D_FLUSH_ALWAYS},
+};
+
+static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
+{
+ unsigned int i;
+
+ if (!s)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
+ if (!strcmp(s, vmentry_l1d_param[i].option)) {
+ vmentry_l1d_flush = vmentry_l1d_param[i].cmd;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
+{
+ return sprintf(s, "%s\n", vmentry_l1d_param[vmentry_l1d_flush].option);
+}
+
+static const struct kernel_param_ops vmentry_l1d_flush_ops = {
+ .set = vmentry_l1d_flush_set,
+ .get = vmentry_l1d_flush_get,
+};
+module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, &vmentry_l1d_flush, S_IRUGO);
+
#define NR_AUTOLOAD_MSRS 8
struct vmcs {
}
}
+/*
+ * Software based L1D cache flush which is used when microcode providing
+ * the cache control MSR is not loaded.
+ *
+ * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
+ * flush it is required to read in 64 KiB because the replacement algorithm
+ * is not exactly LRU. This could be sized at runtime via topology
+ * information but as all relevant affected CPUs have 32KiB L1D cache size
+ * there is no point in doing so.
+ */
+#define L1D_CACHE_ORDER 4
+static void *vmx_l1d_flush_pages;
+
+static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
+{
+ int size = PAGE_SIZE << L1D_CACHE_ORDER;
+ bool always;
+
+ /*
+ * If the mitigation mode is 'flush always', keep the flush bit
+ * set, otherwise clear it. It gets set again either from
+ * vcpu_run() or from one of the unsafe VMEXIT handlers.
+ */
+ always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS;
+ vcpu->arch.l1tf_flush_l1d = always;
+
+ vcpu->stat.l1d_flush++;
+
+ if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
+ wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+ return;
+ }
+
+ asm volatile(
+ /* First ensure the pages are in the TLB */
+ "xorl %%eax, %%eax\n"
+ ".Lpopulate_tlb:\n\t"
+ "movzbl (%[empty_zp], %%" _ASM_AX "), %%ecx\n\t"
+ "addl $4096, %%eax\n\t"
+ "cmpl %%eax, %[size]\n\t"
+ "jne .Lpopulate_tlb\n\t"
+ "xorl %%eax, %%eax\n\t"
+ "cpuid\n\t"
+ /* Now fill the cache */
+ "xorl %%eax, %%eax\n"
+ ".Lfill_cache:\n"
+ "movzbl (%[empty_zp], %%" _ASM_AX "), %%ecx\n\t"
+ "addl $64, %%eax\n\t"
+ "cmpl %%eax, %[size]\n\t"
+ "jne .Lfill_cache\n\t"
+ "lfence\n"
+ :: [empty_zp] "r" (vmx_l1d_flush_pages),
+ [size] "r" (size)
+ : "eax", "ebx", "ecx", "edx");
+}
+
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
[ss]"i"(__KERNEL_DS),
[cs]"i"(__KERNEL_CS)
);
+ vcpu->arch.l1tf_flush_l1d = true;
}
}
STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
vmx->__launched = vmx->loaded_vmcs->launched;
+
+ if (static_branch_unlikely(&vmx_l1d_should_flush)) {
+ if (vcpu->arch.l1tf_flush_l1d)
+ vmx_l1d_flush(vcpu);
+ }
+
asm(
/* Store host registers */
"push %%" _ASM_DX "; push %%" _ASM_BP ";"
return ERR_PTR(err);
}
+#define L1TF_MSG "SMT enabled with L1TF CPU bug present. Refer to CVE-2018-3620 for details.\n"
+
+static int vmx_vm_init(struct kvm *kvm)
+{
+ if (boot_cpu_has(X86_BUG_L1TF) && cpu_smt_control == CPU_SMT_ENABLED) {
+ if (nosmt) {
+ pr_err(L1TF_MSG);
+ return -EOPNOTSUPP;
+ }
+ pr_warn(L1TF_MSG);
+ }
+ return 0;
+}
+
static void __init vmx_check_processor_compat(void *rtn)
{
struct vmcs_config vmcs_conf;
if (ret)
return ret;
+ /* Hide L1D cache contents from the nested guest. */
+ vmx->vcpu.arch.l1tf_flush_l1d = true;
+
if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
return kvm_vcpu_halt(vcpu);
.cpu_has_accelerated_tpr = report_flexpriority,
.has_emulated_msr = vmx_has_emulated_msr,
+ .vm_init = vmx_vm_init,
+
.vcpu_create = vmx_create_vcpu,
.vcpu_free = vmx_free_vcpu,
.vcpu_reset = vmx_vcpu_reset,
.enable_smi_window = enable_smi_window,
};
+static int __init vmx_setup_l1d_flush(void)
+{
+ struct page *page;
+
+ if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER ||
+ !boot_cpu_has_bug(X86_BUG_L1TF))
+ return 0;
+
+ if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
+ page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
+ if (!page)
+ return -ENOMEM;
+ vmx_l1d_flush_pages = page_address(page);
+ }
+
+ static_branch_enable(&vmx_l1d_should_flush);
+ return 0;
+}
+
+static void vmx_free_l1d_flush_pages(void)
+{
+ if (vmx_l1d_flush_pages) {
+ free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
+ vmx_l1d_flush_pages = NULL;
+ }
+}
+
static int __init vmx_init(void)
{
- int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
- __alignof__(struct vcpu_vmx), THIS_MODULE);
+ int r;
+
+ r = vmx_setup_l1d_flush();
if (r)
return r;
+ r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+ __alignof__(struct vcpu_vmx), THIS_MODULE);
+ if (r) {
+ vmx_free_l1d_flush_pages();
+ return r;
+ }
+
#ifdef CONFIG_KEXEC_CORE
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
crash_vmclear_local_loaded_vmcss);
#endif
kvm_exit();
+
+ vmx_free_l1d_flush_pages();
}
module_init(vmx_init)