]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
KVM: SEV: add cache flush to solve SEV cache incoherency issues
authorMingwei Zhang <mizhang@google.com>
Thu, 21 Apr 2022 03:14:07 +0000 (03:14 +0000)
committerStefan Bader <stefan.bader@canonical.com>
Wed, 23 Nov 2022 14:11:32 +0000 (15:11 +0100)
BugLink: https://bugs.launchpad.net/bugs/1995415
commit 683412ccf61294d727ead4a73d97397396e69a6b upstream.

Flush the CPU caches when memory is reclaimed from an SEV guest (where
reclaim also includes it being unmapped from KVM's memslots).  Due to lack
of coherency for SEV encrypted memory, failure to flush results in silent
data corruption if userspace is malicious/broken and doesn't ensure SEV
guest memory is properly pinned and unpinned.

Cache coherency is not enforced across the VM boundary in SEV (AMD APM
vol.2 Section 15.34.7). Confidential cachelines, generated by confidential
VM guests have to be explicitly flushed on the host side. If a memory page
containing dirty confidential cachelines was released by VM and reallocated
to another user, the cachelines may corrupt the new user at a later time.

KVM takes a shortcut by assuming all confidential memory remain pinned
until the end of VM lifetime. Therefore, KVM does not flush cache at
mmu_notifier invalidation events. Because of this incorrect assumption and
the lack of cache flushing, malicous userspace can crash the host kernel:
creating a malicious VM and continuously allocates/releases unpinned
confidential memory pages when the VM is running.

Add cache flush operations to mmu_notifier operations to ensure that any
physical memory leaving the guest VM get flushed. In particular, hook
mmu_notifier_invalidate_range_start and mmu_notifier_release events and
flush cache accordingly. The hook after releasing the mmu lock to avoid
contention with other vCPUs.

Cc: stable@vger.kernel.org
Suggested-by: Sean Christpherson <seanjc@google.com>
Reported-by: Mingwei Zhang <mizhang@google.com>
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Message-Id: <20220421031407.2516575-4-mizhang@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[OP: adjusted KVM_X86_OP_OPTIONAL() -> KVM_X86_OP_NULL, applied
kvm_arch_guest_memory_reclaimed() call in kvm_set_memslot()]
Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Kamal Mostafa <kamal@canonical.com>
Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
arch/x86/include/asm/kvm-x86-ops.h
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/x86.c
include/linux/kvm_host.h
virt/kvm/kvm_main.c

index 9e50da3ed01a3a2598f153884e1f0d5bc71a0abe..23ea8a25cbbeb6352decea2c7d33e523f0f7c48d 100644 (file)
@@ -115,6 +115,7 @@ KVM_X86_OP(enable_smi_window)
 KVM_X86_OP_NULL(mem_enc_op)
 KVM_X86_OP_NULL(mem_enc_reg_region)
 KVM_X86_OP_NULL(mem_enc_unreg_region)
+KVM_X86_OP_NULL(guest_memory_reclaimed)
 KVM_X86_OP(get_msr_feature)
 KVM_X86_OP(can_emulate_instruction)
 KVM_X86_OP(apic_init_signal_blocked)
index 4a25a459412e3efbbe3268ea5862987669767699..1172a201d851b49461559700ce9a615151ae78ee 100644 (file)
@@ -1475,6 +1475,7 @@ struct kvm_x86_ops {
        int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
        int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
        int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
+       void (*guest_memory_reclaimed)(struct kvm *kvm);
 
        int (*get_msr_feature)(struct kvm_msr_entry *entry);
 
index 33b8f350601ef6ab974ef069b5db370d3776c5d4..1f6e5eac3faccc4756cc299e04785dcc290e812b 100644 (file)
@@ -2037,6 +2037,14 @@ static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
        wbinvd_on_all_cpus();
 }
 
+void sev_guest_memory_reclaimed(struct kvm *kvm)
+{
+       if (!sev_guest(kvm))
+               return;
+
+       wbinvd_on_all_cpus();
+}
+
 void sev_free_vcpu(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm;
index d1ba4da93985a8a527fd61845be4f9f052582fc1..81b8eb0fa912ad210aa5947ab95723670040c33c 100644 (file)
@@ -4679,6 +4679,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .mem_enc_op = svm_mem_enc_op,
        .mem_enc_reg_region = svm_register_enc_region,
        .mem_enc_unreg_region = svm_unregister_enc_region,
+       .guest_memory_reclaimed = sev_guest_memory_reclaimed,
 
        .vm_copy_enc_context_from = svm_vm_copy_asid_from,
 
index cf2d8365aeb4bc6d881470beaa417de51ac53e6d..7004f356edf9487d79094f55a741c08846ac4e58 100644 (file)
@@ -555,6 +555,8 @@ int svm_register_enc_region(struct kvm *kvm,
 int svm_unregister_enc_region(struct kvm *kvm,
                              struct kvm_enc_region *range);
 int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd);
+void sev_guest_memory_reclaimed(struct kvm *kvm);
+
 void pre_sev_run(struct vcpu_svm *svm, int cpu);
 void __init sev_set_cpu_caps(void);
 void __init sev_hardware_setup(void);
index 3ad71e68a4a2cc32c9171b40b60c51aa211619b2..9c0f07f931fc1c8bbea8cb09a1ac02b76f053bd0 100644 (file)
@@ -9430,6 +9430,11 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
                kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
 }
 
+void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
+{
+       static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm);
+}
+
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 {
        if (!lapic_in_kernel(vcpu))
index 38b7e9ab48b84d8d713bd736ef101d9244cc83a6..725f8f13adb55cfa900951d72cf73761e84ac5ae 100644 (file)
@@ -1912,6 +1912,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
 void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
                                            unsigned long start, unsigned long end);
 
+void kvm_arch_guest_memory_reclaimed(struct kvm *kvm);
+
 #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
 int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
 #else
index 86fc429a0e438ed61b54661355269d633a12cfec..3ae5f6a3eae497d9dd53c1004eabf29fdc05a0dd 100644 (file)
@@ -162,6 +162,10 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
 {
 }
 
+__weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
+{
+}
+
 bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
 {
        /*
@@ -353,6 +357,12 @@ void kvm_reload_remote_mmus(struct kvm *kvm)
        kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
 }
 
+static void kvm_flush_shadow_all(struct kvm *kvm)
+{
+       kvm_arch_flush_shadow_all(kvm);
+       kvm_arch_guest_memory_reclaimed(kvm);
+}
+
 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
 static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
                                               gfp_t gfp_flags)
@@ -469,12 +479,15 @@ typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
 typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
                             unsigned long end);
 
+typedef void (*on_unlock_fn_t)(struct kvm *kvm);
+
 struct kvm_hva_range {
        unsigned long start;
        unsigned long end;
        pte_t pte;
        hva_handler_t handler;
        on_lock_fn_t on_lock;
+       on_unlock_fn_t on_unlock;
        bool flush_on_ret;
        bool may_block;
 };
@@ -551,8 +564,11 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
        if (range->flush_on_ret && ret)
                kvm_flush_remote_tlbs(kvm);
 
-       if (locked)
+       if (locked) {
                KVM_MMU_UNLOCK(kvm);
+               if (!IS_KVM_NULL_FN(range->on_unlock))
+                       range->on_unlock(kvm);
+       }
 
        srcu_read_unlock(&kvm->srcu, idx);
 
@@ -573,6 +589,7 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
                .pte            = pte,
                .handler        = handler,
                .on_lock        = (void *)kvm_null_fn,
+               .on_unlock      = (void *)kvm_null_fn,
                .flush_on_ret   = true,
                .may_block      = false,
        };
@@ -592,6 +609,7 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
                .pte            = __pte(0),
                .handler        = handler,
                .on_lock        = (void *)kvm_null_fn,
+               .on_unlock      = (void *)kvm_null_fn,
                .flush_on_ret   = false,
                .may_block      = false,
        };
@@ -660,6 +678,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
                .pte            = __pte(0),
                .handler        = kvm_unmap_gfn_range,
                .on_lock        = kvm_inc_notifier_count,
+               .on_unlock      = kvm_arch_guest_memory_reclaimed,
                .flush_on_ret   = true,
                .may_block      = mmu_notifier_range_blockable(range),
        };
@@ -711,6 +730,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
                .pte            = __pte(0),
                .handler        = (void *)kvm_null_fn,
                .on_lock        = kvm_dec_notifier_count,
+               .on_unlock      = (void *)kvm_null_fn,
                .flush_on_ret   = false,
                .may_block      = mmu_notifier_range_blockable(range),
        };
@@ -783,7 +803,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
        int idx;
 
        idx = srcu_read_lock(&kvm->srcu);
-       kvm_arch_flush_shadow_all(kvm);
+       kvm_flush_shadow_all(kvm);
        srcu_read_unlock(&kvm->srcu, idx);
 }
 
@@ -1188,7 +1208,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
        WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait));
        kvm->mn_active_invalidate_count = 0;
 #else
-       kvm_arch_flush_shadow_all(kvm);
+       kvm_flush_shadow_all(kvm);
 #endif
        kvm_arch_destroy_vm(kvm);
        kvm_destroy_devices(kvm);
@@ -1588,6 +1608,7 @@ static int kvm_set_memslot(struct kvm *kvm,
                 *      - kvm_is_visible_gfn (mmu_check_root)
                 */
                kvm_arch_flush_shadow_memslot(kvm, slot);
+               kvm_arch_guest_memory_reclaimed(kvm);
 
                /* Released in install_new_memslots. */
                mutex_lock(&kvm->slots_arch_lock);