X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=patches%2Fkernel%2F0010-x86-KVM-Make-sure-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch;fp=patches%2Fkernel%2F0010-x86-KVM-Make-sure-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch;h=506d50d82328ac230c0cca4048f12e1f8e8fcc8d;hb=6ad155377dfd50c63669037dbf565a3932ecc8bb;hp=0000000000000000000000000000000000000000;hpb=41e87bf3f0a4ed288e8a2da8570faa8cae3b65be;p=pve-kernel.git diff --git a/patches/kernel/0010-x86-KVM-Make-sure-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch b/patches/kernel/0010-x86-KVM-Make-sure-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch new file mode 100644 index 0000000..506d50d --- /dev/null +++ b/patches/kernel/0010-x86-KVM-Make-sure-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch @@ -0,0 +1,125 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Boris Ostrovsky +Date: Fri, 31 Jan 2020 08:06:43 -0300 +Subject: [PATCH] x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed + +CVE-2019-3016 +CVE-2020-3016 + +There is a potential race in record_steal_time() between setting +host-local vcpu->arch.st.steal.preempted to zero (i.e. clearing +KVM_VCPU_PREEMPTED) and propagating this value to the guest with +kvm_write_guest_cached(). Between those two events the guest may +still see KVM_VCPU_PREEMPTED in its copy of kvm_steal_time, set +KVM_VCPU_FLUSH_TLB and assume that hypervisor will do the right +thing. Which it won't. + +Instad of copying, we should map kvm_steal_time and that will +guarantee atomicity of accesses to @preempted. + +This is part of CVE-2019-3016. + +Signed-off-by: Boris Ostrovsky +Reviewed-by: Joao Martins +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Thomas Lamprecht +--- + arch/x86/kvm/x86.c | 49 +++++++++++++++++++++++++++------------------- + 1 file changed, 29 insertions(+), 20 deletions(-) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index cb18560b07bc..f63fa5846f08 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -2488,43 +2488,45 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) + + static void record_steal_time(struct kvm_vcpu *vcpu) + { ++ struct kvm_host_map map; ++ struct kvm_steal_time *st; ++ + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) + return; + +- if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, +- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) ++ /* -EAGAIN is returned in atomic context so we can just return. */ ++ if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, ++ &map, &vcpu->arch.st.cache, false)) + return; + ++ st = map.hva + ++ offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); ++ + /* + * Doing a TLB flush here, on the guest's behalf, can avoid + * expensive IPIs. + */ +- if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB) ++ if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) + kvm_vcpu_flush_tlb(vcpu, false); + +- if (vcpu->arch.st.steal.version & 1) +- vcpu->arch.st.steal.version += 1; /* first time write, random junk */ ++ vcpu->arch.st.steal.preempted = 0; + +- vcpu->arch.st.steal.version += 1; ++ if (st->version & 1) ++ st->version += 1; /* first time write, random junk */ + +- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, +- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); ++ st->version += 1; + + smp_wmb(); + +- vcpu->arch.st.steal.steal += current->sched_info.run_delay - ++ st->steal += current->sched_info.run_delay - + vcpu->arch.st.last_steal; + vcpu->arch.st.last_steal = current->sched_info.run_delay; + +- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, +- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); +- + smp_wmb(); + +- vcpu->arch.st.steal.version += 1; ++ st->version += 1; + +- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, +- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); ++ kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); + } + + int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +@@ -3396,18 +3398,25 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) + + static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) + { ++ struct kvm_host_map map; ++ struct kvm_steal_time *st; ++ + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) + return; + + if (vcpu->arch.st.steal.preempted) + return; + +- vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED; ++ if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map, ++ &vcpu->arch.st.cache, true)) ++ return; ++ ++ st = map.hva + ++ offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); ++ ++ st->preempted = vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED; + +- kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, +- &vcpu->arch.st.steal.preempted, +- offsetof(struct kvm_steal_time, preempted), +- sizeof(vcpu->arch.st.steal.preempted)); ++ kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); + } + + void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)