]> git.proxmox.com Git - pve-kernel.git/blobdiff - patches/kernel/0010-x86-KVM-Make-sure-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch
rebase patches on top of Ubuntu-5.3.0-40.32
[pve-kernel.git] / patches / kernel / 0010-x86-KVM-Make-sure-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch
diff --git a/patches/kernel/0010-x86-KVM-Make-sure-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch b/patches/kernel/0010-x86-KVM-Make-sure-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch
new file mode 100644 (file)
index 0000000..506d50d
--- /dev/null
@@ -0,0 +1,125 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Date: Fri, 31 Jan 2020 08:06:43 -0300
+Subject: [PATCH] x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed
+
+CVE-2019-3016
+CVE-2020-3016
+
+There is a potential race in record_steal_time() between setting
+host-local vcpu->arch.st.steal.preempted to zero (i.e. clearing
+KVM_VCPU_PREEMPTED) and propagating this value to the guest with
+kvm_write_guest_cached(). Between those two events the guest may
+still see KVM_VCPU_PREEMPTED in its copy of kvm_steal_time, set
+KVM_VCPU_FLUSH_TLB and assume that hypervisor will do the right
+thing. Which it won't.
+
+Instad of copying, we should map kvm_steal_time and that will
+guarantee atomicity of accesses to @preempted.
+
+This is part of CVE-2019-3016.
+
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ arch/x86/kvm/x86.c | 49 +++++++++++++++++++++++++++-------------------
+ 1 file changed, 29 insertions(+), 20 deletions(-)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index cb18560b07bc..f63fa5846f08 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -2488,43 +2488,45 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+ static void record_steal_time(struct kvm_vcpu *vcpu)
+ {
++      struct kvm_host_map map;
++      struct kvm_steal_time *st;
++
+       if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+               return;
+-      if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+-              &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
++      /* -EAGAIN is returned in atomic context so we can just return. */
++      if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
++                      &map, &vcpu->arch.st.cache, false))
+               return;
++      st = map.hva +
++              offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
++
+       /*
+        * Doing a TLB flush here, on the guest's behalf, can avoid
+        * expensive IPIs.
+        */
+-      if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
++      if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
+               kvm_vcpu_flush_tlb(vcpu, false);
+-      if (vcpu->arch.st.steal.version & 1)
+-              vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
++      vcpu->arch.st.steal.preempted = 0;
+-      vcpu->arch.st.steal.version += 1;
++      if (st->version & 1)
++              st->version += 1;  /* first time write, random junk */
+-      kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+-              &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
++      st->version += 1;
+       smp_wmb();
+-      vcpu->arch.st.steal.steal += current->sched_info.run_delay -
++      st->steal += current->sched_info.run_delay -
+               vcpu->arch.st.last_steal;
+       vcpu->arch.st.last_steal = current->sched_info.run_delay;
+-      kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+-              &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+-
+       smp_wmb();
+-      vcpu->arch.st.steal.version += 1;
++      st->version += 1;
+-      kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+-              &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
++      kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
+ }
+ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+@@ -3396,18 +3398,25 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
+ {
++      struct kvm_host_map map;
++      struct kvm_steal_time *st;
++
+       if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+               return;
+       if (vcpu->arch.st.steal.preempted)
+               return;
+-      vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
++      if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
++                      &vcpu->arch.st.cache, true))
++              return;
++
++      st = map.hva +
++              offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
++
++      st->preempted = vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
+-      kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
+-                      &vcpu->arch.st.steal.preempted,
+-                      offsetof(struct kvm_steal_time, preempted),
+-                      sizeof(vcpu->arch.st.steal.preempted));
++      kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
+ }
+ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)