[pve-kernel.git] / patches / kernel / 0010-x86-KVM-Make-sure-KVM_VCPU_FLUSH_TLB-flag-is-not-mis.patch

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Fri, 31 Jan 2020 08:06:43 -0300
Subject: [PATCH] x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed

CVE-2019-3016
CVE-2020-3016

There is a potential race in record_steal_time() between setting
host-local vcpu->arch.st.steal.preempted to zero (i.e. clearing
KVM_VCPU_PREEMPTED) and propagating this value to the guest with
kvm_write_guest_cached(). Between those two events the guest may
still see KVM_VCPU_PREEMPTED in its copy of kvm_steal_time, set
KVM_VCPU_FLUSH_TLB and assume that hypervisor will do the right
thing. Which it won't.

Instad of copying, we should map kvm_steal_time and that will
guarantee atomicity of accesses to @preempted.

This is part of CVE-2019-3016.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
 arch/x86/kvm/x86.c | 49 +++++++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cb18560b07bc..f63fa5846f08 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2488,43 +2488,45 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
 
 static void record_steal_time(struct kvm_vcpu *vcpu)
 {
+	struct kvm_host_map map;
+	struct kvm_steal_time *st;
+
 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
 		return;
 
-	if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
-		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
+	/* -EAGAIN is returned in atomic context so we can just return. */
+	if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
+			&map, &vcpu->arch.st.cache, false))
 		return;
 
+	st = map.hva +
+		offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+
 	/*
 	 * Doing a TLB flush here, on the guest's behalf, can avoid
 	 * expensive IPIs.
 	 */
-	if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
+	if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
 		kvm_vcpu_flush_tlb(vcpu, false);
 
-	if (vcpu->arch.st.steal.version & 1)
-		vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
+	vcpu->arch.st.steal.preempted = 0;
 
-	vcpu->arch.st.steal.version += 1;
+	if (st->version & 1)
+		st->version += 1;  /* first time write, random junk */
 
-	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
-		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+	st->version += 1;
 
 	smp_wmb();
 
-	vcpu->arch.st.steal.steal += current->sched_info.run_delay -
+	st->steal += current->sched_info.run_delay -
 		vcpu->arch.st.last_steal;
 	vcpu->arch.st.last_steal = current->sched_info.run_delay;
 
-	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
-		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
-
 	smp_wmb();
 
-	vcpu->arch.st.steal.version += 1;
+	st->version += 1;
 
-	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
-		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+	kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
 }
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -3396,18 +3398,25 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 {
+	struct kvm_host_map map;
+	struct kvm_steal_time *st;
+
 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
 		return;
 
 	if (vcpu->arch.st.steal.preempted)
 		return;
 
-	vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
+	if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
+			&vcpu->arch.st.cache, true))
+		return;
+
+	st = map.hva +
+		offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+
+	st->preempted = vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
 
-	kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
-			&vcpu->arch.st.steal.preempted,
-			offsetof(struct kvm_steal_time, preempted),
-			sizeof(vcpu->arch.st.steal.preempted));
+	kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
Commit	Line	Data
de6f4b1d TL	1	From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
	2	From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
	3	Date: Fri, 31 Jan 2020 08:06:43 -0300
	4	Subject: [PATCH] x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed
	5
	6	CVE-2019-3016
	7	CVE-2020-3016
	8
	9	There is a potential race in record_steal_time() between setting
	10	host-local vcpu->arch.st.steal.preempted to zero (i.e. clearing
	11	KVM_VCPU_PREEMPTED) and propagating this value to the guest with
	12	kvm_write_guest_cached(). Between those two events the guest may
	13	still see KVM_VCPU_PREEMPTED in its copy of kvm_steal_time, set
	14	KVM_VCPU_FLUSH_TLB and assume that hypervisor will do the right
	15	thing. Which it won't.
	16
	17	Instad of copying, we should map kvm_steal_time and that will
	18	guarantee atomicity of accesses to @preempted.
	19
	20	This is part of CVE-2019-3016.
	21
	22	Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
	23	Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
	24	Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
	25	Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
	26	---
	27	arch/x86/kvm/x86.c \| 49 +++++++++++++++++++++++++++-------------------
	28	1 file changed, 29 insertions(+), 20 deletions(-)
	29
	30	diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
6ad15537	31	index cb18560b07bc..f63fa5846f08 100644
de6f4b1d TL	32	--- a/arch/x86/kvm/x86.c
de6f4b1d TL	33	+++ b/arch/x86/kvm/x86.c
6ad15537	34	@@ -2488,43 +2488,45 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
de6f4b1d TL	35
	36	static void record_steal_time(struct kvm_vcpu *vcpu)
	37	{
	38	+ struct kvm_host_map map;
	39	+ struct kvm_steal_time *st;
	40	+
	41	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
	42	return;
	43
	44	- if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
	45	- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
	46	+ /* -EAGAIN is returned in atomic context so we can just return. */
	47	+ if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
	48	+ &map, &vcpu->arch.st.cache, false))
	49	return;
	50
	51	+ st = map.hva +
	52	+ offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
	53	+
	54	/*
	55	* Doing a TLB flush here, on the guest's behalf, can avoid
	56	* expensive IPIs.
	57	*/
	58	- if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
	59	+ if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
	60	kvm_vcpu_flush_tlb(vcpu, false);
	61
	62	- if (vcpu->arch.st.steal.version & 1)
	63	- vcpu->arch.st.steal.version += 1; /* first time write, random junk */
	64	+ vcpu->arch.st.steal.preempted = 0;
	65
	66	- vcpu->arch.st.steal.version += 1;
	67	+ if (st->version & 1)
	68	+ st->version += 1; /* first time write, random junk */
	69
	70	- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
	71	- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
	72	+ st->version += 1;
	73
	74	smp_wmb();
	75
	76	- vcpu->arch.st.steal.steal += current->sched_info.run_delay -
	77	+ st->steal += current->sched_info.run_delay -
	78	vcpu->arch.st.last_steal;
	79	vcpu->arch.st.last_steal = current->sched_info.run_delay;
	80
	81	- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
	82	- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
	83	-
	84	smp_wmb();
	85
	86	- vcpu->arch.st.steal.version += 1;
	87	+ st->version += 1;
	88
	89	- kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
	90	- &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
	91	+ kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
	92	}
	93
	94	int kvm_set_msr_common(struct kvm_vcpu vcpu, struct msr_data msr_info)
6ad15537	95	@@ -3396,18 +3398,25 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
de6f4b1d TL	96
	97	static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
	98	{
	99	+ struct kvm_host_map map;
	100	+ struct kvm_steal_time *st;
	101	+
	102	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
	103	return;
	104
	105	if (vcpu->arch.st.steal.preempted)
	106	return;
	107
	108	- vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
	109	+ if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
	110	+ &vcpu->arch.st.cache, true))
	111	+ return;
	112	+
	113	+ st = map.hva +
	114	+ offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
	115	+
	116	+ st->preempted = vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
	117
	118	- kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
	119	- &vcpu->arch.st.steal.preempted,
	120	- offsetof(struct kvm_steal_time, preempted),
	121	- sizeof(vcpu->arch.st.steal.preempted));
	122	+ kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
	123	}
	124
	125	void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)