KVM: X86: Synchronize the shadow pagetable before link it

author Lai Jiangshan <laijs@linux.alibaba.com>

Sat, 18 Sep 2021 00:56:28 +0000 (08:56 +0800)

committer Paolo Bonzini <pbonzini@redhat.com>

Thu, 23 Sep 2021 15:01:00 +0000 (11:01 -0400)
author Lai Jiangshan <laijs@linux.alibaba.com>
Sat, 18 Sep 2021 00:56:28 +0000 (08:56 +0800)
committer Paolo Bonzini <pbonzini@redhat.com>
Thu, 23 Sep 2021 15:01:00 +0000 (11:01 -0400)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index 2d7e61122af81566d857cde0e8540aa852fb5795..1a64ba5b9437b14f59d07ff75bda3ce3a323bdde 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2027,8 +2027,8 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
         } while (!sp->unsync_children);
  }
  
-static void mmu_sync_children(struct kvm_vcpu *vcpu,
-                             struct kvm_mmu_page *parent)
+static int mmu_sync_children(struct kvm_vcpu *vcpu,
+                            struct kvm_mmu_page *parent, bool can_yield)
  {
         int i;
         struct kvm_mmu_page *sp;
@@ -2055,12 +2055,18 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
                 }
                 if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
                         kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
+                       if (!can_yield) {
+                               kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+                               return -EINTR;
+                       }
+
                         cond_resched_rwlock_write(&vcpu->kvm->mmu_lock);
                         flush = false;
                 }
         }
  
         kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
+       return 0;
  }
  
  static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
@@ -2146,9 +2152,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
                         kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
                 }
  
-               if (sp->unsync_children)
-                       kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
-
                 __clear_sp_write_flooding_count(sp);
  
  trace_get_page:
@@ -3684,7 +3687,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
                 write_lock(&vcpu->kvm->mmu_lock);
                 kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
  
-               mmu_sync_children(vcpu, sp);
+               mmu_sync_children(vcpu, sp, true);
  
                 kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
                 write_unlock(&vcpu->kvm->mmu_lock);
@@ -3700,7 +3703,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
                 if (IS_VALID_PAE_ROOT(root)) {
                         root &= PT64_BASE_ADDR_MASK;
                         sp = to_shadow_page(root);
-                       mmu_sync_children(vcpu, sp);
+                       mmu_sync_children(vcpu, sp, true);
                 }
         }
  
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h

index efce0a935e2312bd51cf9c365f2d664d96e0c6a0..913d52a7923e654576b1af8ad0400a55cd36e86a 100644 (file)
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -707,8 +707,27 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
                 if (!is_shadow_present_pte(*it.sptep)) {
                         table_gfn = gw->table_gfn[it.level - 2];
                         access = gw->pt_access[it.level - 2];
-                       sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
-                                             false, access);
+                       sp = kvm_mmu_get_page(vcpu, table_gfn, addr,
+                                             it.level-1, false, access);
+                       /*
+                        * We must synchronize the pagetable before linking it
+                        * because the guest doesn't need to flush tlb when
+                        * the gpte is changed from non-present to present.
+                        * Otherwise, the guest may use the wrong mapping.
+                        *
+                        * For PG_LEVEL_4K, kvm_mmu_get_page() has already
+                        * synchronized it transiently via kvm_sync_page().
+                        *
+                        * For higher level pagetable, we synchronize it via
+                        * the slower mmu_sync_children().  If it needs to
+                        * break, some progress has been made; return
+                        * RET_PF_RETRY and retry on the next #PF.
+                        * KVM_REQ_MMU_SYNC is not necessary but it
+                        * expedites the process.
+                        */
+                       if (sp->unsync_children &&
+                           mmu_sync_children(vcpu, sp, false))
+                               return RET_PF_RETRY;
                 }
  
                 /*
author	Lai Jiangshan <laijs@linux.alibaba.com>
	Sat, 18 Sep 2021 00:56:28 +0000 (08:56 +0800)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Thu, 23 Sep 2021 15:01:00 +0000 (11:01 -0400)
arch/x86/kvm/mmu/mmu.c		patch \| blob \| blame \| history
arch/x86/kvm/mmu/paging_tmpl.h		patch \| blob \| blame \| history