]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - arch/x86/kvm/paging_tmpl.h
KVM: MMU: don't mark pte notrap if it's just sync transient
[mirror_ubuntu-bionic-kernel.git] / arch / x86 / kvm / paging_tmpl.h
index 89d66ca4d87cb2982ae48c5877580e733ab0cce8..863920f649fbeb4ed2f15b5c8ddcf707e04a6c63 100644 (file)
@@ -7,6 +7,7 @@
  * MMU support
  *
  * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright 2010 Red Hat, Inc. and/or its affilates.
  *
  * Authors:
  *   Yaniv Kamay  <yaniv@qumranet.com>
@@ -177,10 +178,10 @@ walk:
                if (!(pte & PT_ACCESSED_MASK)) {
                        trace_kvm_mmu_set_accessed_bit(table_gfn, index,
                                                       sizeof(pte));
-                       mark_page_dirty(vcpu->kvm, table_gfn);
                        if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
                            index, pte, pte|PT_ACCESSED_MASK))
                                goto walk;
+                       mark_page_dirty(vcpu->kvm, table_gfn);
                        pte |= PT_ACCESSED_MASK;
                }
 
@@ -217,11 +218,11 @@ walk:
                bool ret;
 
                trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
-               mark_page_dirty(vcpu->kvm, table_gfn);
                ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
                            pte|PT_DIRTY_MASK);
                if (ret)
                        goto walk;
+               mark_page_dirty(vcpu->kvm, table_gfn);
                pte |= PT_DIRTY_MASK;
                walker->ptes[walker->level - 1] = pte;
        }
@@ -229,7 +230,7 @@ walk:
        walker->pt_access = pt_access;
        walker->pte_access = pte_access;
        pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
-                __func__, (u64)pte, pt_access, pte_access);
+                __func__, (u64)pte, pte_access, pt_access);
        return 1;
 
 not_present:
@@ -252,7 +253,7 @@ err:
        return 0;
 }
 
-static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
+static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
                              u64 *spte, const void *pte)
 {
        pt_element_t gpte;
@@ -263,7 +264,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
        gpte = *(const pt_element_t *)pte;
        if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
                if (!is_present_gpte(gpte)) {
-                       if (page->unsync)
+                       if (sp->unsync)
                                new_spte = shadow_trap_nonpresent_pte;
                        else
                                new_spte = shadow_notrap_nonpresent_pte;
@@ -272,7 +273,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
                return;
        }
        pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
-       pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
+       pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
        if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
                return;
        pfn = vcpu->arch.update_pte.pfn;
@@ -285,8 +286,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
         * we call mmu_set_spte() with reset_host_protection = true beacuse that
         * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
         */
-       mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
-                    gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
+       mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
+                    is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL,
                     gpte_to_gfn(gpte), pfn, true, true);
 }
 
@@ -299,7 +300,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
                         int *ptwrite, pfn_t pfn)
 {
        unsigned access = gw->pt_access;
-       struct kvm_mmu_page *shadow_page;
+       struct kvm_mmu_page *sp;
        u64 spte, *sptep = NULL;
        int direct;
        gfn_t table_gfn;
@@ -318,7 +319,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
                        mmu_set_spte(vcpu, sptep, access,
                                     gw->pte_access & access,
                                     user_fault, write_fault,
-                                    gw->ptes[gw->level-1] & PT_DIRTY_MASK,
+                                    is_dirty_gpte(gw->ptes[gw->level-1]),
                                     ptwrite, level,
                                     gw->gfn, pfn, false, true);
                        break;
@@ -338,29 +339,33 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
                        direct = 1;
                        if (!is_dirty_gpte(gw->ptes[level - delta]))
                                access &= ~ACC_WRITE_MASK;
-                       table_gfn = gpte_to_gfn(gw->ptes[level - delta]);
-                       /* advance table_gfn when emulating 1gb pages with 4k */
-                       if (delta == 0)
-                               table_gfn += PT_INDEX(addr, level);
+                       /*
+                        * It is a large guest pages backed by small host pages,
+                        * So we set @direct(@sp->role.direct)=1, and set
+                        * @table_gfn(@sp->gfn)=the base page frame for linear
+                        * translations.
+                        */
+                       table_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
+                       access &= gw->pte_access;
                } else {
                        direct = 0;
                        table_gfn = gw->table_gfn[level - 2];
                }
-               shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
+               sp = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
                                               direct, access, sptep);
                if (!direct) {
                        r = kvm_read_guest_atomic(vcpu->kvm,
                                                  gw->pte_gpa[level - 2],
                                                  &curr_pte, sizeof(curr_pte));
                        if (r || curr_pte != gw->ptes[level - 2]) {
-                               kvm_mmu_put_page(shadow_page, sptep);
+                               kvm_mmu_put_page(sp, sptep);
                                kvm_release_pfn_clean(pfn);
                                sptep = NULL;
                                break;
                        }
                }
 
-               spte = __pa(shadow_page->spt)
+               spte = __pa(sp->spt)
                        | PT_PRESENT_MASK | PT_ACCESSED_MASK
                        | PT_WRITABLE_MASK | PT_USER_MASK;
                *sptep = spte;
@@ -430,11 +435,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
        pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
 
        /* mmio */
-       if (is_error_pfn(pfn)) {
-               pgprintk("gfn %lx is mmio\n", walker.gfn);
-               kvm_release_pfn_clean(pfn);
-               return 1;
-       }
+       if (is_error_pfn(pfn))
+               return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn);
 
        spin_lock(&vcpu->kvm->mmu_lock);
        if (mmu_notifier_retry(vcpu, mmu_seq))
@@ -442,6 +444,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
        kvm_mmu_free_some_pages(vcpu);
        sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
                             level, &write_pt, pfn);
+       (void)sptep;
        pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
                 sptep, *sptep, write_pt);
 
@@ -463,6 +466,7 @@ out_unlock:
 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 {
        struct kvm_shadow_walk_iterator iterator;
+       struct kvm_mmu_page *sp;
        gpa_t pte_gpa = -1;
        int level;
        u64 *sptep;
@@ -474,10 +478,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
                level = iterator.level;
                sptep = iterator.sptep;
 
+               sp = page_header(__pa(sptep));
                if (is_last_spte(*sptep, level)) {
-                       struct kvm_mmu_page *sp = page_header(__pa(sptep));
                        int offset, shift;
 
+                       if (!sp->unsync)
+                               break;
+
                        shift = PAGE_SHIFT -
                                  (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level;
                        offset = sp->role.quadrant << shift;
@@ -495,7 +502,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
                        break;
                }
 
-               if (!is_shadow_present_pte(*sptep))
+               if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
                        break;
        }
 
@@ -571,7 +578,8 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
  *   can't change unless all sptes pointing to it are nuked first.
  * - Alias changes zap the entire shadow cache.
  */
-static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+                           bool clear_unsync)
 {
        int i, offset, nr_present;
        bool reset_host_protection;
@@ -579,6 +587,9 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
        offset = nr_present = 0;
 
+       /* direct kvm_mmu_page can not be unsync. */
+       BUG_ON(sp->role.direct);
+
        if (PTTYPE == 32)
                offset = sp->role.quadrant << PT64_LEVEL_BITS;
 
@@ -588,7 +599,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
                unsigned pte_access;
                pt_element_t gpte;
                gpa_t pte_gpa;
-               gfn_t gfn = sp->gfns[i];
+               gfn_t gfn;
 
                if (!is_shadow_present_pte(sp->spt[i]))
                        continue;
@@ -599,12 +610,13 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
                                          sizeof(pt_element_t)))
                        return -EINVAL;
 
-               if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) ||
-                   !(gpte & PT_ACCESSED_MASK)) {
+               gfn = gpte_to_gfn(gpte);
+               if (unalias_gfn(vcpu->kvm, gfn) != sp->gfns[i] ||
+                     !is_present_gpte(gpte) || !(gpte & PT_ACCESSED_MASK)) {
                        u64 nonpresent;
 
                        rmap_remove(vcpu->kvm, &sp->spt[i]);
-                       if (is_present_gpte(gpte))
+                       if (is_present_gpte(gpte) || !clear_unsync)
                                nonpresent = shadow_trap_nonpresent_pte;
                        else
                                nonpresent = shadow_notrap_nonpresent_pte;