X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=arch%2Fx86%2Fkvm%2Fpaging_tmpl.h;h=863920f649fbeb4ed2f15b5c8ddcf707e04a6c63;hb=be71e061d15c0aad4f8c2606f76c57b8a19792fd;hp=89d66ca4d87cb2982ae48c5877580e733ab0cce8;hpb=88e88374ee4958786397a57f684de6f1fc5e0242;p=mirror_ubuntu-bionic-kernel.git diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 89d66ca4d87c..863920f649fb 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -7,6 +7,7 @@ * MMU support * * Copyright (C) 2006 Qumranet, Inc. + * Copyright 2010 Red Hat, Inc. and/or its affilates. * * Authors: * Yaniv Kamay @@ -177,10 +178,10 @@ walk: if (!(pte & PT_ACCESSED_MASK)) { trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); - mark_page_dirty(vcpu->kvm, table_gfn); if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, pte|PT_ACCESSED_MASK)) goto walk; + mark_page_dirty(vcpu->kvm, table_gfn); pte |= PT_ACCESSED_MASK; } @@ -217,11 +218,11 @@ walk: bool ret; trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); - mark_page_dirty(vcpu->kvm, table_gfn); ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, pte|PT_DIRTY_MASK); if (ret) goto walk; + mark_page_dirty(vcpu->kvm, table_gfn); pte |= PT_DIRTY_MASK; walker->ptes[walker->level - 1] = pte; } @@ -229,7 +230,7 @@ walk: walker->pt_access = pt_access; walker->pte_access = pte_access; pgprintk("%s: pte %llx pte_access %x pt_access %x\n", - __func__, (u64)pte, pt_access, pte_access); + __func__, (u64)pte, pte_access, pt_access); return 1; not_present: @@ -252,7 +253,7 @@ err: return 0; } -static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, +static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte) { pt_element_t gpte; @@ -263,7 +264,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, gpte = *(const pt_element_t *)pte; if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { if (!is_present_gpte(gpte)) { - if (page->unsync) + if (sp->unsync) new_spte = shadow_trap_nonpresent_pte; else new_spte = shadow_notrap_nonpresent_pte; @@ -272,7 +273,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, return; } pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); - pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); + pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) return; pfn = vcpu->arch.update_pte.pfn; @@ -285,8 +286,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, * we call mmu_set_spte() with reset_host_protection = true beacuse that * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). */ - mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, - gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, + mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, + is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL, gpte_to_gfn(gpte), pfn, true, true); } @@ -299,7 +300,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, int *ptwrite, pfn_t pfn) { unsigned access = gw->pt_access; - struct kvm_mmu_page *shadow_page; + struct kvm_mmu_page *sp; u64 spte, *sptep = NULL; int direct; gfn_t table_gfn; @@ -318,7 +319,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, user_fault, write_fault, - gw->ptes[gw->level-1] & PT_DIRTY_MASK, + is_dirty_gpte(gw->ptes[gw->level-1]), ptwrite, level, gw->gfn, pfn, false, true); break; @@ -338,29 +339,33 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, direct = 1; if (!is_dirty_gpte(gw->ptes[level - delta])) access &= ~ACC_WRITE_MASK; - table_gfn = gpte_to_gfn(gw->ptes[level - delta]); - /* advance table_gfn when emulating 1gb pages with 4k */ - if (delta == 0) - table_gfn += PT_INDEX(addr, level); + /* + * It is a large guest pages backed by small host pages, + * So we set @direct(@sp->role.direct)=1, and set + * @table_gfn(@sp->gfn)=the base page frame for linear + * translations. + */ + table_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); + access &= gw->pte_access; } else { direct = 0; table_gfn = gw->table_gfn[level - 2]; } - shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, + sp = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, direct, access, sptep); if (!direct) { r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], &curr_pte, sizeof(curr_pte)); if (r || curr_pte != gw->ptes[level - 2]) { - kvm_mmu_put_page(shadow_page, sptep); + kvm_mmu_put_page(sp, sptep); kvm_release_pfn_clean(pfn); sptep = NULL; break; } } - spte = __pa(shadow_page->spt) + spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; *sptep = spte; @@ -430,11 +435,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); /* mmio */ - if (is_error_pfn(pfn)) { - pgprintk("gfn %lx is mmio\n", walker.gfn); - kvm_release_pfn_clean(pfn); - return 1; - } + if (is_error_pfn(pfn)) + return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn); spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu, mmu_seq)) @@ -442,6 +444,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, kvm_mmu_free_some_pages(vcpu); sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, level, &write_pt, pfn); + (void)sptep; pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, sptep, *sptep, write_pt); @@ -463,6 +466,7 @@ out_unlock: static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { struct kvm_shadow_walk_iterator iterator; + struct kvm_mmu_page *sp; gpa_t pte_gpa = -1; int level; u64 *sptep; @@ -474,10 +478,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) level = iterator.level; sptep = iterator.sptep; + sp = page_header(__pa(sptep)); if (is_last_spte(*sptep, level)) { - struct kvm_mmu_page *sp = page_header(__pa(sptep)); int offset, shift; + if (!sp->unsync) + break; + shift = PAGE_SHIFT - (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; offset = sp->role.quadrant << shift; @@ -495,7 +502,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) break; } - if (!is_shadow_present_pte(*sptep)) + if (!is_shadow_present_pte(*sptep) || !sp->unsync_children) break; } @@ -571,7 +578,8 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, * can't change unless all sptes pointing to it are nuked first. * - Alias changes zap the entire shadow cache. */ -static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) +static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, + bool clear_unsync) { int i, offset, nr_present; bool reset_host_protection; @@ -579,6 +587,9 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) offset = nr_present = 0; + /* direct kvm_mmu_page can not be unsync. */ + BUG_ON(sp->role.direct); + if (PTTYPE == 32) offset = sp->role.quadrant << PT64_LEVEL_BITS; @@ -588,7 +599,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) unsigned pte_access; pt_element_t gpte; gpa_t pte_gpa; - gfn_t gfn = sp->gfns[i]; + gfn_t gfn; if (!is_shadow_present_pte(sp->spt[i])) continue; @@ -599,12 +610,13 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) sizeof(pt_element_t))) return -EINVAL; - if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) || - !(gpte & PT_ACCESSED_MASK)) { + gfn = gpte_to_gfn(gpte); + if (unalias_gfn(vcpu->kvm, gfn) != sp->gfns[i] || + !is_present_gpte(gpte) || !(gpte & PT_ACCESSED_MASK)) { u64 nonpresent; rmap_remove(vcpu->kvm, &sp->spt[i]); - if (is_present_gpte(gpte)) + if (is_present_gpte(gpte) || !clear_unsync) nonpresent = shadow_trap_nonpresent_pte; else nonpresent = shadow_notrap_nonpresent_pte;