* MMU support
*
* Copyright (C) 2006 Qumranet, Inc.
+ * Copyright 2010 Red Hat, Inc. and/or its affilates.
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
if (!(pte & PT_ACCESSED_MASK)) {
trace_kvm_mmu_set_accessed_bit(table_gfn, index,
sizeof(pte));
- mark_page_dirty(vcpu->kvm, table_gfn);
if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
index, pte, pte|PT_ACCESSED_MASK))
goto walk;
+ mark_page_dirty(vcpu->kvm, table_gfn);
pte |= PT_ACCESSED_MASK;
}
bool ret;
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
- mark_page_dirty(vcpu->kvm, table_gfn);
ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
pte|PT_DIRTY_MASK);
if (ret)
goto walk;
+ mark_page_dirty(vcpu->kvm, table_gfn);
pte |= PT_DIRTY_MASK;
walker->ptes[walker->level - 1] = pte;
}
walker->pt_access = pt_access;
walker->pte_access = pte_access;
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
- __func__, (u64)pte, pt_access, pte_access);
+ __func__, (u64)pte, pte_access, pt_access);
return 1;
not_present:
return 0;
}
-static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
+static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte)
{
pt_element_t gpte;
gpte = *(const pt_element_t *)pte;
if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
if (!is_present_gpte(gpte)) {
- if (page->unsync)
+ if (sp->unsync)
new_spte = shadow_trap_nonpresent_pte;
else
new_spte = shadow_notrap_nonpresent_pte;
return;
}
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
- pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
+ pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
return;
pfn = vcpu->arch.update_pte.pfn;
* we call mmu_set_spte() with reset_host_protection = true beacuse that
* vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
*/
- mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
- gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
+ mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
+ is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL,
gpte_to_gfn(gpte), pfn, true, true);
}
int *ptwrite, pfn_t pfn)
{
unsigned access = gw->pt_access;
- struct kvm_mmu_page *shadow_page;
+ struct kvm_mmu_page *sp;
u64 spte, *sptep = NULL;
int direct;
gfn_t table_gfn;
mmu_set_spte(vcpu, sptep, access,
gw->pte_access & access,
user_fault, write_fault,
- gw->ptes[gw->level-1] & PT_DIRTY_MASK,
+ is_dirty_gpte(gw->ptes[gw->level-1]),
ptwrite, level,
gw->gfn, pfn, false, true);
break;
direct = 1;
if (!is_dirty_gpte(gw->ptes[level - delta]))
access &= ~ACC_WRITE_MASK;
- table_gfn = gpte_to_gfn(gw->ptes[level - delta]);
- /* advance table_gfn when emulating 1gb pages with 4k */
- if (delta == 0)
- table_gfn += PT_INDEX(addr, level);
+ /*
+ * It is a large guest pages backed by small host pages,
+ * So we set @direct(@sp->role.direct)=1, and set
+ * @table_gfn(@sp->gfn)=the base page frame for linear
+ * translations.
+ */
+ table_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
+ access &= gw->pte_access;
} else {
direct = 0;
table_gfn = gw->table_gfn[level - 2];
}
- shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
+ sp = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
direct, access, sptep);
if (!direct) {
r = kvm_read_guest_atomic(vcpu->kvm,
gw->pte_gpa[level - 2],
&curr_pte, sizeof(curr_pte));
if (r || curr_pte != gw->ptes[level - 2]) {
- kvm_mmu_put_page(shadow_page, sptep);
+ kvm_mmu_put_page(sp, sptep);
kvm_release_pfn_clean(pfn);
sptep = NULL;
break;
}
}
- spte = __pa(shadow_page->spt)
+ spte = __pa(sp->spt)
| PT_PRESENT_MASK | PT_ACCESSED_MASK
| PT_WRITABLE_MASK | PT_USER_MASK;
*sptep = spte;
pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
/* mmio */
- if (is_error_pfn(pfn)) {
- pgprintk("gfn %lx is mmio\n", walker.gfn);
- kvm_release_pfn_clean(pfn);
- return 1;
- }
+ if (is_error_pfn(pfn))
+ return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn);
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu, mmu_seq))
kvm_mmu_free_some_pages(vcpu);
sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
level, &write_pt, pfn);
+ (void)sptep;
pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
sptep, *sptep, write_pt);
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{
struct kvm_shadow_walk_iterator iterator;
+ struct kvm_mmu_page *sp;
gpa_t pte_gpa = -1;
int level;
u64 *sptep;
level = iterator.level;
sptep = iterator.sptep;
+ sp = page_header(__pa(sptep));
if (is_last_spte(*sptep, level)) {
- struct kvm_mmu_page *sp = page_header(__pa(sptep));
int offset, shift;
+ if (!sp->unsync)
+ break;
+
shift = PAGE_SHIFT -
(PT_LEVEL_BITS - PT64_LEVEL_BITS) * level;
offset = sp->role.quadrant << shift;
break;
}
- if (!is_shadow_present_pte(*sptep))
+ if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
break;
}
* can't change unless all sptes pointing to it are nuked first.
* - Alias changes zap the entire shadow cache.
*/
-static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ bool clear_unsync)
{
int i, offset, nr_present;
bool reset_host_protection;
offset = nr_present = 0;
+ /* direct kvm_mmu_page can not be unsync. */
+ BUG_ON(sp->role.direct);
+
if (PTTYPE == 32)
offset = sp->role.quadrant << PT64_LEVEL_BITS;
unsigned pte_access;
pt_element_t gpte;
gpa_t pte_gpa;
- gfn_t gfn = sp->gfns[i];
+ gfn_t gfn;
if (!is_shadow_present_pte(sp->spt[i]))
continue;
sizeof(pt_element_t)))
return -EINVAL;
- if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) ||
- !(gpte & PT_ACCESSED_MASK)) {
+ gfn = gpte_to_gfn(gpte);
+ if (unalias_gfn(vcpu->kvm, gfn) != sp->gfns[i] ||
+ !is_present_gpte(gpte) || !(gpte & PT_ACCESSED_MASK)) {
u64 nonpresent;
rmap_remove(vcpu->kvm, &sp->spt[i]);
- if (is_present_gpte(gpte))
+ if (is_present_gpte(gpte) || !clear_unsync)
nonpresent = shadow_trap_nonpresent_pte;
else
nonpresent = shadow_notrap_nonpresent_pte;