kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
}
-/**
- * kvm_arch_write_log_dirty - emulate dirty page logging
- * @vcpu: Guest mode vcpu
- *
- * Emulate arch specific page modification logging for the
- * nested hypervisor
- */
-int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu)
-{
- if (kvm_x86_ops.write_log_dirty)
- return kvm_x86_ops.write_log_dirty(vcpu);
-
- return 0;
-}
-
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn)
{
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list);
-
-#define for_each_valid_sp(_kvm, _sp, _gfn) \
- hlist_for_each_entry(_sp, \
- &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
+#define for_each_valid_sp(_kvm, _sp, _list) \
+ hlist_for_each_entry(_sp, _list, hash_link) \
if (is_obsolete_sp((_kvm), (_sp))) { \
} else
#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \
- for_each_valid_sp(_kvm, _sp, _gfn) \
+ for_each_valid_sp(_kvm, _sp, \
+ &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \
if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else
static inline bool is_ept_sp(struct kvm_mmu_page *sp)
int direct,
unsigned int access)
{
+ bool direct_mmu = vcpu->arch.mmu->direct_map;
union kvm_mmu_page_role role;
+ struct hlist_head *sp_list;
unsigned quadrant;
struct kvm_mmu_page *sp;
bool need_sync = false;
if (role.direct)
role.gpte_is_8_bytes = true;
role.access = access;
- if (!vcpu->arch.mmu->direct_map
- && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
+ if (!direct_mmu && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
}
- for_each_valid_sp(vcpu->kvm, sp, gfn) {
+
+ sp_list = &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)];
+ for_each_valid_sp(vcpu->kvm, sp, sp_list) {
if (sp->gfn != gfn) {
collisions++;
continue;
if (sp->role.word != role.word)
continue;
+ if (direct_mmu)
+ goto trace_get_page;
+
if (sp->unsync) {
/* The page is good, but __kvm_sync_page might still end
* up zapping it. If so, break in order to rebuild it.
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
__clear_sp_write_flooding_count(sp);
+
+trace_get_page:
trace_kvm_mmu_get_page(sp, false);
goto out;
}
sp->gfn = gfn;
sp->role = role;
- hlist_add_head(&sp->hash_link,
- &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
+ hlist_add_head(&sp->hash_link, sp_list);
if (!direct) {
/*
* we should do write protection before syncing pages
if (!sp->root_count) {
/* Count self */
(*nr_zapped)++;
- list_move(&sp->link, invalid_list);
+
+ /*
+ * Already invalid pages (previously active roots) are not on
+ * the active page list. See list_del() in the "else" case of
+ * !sp->root_count.
+ */
+ if (sp->role.invalid)
+ list_add(&sp->link, invalid_list);
+ else
+ list_move(&sp->link, invalid_list);
kvm_mod_used_mmu_pages(kvm, -1);
} else {
- list_move(&sp->link, &kvm->arch.active_mmu_pages);
+ /*
+ * Remove the active root from the active page list, the root
+ * will be explicitly freed when the root_count hits zero.
+ */
+ list_del(&sp->link);
/*
* Obsolete pages cannot be used on any vCPUs, see the comment
}
}
-static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
- struct list_head *invalid_list)
+static unsigned long kvm_mmu_zap_oldest_mmu_pages(struct kvm *kvm,
+ unsigned long nr_to_zap)
{
- struct kvm_mmu_page *sp;
+ unsigned long total_zapped = 0;
+ struct kvm_mmu_page *sp, *tmp;
+ LIST_HEAD(invalid_list);
+ bool unstable;
+ int nr_zapped;
if (list_empty(&kvm->arch.active_mmu_pages))
- return false;
+ return 0;
- sp = list_last_entry(&kvm->arch.active_mmu_pages,
- struct kvm_mmu_page, link);
- return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
+restart:
+ list_for_each_entry_safe(sp, tmp, &kvm->arch.active_mmu_pages, link) {
+ /*
+ * Don't zap active root pages, the page itself can't be freed
+ * and zapping it will just force vCPUs to realloc and reload.
+ */
+ if (sp->root_count)
+ continue;
+
+ unstable = __kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list,
+ &nr_zapped);
+ total_zapped += nr_zapped;
+ if (total_zapped >= nr_to_zap)
+ break;
+
+ if (unstable)
+ goto restart;
+ }
+
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
+
+ kvm->stat.mmu_recycled += total_zapped;
+ return total_zapped;
}
static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
{
- LIST_HEAD(invalid_list);
+ unsigned long avail = kvm_mmu_available_pages(vcpu->kvm);
- if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES))
+ if (likely(avail >= KVM_MIN_FREE_MMU_PAGES))
return 0;
- while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) {
- if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list))
- break;
-
- ++vcpu->kvm->stat.mmu_recycled;
- }
- kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+ kvm_mmu_zap_oldest_mmu_pages(vcpu->kvm, KVM_REFILL_PAGES - avail);
if (!kvm_mmu_available_pages(vcpu->kvm))
return -ENOSPC;
*/
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
{
- LIST_HEAD(invalid_list);
-
spin_lock(&kvm->mmu_lock);
if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
- /* Need to free some mmu pages to achieve the goal. */
- while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages)
- if (!prepare_zap_oldest_mmu_page(kvm, &invalid_list))
- break;
+ kvm_mmu_zap_oldest_mmu_pages(kvm, kvm->arch.n_used_mmu_pages -
+ goal_nr_mmu_pages);
- kvm_mmu_commit_zap_page(kvm, &invalid_list);
goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages;
}
walk_shadow_page_lockless_end(vcpu);
}
-static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
- gfn_t gfn)
+static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ gfn_t gfn)
{
struct kvm_arch_async_pf arch;
u64 fault_address, char *insn, int insn_len)
{
int r = 1;
+ u32 flags = vcpu->arch.apf.host_apf_flags;
#ifndef CONFIG_X86_64
/* A 64-bit CR2 should be impossible on 32-bit KVM. */
#endif
vcpu->arch.l1tf_flush_l1d = true;
- switch (vcpu->arch.apf.host_apf_flags) {
- default:
+ if (!flags) {
trace_kvm_page_fault(fault_address, error_code);
if (kvm_event_needs_reinjection(vcpu))
kvm_mmu_unprotect_page_virt(vcpu, fault_address);
r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
insn_len);
- break;
- case KVM_PV_REASON_PAGE_NOT_PRESENT:
+ } else if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
vcpu->arch.apf.host_apf_flags = 0;
local_irq_disable();
kvm_async_pf_task_wait_schedule(fault_address);
local_irq_enable();
- break;
- case KVM_PV_REASON_PAGE_READY:
- vcpu->arch.apf.host_apf_flags = 0;
- local_irq_disable();
- kvm_async_pf_task_wake(fault_address);
- local_irq_enable();
- break;
+ } else {
+ WARN_ONCE(1, "Unexpected host async PF flags: %x\n", flags);
}
+
return r;
}
EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
- nonleaf_bit8_rsvd | gbpages_bit_rsvd |
+ gbpages_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
break;
/*
- * Skip invalid pages with a non-zero root count, zapping pages
- * with a non-zero root count will never succeed, i.e. the page
- * will get thrown back on active_mmu_pages and we'll get stuck
- * in an infinite loop.
+ * Invalid pages should never land back on the list of active
+ * pages. Skip the bogus page, otherwise we'll get stuck in an
+ * infinite loop if the page gets put back on the list (again).
*/
- if (sp->role.invalid && sp->root_count)
+ if (WARN_ON(sp->role.invalid))
continue;
/*
spin_lock(&kvm->mmu_lock);
restart:
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
- if (sp->role.invalid && sp->root_count)
+ if (WARN_ON(sp->role.invalid))
continue;
if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
goto restart;
goto unlock;
}
- if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
- freed++;
- kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan);
unlock:
spin_unlock(&kvm->mmu_lock);