@@ -1653,8 +1653,18 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
--kvm->stat.mmu_unsync;
}
+/*
+ * Used to hold a pointer to the next mmu page's node when traversing through
+ * one of the linked lists. This must be updated correctly when deleting any
+ * entries from the list.
+ */
+struct sp_next_pos {
+ struct hlist_node *hn; /* next hash_link node */
+};
+
static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
- struct list_head *invalid_list);
+ struct list_head *invalid_list,
+ struct sp_next_pos *npos);
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list);
@@ -1672,7 +1682,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list, bool clear_unsync)
{
if (sp->role.cr4_pae != !!is_pae(vcpu)) {
- kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
+ kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list, NULL);
return 1;
}
@@ -1680,7 +1690,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
kvm_unlink_unsync_page(vcpu->kvm, sp);
if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
- kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
+ kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list, NULL);
return 1;
}
@@ -1730,7 +1740,8 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
kvm_unlink_unsync_page(vcpu->kvm, s);
if ((s->role.cr4_pae != !!is_pae(vcpu)) ||
(vcpu->arch.mmu.sync_page(vcpu, s))) {
- kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list);
+ kvm_mmu_prepare_zap_page(vcpu->kvm, s,
+ &invalid_list, NULL);
continue;
}
flush = true;
@@ -2062,7 +2073,7 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
struct kvm_mmu_page *sp;
for_each_sp(pages, sp, parents, i) {
- kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
+ kvm_mmu_prepare_zap_page(kvm, sp, invalid_list, NULL);
mmu_pages_clear_parents(&parents);
zapped++;
}
@@ -2073,7 +2084,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
}
static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
- struct list_head *invalid_list)
+ struct list_head *invalid_list,
+ struct sp_next_pos *npos)
{
int ret;
@@ -2149,7 +2161,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
page = container_of(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
- kvm_mmu_prepare_zap_page(kvm, page, &invalid_list);
+ kvm_mmu_prepare_zap_page(kvm, page, &invalid_list, NULL);
}
kvm_mmu_commit_zap_page(kvm, &invalid_list);
goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages;
@@ -2174,7 +2186,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
- kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+ kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, NULL);
}
kvm_mmu_commit_zap_page(kvm, &invalid_list);
spin_unlock(&kvm->mmu_lock);
@@ -2894,7 +2906,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
sp = page_header(root);
--sp->root_count;
if (!sp->root_count && sp->role.invalid) {
- kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
+ kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
+ &invalid_list, NULL);
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
}
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
@@ -2910,7 +2923,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
--sp->root_count;
if (!sp->root_count && sp->role.invalid)
kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
- &invalid_list);
+ &invalid_list, NULL);
}
vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
}
@@ -3987,7 +4000,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
if (detect_write_misaligned(sp, gpa, bytes) ||
detect_write_flooding(sp)) {
zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
- &invalid_list);
+ &invalid_list, NULL);
++vcpu->kvm->stat.mmu_flooded;
continue;
}
@@ -4041,7 +4054,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
- kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
+ kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list, NULL);
++vcpu->kvm->stat.mmu_recycled;
}
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
@@ -4203,7 +4216,7 @@ void kvm_mmu_zap_all(struct kvm *kvm)
spin_lock(&kvm->mmu_lock);
restart:
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
- if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
+ if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, NULL))
goto restart;
kvm_mmu_commit_zap_page(kvm, &invalid_list);
@@ -4220,7 +4233,7 @@ static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm,
page = container_of(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
- kvm_mmu_prepare_zap_page(kvm, page, invalid_list);
+ kvm_mmu_prepare_zap_page(kvm, page, invalid_list, NULL);
}
static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
Currently we cannot do the isolation of mmu pages, i.e. deleting the current hash_link node by hlist_del(), in this function, because we may call it while traversing the linked list; we cannot solve the problem by hlist_for_each_entry_safe as zapping can happen recursively. Since the isolation must be done before releasing mmu_lock, we are now forced to call kvm_mmu_isolate_page() for each mmu page found in the invalid_list in kvm_mmu_commit_zap_page(). This patch adds a new parameter to kvm_mmu_prepare_zap_page() as a preparation for solving this issue: all callers just pass NULL now. Note: the abstraction, the introduction of sp_next_pos, makes it possible to support the other list later. Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp> --- arch/x86/kvm/mmu.c | 41 +++++++++++++++++++++++++++-------------- 1 files changed, 27 insertions(+), 14 deletions(-)