@@ -5441,6 +5441,18 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
* will drop their references and allow the root count to
* go to 0.
*
+ * Also take a reference on all roots so that this thread
+ * can do the bulk of the work required to free the roots
+ * once they are invalidated. Without this reference, a
+ * vCPU thread might drop the last reference to a root and
+ * get stuck with tearing down the entire paging structure.
+ *
+ * Roots which have a zero refcount should be skipped as
+ * they're already being torn down.
+ * Already invalid roots should be referenced again so that
+ * they aren't freed before kvm_tdp_mmu_zap_all_fast is
+ * done with them.
+ *
* This has essentially the same effect for the TDP MMU
* as updating mmu_valid_gen above does for the shadow
* MMU.
@@ -5452,7 +5464,8 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
* could drop the MMU lock and yield.
*/
list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link)
- root->role.invalid = true;
+ if (refcount_inc_not_zero(&root->tdp_mmu_root_count))
+ root->role.invalid = true;
}
/*
@@ -5468,6 +5481,12 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
kvm_zap_obsolete_pages(kvm);
write_unlock(&kvm->mmu_lock);
+
+ if (is_tdp_mmu_enabled(kvm)) {
+ read_lock(&kvm->mmu_lock);
+ kvm_tdp_mmu_zap_invalidated_roots(kvm);
+ read_unlock(&kvm->mmu_lock);
+ }
}
static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
@@ -798,6 +798,74 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
kvm_flush_remote_tlbs(kvm);
}
+static struct kvm_mmu_page *next_invalidated_root(struct kvm *kvm,
+ struct kvm_mmu_page *prev_root)
+{
+ struct kvm_mmu_page *next_root;
+
+ if (prev_root)
+ next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots,
+ &prev_root->link,
+ typeof(*prev_root), link);
+ else
+ next_root = list_first_or_null_rcu(&kvm->arch.tdp_mmu_roots,
+ typeof(*next_root), link);
+
+ while (next_root && !(next_root->role.invalid &&
+ refcount_read(&next_root->tdp_mmu_root_count)))
+ next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots,
+ &next_root->link,
+ typeof(*next_root), link);
+
+ return next_root;
+}
+
+/*
+ * Since kvm_mmu_zap_all_fast has acquired a reference to each
+ * invalidated root, they will not be freed until this function drops the
+ * reference. Before dropping that reference, tear down the paging
+ * structure so that whichever thread does drop the last reference
+ * only has to do a trivial ammount of work. Since the roots are invalid,
+ * no new SPTEs should be created under them.
+ */
+void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
+{
+ gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
+ struct kvm_mmu_page *next_root;
+ struct kvm_mmu_page *root;
+ bool flush = false;
+
+ lockdep_assert_held_read(&kvm->mmu_lock);
+
+ rcu_read_lock();
+
+ root = next_invalidated_root(kvm, NULL);
+
+ while (root) {
+ next_root = next_invalidated_root(kvm, root);
+
+ rcu_read_unlock();
+
+ flush = zap_gfn_range(kvm, root, 0, max_gfn, true, flush,
+ true);
+
+ /*
+ * Put the reference acquired in
+ * kvm_tdp_mmu_invalidate_roots
+ */
+ kvm_tdp_mmu_put_root(kvm, root, true);
+
+ root = next_root;
+
+ rcu_read_lock();
+ }
+
+ rcu_read_unlock();
+
+ if (flush)
+ kvm_flush_remote_tlbs(kvm);
+}
+
/*
* Installs a last-level SPTE to handle a TDP page fault.
* (NPT/EPT violation/misconfiguration)
@@ -47,6 +47,7 @@ static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
sp->gfn, end, false, false, false);
}
void kvm_tdp_mmu_zap_all(struct kvm *kvm);
+void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm);
int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
int map_writable, int max_level, kvm_pfn_t pfn,
To avoid saddling a vCPU thread with the work of tearing down an entire paging structure, take a reference on each root before they become obsolete, so that the thread initiating the fast invalidation can tear down the paging structure and (most likely) release the last reference. As a bonus, this teardown can happen under the MMU lock in read mode so as not to block the progress of vCPU threads. Signed-off-by: Ben Gardon <bgardon@google.com> --- Changelog v2: -- rename kvm_tdp_mmu_zap_all_fast to kvm_tdp_mmu_zap_invalidated_roots arch/x86/kvm/mmu/mmu.c | 21 +++++++++++- arch/x86/kvm/mmu/tdp_mmu.c | 68 ++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/mmu/tdp_mmu.h | 1 + 3 files changed, 89 insertions(+), 1 deletion(-)