@@ -795,18 +795,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
}
spin_unlock(&kvm->mn_invalidate_lock);
- /*
- * Invalidate pfn caches _before_ invalidating the secondary MMUs, i.e.
- * before acquiring mmu_lock, to avoid holding mmu_lock while acquiring
- * each cache's lock. There are relatively few caches in existence at
- * any given time, and the caches themselves can check for hva overlap,
- * i.e. don't need to rely on memslot overlap checks for performance.
- * Because this runs without holding mmu_lock, the pfn caches must use
- * mn_active_invalidate_count (see above) instead of
- * mmu_invalidate_in_progress.
- */
- gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end);
-
/*
* If one or more memslots were found and thus zapped, notify arch code
* that guest memory has been reclaimed. This needs to be done *after*
@@ -860,6 +848,14 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
__kvm_handle_hva_range(kvm, &hva_range);
+ /*
+ * It's safe to invalidate the gfn_to_pfn_caches from this 'end'
+ * hook, because hva_to_pfn_retry() will wait until no active
+ * invalidations could be affecting the corresponding uHVA
+ * before before allowing a newly mapped GPC to become valid.
+ */
+ gfn_to_pfn_cache_invalidate(kvm, range->start, range->end);
+
/* Pairs with the increment in range_start(). */
spin_lock(&kvm->mn_invalidate_lock);
if (!WARN_ON_ONCE(!kvm->mn_active_invalidate_count))
@@ -24,13 +24,13 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
bool *async, bool write_fault, bool *writable);
#ifdef CONFIG_HAVE_KVM_PFNCACHE
-void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
- unsigned long start,
- unsigned long end);
+void gfn_to_pfn_cache_invalidate(struct kvm *kvm,
+ unsigned long start,
+ unsigned long end);
#else
-static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
- unsigned long start,
- unsigned long end)
+static inline void gfn_to_pfn_cache_invalidate(struct kvm *kvm,
+ unsigned long start,
+ unsigned long end)
{
}
#endif /* HAVE_KVM_PFNCACHE */
@@ -20,10 +20,15 @@
#include "kvm_mm.h"
/*
- * MMU notifier 'invalidate_range_start' hook.
+ * MMU notifier 'invalidate_range_end' hook. The hva_to_pfn_retry() function
+ * below may look up a PFN just before it is zapped, and may be mapping it
+ * concurrently with the actual invalidation (with the GPC lock dropped). By
+ * using a separate 'needs_invalidation' flag, the concurrent invalidation
+ * can handle this case, causing hva_to_pfn_retry() to drop its result and
+ * retry correctly.
*/
-void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
- unsigned long end)
+void gfn_to_pfn_cache_invalidate(struct kvm *kvm, unsigned long start,
+ unsigned long end)
{
struct gfn_to_pfn_cache *gpc;
@@ -140,15 +145,12 @@ static bool gpc_invalidations_pending(struct gfn_to_pfn_cache *gpc)
(gpc->kvm->mmu_gpc_invalidate_range_end > gpc->uhva);
}
-static bool gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc)
+static void gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc)
{
- bool waited = false;
-
spin_lock(&gpc->kvm->mn_invalidate_lock);
if (gpc_invalidations_pending(gpc)) {
DEFINE_WAIT(wait);
- waited = true;
for (;;) {
prepare_to_wait(&gpc->kvm->gpc_invalidate_wq, &wait,
TASK_UNINTERRUPTIBLE);
@@ -163,10 +165,8 @@ static bool gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc)
finish_wait(&gpc->kvm->gpc_invalidate_wq, &wait);
}
spin_unlock(&gpc->kvm->mn_invalidate_lock);
- return waited;
}
-
static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
{
/* Note, the new page offset may be different than the old! */
@@ -199,28 +199,6 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
write_unlock_irq(&gpc->lock);
- /*
- * Invalidation occurs from the invalidate_range_start() hook,
- * which could already have happened before __kvm_gpc_refresh()
- * (or the previous turn around this loop) took gpc->lock().
- * If so, and if the corresponding invalidate_range_end() hook
- * hasn't happened yet, hva_to_pfn() could return a mapping
- * which is about to be stale and which should not be used. So
- * check if there are any currently-running invalidations which
- * affect the uHVA of this GPC, and retry if there are. Any
- * invalidation which starts after gpc->needs_invalidation is
- * set is fine, because it will clear that flag and trigger a
- * retry. And any invalidation which *completes* by having its
- * invalidate_range_end() hook called immediately prior to this
- * check is also fine, because the page tables are guaranteed
- * to have been changted already, so hva_to_pfn() won't return
- * a stale mapping in that case anyway.
- */
- if (gpc_wait_for_invalidations(gpc)) {
- write_lock_irq(&gpc->lock);
- continue;
- }
-
/*
* If the previous iteration "failed" due to an mmu_notifier
* event, release the pfn and unmap the kernel virtual address
@@ -261,6 +239,14 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
goto out_error;
}
+ /*
+ * Avoid populating a GPC with a user address which is already
+ * being invalidated, if the invalidate_range_start() notifier
+ * has already been called. The actual invalidation happens in
+ * the invalidate_range_end() callback, so wait until there are
+ * no active invalidations (for the relevant HVA).
+ */
+ gpc_wait_for_invalidations(gpc);
write_lock_irq(&gpc->lock);
@@ -269,6 +255,13 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
* attempting to refresh.
*/
WARN_ON_ONCE(gpc->valid);
+
+ /*
+ * Since gfn_to_pfn_cache_invalidate() is called from the
+ * kvm_mmu_notifier_invalidate_range_end() callback, it can
+ * invalidate the GPC the moment after hva_to_pfn() returned
+ * a valid PFN. If that happens, retry.
+ */
} while (!gpc->needs_invalidation);
gpc->valid = true;