diff mbox series

[v2,13/28] KVM: x86/mmu: Ensure forward progress when yielding in TDP MMU iter

Message ID 20210202185734.1680553-14-bgardon@google.com (mailing list archive)
State New, archived
Headers show
Series Allow parallel MMU operations with TDP MMU | expand

Commit Message

Ben Gardon Feb. 2, 2021, 6:57 p.m. UTC
In some functions the TDP iter risks not making forward progress if two
threads livelock yielding to one another. This is possible if two threads
are trying to execute wrprot_gfn_range. Each could write protect an entry
and then yield. This would reset the tdp_iter's walk over the paging
structure and the loop would end up repeating the same entry over and
over, preventing either thread from making forward progress.

Fix this issue by only yielding if the loop has made forward progress
since the last yield.

Fixes: a6a0b05da9f3 ("kvm: x86/mmu: Support dirty logging for the TDP MMU")
Reviewed-by: Peter Feiner <pfeiner@google.com>
Signed-off-by: Ben Gardon <bgardon@google.com>

---

v1 -> v2
- Moved forward progress check into tdp_mmu_iter_cond_resched
- Folded tdp_iter_refresh_walk into tdp_mmu_iter_cond_resched
- Split patch into three and renamed all

 arch/x86/kvm/mmu/tdp_iter.c | 18 +-----------------
 arch/x86/kvm/mmu/tdp_iter.h |  7 ++++++-
 arch/x86/kvm/mmu/tdp_mmu.c  | 21 ++++++++++++++++-----
 3 files changed, 23 insertions(+), 23 deletions(-)

Comments

Sean Christopherson Feb. 5, 2021, 11:42 p.m. UTC | #1
On Tue, Feb 02, 2021, Ben Gardon wrote:
> @@ -505,8 +516,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
>  
>  		tdp_mmu_set_spte(kvm, &iter, 0);
>  
> -		flush_needed = !can_yield ||
> -			       !tdp_mmu_iter_cond_resched(kvm, &iter, true);
> +		flush_needed = !(can_yield &&
> +				 tdp_mmu_iter_cond_resched(kvm, &iter, true));

Unnecessary change to convert perfectly readable code into an abomination :-D

No need to "fix", it goes aways in the next patch anyways, I just wanted to
complain.

>  	}
>  	return flush_needed;
>  }
> -- 
> 2.30.0.365.g02bc693789-goog
>
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c
index 9917c55b7d24..1a09d212186b 100644
--- a/arch/x86/kvm/mmu/tdp_iter.c
+++ b/arch/x86/kvm/mmu/tdp_iter.c
@@ -31,6 +31,7 @@  void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
 	WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
 
 	iter->next_last_level_gfn = next_last_level_gfn;
+	iter->yielded_gfn = iter->next_last_level_gfn;
 	iter->root_level = root_level;
 	iter->min_level = min_level;
 	iter->level = root_level;
@@ -158,23 +159,6 @@  void tdp_iter_next(struct tdp_iter *iter)
 	iter->valid = false;
 }
 
-/*
- * Restart the walk over the paging structure from the root, starting from the
- * highest gfn the iterator had previously reached. Assumes that the entire
- * paging structure, except the root page, may have been completely torn down
- * and rebuilt.
- */
-void tdp_iter_refresh_walk(struct tdp_iter *iter)
-{
-	gfn_t next_last_level_gfn = iter->next_last_level_gfn;
-
-	if (iter->gfn > next_last_level_gfn)
-		next_last_level_gfn = iter->gfn;
-
-	tdp_iter_start(iter, iter->pt_path[iter->root_level - 1],
-		       iter->root_level, iter->min_level, next_last_level_gfn);
-}
-
 u64 *tdp_iter_root_pt(struct tdp_iter *iter)
 {
 	return iter->pt_path[iter->root_level - 1];
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
index b2dd269c631f..d480c540ee27 100644
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -16,6 +16,12 @@  struct tdp_iter {
 	 * for this GFN.
 	 */
 	gfn_t next_last_level_gfn;
+	/*
+	 * The next_last_level_gfn at the time when the thread last
+	 * yielded. Only yielding when the next_last_level_gfn !=
+	 * yielded_gfn helps ensure forward progress.
+	 */
+	gfn_t yielded_gfn;
 	/* Pointers to the page tables traversed to reach the current SPTE */
 	u64 *pt_path[PT64_ROOT_MAX_LEVEL];
 	/* A pointer to the current SPTE */
@@ -54,7 +60,6 @@  u64 *spte_to_child_pt(u64 pte, int level);
 void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
 		    int min_level, gfn_t next_last_level_gfn);
 void tdp_iter_next(struct tdp_iter *iter);
-void tdp_iter_refresh_walk(struct tdp_iter *iter);
 u64 *tdp_iter_root_pt(struct tdp_iter *iter);
 
 #endif /* __KVM_X86_MMU_TDP_ITER_H */
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 8f7b120597f3..7cfc0639b1ef 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -451,8 +451,9 @@  static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
  * TLB flush before yielding.
  *
  * If this function yields, it will also reset the tdp_iter's walk over the
- * paging structure and the calling function should allow the iterator to
- * continue its traversal from the paging structure root.
+ * paging structure and the calling function should skip to the next
+ * iteration to allow the iterator to continue its traversal from the
+ * paging structure root.
  *
  * Return true if this function yielded and the iterator's traversal was reset.
  * Return false if a yield was not needed.
@@ -460,12 +461,22 @@  static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
 static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
 					     struct tdp_iter *iter, bool flush)
 {
+	/* Ensure forward progress has been made before yielding. */
+	if (iter->next_last_level_gfn == iter->yielded_gfn)
+		return false;
+
 	if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
 		if (flush)
 			kvm_flush_remote_tlbs(kvm);
 
 		cond_resched_lock(&kvm->mmu_lock);
-		tdp_iter_refresh_walk(iter);
+
+		WARN_ON(iter->gfn > iter->next_last_level_gfn);
+
+		tdp_iter_start(iter, iter->pt_path[iter->root_level - 1],
+			       iter->root_level, iter->min_level,
+			       iter->next_last_level_gfn);
+
 		return true;
 	}
 
@@ -505,8 +516,8 @@  static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 
 		tdp_mmu_set_spte(kvm, &iter, 0);
 
-		flush_needed = !can_yield ||
-			       !tdp_mmu_iter_cond_resched(kvm, &iter, true);
+		flush_needed = !(can_yield &&
+				 tdp_mmu_iter_cond_resched(kvm, &iter, true));
 	}
 	return flush_needed;
 }