diff mbox series

[v2,22/28] KVM: x86/mmu: Mark SPTEs in disconnected pages as removed

Message ID 20210202185734.1680553-23-bgardon@google.com (mailing list archive)
State New, archived
Headers show
Series Allow parallel MMU operations with TDP MMU | expand

Commit Message

Ben Gardon Feb. 2, 2021, 6:57 p.m. UTC
When clearing TDP MMU pages what have been disconnected from the paging
structure root, set the SPTEs to a special non-present value which will
not be overwritten by other threads. This is needed to prevent races in
which a thread is clearing a disconnected page table, but another thread
has already acquired a pointer to that memory and installs a mapping in
an already cleared entry. This can lead to memory leaks and accounting
errors.

Reviewed-by: Peter Feiner <pfeiner@google.com>
Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

Comments

Paolo Bonzini Feb. 3, 2021, 11:17 a.m. UTC | #1
On 02/02/21 19:57, Ben Gardon wrote:
> 
> +			 * Marking the SPTE as a removed SPTE is not
> +			 * strictly necessary here as the MMU lock should

"should" is a bit too weak---the point of !shared is that the MMU lock 
*will* stop other threads from concurrent modifications of the SPTEs.

Paolo

> +			 * stop other threads from concurrentrly modifying
> +			 * this SPTE. Using the removed SPTE value keeps
> +			 * the shared and non-atomic cases consistent and
> +			 * simplifies the function.
> +			 */
> +			WRITE_ONCE(*sptep, REMOVED_SPTE);
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 7a2cdfeac4d2..0dd27e000dd0 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -334,9 +334,10 @@  static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
 {
 	struct kvm_mmu_page *sp = sptep_to_sp(pt);
 	int level = sp->role.level;
-	gfn_t gfn = sp->gfn;
+	gfn_t base_gfn = sp->gfn;
 	u64 old_child_spte;
 	u64 *sptep;
+	gfn_t gfn;
 	int i;
 
 	trace_kvm_mmu_prepare_zap_page(sp);
@@ -345,16 +346,39 @@  static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
 
 	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
 		sptep = pt + i;
+		gfn = base_gfn + (i * KVM_PAGES_PER_HPAGE(level - 1));
 
 		if (shared) {
-			old_child_spte = xchg(sptep, 0);
+			/*
+			 * Set the SPTE to a nonpresent value that other
+			 * threads will not overwrite. If the SPTE was
+			 * already marked as removed then another thread
+			 * handling a page fault could overwrite it, so
+			 * set the SPTE until it is set from some other
+			 * value to the removed SPTE value.
+			 */
+			for (;;) {
+				old_child_spte = xchg(sptep, REMOVED_SPTE);
+				if (!is_removed_spte(old_child_spte))
+					break;
+				cpu_relax();
+			}
 		} else {
 			old_child_spte = READ_ONCE(*sptep);
-			WRITE_ONCE(*sptep, 0);
+
+			/*
+			 * Marking the SPTE as a removed SPTE is not
+			 * strictly necessary here as the MMU lock should
+			 * stop other threads from concurrentrly modifying
+			 * this SPTE. Using the removed SPTE value keeps
+			 * the shared and non-atomic cases consistent and
+			 * simplifies the function.
+			 */
+			WRITE_ONCE(*sptep, REMOVED_SPTE);
 		}
-		handle_changed_spte(kvm, kvm_mmu_page_as_id(sp),
-			gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)),
-			old_child_spte, 0, level - 1, shared);
+		handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn,
+				    old_child_spte, REMOVED_SPTE, level - 1,
+				    shared);
 	}
 
 	kvm_flush_remote_tlbs_with_address(kvm, gfn,