@@ -547,10 +547,8 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
kvm_set_pfn_accessed(spte_to_pfn(old_spte));
}
- if (is_dirty_spte(old_spte) && !is_dirty_spte(new_spte)) {
+ if (is_dirty_spte(old_spte) && !is_dirty_spte(new_spte))
flush = true;
- kvm_set_pfn_dirty(spte_to_pfn(old_spte));
- }
return flush;
}
@@ -593,9 +591,6 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
if (is_accessed_spte(old_spte))
kvm_set_pfn_accessed(pfn);
- if (is_dirty_spte(old_spte))
- kvm_set_pfn_dirty(pfn);
-
return old_spte;
}
@@ -1250,16 +1245,6 @@ static bool spte_clear_dirty(u64 *sptep)
return mmu_spte_update(sptep, spte);
}
-static bool spte_wrprot_for_clear_dirty(u64 *sptep)
-{
- bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT,
- (unsigned long *)sptep);
- if (was_writable && !spte_ad_enabled(*sptep))
- kvm_set_pfn_dirty(spte_to_pfn(*sptep));
-
- return was_writable;
-}
-
/*
* Gets the GFN ready for another round of dirty logging by clearing the
* - D bit on ad-enabled SPTEs, and
@@ -1275,7 +1260,8 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
for_each_rmap_spte(rmap_head, &iter, sptep)
if (spte_ad_need_write_protect(*sptep))
- flush |= spte_wrprot_for_clear_dirty(sptep);
+ flush |= test_and_clear_bit(PT_WRITABLE_SHIFT,
+ (unsigned long *)sptep);
else
flush |= spte_clear_dirty(sptep);
@@ -1628,14 +1614,6 @@ static bool kvm_rmap_age_gfn_range(struct kvm *kvm,
clear_bit((ffs(shadow_accessed_mask) - 1),
(unsigned long *)sptep);
} else {
- /*
- * Capture the dirty status of the page, so that
- * it doesn't get lost when the SPTE is marked
- * for access tracking.
- */
- if (is_writable_pte(spte))
- kvm_set_pfn_dirty(spte_to_pfn(spte));
-
spte = mark_spte_for_access_track(spte);
mmu_spte_update_no_track(sptep, spte);
}
@@ -3415,7 +3393,7 @@ static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu,
* harm. This also avoids the TLB flush needed after setting dirty bit
* so non-PML cases won't be impacted.
*
- * Compare with set_spte where instead shadow_dirty_mask is set.
+ * Compare with make_spte() where instead shadow_dirty_mask is set.
*/
if (!try_cmpxchg64(sptep, &old_spte, new_spte))
return false;
@@ -892,9 +892,9 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
/*
* Using the information in sp->shadowed_translation (kvm_mmu_page_get_gfn()) is
- * safe because:
- * - The spte has a reference to the struct page, so the pfn for a given gfn
- * can't change unless all sptes pointing to it are nuked first.
+ * safe because SPTEs are protected by mmu_notifiers and memslot generations, so
+ * the pfn for a given gfn can't change unless all SPTEs pointing to the gfn are
+ * nuked first.
*
* Returns
* < 0: failed to sync spte
@@ -232,8 +232,8 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
* unnecessary (and expensive).
*
* The same reasoning applies to dirty page/folio accounting;
- * KVM will mark the folio dirty using the old SPTE, thus
- * there's no need to immediately mark the new SPTE as dirty.
+ * KVM marked the folio dirty when the old SPTE was created,
+ * thus there's no need to mark the folio dirty again.
*
* Note, both cases rely on KVM not changing PFNs without first
* zapping the old SPTE, which is guaranteed by both the shadow
@@ -266,12 +266,28 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
"spte = 0x%llx, level = %d, rsvd bits = 0x%llx", spte, level,
get_rsvd_bits(&vcpu->arch.mmu->shadow_zero_check, spte, level));
+ /*
+ * Mark the memslot dirty *after* modifying it for access tracking.
+ * Unlike folios, memslots can be safely marked dirty out of mmu_lock,
+ * i.e. in the fast page fault handler.
+ */
if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) {
/* Enforced by kvm_mmu_hugepage_adjust. */
WARN_ON_ONCE(level > PG_LEVEL_4K);
mark_page_dirty_in_slot(vcpu->kvm, slot, gfn);
}
+ /*
+ * If the page that KVM got from the primary MMU is writable, i.e. if
+ * it's host-writable, mark the page/folio dirty. As alluded to above,
+ * folios can't be safely marked dirty in the fast page fault handler,
+ * and so KVM must (somewhat) speculatively mark the folio dirty even
+ * though it isn't guaranteed to be written as KVM won't mark the folio
+ * dirty if/when the SPTE is made writable.
+ */
+ if (host_writable)
+ kvm_set_pfn_dirty(pfn);
+
*new_spte = spte;
return wrprot;
}
@@ -511,10 +511,6 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
if (is_leaf != was_leaf)
kvm_update_page_stats(kvm, level, is_leaf ? 1 : -1);
- if (was_leaf && is_dirty_spte(old_spte) &&
- (!is_present || !is_dirty_spte(new_spte) || pfn_changed))
- kvm_set_pfn_dirty(spte_to_pfn(old_spte));
-
/*
* Recursively handle child PTs if the change removed a subtree from
* the paging structure. Note the WARN on the PFN changing without the
@@ -1249,13 +1245,6 @@ static bool age_gfn_range(struct kvm *kvm, struct tdp_iter *iter,
iter->level);
new_spte = iter->old_spte & ~shadow_accessed_mask;
} else {
- /*
- * Capture the dirty status of the page, so that it doesn't get
- * lost when the SPTE is marked for access tracking.
- */
- if (is_writable_pte(iter->old_spte))
- kvm_set_pfn_dirty(spte_to_pfn(iter->old_spte));
-
new_spte = mark_spte_for_access_track(iter->old_spte);
iter->old_spte = kvm_tdp_mmu_write_spte(iter->sptep,
iter->old_spte, new_spte,
@@ -1596,7 +1585,6 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
trace_kvm_tdp_mmu_spte_changed(iter.as_id, iter.gfn, iter.level,
iter.old_spte,
iter.old_spte & ~dbit);
- kvm_set_pfn_dirty(spte_to_pfn(iter.old_spte));
}
rcu_read_unlock();