Message ID | 20240530210714.364118-11-rick.p.edgecombe@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | TDX MMU prep series part 1 | expand |
On Thu, May 30, 2024 at 11:07 PM Rick Edgecombe <rick.p.edgecombe@intel.com> wrote: > + /* Update mirrored mapping with page table link */ > + int (*reflect_link_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, > + void *mirrored_spt); > + /* Update the mirrored page table from spte getting set */ > + int (*reflect_set_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, > + kvm_pfn_t pfn); Possibly link_external_spt and set_external_spte, since you'll have to s/mirrored/external/ in the comment. But not a hard request. > +static void *get_mirrored_spt(gfn_t gfn, u64 new_spte, int level) > +{ > + if (is_shadow_present_pte(new_spte) && !is_last_spte(new_spte, level)) { > + struct kvm_mmu_page *sp = to_shadow_page(pfn_to_hpa(spte_to_pfn(new_spte))); I think this is spte_to_child_sp(new_spte)? > + void *mirrored_spt = kvm_mmu_mirrored_spt(sp); > + > + WARN_ON_ONCE(sp->role.level + 1 != level); > + WARN_ON_ONCE(sp->gfn != gfn); > + return mirrored_spt; Based on previous reviews this can be just "return sp->external_spt", removing the not-particularly-interesting kvm_mmu_mirrored_spt() helper. > +static int __must_check reflect_set_spte_present(struct kvm *kvm, tdp_ptep_t sptep, tdp_mmu_set_mirror_spte_atomic? > + /* > + * For mirrored page table, callbacks are needed to propagate SPTE > + * change into the mirrored page table. In order to atomically update > + * both the SPTE and the mirrored page tables with callbacks, utilize > + * freezing SPTE. > + * - Freeze the SPTE. Set entry to REMOVED_SPTE. > + * - Trigger callbacks for mirrored page tables. > + * - Unfreeze the SPTE. Set the entry to new_spte. > + */ /* * We need to lock out other updates to the SPTE until the external * page table has been modified. Use REMOVED_SPTE similar to * the zapping case. */ Easy peasy. :) We may want to rename REMOVED_SPTE to FROZEN_SPTE; feel free to do it at the head of this series, then it can be picked for 6.11. > -static inline int __tdp_mmu_set_spte_atomic(struct tdp_iter *iter, u64 new_spte) > +static inline int __tdp_mmu_set_spte_atomic(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte) > { > u64 *sptep = rcu_dereference(iter->sptep); > > @@ -571,15 +629,36 @@ static inline int __tdp_mmu_set_spte_atomic(struct tdp_iter *iter, u64 new_spte) > */ > WARN_ON_ONCE(iter->yielded || is_removed_spte(iter->old_spte)); > > - /* > - * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and > - * does not hold the mmu_lock. On failure, i.e. if a different logical > - * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with > - * the current value, so the caller operates on fresh data, e.g. if it > - * retries tdp_mmu_set_spte_atomic() > - */ > - if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte)) > - return -EBUSY; > + if (is_mirror_sptep(iter->sptep) && !is_removed_spte(new_spte)) { > + int ret; > + > + /* Don't support atomic zapping for mirrored roots */ The why is hidden in the commit message to patch 11. I wonder if it isn't clearer to simply squash together patches 10 and 11 (your call), and instead split out the addition of the new struct kvm parameters. Anyway, this comment needs a bit more info: /* * Users of atomic zapping don't operate on mirror roots, * so only need to handle present new_spte. */ > + if (KVM_BUG_ON(!is_shadow_present_pte(new_spte), kvm)) > + return -EBUSY; > + /* > + * Populating case. > + * - reflect_set_spte_present() implements > + * 1) Freeze SPTE > + * 2) call hooks to update mirrored page table, > + * 3) update SPTE to new_spte > + * - handle_changed_spte() only updates stats. > + */ Comment not needed (weird I know). > + ret = reflect_set_spte_present(kvm, iter->sptep, iter->gfn, > + iter->old_spte, new_spte, iter->level); > + if (ret) > + return ret; > + } else { > + /* > + * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs > + * and does not hold the mmu_lock. On failure, i.e. if a > + * different logical CPU modified the SPTE, try_cmpxchg64() > + * updates iter->old_spte with the current value, so the caller > + * operates on fresh data, e.g. if it retries > + * tdp_mmu_set_spte_atomic() > + */ > + if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte)) > + return -EBUSY; > + } > > return 0; > } > @@ -610,7 +689,7 @@ static inline int tdp_mmu_set_spte_atomic(struct kvm *kvm, > > lockdep_assert_held_read(&kvm->mmu_lock); > > - ret = __tdp_mmu_set_spte_atomic(iter, new_spte); > + ret = __tdp_mmu_set_spte_atomic(kvm, iter, new_spte); > if (ret) > return ret; > > @@ -636,7 +715,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm, > * Delay processing of the zapped SPTE until after TLBs are flushed and > * the REMOVED_SPTE is replaced (see below). > */ > - ret = __tdp_mmu_set_spte_atomic(iter, REMOVED_SPTE); > + ret = __tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE); > if (ret) > return ret; > > @@ -698,6 +777,11 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep, > role = sptep_to_sp(sptep)->role; > role.level = level; > handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, role, false); > + > + /* Don't support setting for the non-atomic case */ > + if (is_mirror_sptep(sptep)) > + KVM_BUG_ON(is_shadow_present_pte(new_spte), kvm); > + > return old_spte; > } > > -- > 2.34.1 >
On Fri, 2024-06-07 at 12:10 +0200, Paolo Bonzini wrote: > On Thu, May 30, 2024 at 11:07 PM Rick Edgecombe > <rick.p.edgecombe@intel.com> wrote: > > + /* Update mirrored mapping with page table link */ > > + int (*reflect_link_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level > > level, > > + void *mirrored_spt); > > + /* Update the mirrored page table from spte getting set */ > > + int (*reflect_set_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level > > level, > > + kvm_pfn_t pfn); > > Possibly link_external_spt and set_external_spte, since you'll have to > s/mirrored/external/ in the comment. But not a hard request. Definitely seems better now that we have the "external" nomenclature. > > > +static void *get_mirrored_spt(gfn_t gfn, u64 new_spte, int level) > > +{ > > + if (is_shadow_present_pte(new_spte) && !is_last_spte(new_spte, > > level)) { > > + struct kvm_mmu_page *sp = > > to_shadow_page(pfn_to_hpa(spte_to_pfn(new_spte))); > > I think this is spte_to_child_sp(new_spte)? Yes, that seems much easier than all this. [snip] > > > +static int __must_check reflect_set_spte_present(struct kvm *kvm, > > tdp_ptep_t sptep, > > tdp_mmu_set_mirror_spte_atomic? > > > + /* > > + * For mirrored page table, callbacks are needed to propagate SPTE > > + * change into the mirrored page table. In order to atomically > > update > > + * both the SPTE and the mirrored page tables with callbacks, > > utilize > > + * freezing SPTE. > > + * - Freeze the SPTE. Set entry to REMOVED_SPTE. > > + * - Trigger callbacks for mirrored page tables. > > + * - Unfreeze the SPTE. Set the entry to new_spte. > > + */ > > /* > * We need to lock out other updates to the SPTE until the external > * page table has been modified. Use REMOVED_SPTE similar to > * the zapping case. > */ > > Easy peasy. :) We may want to rename REMOVED_SPTE to FROZEN_SPTE; feel > free to do it at the head of this series, then it can be picked for > 6.11. Ok. > > > -static inline int __tdp_mmu_set_spte_atomic(struct tdp_iter *iter, u64 > > new_spte) > > +static inline int __tdp_mmu_set_spte_atomic(struct kvm *kvm, struct > > tdp_iter *iter, u64 new_spte) > > { > > u64 *sptep = rcu_dereference(iter->sptep); > > > > @@ -571,15 +629,36 @@ static inline int __tdp_mmu_set_spte_atomic(struct > > tdp_iter *iter, u64 new_spte) > > */ > > WARN_ON_ONCE(iter->yielded || is_removed_spte(iter->old_spte)); > > > > - /* > > - * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and > > - * does not hold the mmu_lock. On failure, i.e. if a different > > logical > > - * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte > > with > > - * the current value, so the caller operates on fresh data, e.g. if > > it > > - * retries tdp_mmu_set_spte_atomic() > > - */ > > - if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte)) > > - return -EBUSY; > > + if (is_mirror_sptep(iter->sptep) && !is_removed_spte(new_spte)) { > > + int ret; > > + > > + /* Don't support atomic zapping for mirrored roots */ > > The why is hidden in the commit message to patch 11. I wonder if it > isn't clearer to simply squash together patches 10 and 11 (your call), > and instead split out the addition of the new struct kvm parameters. I actually split them in two for this v2. I thought the combined patch was too big. Maybe I could just move this whole hunk to the next patch. I'll give it a try. > > Anyway, this comment needs a bit more info: > > /* > * Users of atomic zapping don't operate on mirror roots, > * so only need to handle present new_spte. > */ Ok. > > > + if (KVM_BUG_ON(!is_shadow_present_pte(new_spte), kvm)) > > + return -EBUSY; > > + /* > > + * Populating case. > > + * - reflect_set_spte_present() implements > > + * 1) Freeze SPTE > > + * 2) call hooks to update mirrored page table, > > + * 3) update SPTE to new_spte > > + * - handle_changed_spte() only updates stats. > > + */ > > Comment not needed (weird I know). Sure.
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 566d19b02483..1877d6a77525 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -95,6 +95,8 @@ KVM_X86_OP_OPTIONAL_RET0(set_tss_addr) KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr) KVM_X86_OP_OPTIONAL_RET0(get_mt_mask) KVM_X86_OP(load_mmu_pgd) +KVM_X86_OP_OPTIONAL(reflect_link_spt) +KVM_X86_OP_OPTIONAL(reflect_set_spte) KVM_X86_OP(has_wbinvd_exit) KVM_X86_OP(get_l2_tsc_offset) KVM_X86_OP(get_l2_tsc_multiplier) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d4446dde0ace..20bb10f22ca6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1748,6 +1748,13 @@ struct kvm_x86_ops { void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level); + /* Update mirrored mapping with page table link */ + int (*reflect_link_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + void *mirrored_spt); + /* Update the mirrored page table from spte getting set */ + int (*reflect_set_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + kvm_pfn_t pfn); + bool (*has_wbinvd_exit)(void); u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 0fc168a3fff1..41b1d3f26597 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -446,6 +446,64 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared) call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback); } +static void *get_mirrored_spt(gfn_t gfn, u64 new_spte, int level) +{ + if (is_shadow_present_pte(new_spte) && !is_last_spte(new_spte, level)) { + struct kvm_mmu_page *sp = to_shadow_page(pfn_to_hpa(spte_to_pfn(new_spte))); + void *mirrored_spt = kvm_mmu_mirrored_spt(sp); + + WARN_ON_ONCE(sp->role.level + 1 != level); + WARN_ON_ONCE(sp->gfn != gfn); + return mirrored_spt; + } + + return NULL; +} + +static int __must_check reflect_set_spte_present(struct kvm *kvm, tdp_ptep_t sptep, + gfn_t gfn, u64 old_spte, + u64 new_spte, int level) +{ + bool was_present = is_shadow_present_pte(old_spte); + bool is_present = is_shadow_present_pte(new_spte); + bool is_leaf = is_present && is_last_spte(new_spte, level); + kvm_pfn_t new_pfn = spte_to_pfn(new_spte); + int ret = 0; + + KVM_BUG_ON(was_present, kvm); + + /* + * For mirrored page table, callbacks are needed to propagate SPTE + * change into the mirrored page table. In order to atomically update + * both the SPTE and the mirrored page tables with callbacks, utilize + * freezing SPTE. + * - Freeze the SPTE. Set entry to REMOVED_SPTE. + * - Trigger callbacks for mirrored page tables. + * - Unfreeze the SPTE. Set the entry to new_spte. + */ + lockdep_assert_held(&kvm->mmu_lock); + if (!try_cmpxchg64(sptep, &old_spte, REMOVED_SPTE)) + return -EBUSY; + + /* + * Use different call to either set up middle level + * mirrored page table, or leaf. + */ + if (is_leaf) { + ret = static_call(kvm_x86_reflect_set_spte)(kvm, gfn, level, new_pfn); + } else { + void *mirrored_spt = get_mirrored_spt(gfn, new_spte, level); + + KVM_BUG_ON(!mirrored_spt, kvm); + ret = static_call(kvm_x86_reflect_link_spt)(kvm, gfn, level, mirrored_spt); + } + if (ret) + __kvm_tdp_mmu_write_spte(sptep, old_spte); + else + __kvm_tdp_mmu_write_spte(sptep, new_spte); + return ret; +} + /** * handle_changed_spte - handle bookkeeping associated with an SPTE change * @kvm: kvm instance @@ -559,7 +617,7 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, kvm_set_pfn_accessed(spte_to_pfn(old_spte)); } -static inline int __tdp_mmu_set_spte_atomic(struct tdp_iter *iter, u64 new_spte) +static inline int __tdp_mmu_set_spte_atomic(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte) { u64 *sptep = rcu_dereference(iter->sptep); @@ -571,15 +629,36 @@ static inline int __tdp_mmu_set_spte_atomic(struct tdp_iter *iter, u64 new_spte) */ WARN_ON_ONCE(iter->yielded || is_removed_spte(iter->old_spte)); - /* - * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and - * does not hold the mmu_lock. On failure, i.e. if a different logical - * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with - * the current value, so the caller operates on fresh data, e.g. if it - * retries tdp_mmu_set_spte_atomic() - */ - if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte)) - return -EBUSY; + if (is_mirror_sptep(iter->sptep) && !is_removed_spte(new_spte)) { + int ret; + + /* Don't support atomic zapping for mirrored roots */ + if (KVM_BUG_ON(!is_shadow_present_pte(new_spte), kvm)) + return -EBUSY; + /* + * Populating case. + * - reflect_set_spte_present() implements + * 1) Freeze SPTE + * 2) call hooks to update mirrored page table, + * 3) update SPTE to new_spte + * - handle_changed_spte() only updates stats. + */ + ret = reflect_set_spte_present(kvm, iter->sptep, iter->gfn, + iter->old_spte, new_spte, iter->level); + if (ret) + return ret; + } else { + /* + * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs + * and does not hold the mmu_lock. On failure, i.e. if a + * different logical CPU modified the SPTE, try_cmpxchg64() + * updates iter->old_spte with the current value, so the caller + * operates on fresh data, e.g. if it retries + * tdp_mmu_set_spte_atomic() + */ + if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte)) + return -EBUSY; + } return 0; } @@ -610,7 +689,7 @@ static inline int tdp_mmu_set_spte_atomic(struct kvm *kvm, lockdep_assert_held_read(&kvm->mmu_lock); - ret = __tdp_mmu_set_spte_atomic(iter, new_spte); + ret = __tdp_mmu_set_spte_atomic(kvm, iter, new_spte); if (ret) return ret; @@ -636,7 +715,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm, * Delay processing of the zapped SPTE until after TLBs are flushed and * the REMOVED_SPTE is replaced (see below). */ - ret = __tdp_mmu_set_spte_atomic(iter, REMOVED_SPTE); + ret = __tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE); if (ret) return ret; @@ -698,6 +777,11 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep, role = sptep_to_sp(sptep)->role; role.level = level; handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, role, false); + + /* Don't support setting for the non-atomic case */ + if (is_mirror_sptep(sptep)) + KVM_BUG_ON(is_shadow_present_pte(new_spte), kvm); + return old_spte; }