Message ID | 20231027182217.3615211-12-seanjc@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | KVM: guest_memfd() and per-page attributes | expand |
On 10/27/23 20:21, Sean Christopherson wrote: > Drop the .on_unlock() mmu_notifer hook now that it's no longer used for > notifying arch code that memory has been reclaimed. Adding .on_unlock() > and invoking it *after* dropping mmu_lock was a terrible idea, as doing so > resulted in .on_lock() and .on_unlock() having divergent and asymmetric > behavior, and set future developers up for failure, i.e. all but asked for > bugs where KVM relied on using .on_unlock() to try to run a callback while > holding mmu_lock. > > Opportunistically add a lockdep assertion in kvm_mmu_invalidate_end() to > guard against future bugs of this nature. This is what David suggested to do in patch 3, FWIW. Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Paolo > Reported-by: Isaku Yamahata <isaku.yamahata@intel.com> > Link: https://lore.kernel.org/all/20230802203119.GB2021422@ls.amr.corp.intel.com > Signed-off-by: Sean Christopherson <seanjc@google.com> > ---
On Fri, Oct 27, 2023 at 7:22 PM Sean Christopherson <seanjc@google.com> wrote: > > Drop the .on_unlock() mmu_notifer hook now that it's no longer used for > notifying arch code that memory has been reclaimed. Adding .on_unlock() > and invoking it *after* dropping mmu_lock was a terrible idea, as doing so > resulted in .on_lock() and .on_unlock() having divergent and asymmetric > behavior, and set future developers up for failure, i.e. all but asked for > bugs where KVM relied on using .on_unlock() to try to run a callback while > holding mmu_lock. > > Opportunistically add a lockdep assertion in kvm_mmu_invalidate_end() to > guard against future bugs of this nature. > > Reported-by: Isaku Yamahata <isaku.yamahata@intel.com> > Link: https://lore.kernel.org/all/20230802203119.GB2021422@ls.amr.corp.intel.com > Signed-off-by: Sean Christopherson <seanjc@google.com> > --- Reviewed-by: Fuad Tabba <tabba@google.com> Tested-by: Fuad Tabba <tabba@google.com> Cheers, /fuad > virt/kvm/kvm_main.c | 13 +++---------- > 1 file changed, 3 insertions(+), 10 deletions(-) > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 2bc04c8ae1f4..cb9376833c18 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -544,7 +544,6 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) > typedef bool (*gfn_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range); > > typedef void (*on_lock_fn_t)(struct kvm *kvm); > -typedef void (*on_unlock_fn_t)(struct kvm *kvm); > > struct kvm_mmu_notifier_range { > /* > @@ -556,7 +555,6 @@ struct kvm_mmu_notifier_range { > union kvm_mmu_notifier_arg arg; > gfn_handler_t handler; > on_lock_fn_t on_lock; > - on_unlock_fn_t on_unlock; > bool flush_on_ret; > bool may_block; > }; > @@ -663,11 +661,8 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm, > if (range->flush_on_ret && r.ret) > kvm_flush_remote_tlbs(kvm); > > - if (r.found_memslot) { > + if (r.found_memslot) > KVM_MMU_UNLOCK(kvm); > - if (!IS_KVM_NULL_FN(range->on_unlock)) > - range->on_unlock(kvm); > - } > > srcu_read_unlock(&kvm->srcu, idx); > > @@ -687,7 +682,6 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn, > .arg = arg, > .handler = handler, > .on_lock = (void *)kvm_null_fn, > - .on_unlock = (void *)kvm_null_fn, > .flush_on_ret = true, > .may_block = false, > }; > @@ -706,7 +700,6 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn > .end = end, > .handler = handler, > .on_lock = (void *)kvm_null_fn, > - .on_unlock = (void *)kvm_null_fn, > .flush_on_ret = false, > .may_block = false, > }; > @@ -813,7 +806,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, > .end = range->end, > .handler = kvm_mmu_unmap_gfn_range, > .on_lock = kvm_mmu_invalidate_begin, > - .on_unlock = (void *)kvm_null_fn, > .flush_on_ret = true, > .may_block = mmu_notifier_range_blockable(range), > }; > @@ -858,6 +850,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, > > void kvm_mmu_invalidate_end(struct kvm *kvm) > { > + lockdep_assert_held_write(&kvm->mmu_lock); > + > /* > * This sequence increase will notify the kvm page fault that > * the page that is going to be mapped in the spte could have > @@ -889,7 +883,6 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, > .end = range->end, > .handler = (void *)kvm_null_fn, > .on_lock = kvm_mmu_invalidate_end, > - .on_unlock = (void *)kvm_null_fn, > .flush_on_ret = false, > .may_block = mmu_notifier_range_blockable(range), > }; > -- > 2.42.0.820.g83a721a137-goog >
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2bc04c8ae1f4..cb9376833c18 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -544,7 +544,6 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) typedef bool (*gfn_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range); typedef void (*on_lock_fn_t)(struct kvm *kvm); -typedef void (*on_unlock_fn_t)(struct kvm *kvm); struct kvm_mmu_notifier_range { /* @@ -556,7 +555,6 @@ struct kvm_mmu_notifier_range { union kvm_mmu_notifier_arg arg; gfn_handler_t handler; on_lock_fn_t on_lock; - on_unlock_fn_t on_unlock; bool flush_on_ret; bool may_block; }; @@ -663,11 +661,8 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm, if (range->flush_on_ret && r.ret) kvm_flush_remote_tlbs(kvm); - if (r.found_memslot) { + if (r.found_memslot) KVM_MMU_UNLOCK(kvm); - if (!IS_KVM_NULL_FN(range->on_unlock)) - range->on_unlock(kvm); - } srcu_read_unlock(&kvm->srcu, idx); @@ -687,7 +682,6 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn, .arg = arg, .handler = handler, .on_lock = (void *)kvm_null_fn, - .on_unlock = (void *)kvm_null_fn, .flush_on_ret = true, .may_block = false, }; @@ -706,7 +700,6 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn .end = end, .handler = handler, .on_lock = (void *)kvm_null_fn, - .on_unlock = (void *)kvm_null_fn, .flush_on_ret = false, .may_block = false, }; @@ -813,7 +806,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, .end = range->end, .handler = kvm_mmu_unmap_gfn_range, .on_lock = kvm_mmu_invalidate_begin, - .on_unlock = (void *)kvm_null_fn, .flush_on_ret = true, .may_block = mmu_notifier_range_blockable(range), }; @@ -858,6 +850,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, void kvm_mmu_invalidate_end(struct kvm *kvm) { + lockdep_assert_held_write(&kvm->mmu_lock); + /* * This sequence increase will notify the kvm page fault that * the page that is going to be mapped in the spte could have @@ -889,7 +883,6 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, .end = range->end, .handler = (void *)kvm_null_fn, .on_lock = kvm_mmu_invalidate_end, - .on_unlock = (void *)kvm_null_fn, .flush_on_ret = false, .may_block = mmu_notifier_range_blockable(range), };
Drop the .on_unlock() mmu_notifer hook now that it's no longer used for notifying arch code that memory has been reclaimed. Adding .on_unlock() and invoking it *after* dropping mmu_lock was a terrible idea, as doing so resulted in .on_lock() and .on_unlock() having divergent and asymmetric behavior, and set future developers up for failure, i.e. all but asked for bugs where KVM relied on using .on_unlock() to try to run a callback while holding mmu_lock. Opportunistically add a lockdep assertion in kvm_mmu_invalidate_end() to guard against future bugs of this nature. Reported-by: Isaku Yamahata <isaku.yamahata@intel.com> Link: https://lore.kernel.org/all/20230802203119.GB2021422@ls.amr.corp.intel.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- virt/kvm/kvm_main.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-)