[6/6] KVM: pfncache: clean up rwlock abuse

Message ID	20240217114017.11551-7-dwmw2@infradead.org (mailing list archive)
State	New, archived
Headers	show Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 312A769DE6 for <kvm@vger.kernel.org>; Sat, 17 Feb 2024 11:40:24 +0000 (UTC) From: David Woodhouse <dwmw2@infradead.org> To: kvm@vger.kernel.org Cc: Sean Christopherson <seanjc@google.com>, Paul Durrant <paul@xen.org>, Paolo Bonzini <pbonzini@redhat.com>, Michal Luczaj <mhal@rbox.co>, David Woodhouse <dwmw@amazon.co.uk>, Paul Durrant <pdurrant@amazon.com> Subject: [PATCH 6/6] KVM: pfncache: clean up rwlock abuse Date: Sat, 17 Feb 2024 11:27:04 +0000 Message-ID: <20240217114017.11551-7-dwmw2@infradead.org> In-Reply-To: <20240217114017.11551-1-dwmw2@infradead.org> References: <20240217114017.11551-1-dwmw2@infradead.org> Precedence: bulk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sender: David Woodhouse <dwmw2@infradead.org>
Series	KVM: x86/xen updates \| expand [0/6] KVM: x86/xen updates [1/6] KVM: x86/xen: improve accuracy of Xen timers [2/6] KVM: x86/xen: inject vCPU upcall vector when local APIC is enabled [3/6] KVM: x86/xen: remove WARN_ON_ONCE() with false positives in evtchn delivery [4/6] KVM: pfncache: simplify locking and make more self-contained [5/6] KVM: x86/xen: fix recursive deadlock in timer injection [6/6] KVM: pfncache: clean up rwlock abuse

diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 79a3ef7c6d04..11b66f63af83 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -139,108 +139,65 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s return kvm->mmu_invalidate_seq != mmu_seq; } -static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) +/* + * Given a user virtual address, obtain a pinned host PFN and kernel mapping + * for it. The caller will release the PFN after installing it into the GPC + * so that the MMU notifier invalidation mechanism is active. + */ +static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva, + kvm_pfn_t *pfn, void **khva) { /* Note, the new page offset may be different than the old! */ - void *old_khva = (void *)PAGE_ALIGN_DOWN((uintptr_t)gpc->khva); kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT; void *new_khva = NULL; unsigned long mmu_seq; - lockdep_assert_held(&gpc->refresh_lock); - - lockdep_assert_held_write(&gpc->lock); - - /* - * Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva - * assets have already been updated and so a concurrent check() from a - * different task may not fail the gpa/uhva/generation checks. - */ - gpc->valid = false; - - do { - mmu_seq = gpc->kvm->mmu_invalidate_seq; + for (;;) { + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); - write_unlock_irq(&gpc->lock); - - /* - * If the previous iteration "failed" due to an mmu_notifier - * event, release the pfn and unmap the kernel virtual address - * from the previous attempt. Unmapping might sleep, so this - * needs to be done after dropping the lock. Opportunistically - * check for resched while the lock isn't held. - */ - if (new_pfn != KVM_PFN_ERR_FAULT) { - /* - * Keep the mapping if the previous iteration reused - * the existing mapping and didn't create a new one. - */ - if (new_khva != old_khva) - gpc_unmap(new_pfn, new_khva); - - kvm_release_pfn_clean(new_pfn); - - cond_resched(); - } - /* We always request a writeable mapping */ - new_pfn = hva_to_pfn(gpc->uhva, false, false, NULL, true, NULL); + new_pfn = hva_to_pfn(uhva, false, false, NULL, true, NULL); if (is_error_noslot_pfn(new_pfn)) - goto out_error; + return -EFAULT; /* - * Obtain a new kernel mapping if KVM itself will access the - * pfn. Note, kmap() and memremap() can both sleep, so this - * too must be done outside of gpc->lock! + * Always obtain a new kernel mapping. Trying to reuse an + * existing one is more complex than it's worth. */ - if (new_pfn == gpc->pfn) - new_khva = old_khva; - else - new_khva = gpc_map(new_pfn); - + new_khva = gpc_map(new_pfn); if (!new_khva) { kvm_release_pfn_clean(new_pfn); - goto out_error; + return -EFAULT; } - write_lock_irq(&gpc->lock); + if (!mmu_notifier_retry_cache(kvm, mmu_seq)) + break; /* - * Other tasks must wait for _this_ refresh to complete before - * attempting to refresh. + * If this iteration "failed" due to an mmu_notifier event, + * release the pfn and unmap the kernel virtual address, and + * loop around again. */ - WARN_ON_ONCE(gpc->valid); - } while (mmu_notifier_retry_cache(gpc->kvm, mmu_seq)); - - gpc->valid = true; - gpc->pfn = new_pfn; - gpc->khva = new_khva + offset_in_page(gpc->uhva); + if (new_pfn != KVM_PFN_ERR_FAULT) { + gpc_unmap(new_pfn, new_khva); + kvm_release_pfn_clean(new_pfn); + } + } - /* - * Put the reference to the _new_ pfn. The pfn is now tracked by the - * cache and can be safely migrated, swapped, etc... as the cache will - * invalidate any mappings in response to relevant mmu_notifier events. - */ - kvm_release_pfn_clean(new_pfn); + *pfn = new_pfn; + *khva = new_khva; return 0; - -out_error: - write_lock_irq(&gpc->lock); - - return -EFAULT; } -static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva, - unsigned long len) +static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, + unsigned long uhva, unsigned long len) { unsigned long page_offset = kvm_is_error_gpa(gpa) ? offset_in_page(uhva) : offset_in_page(gpa); - bool unmap_old = false; unsigned long old_uhva; - kvm_pfn_t old_pfn; - bool hva_change = false; + kvm_pfn_t old_pfn = KVM_PFN_ERR_FAULT; void *old_khva; int ret; @@ -274,7 +231,7 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l gpc->uhva = PAGE_ALIGN_DOWN(uhva); if (gpc->uhva != old_uhva) - hva_change = true; + gpc->valid = false; } else { struct kvm_memslots *slots = kvm_memslots(gpc->kvm); @@ -289,7 +246,11 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l if (kvm_is_error_hva(gpc->uhva)) { ret = -EFAULT; - goto out; + + gpc->valid = false; + gpc->pfn = KVM_PFN_ERR_FAULT; + gpc->khva = NULL; + goto out_unlock; } /* @@ -297,7 +258,7 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l * HVA may still be the same. */ if (gpc->uhva != old_uhva) - hva_change = true; + gpc->valid = false; } else { gpc->uhva = old_uhva; } @@ -310,9 +271,7 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l * If the userspace HVA changed or the PFN was already invalid, * drop the lock and do the HVA to PFN lookup again. */ - if (!gpc->valid || hva_change) { - ret = hva_to_pfn_retry(gpc); - } else { + if (gpc->valid) { /* * If the HVA→PFN mapping was already valid, don't unmap it. * But do update gpc->khva because the offset within the page @@ -320,28 +279,60 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l */ gpc->khva = old_khva + page_offset; ret = 0; - goto out_unlock; - } - out: - /* - * Invalidate the cache and purge the pfn/khva if the refresh failed. - * Some/all of the uhva, gpa, and memslot generation info may still be - * valid, leave it as is. - */ - if (ret) { + /* old_pfn must not be unmapped because it was reused. */ + old_pfn = KVM_PFN_ERR_FAULT; + } else { + kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT; + unsigned long new_uhva = gpc->uhva; + void *new_khva = NULL; + + /* + * Invalidate the cache prior to dropping gpc->lock; the + * gpa=>uhva assets have already been updated and so a + * concurrent check() from a different task may not fail + * the gpa/uhva/generation checks as it should. + */ gpc->valid = false; - gpc->pfn = KVM_PFN_ERR_FAULT; - gpc->khva = NULL; - } - /* Detect a pfn change before dropping the lock! */ - unmap_old = (old_pfn != gpc->pfn); + write_unlock_irq(&gpc->lock); + + ret = hva_to_pfn_retry(gpc->kvm, new_uhva, &new_pfn, &new_khva); + + write_lock_irq(&gpc->lock); + + WARN_ON_ONCE(gpc->valid); + + if (ret || !gpc->active || gpc->uhva != new_uhva) { + /* + * On failure or if another change occurred while the + * lock was dropped, just purge the new mapping. + */ + old_pfn = new_pfn; + old_khva = new_khva; + } else { + old_pfn = gpc->pfn; + old_khva = gpc->khva; + + gpc->pfn = new_pfn; + gpc->khva = new_khva + offset_in_page(gpc->uhva); + gpc->valid = true; + } + + /* + * Put the reference to the _new_ pfn. On success, the + * pfn is now tracked by the cache and can safely be + * migrated, swapped, etc. as the cache will invalidate + * any mappings in response to relevant mmu_notifier + * events. + */ + kvm_release_pfn_clean(new_pfn); + } out_unlock: write_unlock_irq(&gpc->lock); - if (unmap_old) + if (old_pfn != KVM_PFN_ERR_FAULT) gpc_unmap(old_pfn, old_khva); return ret;

[6/6] KVM: pfncache: clean up rwlock abuse

Commit Message

Patch