KVM: pfncache: rework __kvm_gpc_refresh() to fix locking issues

Message ID	9a82db197449bdb97ee889d2f3cdd7998abd9692.camel@amazon.co.uk (mailing list archive)
State	New, archived
Headers	show Received: from smtp-fw-80008.amazon.com (smtp-fw-80008.amazon.com [99.78.197.219]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 698F91641B for <kvm@vger.kernel.org>; Fri, 12 Jan 2024 20:38:26 +0000 (UTC) Content-Type: multipart/mixed; boundary="===============6231862153601125174==" Precedence: bulk MIME-Version: 1.0 From: "Woodhouse, David" <dwmw@amazon.co.uk> To: "kvm@vger.kernel.org" <kvm@vger.kernel.org> CC: "pbonzini@redhat.com" <pbonzini@redhat.com>, "seanjc@google.com" <seanjc@google.com>, "Durrant, Paul" <pdurrant@amazon.co.uk> Subject: [PATCH] KVM: pfncache: rework __kvm_gpc_refresh() to fix locking issues Thread-Topic: [PATCH] KVM: pfncache: rework __kvm_gpc_refresh() to fix locking issues Thread-Index: AQHaRZdH/5R5foK5Dkq2EmfI7zjsew== Date: Fri, 12 Jan 2024 20:38:20 +0000 Message-ID: <9a82db197449bdb97ee889d2f3cdd7998abd9692.camel@amazon.co.uk> Accept-Language: en-GB, en-US
Series	KVM: pfncache: rework __kvm_gpc_refresh() to fix locking issues \| expand KVM: pfncache: rework __kvm_gpc_refresh() to fix locking issues

diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 70394d7c9a38..adca709a5884 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -135,110 +135,67 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s return kvm->mmu_invalidate_seq != mmu_seq; } -static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) +/* + * Given a user virtual address, obtain a pinned host PFN and kernel mapping + * for it. The caller will release the PFN after installing it into the GPC + * so that the MMU notifier invalidation mechanism is active. + */ +static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva, + kvm_pfn_t *pfn, void **khva) { /* Note, the new page offset may be different than the old! */ - void *old_khva = (void *)PAGE_ALIGN_DOWN((uintptr_t)gpc->khva); kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT; void *new_khva = NULL; unsigned long mmu_seq; - lockdep_assert_held(&gpc->refresh_lock); - - lockdep_assert_held_write(&gpc->lock); - - /* - * Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva - * assets have already been updated and so a concurrent check() from a - * different task may not fail the gpa/uhva/generation checks. - */ - gpc->valid = false; - - do { - mmu_seq = gpc->kvm->mmu_invalidate_seq; + for (;;) { + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); - write_unlock_irq(&gpc->lock); - - /* - * If the previous iteration "failed" due to an mmu_notifier - * event, release the pfn and unmap the kernel virtual address - * from the previous attempt. Unmapping might sleep, so this - * needs to be done after dropping the lock. Opportunistically - * check for resched while the lock isn't held. - */ - if (new_pfn != KVM_PFN_ERR_FAULT) { - /* - * Keep the mapping if the previous iteration reused - * the existing mapping and didn't create a new one. - */ - if (new_khva != old_khva) - gpc_unmap(new_pfn, new_khva); - - kvm_release_pfn_clean(new_pfn); - - cond_resched(); - } - /* We always request a writeable mapping */ - new_pfn = hva_to_pfn(gpc->uhva, false, false, NULL, true, NULL); + new_pfn = hva_to_pfn(uhva, false, false, NULL, true, NULL); if (is_error_noslot_pfn(new_pfn)) - goto out_error; + return -EFAULT; /* - * Obtain a new kernel mapping if KVM itself will access the - * pfn. Note, kmap() and memremap() can both sleep, so this - * too must be done outside of gpc->lock! + * Always obtain a new kernel mapping. Trying to reuse an + * existing one is more complex than it's worth. */ - if (new_pfn == gpc->pfn) - new_khva = old_khva; - else - new_khva = gpc_map(new_pfn); - + new_khva = gpc_map(new_pfn); if (!new_khva) { kvm_release_pfn_clean(new_pfn); - goto out_error; + return -EFAULT; } - write_lock_irq(&gpc->lock); + if (!mmu_notifier_retry_cache(kvm, mmu_seq)) + break; /* - * Other tasks must wait for _this_ refresh to complete before - * attempting to refresh. + * If this iteration "failed" due to an mmu_notifier event, + * release the pfn and unmap the kernel virtual address, and + * loop around again. */ - WARN_ON_ONCE(gpc->valid); - } while (mmu_notifier_retry_cache(gpc->kvm, mmu_seq)); - - gpc->valid = true; - gpc->pfn = new_pfn; - gpc->khva = new_khva + offset_in_page(gpc->uhva); + if (new_pfn != KVM_PFN_ERR_FAULT) { + gpc_unmap(new_pfn, new_khva); + kvm_release_pfn_clean(new_pfn); + } + } - /* - * Put the reference to the _new_ pfn. The pfn is now tracked by the - * cache and can be safely migrated, swapped, etc... as the cache will - * invalidate any mappings in response to relevant mmu_notifier events. - */ - kvm_release_pfn_clean(new_pfn); + *pfn = new_pfn; + *khva = new_khva; return 0; - -out_error: - write_lock_irq(&gpc->lock); - - return -EFAULT; } -static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva, - unsigned long len) +static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, + unsigned long uhva, unsigned long len) { struct kvm_memslots *slots = kvm_memslots(gpc->kvm); unsigned long page_offset = (gpa != KVM_XEN_INVALID_GPA) ? offset_in_page(gpa) : offset_in_page(uhva); - bool unmap_old = false; unsigned long old_uhva; - kvm_pfn_t old_pfn; - bool hva_change = false; + kvm_pfn_t old_pfn = KVM_PFN_ERR_FAULT; void *old_khva; int ret; @@ -251,8 +208,9 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l /* * If another task is refreshing the cache, wait for it to complete. - * There is no guarantee that concurrent refreshes will see the same - * gpa, memslots generation, etc..., so they must be fully serialized. + * This is purely an optimisation, to avoid concurrent mappings from + * hva_to_pfn_retry(), all but one of which will be discarded after + * losing a race to install them in the GPC. */ mutex_lock(&gpc->refresh_lock); @@ -272,7 +230,7 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l gpc->uhva = PAGE_ALIGN_DOWN(uhva); if (gpc->uhva != old_uhva) - hva_change = true; + gpc->valid = false; } else if (gpc->gpa != gpa || gpc->generation != slots->generation || kvm_is_error_hva(gpc->uhva)) { @@ -285,7 +243,11 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l if (kvm_is_error_hva(gpc->uhva)) { ret = -EFAULT; - goto out; + + gpc->valid = false; + gpc->pfn = KVM_PFN_ERR_FAULT; + gpc->khva = NULL; + goto out_unlock; } /* @@ -293,7 +255,7 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l * HVA may still be the same. */ if (gpc->uhva != old_uhva) - hva_change = true; + gpc->valid = false; } else { gpc->uhva = old_uhva; } @@ -305,9 +267,7 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l * If the userspace HVA changed or the PFN was already invalid, * drop the lock and do the HVA to PFN lookup again. */ - if (!gpc->valid || hva_change) { - ret = hva_to_pfn_retry(gpc); - } else { + if (gpc->valid) { /* * If the HVA→PFN mapping was already valid, don't unmap it. * But do update gpc->khva because the offset within the page @@ -315,30 +275,59 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l */ gpc->khva = old_khva + page_offset; ret = 0; - goto out_unlock; - } - out: - /* - * Invalidate the cache and purge the pfn/khva if the refresh failed. - * Some/all of the uhva, gpa, and memslot generation info may still be - * valid, leave it as is. - */ - if (ret) { + /* old_pfn must not be unmapped because it was reused. */ + old_pfn = KVM_PFN_ERR_FAULT; + } else { + kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT; + unsigned long new_uhva = gpc->uhva; + void *new_khva = NULL; + + /* + * Invalidate the cache prior to dropping gpc->lock; the + * gpa=>uhva assets have already been updated and so a + * concurrent check() from a different task may not fail + * the gpa/uhva/generation checks as it should. + */ gpc->valid = false; - gpc->pfn = KVM_PFN_ERR_FAULT; - gpc->khva = NULL; - } - /* Detect a pfn change before dropping the lock! */ - unmap_old = (old_pfn != gpc->pfn); + write_unlock_irq(&gpc->lock); + + ret = hva_to_pfn_retry(gpc->kvm, new_uhva, &new_pfn, &new_khva); + + write_lock_irq(&gpc->lock); + + if (ret || gpc->uhva != new_uhva) { + /* + * On failure or if another update occurred while the + * lock was dropped, just purge the new mapping. */ + old_pfn = new_pfn; + old_khva = new_khva; + } else { + old_pfn = gpc->pfn; + old_khva = gpc->khva; + + gpc->pfn = new_pfn; + gpc->khva = new_khva + offset_in_page(gpc->uhva); + gpc->valid = true; + } + + /* + * Put the reference to the _new_ pfn. On success, the + * pfn is now tracked by the cache and can safely be + * migrated, swapped, etc. as the cache will invalidate + * any mappings in response to relevant mmu_notifier + * events. + */ + kvm_release_pfn_clean(new_pfn); + } out_unlock: write_unlock_irq(&gpc->lock); mutex_unlock(&gpc->refresh_lock); - if (unmap_old) + if (old_pfn != KVM_PFN_ERR_FAULT) gpc_unmap(old_pfn, old_khva); return ret;

KVM: pfncache: rework __kvm_gpc_refresh() to fix locking issues

Commit Message

Comments

Patch