Message ID | 20220415215901.1737897-17-oupton@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: arm64: Parallelize stage 2 fault handling | expand |
On Fri, Apr 15, 2022 at 2:59 PM Oliver Upton <oupton@google.com> wrote: > > Voila! Since the map walkers are able to work in parallel there is no > need to take the write lock on a stage 2 memory abort. Relax locking > on map operations and cross fingers we got it right. Might be worth a healthy sprinkle of lockdep on the functions taking "shared" as an argument, just to make sure the wrong value isn't going down a callstack you didn't expect. > > Signed-off-by: Oliver Upton <oupton@google.com> > --- > arch/arm64/kvm/mmu.c | 21 +++------------------ > 1 file changed, 3 insertions(+), 18 deletions(-) > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index 63cf18cdb978..2881051c3743 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -1127,7 +1127,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > gfn_t gfn; > kvm_pfn_t pfn; > bool logging_active = memslot_is_logging(memslot); > - bool use_read_lock = false; > unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); > unsigned long vma_pagesize, fault_granule; > enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; > @@ -1162,8 +1161,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > if (logging_active) { > force_pte = true; > vma_shift = PAGE_SHIFT; > - use_read_lock = (fault_status == FSC_PERM && write_fault && > - fault_granule == PAGE_SIZE); > } else { > vma_shift = get_vma_page_shift(vma, hva); > } > @@ -1267,15 +1264,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > if (exec_fault && device) > return -ENOEXEC; > > - /* > - * To reduce MMU contentions and enhance concurrency during dirty > - * logging dirty logging, only acquire read lock for permission > - * relaxation. > - */ > - if (use_read_lock) > - read_lock(&kvm->mmu_lock); > - else > - write_lock(&kvm->mmu_lock); > + read_lock(&kvm->mmu_lock); > + Ugh, I which we could get rid of the analogous ugly block on x86. > pgt = vcpu->arch.hw_mmu->pgt; > if (mmu_notifier_retry(kvm, mmu_seq)) > goto out_unlock; > @@ -1322,8 +1312,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > if (fault_status == FSC_PERM && vma_pagesize == fault_granule) { > ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); > } else { > - WARN_ONCE(use_read_lock, "Attempted stage-2 map outside of write lock\n"); > - > ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, > __pfn_to_phys(pfn), prot, > mmu_caches, true); > @@ -1336,10 +1324,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > } > > out_unlock: > - if (use_read_lock) > - read_unlock(&kvm->mmu_lock); > - else > - write_unlock(&kvm->mmu_lock); > + read_unlock(&kvm->mmu_lock); > kvm_set_pfn_accessed(pfn); > kvm_release_pfn_clean(pfn); > return ret != -EAGAIN ? ret : 0; > -- > 2.36.0.rc0.470.gd361397f0d-goog >
On Thu, Apr 21, 2022 at 09:35:27AM -0700, Ben Gardon wrote: > On Fri, Apr 15, 2022 at 2:59 PM Oliver Upton <oupton@google.com> wrote: > > > > Voila! Since the map walkers are able to work in parallel there is no > > need to take the write lock on a stage 2 memory abort. Relax locking > > on map operations and cross fingers we got it right. > > Might be worth a healthy sprinkle of lockdep on the functions taking > "shared" as an argument, just to make sure the wrong value isn't going > down a callstack you didn't expect. If we're going to go this route we might need to just punch a pointer to the vCPU through to the stage 2 table walker. All of this plumbing is built around the idea that there are multiple tables to manage and needn't be in the context of a vCPU/VM, which is why I went the WARN() route instead of better lockdep assertions. > > > > Signed-off-by: Oliver Upton <oupton@google.com> > > --- > > arch/arm64/kvm/mmu.c | 21 +++------------------ > > 1 file changed, 3 insertions(+), 18 deletions(-) > > > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > > index 63cf18cdb978..2881051c3743 100644 > > --- a/arch/arm64/kvm/mmu.c > > +++ b/arch/arm64/kvm/mmu.c > > @@ -1127,7 +1127,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > > gfn_t gfn; > > kvm_pfn_t pfn; > > bool logging_active = memslot_is_logging(memslot); > > - bool use_read_lock = false; > > unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); > > unsigned long vma_pagesize, fault_granule; > > enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; > > @@ -1162,8 +1161,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > > if (logging_active) { > > force_pte = true; > > vma_shift = PAGE_SHIFT; > > - use_read_lock = (fault_status == FSC_PERM && write_fault && > > - fault_granule == PAGE_SIZE); > > } else { > > vma_shift = get_vma_page_shift(vma, hva); > > } > > @@ -1267,15 +1264,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > > if (exec_fault && device) > > return -ENOEXEC; > > > > - /* > > - * To reduce MMU contentions and enhance concurrency during dirty > > - * logging dirty logging, only acquire read lock for permission > > - * relaxation. > > - */ > > - if (use_read_lock) > > - read_lock(&kvm->mmu_lock); > > - else > > - write_lock(&kvm->mmu_lock); > > + read_lock(&kvm->mmu_lock); > > + > > Ugh, I which we could get rid of the analogous ugly block on x86. Maybe we could fold it in to a MMU macro in the arch-generic scope? Conditional locking is smelly, I was very pleased to delete these lines :) -- Thanks, Oliver
On Thu, Apr 21, 2022 at 9:46 AM Oliver Upton <oupton@google.com> wrote: > > On Thu, Apr 21, 2022 at 09:35:27AM -0700, Ben Gardon wrote: > > On Fri, Apr 15, 2022 at 2:59 PM Oliver Upton <oupton@google.com> wrote: > > > > > > Voila! Since the map walkers are able to work in parallel there is no > > > need to take the write lock on a stage 2 memory abort. Relax locking > > > on map operations and cross fingers we got it right. > > > > Might be worth a healthy sprinkle of lockdep on the functions taking > > "shared" as an argument, just to make sure the wrong value isn't going > > down a callstack you didn't expect. > > If we're going to go this route we might need to just punch a pointer > to the vCPU through to the stage 2 table walker. All of this plumbing is > built around the idea that there are multiple tables to manage and > needn't be in the context of a vCPU/VM, which is why I went the WARN() > route instead of better lockdep assertions. Oh right, it didn't even occur to me that those functions wouldn't have a vCPU / KVM pointer. > > > > > > > Signed-off-by: Oliver Upton <oupton@google.com> > > > --- > > > arch/arm64/kvm/mmu.c | 21 +++------------------ > > > 1 file changed, 3 insertions(+), 18 deletions(-) > > > > > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > > > index 63cf18cdb978..2881051c3743 100644 > > > --- a/arch/arm64/kvm/mmu.c > > > +++ b/arch/arm64/kvm/mmu.c > > > @@ -1127,7 +1127,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > > > gfn_t gfn; > > > kvm_pfn_t pfn; > > > bool logging_active = memslot_is_logging(memslot); > > > - bool use_read_lock = false; > > > unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); > > > unsigned long vma_pagesize, fault_granule; > > > enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; > > > @@ -1162,8 +1161,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > > > if (logging_active) { > > > force_pte = true; > > > vma_shift = PAGE_SHIFT; > > > - use_read_lock = (fault_status == FSC_PERM && write_fault && > > > - fault_granule == PAGE_SIZE); > > > } else { > > > vma_shift = get_vma_page_shift(vma, hva); > > > } > > > @@ -1267,15 +1264,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > > > if (exec_fault && device) > > > return -ENOEXEC; > > > > > > - /* > > > - * To reduce MMU contentions and enhance concurrency during dirty > > > - * logging dirty logging, only acquire read lock for permission > > > - * relaxation. > > > - */ > > > - if (use_read_lock) > > > - read_lock(&kvm->mmu_lock); > > > - else > > > - write_lock(&kvm->mmu_lock); > > > + read_lock(&kvm->mmu_lock); > > > + > > > > Ugh, I which we could get rid of the analogous ugly block on x86. > > Maybe we could fold it in to a MMU macro in the arch-generic scope? > Conditional locking is smelly, I was very pleased to delete these lines :) Smelly indeed. I don't think hiding it behind a macro would really help. It's just something we'll have to live with in x86. > > -- > Thanks, > Oliver
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 63cf18cdb978..2881051c3743 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1127,7 +1127,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn; kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); - bool use_read_lock = false; unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); unsigned long vma_pagesize, fault_granule; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; @@ -1162,8 +1161,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (logging_active) { force_pte = true; vma_shift = PAGE_SHIFT; - use_read_lock = (fault_status == FSC_PERM && write_fault && - fault_granule == PAGE_SIZE); } else { vma_shift = get_vma_page_shift(vma, hva); } @@ -1267,15 +1264,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault && device) return -ENOEXEC; - /* - * To reduce MMU contentions and enhance concurrency during dirty - * logging dirty logging, only acquire read lock for permission - * relaxation. - */ - if (use_read_lock) - read_lock(&kvm->mmu_lock); - else - write_lock(&kvm->mmu_lock); + read_lock(&kvm->mmu_lock); + pgt = vcpu->arch.hw_mmu->pgt; if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; @@ -1322,8 +1312,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (fault_status == FSC_PERM && vma_pagesize == fault_granule) { ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); } else { - WARN_ONCE(use_read_lock, "Attempted stage-2 map outside of write lock\n"); - ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, __pfn_to_phys(pfn), prot, mmu_caches, true); @@ -1336,10 +1324,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } out_unlock: - if (use_read_lock) - read_unlock(&kvm->mmu_lock); - else - write_unlock(&kvm->mmu_lock); + read_unlock(&kvm->mmu_lock); kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); return ret != -EAGAIN ? ret : 0;
Voila! Since the map walkers are able to work in parallel there is no need to take the write lock on a stage 2 memory abort. Relax locking on map operations and cross fingers we got it right. Signed-off-by: Oliver Upton <oupton@google.com> --- arch/arm64/kvm/mmu.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-)