Message ID | 20171122034152.GA30117@fergus.ozlabs.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, 2017-11-22 at 14:41 +1100, Paul Mackerras wrote: > From a038eb6fa13cc49cc91e6e81409c230ef8086038 Mon Sep 17 00:00:00 > 2001 > From: Paul Mackerras <paulus@ozlabs.org> > Date: Wed, 22 Nov 2017 14:38:53 +1100 > Subject: > > This fixes two errors that prevent a guest using the HPT MMU from > successfully migrating to a POWER9 host in radix MMU mode, or > resizing > its HPT when running on a radix host. > > The first bug was that commit 8dc6cca556e4 ("KVM: PPC: Book3S HV: > Don't rely on host's page size information", 2017-09-11) missed two > uses of hpte_base_page_size(), one in the HPT rehashing code and > one in kvm_htab_write() (which is used on the destination side in > migrating a HPT guest). Instead we use > kvmppc_hpte_base_page_shift(). > Having the shift count means that we can use left and right shifts > instead of multiplication and division in a few places. > > Along the way, this adds a check in kvm_htab_write() to ensure that > the > page size encoding in the incoming HPTEs is recognized, and if not > return an EINVAL error to userspace. > > The second bug was that kvm_htab_write was performing some but not > all > of the functions of kvmhv_setup_mmu(), resulting in the destination > VM > being left in radix mode as far as the hardware is concerned. The > simplest fix for now is make kvm_htab_write() call > kvmppc_setup_partition_table() like kvmppc_hv_setup_htab_rma() does. > In future it would be better to refactor the code more extensively > to remove the duplication. > > Fixes: 8dc6cca556e4 ("KVM: PPC: Book3S HV: Don't rely on host's page > size information") > Fixes: 7a84084c6054 ("KVM: PPC: Book3S HV: Set partition table rather > than SDR1 on POWER9") > Reported-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com> > Signed-off-by: Paul Mackerras <paulus@ozlabs.org> Tested-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com> > --- > arch/powerpc/include/asm/kvm_ppc.h | 1 + > arch/powerpc/kvm/book3s_64_mmu_hv.c | 37 +++++++++++++++++++++++-- > ------------ > arch/powerpc/kvm/book3s_hv.c | 3 +-- > 3 files changed, 25 insertions(+), 16 deletions(-) > > diff --git a/arch/powerpc/include/asm/kvm_ppc.h > b/arch/powerpc/include/asm/kvm_ppc.h > index 96753f3..941c2a3 100644 > --- a/arch/powerpc/include/asm/kvm_ppc.h > +++ b/arch/powerpc/include/asm/kvm_ppc.h > @@ -180,6 +180,7 @@ extern void > kvm_spapr_tce_release_iommu_group(struct kvm *kvm, > struct iommu_group *grp); > extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm); > extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm); > +extern void kvmppc_setup_partition_table(struct kvm *kvm); > > extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, > struct kvm_create_spapr_tce_64 > *args); > diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c > b/arch/powerpc/kvm/book3s_64_mmu_hv.c > index 235319c..9660972 100644 > --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c > +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c > @@ -1238,8 +1238,9 @@ static unsigned long > resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, > unsigned long vpte, rpte, guest_rpte; > int ret; > struct revmap_entry *rev; > - unsigned long apsize, psize, avpn, pteg, hash; > + unsigned long apsize, avpn, pteg, hash; > unsigned long new_idx, new_pteg, replace_vpte; > + int pshift; > > hptep = (__be64 *)(old->virt + (idx << 4)); > > @@ -1298,8 +1299,8 @@ static unsigned long > resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, > goto out; > > rpte = be64_to_cpu(hptep[1]); > - psize = hpte_base_page_size(vpte, rpte); > - avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23); > + pshift = kvmppc_hpte_base_page_shift(vpte, rpte); > + avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> > 23); > pteg = idx / HPTES_PER_GROUP; > if (vpte & HPTE_V_SECONDARY) > pteg = ~pteg; > @@ -1311,20 +1312,20 @@ static unsigned long > resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, > offset = (avpn & 0x1f) << 23; > vsid = avpn >> 5; > /* We can find more bits from the pteg value */ > - if (psize < (1ULL << 23)) > - offset |= ((vsid ^ pteg) & old_hash_mask) * > psize; > + if (pshift < 23) > + offset |= ((vsid ^ pteg) & old_hash_mask) << > pshift; > > - hash = vsid ^ (offset / psize); > + hash = vsid ^ (offset >> pshift); > } else { > unsigned long offset, vsid; > > /* We only have 40 - 23 bits of seg_off in avpn */ > offset = (avpn & 0x1ffff) << 23; > vsid = avpn >> 17; > - if (psize < (1ULL << 23)) > - offset |= ((vsid ^ (vsid << 25) ^ pteg) & > old_hash_mask) * psize; > + if (pshift < 23) > + offset |= ((vsid ^ (vsid << 25) ^ pteg) & > old_hash_mask) << pshift; > > - hash = vsid ^ (vsid << 25) ^ (offset / psize); > + hash = vsid ^ (vsid << 25) ^ (offset >> pshift); > } > > new_pteg = hash & new_hash_mask; > @@ -1801,6 +1802,7 @@ static ssize_t kvm_htab_write(struct file > *file, const char __user *buf, > ssize_t nb; > long int err, ret; > int mmu_ready; > + int pshift; > > if (!access_ok(VERIFY_READ, buf, count)) > return -EFAULT; > @@ -1855,6 +1857,9 @@ static ssize_t kvm_htab_write(struct file > *file, const char __user *buf, > err = -EINVAL; > if (!(v & HPTE_V_VALID)) > goto out; > + pshift = kvmppc_hpte_base_page_shift(v, r); > + if (pshift <= 0) > + goto out; > lbuf += 2; > nb += HPTE_SIZE; > > @@ -1869,14 +1874,18 @@ static ssize_t kvm_htab_write(struct file > *file, const char __user *buf, > goto out; > } > if (!mmu_ready && is_vrma_hpte(v)) { > - unsigned long psize = > hpte_base_page_size(v, r); > - unsigned long senc = > slb_pgsize_encoding(psize); > - unsigned long lpcr; > + unsigned long senc, lpcr; > > + senc = slb_pgsize_encoding(1ul << > pshift); > kvm->arch.vrma_slb_v = senc | > SLB_VSID_B_1T | > (VRMA_VSID << > SLB_VSID_SHIFT_1T); > - lpcr = senc << (LPCR_VRMASD_SH - 4); > - kvmppc_update_lpcr(kvm, lpcr, > LPCR_VRMASD); > + if > (!cpu_has_feature(CPU_FTR_ARCH_300)) { > + lpcr = senc << > (LPCR_VRMASD_SH - 4); > + kvmppc_update_lpcr(kvm, > lpcr, > + LPCR_VRMA > SD); > + } else { > + kvmppc_setup_partition_table > (kvm); > + } > mmu_ready = 1; > } > ++i; > diff --git a/arch/powerpc/kvm/book3s_hv.c > b/arch/powerpc/kvm/book3s_hv.c > index 79ea3d9..2d46037 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -120,7 +120,6 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI > wakeup to a free host core"); > > static void kvmppc_end_cede(struct kvm_vcpu *vcpu); > static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); > -static void kvmppc_setup_partition_table(struct kvm *kvm); > > static inline struct kvm_vcpu *next_runnable_thread(struct > kvmppc_vcore *vc, > int *ip) > @@ -3574,7 +3573,7 @@ static void kvmppc_mmu_destroy_hv(struct > kvm_vcpu *vcpu) > return; > } > > -static void kvmppc_setup_partition_table(struct kvm *kvm) > +void kvmppc_setup_partition_table(struct kvm *kvm) > { > unsigned long dw0, dw1; >
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 96753f3..941c2a3 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -180,6 +180,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, struct iommu_group *grp); extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm); extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm); +extern void kvmppc_setup_partition_table(struct kvm *kvm); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce_64 *args); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 235319c..9660972 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1238,8 +1238,9 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, unsigned long vpte, rpte, guest_rpte; int ret; struct revmap_entry *rev; - unsigned long apsize, psize, avpn, pteg, hash; + unsigned long apsize, avpn, pteg, hash; unsigned long new_idx, new_pteg, replace_vpte; + int pshift; hptep = (__be64 *)(old->virt + (idx << 4)); @@ -1298,8 +1299,8 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, goto out; rpte = be64_to_cpu(hptep[1]); - psize = hpte_base_page_size(vpte, rpte); - avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23); + pshift = kvmppc_hpte_base_page_shift(vpte, rpte); + avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23); pteg = idx / HPTES_PER_GROUP; if (vpte & HPTE_V_SECONDARY) pteg = ~pteg; @@ -1311,20 +1312,20 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, offset = (avpn & 0x1f) << 23; vsid = avpn >> 5; /* We can find more bits from the pteg value */ - if (psize < (1ULL << 23)) - offset |= ((vsid ^ pteg) & old_hash_mask) * psize; + if (pshift < 23) + offset |= ((vsid ^ pteg) & old_hash_mask) << pshift; - hash = vsid ^ (offset / psize); + hash = vsid ^ (offset >> pshift); } else { unsigned long offset, vsid; /* We only have 40 - 23 bits of seg_off in avpn */ offset = (avpn & 0x1ffff) << 23; vsid = avpn >> 17; - if (psize < (1ULL << 23)) - offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize; + if (pshift < 23) + offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << pshift; - hash = vsid ^ (vsid << 25) ^ (offset / psize); + hash = vsid ^ (vsid << 25) ^ (offset >> pshift); } new_pteg = hash & new_hash_mask; @@ -1801,6 +1802,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, ssize_t nb; long int err, ret; int mmu_ready; + int pshift; if (!access_ok(VERIFY_READ, buf, count)) return -EFAULT; @@ -1855,6 +1857,9 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, err = -EINVAL; if (!(v & HPTE_V_VALID)) goto out; + pshift = kvmppc_hpte_base_page_shift(v, r); + if (pshift <= 0) + goto out; lbuf += 2; nb += HPTE_SIZE; @@ -1869,14 +1874,18 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, goto out; } if (!mmu_ready && is_vrma_hpte(v)) { - unsigned long psize = hpte_base_page_size(v, r); - unsigned long senc = slb_pgsize_encoding(psize); - unsigned long lpcr; + unsigned long senc, lpcr; + senc = slb_pgsize_encoding(1ul << pshift); kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | (VRMA_VSID << SLB_VSID_SHIFT_1T); - lpcr = senc << (LPCR_VRMASD_SH - 4); - kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + lpcr = senc << (LPCR_VRMASD_SH - 4); + kvmppc_update_lpcr(kvm, lpcr, + LPCR_VRMASD); + } else { + kvmppc_setup_partition_table(kvm); + } mmu_ready = 1; } ++i; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 79ea3d9..2d46037 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -120,7 +120,6 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); -static void kvmppc_setup_partition_table(struct kvm *kvm); static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, int *ip) @@ -3574,7 +3573,7 @@ static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu) return; } -static void kvmppc_setup_partition_table(struct kvm *kvm) +void kvmppc_setup_partition_table(struct kvm *kvm) { unsigned long dw0, dw1;