diff mbox

KVM: PPC: Book3S HV: Fix migration and HPT resizing of HPT guests on radix hosts

Message ID 20171122034152.GA30117@fergus.ozlabs.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paul Mackerras Nov. 22, 2017, 3:41 a.m. UTC
From a038eb6fa13cc49cc91e6e81409c230ef8086038 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Wed, 22 Nov 2017 14:38:53 +1100
Subject: 

This fixes two errors that prevent a guest using the HPT MMU from
successfully migrating to a POWER9 host in radix MMU mode, or resizing
its HPT when running on a radix host.

The first bug was that commit 8dc6cca556e4 ("KVM: PPC: Book3S HV:
Don't rely on host's page size information", 2017-09-11) missed two
uses of hpte_base_page_size(), one in the HPT rehashing code and
one in kvm_htab_write() (which is used on the destination side in
migrating a HPT guest).  Instead we use kvmppc_hpte_base_page_shift().
Having the shift count means that we can use left and right shifts
instead of multiplication and division in a few places.

Along the way, this adds a check in kvm_htab_write() to ensure that the
page size encoding in the incoming HPTEs is recognized, and if not
return an EINVAL error to userspace.

The second bug was that kvm_htab_write was performing some but not all
of the functions of kvmhv_setup_mmu(), resulting in the destination VM
being left in radix mode as far as the hardware is concerned.  The
simplest fix for now is make kvm_htab_write() call
kvmppc_setup_partition_table() like kvmppc_hv_setup_htab_rma() does.
In future it would be better to refactor the code more extensively
to remove the duplication.

Fixes: 8dc6cca556e4 ("KVM: PPC: Book3S HV: Don't rely on host's page size information")
Fixes: 7a84084c6054 ("KVM: PPC: Book3S HV: Set partition table rather than SDR1 on POWER9")
Reported-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/kvm_ppc.h  |  1 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 37 +++++++++++++++++++++++--------------
 arch/powerpc/kvm/book3s_hv.c        |  3 +--
 3 files changed, 25 insertions(+), 16 deletions(-)

Comments

Suraj Jitindar Singh Nov. 22, 2017, 5:55 a.m. UTC | #1
On Wed, 2017-11-22 at 14:41 +1100, Paul Mackerras wrote:
> From a038eb6fa13cc49cc91e6e81409c230ef8086038 Mon Sep 17 00:00:00
> 2001
> From: Paul Mackerras <paulus@ozlabs.org>
> Date: Wed, 22 Nov 2017 14:38:53 +1100
> Subject: 
> 
> This fixes two errors that prevent a guest using the HPT MMU from
> successfully migrating to a POWER9 host in radix MMU mode, or
> resizing
> its HPT when running on a radix host.
> 
> The first bug was that commit 8dc6cca556e4 ("KVM: PPC: Book3S HV:
> Don't rely on host's page size information", 2017-09-11) missed two
> uses of hpte_base_page_size(), one in the HPT rehashing code and
> one in kvm_htab_write() (which is used on the destination side in
> migrating a HPT guest).  Instead we use
> kvmppc_hpte_base_page_shift().
> Having the shift count means that we can use left and right shifts
> instead of multiplication and division in a few places.
> 
> Along the way, this adds a check in kvm_htab_write() to ensure that
> the
> page size encoding in the incoming HPTEs is recognized, and if not
> return an EINVAL error to userspace.
> 
> The second bug was that kvm_htab_write was performing some but not
> all
> of the functions of kvmhv_setup_mmu(), resulting in the destination
> VM
> being left in radix mode as far as the hardware is concerned.  The
> simplest fix for now is make kvm_htab_write() call
> kvmppc_setup_partition_table() like kvmppc_hv_setup_htab_rma() does.
> In future it would be better to refactor the code more extensively
> to remove the duplication.
> 
> Fixes: 8dc6cca556e4 ("KVM: PPC: Book3S HV: Don't rely on host's page
> size information")
> Fixes: 7a84084c6054 ("KVM: PPC: Book3S HV: Set partition table rather
> than SDR1 on POWER9")
> Reported-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>

Tested-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>

> ---
>  arch/powerpc/include/asm/kvm_ppc.h  |  1 +
>  arch/powerpc/kvm/book3s_64_mmu_hv.c | 37 +++++++++++++++++++++++--
> ------------
>  arch/powerpc/kvm/book3s_hv.c        |  3 +--
>  3 files changed, 25 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
> b/arch/powerpc/include/asm/kvm_ppc.h
> index 96753f3..941c2a3 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -180,6 +180,7 @@ extern void
> kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
>  		struct iommu_group *grp);
>  extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
>  extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
> +extern void kvmppc_setup_partition_table(struct kvm *kvm);
>  
>  extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>  				struct kvm_create_spapr_tce_64
> *args);
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 235319c..9660972 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -1238,8 +1238,9 @@ static unsigned long
> resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
>  	unsigned long vpte, rpte, guest_rpte;
>  	int ret;
>  	struct revmap_entry *rev;
> -	unsigned long apsize, psize, avpn, pteg, hash;
> +	unsigned long apsize, avpn, pteg, hash;
>  	unsigned long new_idx, new_pteg, replace_vpte;
> +	int pshift;
>  
>  	hptep = (__be64 *)(old->virt + (idx << 4));
>  
> @@ -1298,8 +1299,8 @@ static unsigned long
> resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
>  		goto out;
>  
>  	rpte = be64_to_cpu(hptep[1]);
> -	psize = hpte_base_page_size(vpte, rpte);
> -	avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23);
> +	pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
> +	avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >>
> 23);
>  	pteg = idx / HPTES_PER_GROUP;
>  	if (vpte & HPTE_V_SECONDARY)
>  		pteg = ~pteg;
> @@ -1311,20 +1312,20 @@ static unsigned long
> resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
>  		offset = (avpn & 0x1f) << 23;
>  		vsid = avpn >> 5;
>  		/* We can find more bits from the pteg value */
> -		if (psize < (1ULL << 23))
> -			offset |= ((vsid ^ pteg) & old_hash_mask) *
> psize;
> +		if (pshift < 23)
> +			offset |= ((vsid ^ pteg) & old_hash_mask) <<
> pshift;
>  
> -		hash = vsid ^ (offset / psize);
> +		hash = vsid ^ (offset >> pshift);
>  	} else {
>  		unsigned long offset, vsid;
>  
>  		/* We only have 40 - 23 bits of seg_off in avpn */
>  		offset = (avpn & 0x1ffff) << 23;
>  		vsid = avpn >> 17;
> -		if (psize < (1ULL << 23))
> -			offset |= ((vsid ^ (vsid << 25) ^ pteg) &
> old_hash_mask) * psize;
> +		if (pshift < 23)
> +			offset |= ((vsid ^ (vsid << 25) ^ pteg) &
> old_hash_mask) << pshift;
>  
> -		hash = vsid ^ (vsid << 25) ^ (offset / psize);
> +		hash = vsid ^ (vsid << 25) ^ (offset >> pshift);
>  	}
>  
>  	new_pteg = hash & new_hash_mask;
> @@ -1801,6 +1802,7 @@ static ssize_t kvm_htab_write(struct file
> *file, const char __user *buf,
>  	ssize_t nb;
>  	long int err, ret;
>  	int mmu_ready;
> +	int pshift;
>  
>  	if (!access_ok(VERIFY_READ, buf, count))
>  		return -EFAULT;
> @@ -1855,6 +1857,9 @@ static ssize_t kvm_htab_write(struct file
> *file, const char __user *buf,
>  			err = -EINVAL;
>  			if (!(v & HPTE_V_VALID))
>  				goto out;
> +			pshift = kvmppc_hpte_base_page_shift(v, r);
> +			if (pshift <= 0)
> +				goto out;
>  			lbuf += 2;
>  			nb += HPTE_SIZE;
>  
> @@ -1869,14 +1874,18 @@ static ssize_t kvm_htab_write(struct file
> *file, const char __user *buf,
>  				goto out;
>  			}
>  			if (!mmu_ready && is_vrma_hpte(v)) {
> -				unsigned long psize =
> hpte_base_page_size(v, r);
> -				unsigned long senc =
> slb_pgsize_encoding(psize);
> -				unsigned long lpcr;
> +				unsigned long senc, lpcr;
>  
> +				senc = slb_pgsize_encoding(1ul <<
> pshift);
>  				kvm->arch.vrma_slb_v = senc |
> SLB_VSID_B_1T |
>  					(VRMA_VSID <<
> SLB_VSID_SHIFT_1T);
> -				lpcr = senc << (LPCR_VRMASD_SH - 4);
> -				kvmppc_update_lpcr(kvm, lpcr,
> LPCR_VRMASD);
> +				if
> (!cpu_has_feature(CPU_FTR_ARCH_300)) {
> +					lpcr = senc <<
> (LPCR_VRMASD_SH - 4);
> +					kvmppc_update_lpcr(kvm,
> lpcr,
> +							   LPCR_VRMA
> SD);
> +				} else {
> +					kvmppc_setup_partition_table
> (kvm);
> +				}
>  				mmu_ready = 1;
>  			}
>  			++i;
> diff --git a/arch/powerpc/kvm/book3s_hv.c
> b/arch/powerpc/kvm/book3s_hv.c
> index 79ea3d9..2d46037 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -120,7 +120,6 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI
> wakeup to a free host core");
>  
>  static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
>  static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
> -static void kvmppc_setup_partition_table(struct kvm *kvm);
>  
>  static inline struct kvm_vcpu *next_runnable_thread(struct
> kvmppc_vcore *vc,
>  		int *ip)
> @@ -3574,7 +3573,7 @@ static void kvmppc_mmu_destroy_hv(struct
> kvm_vcpu *vcpu)
>  	return;
>  }
>  
> -static void kvmppc_setup_partition_table(struct kvm *kvm)
> +void kvmppc_setup_partition_table(struct kvm *kvm)
>  {
>  	unsigned long dw0, dw1;
>
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 96753f3..941c2a3 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -180,6 +180,7 @@  extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
 		struct iommu_group *grp);
 extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
 extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
+extern void kvmppc_setup_partition_table(struct kvm *kvm);
 
 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 				struct kvm_create_spapr_tce_64 *args);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 235319c..9660972 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1238,8 +1238,9 @@  static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
 	unsigned long vpte, rpte, guest_rpte;
 	int ret;
 	struct revmap_entry *rev;
-	unsigned long apsize, psize, avpn, pteg, hash;
+	unsigned long apsize, avpn, pteg, hash;
 	unsigned long new_idx, new_pteg, replace_vpte;
+	int pshift;
 
 	hptep = (__be64 *)(old->virt + (idx << 4));
 
@@ -1298,8 +1299,8 @@  static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
 		goto out;
 
 	rpte = be64_to_cpu(hptep[1]);
-	psize = hpte_base_page_size(vpte, rpte);
-	avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23);
+	pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
+	avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
 	pteg = idx / HPTES_PER_GROUP;
 	if (vpte & HPTE_V_SECONDARY)
 		pteg = ~pteg;
@@ -1311,20 +1312,20 @@  static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
 		offset = (avpn & 0x1f) << 23;
 		vsid = avpn >> 5;
 		/* We can find more bits from the pteg value */
-		if (psize < (1ULL << 23))
-			offset |= ((vsid ^ pteg) & old_hash_mask) * psize;
+		if (pshift < 23)
+			offset |= ((vsid ^ pteg) & old_hash_mask) << pshift;
 
-		hash = vsid ^ (offset / psize);
+		hash = vsid ^ (offset >> pshift);
 	} else {
 		unsigned long offset, vsid;
 
 		/* We only have 40 - 23 bits of seg_off in avpn */
 		offset = (avpn & 0x1ffff) << 23;
 		vsid = avpn >> 17;
-		if (psize < (1ULL << 23))
-			offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize;
+		if (pshift < 23)
+			offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << pshift;
 
-		hash = vsid ^ (vsid << 25) ^ (offset / psize);
+		hash = vsid ^ (vsid << 25) ^ (offset >> pshift);
 	}
 
 	new_pteg = hash & new_hash_mask;
@@ -1801,6 +1802,7 @@  static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 	ssize_t nb;
 	long int err, ret;
 	int mmu_ready;
+	int pshift;
 
 	if (!access_ok(VERIFY_READ, buf, count))
 		return -EFAULT;
@@ -1855,6 +1857,9 @@  static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 			err = -EINVAL;
 			if (!(v & HPTE_V_VALID))
 				goto out;
+			pshift = kvmppc_hpte_base_page_shift(v, r);
+			if (pshift <= 0)
+				goto out;
 			lbuf += 2;
 			nb += HPTE_SIZE;
 
@@ -1869,14 +1874,18 @@  static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 				goto out;
 			}
 			if (!mmu_ready && is_vrma_hpte(v)) {
-				unsigned long psize = hpte_base_page_size(v, r);
-				unsigned long senc = slb_pgsize_encoding(psize);
-				unsigned long lpcr;
+				unsigned long senc, lpcr;
 
+				senc = slb_pgsize_encoding(1ul << pshift);
 				kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
 					(VRMA_VSID << SLB_VSID_SHIFT_1T);
-				lpcr = senc << (LPCR_VRMASD_SH - 4);
-				kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
+				if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+					lpcr = senc << (LPCR_VRMASD_SH - 4);
+					kvmppc_update_lpcr(kvm, lpcr,
+							   LPCR_VRMASD);
+				} else {
+					kvmppc_setup_partition_table(kvm);
+				}
 				mmu_ready = 1;
 			}
 			++i;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 79ea3d9..2d46037 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -120,7 +120,6 @@  MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
 
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
-static void kvmppc_setup_partition_table(struct kvm *kvm);
 
 static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
 		int *ip)
@@ -3574,7 +3573,7 @@  static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
 	return;
 }
 
-static void kvmppc_setup_partition_table(struct kvm *kvm)
+void kvmppc_setup_partition_table(struct kvm *kvm)
 {
 	unsigned long dw0, dw1;