diff mbox

powerpc/kvm/xive: Enable use of the new "single escalation" feature

Message ID 1511236663.2466.4.camel@kernel.crashing.org (mailing list archive)
State New, archived
Headers show

Commit Message

Benjamin Herrenschmidt Nov. 21, 2017, 3:57 a.m. UTC
That feature, provided by Power9 DDD2.0 and later, when supported
by newer OPAL versions, allows to sacrifice a queue (priority 7)
in favor of merging all the escalation interrupts of the queues
of a single VP into a single interrupt.

This reduces the number of host interrupts used up by KVM guests
especially when those guests use multiple priorities.

It will also enable a future change to control the masking of the
escalation interrupts more precisely to avoid spurrious ones.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

To test, you need a DD2.x chip and this series applied to
your skiboot firmware:

https://patchwork.ozlabs.org/project/skiboot/list/?series=14500

 arch/powerpc/include/asm/opal-api.h |  1 +
 arch/powerpc/include/asm/xive.h     |  3 ++-
 arch/powerpc/kvm/book3s_xive.c      | 48 ++++++++++++++++++++++++-------------
 arch/powerpc/kvm/book3s_xive.h      | 15 +++++-------
 arch/powerpc/sysdev/xive/native.c   | 18 ++++++++++++--
 5 files changed, 57 insertions(+), 28 deletions(-)

Comments

Benjamin Herrenschmidt Nov. 21, 2017, 9:37 a.m. UTC | #1
On Tue, 2017-11-21 at 14:57 +1100, Benjamin Herrenschmidt wrote:
> That feature, provided by Power9 DDD2.0 and later, when supported
> by newer OPAL versions, allows to sacrifice a queue (priority 7)
> in favor of merging all the escalation interrupts of the queues
> of a single VP into a single interrupt.
> 
> This reduces the number of host interrupts used up by KVM guests
> especially when those guests use multiple priorities.
> 
> It will also enable a future change to control the masking of the
> escalation interrupts more precisely to avoid spurrious ones.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
> 
> To test, you need a DD2.x chip and this series applied to
> your skiboot firmware:
> 
> https://patchwork.ozlabs.org/project/skiboot/list/?series=14500

Or better, this one:

https://patchwork.ozlabs.org/project/skiboot/list/?series=14526

> 
>  arch/powerpc/include/asm/opal-api.h |  1 +
>  arch/powerpc/include/asm/xive.h     |  3 ++-
>  arch/powerpc/kvm/book3s_xive.c      | 48 ++++++++++++++++++++++++-------------
>  arch/powerpc/kvm/book3s_xive.h      | 15 +++++-------
>  arch/powerpc/sysdev/xive/native.c   | 18 ++++++++++++--
>  5 files changed, 57 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
> index 450a60b81d2a..4df668a32ab4 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -1070,6 +1070,7 @@ enum {
>  /* Flags for OPAL_XIVE_GET/SET_VP_INFO */
>  enum {
>  	OPAL_XIVE_VP_ENABLED		= 0x00000001,
> +	OPAL_XIVE_VP_SINGLE_ESCALATION	= 0x00000002,
>  };
>  
>  /* "Any chip" replacement for chip ID for allocation functions */
> diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
> index 371fbebf1ec9..11d5edeb5c22 100644
> --- a/arch/powerpc/include/asm/xive.h
> +++ b/arch/powerpc/include/asm/xive.h
> @@ -143,9 +143,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
>  
>  extern void xive_native_sync_source(u32 hw_irq);
>  extern bool is_xive_irq(struct irq_chip *chip);
> -extern int xive_native_enable_vp(u32 vp_id);
> +extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
>  extern int xive_native_disable_vp(u32 vp_id);
>  extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
> +extern bool xive_native_has_single_escalation(void);
>  
>  #else
>  
> diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
> index 6cff5bdfd6b7..a102efeabf05 100644
> --- a/arch/powerpc/kvm/book3s_xive.c
> +++ b/arch/powerpc/kvm/book3s_xive.c
> @@ -112,19 +112,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
>  		return -EIO;
>  	}
>  
> -	/*
> -	 * Future improvement: start with them disabled
> -	 * and handle DD2 and later scheme of merged escalation
> -	 * interrupts
> -	 */
> -	name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
> -			 vcpu->kvm->arch.lpid, xc->server_num, prio);
> +	if (xc->xive->single_escalation)
> +		name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
> +				 vcpu->kvm->arch.lpid, xc->server_num);
> +	else
> +		name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
> +				 vcpu->kvm->arch.lpid, xc->server_num, prio);
>  	if (!name) {
>  		pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
>  		       prio, xc->server_num);
>  		rc = -ENOMEM;
>  		goto error;
>  	}
> +
> +	pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
> +
>  	rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
>  			 IRQF_NO_THREAD, name, vcpu);
>  	if (rc) {
> @@ -191,12 +193,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
>  
>  	pr_devel("Provisioning prio... %d\n", prio);
>  
> -	/* Provision each VCPU and enable escalations */
> +	/* Provision each VCPU and enable escalations if needed */
>  	kvm_for_each_vcpu(i, vcpu, kvm) {
>  		if (!vcpu->arch.xive_vcpu)
>  			continue;
>  		rc = xive_provision_queue(vcpu, prio);
> -		if (rc == 0)
> +		if (rc == 0 && !xive->single_escalation)
>  			xive_attach_escalation(vcpu, prio);
>  		if (rc)
>  			return rc;
> @@ -1081,6 +1083,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
>  	/* Allocate IPI */
>  	xc->vp_ipi = xive_native_alloc_irq();
>  	if (!xc->vp_ipi) {
> +		pr_err("Failed to allocate xive irq for VCPU IPI\n");
>  		r = -EIO;
>  		goto bail;
>  	}
> @@ -1090,19 +1093,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
>  	if (r)
>  		goto bail;
>  
> +	/*
> +	 * Enable the VP first as the single escalation mode will
> +	 * affect escalation interrupts numbering
> +	 */
> +	r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
> +	if (r) {
> +		pr_err("Failed to enable VP in OPAL, err %d\n", r);
> +		goto bail;
> +	}
> +
>  	/*
>  	 * Initialize queues. Initially we set them all for no queueing
>  	 * and we enable escalation for queue 0 only which we'll use for
>  	 * our mfrr change notifications. If the VCPU is hot-plugged, we
> -	 * do handle provisioning however.
> +	 * do handle provisioning however based on the existing "map"
> +	 * of enabled queues.
>  	 */
>  	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
>  		struct xive_q *q = &xc->queues[i];
>  
> +		/* Single escalation, no queue 7 */
> +		if (i == 7 && xive->single_escalation)
> +			break;
> +
>  		/* Is queue already enabled ? Provision it */
>  		if (xive->qmap & (1 << i)) {
>  			r = xive_provision_queue(vcpu, i);
> -			if (r == 0)
> +			if (r == 0 && !xive->single_escalation)
>  				xive_attach_escalation(vcpu, i);
>  			if (r)
>  				goto bail;
> @@ -1122,11 +1140,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
>  	if (r)
>  		goto bail;
>  
> -	/* Enable the VP */
> -	r = xive_native_enable_vp(xc->vp_id);
> -	if (r)
> -		goto bail;
> -
>  	/* Route the IPI */
>  	r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
>  	if (!r)
> @@ -1473,6 +1486,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
>  
>  	pr_devel("  val=0x016%llx (server=0x%x, guest_prio=%d)\n",
>  		 val, server, guest_prio);
> +
>  	/*
>  	 * If the source doesn't already have an IPI, allocate
>  	 * one and get the corresponding data
> @@ -1761,6 +1775,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
>  	if (xive->vp_base == XIVE_INVALID_VP)
>  		ret = -ENOMEM;
>  
> +	xive->single_escalation = xive_native_has_single_escalation();
> +
>  	if (ret) {
>  		kfree(xive);
>  		return ret;
> diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
> index 6ba63f8e8a61..a08ae6fd4c51 100644
> --- a/arch/powerpc/kvm/book3s_xive.h
> +++ b/arch/powerpc/kvm/book3s_xive.h
> @@ -120,6 +120,8 @@ struct kvmppc_xive {
>  	u32	q_order;
>  	u32	q_page_order;
>  
> +	/* Flags */
> +	u8	single_escalation;
>  };
>  
>  #define KVMPPC_XIVE_Q_COUNT	8
> @@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
>   * is as follow.
>   *
>   * Guest request for 0...6 are honored. Guest request for anything
> - * higher results in a priority of 7 being applied.
> - *
> - * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
> - * in order to match AIX expectations
> + * higher results in a priority of 6 being applied.
>   *
>   * Similar mapping is done for CPPR values
>   */
>  static inline u8 xive_prio_from_guest(u8 prio)
>  {
> -	if (prio == 0xff || prio < 8)
> +	if (prio == 0xff || prio < 6)
>  		return prio;
> -	return 7;
> +	return 6;
>  }
>  
>  static inline u8 xive_prio_to_guest(u8 prio)
>  {
> -	if (prio == 0xff || prio < 7)
> -		return prio;
> -	return 0xb;
> +	return prio;
>  }
>  
>  static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
> diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
> index ebc244b08d67..d22aeb0b69e1 100644
> --- a/arch/powerpc/sysdev/xive/native.c
> +++ b/arch/powerpc/sysdev/xive/native.c
> @@ -42,6 +42,7 @@ static u32 xive_provision_chip_count;
>  static u32 xive_queue_shift;
>  static u32 xive_pool_vps = XIVE_INVALID_VP;
>  static struct kmem_cache *xive_provision_cache;
> +static bool xive_has_single_esc;
>  
>  int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
>  {
> @@ -571,6 +572,10 @@ bool __init xive_native_init(void)
>  			break;
>  	}
>  
> +	/* Do we support single escalation */
> +	if (of_get_property(np, "single-escalation-support", NULL) != NULL)
> +		xive_has_single_esc = true;
> +
>  	/* Configure Thread Management areas for KVM */
>  	for_each_possible_cpu(cpu)
>  		kvmppc_set_xive_tima(cpu, r.start, tima);
> @@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base)
>  }
>  EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
>  
> -int xive_native_enable_vp(u32 vp_id)
> +int xive_native_enable_vp(u32 vp_id, bool single_escalation)
>  {
>  	s64 rc;
> +	u64 flags = OPAL_XIVE_VP_ENABLED;
>  
> +	if (single_escalation)
> +		flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
>  	for (;;) {
> -		rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
> +		rc = opal_xive_set_vp_info(vp_id, flags, 0);
>  		if (rc != OPAL_BUSY)
>  			break;
>  		msleep(1);
> @@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
>  	return 0;
>  }
>  EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
> +
> +bool xive_native_has_single_escalation(void)
> +{
> +	return xive_has_single_esc;
> +}
> +EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 450a60b81d2a..4df668a32ab4 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1070,6 +1070,7 @@  enum {
 /* Flags for OPAL_XIVE_GET/SET_VP_INFO */
 enum {
 	OPAL_XIVE_VP_ENABLED		= 0x00000001,
+	OPAL_XIVE_VP_SINGLE_ESCALATION	= 0x00000002,
 };
 
 /* "Any chip" replacement for chip ID for allocation functions */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 371fbebf1ec9..11d5edeb5c22 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -143,9 +143,10 @@  extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
 
 extern void xive_native_sync_source(u32 hw_irq);
 extern bool is_xive_irq(struct irq_chip *chip);
-extern int xive_native_enable_vp(u32 vp_id);
+extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
 extern int xive_native_disable_vp(u32 vp_id);
 extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
+extern bool xive_native_has_single_escalation(void);
 
 #else
 
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 6cff5bdfd6b7..a102efeabf05 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -112,19 +112,21 @@  static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
 		return -EIO;
 	}
 
-	/*
-	 * Future improvement: start with them disabled
-	 * and handle DD2 and later scheme of merged escalation
-	 * interrupts
-	 */
-	name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
-			 vcpu->kvm->arch.lpid, xc->server_num, prio);
+	if (xc->xive->single_escalation)
+		name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
+				 vcpu->kvm->arch.lpid, xc->server_num);
+	else
+		name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+				 vcpu->kvm->arch.lpid, xc->server_num, prio);
 	if (!name) {
 		pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
 		       prio, xc->server_num);
 		rc = -ENOMEM;
 		goto error;
 	}
+
+	pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
+
 	rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
 			 IRQF_NO_THREAD, name, vcpu);
 	if (rc) {
@@ -191,12 +193,12 @@  static int xive_check_provisioning(struct kvm *kvm, u8 prio)
 
 	pr_devel("Provisioning prio... %d\n", prio);
 
-	/* Provision each VCPU and enable escalations */
+	/* Provision each VCPU and enable escalations if needed */
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (!vcpu->arch.xive_vcpu)
 			continue;
 		rc = xive_provision_queue(vcpu, prio);
-		if (rc == 0)
+		if (rc == 0 && !xive->single_escalation)
 			xive_attach_escalation(vcpu, prio);
 		if (rc)
 			return rc;
@@ -1081,6 +1083,7 @@  int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	/* Allocate IPI */
 	xc->vp_ipi = xive_native_alloc_irq();
 	if (!xc->vp_ipi) {
+		pr_err("Failed to allocate xive irq for VCPU IPI\n");
 		r = -EIO;
 		goto bail;
 	}
@@ -1090,19 +1093,34 @@  int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	if (r)
 		goto bail;
 
+	/*
+	 * Enable the VP first as the single escalation mode will
+	 * affect escalation interrupts numbering
+	 */
+	r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+	if (r) {
+		pr_err("Failed to enable VP in OPAL, err %d\n", r);
+		goto bail;
+	}
+
 	/*
 	 * Initialize queues. Initially we set them all for no queueing
 	 * and we enable escalation for queue 0 only which we'll use for
 	 * our mfrr change notifications. If the VCPU is hot-plugged, we
-	 * do handle provisioning however.
+	 * do handle provisioning however based on the existing "map"
+	 * of enabled queues.
 	 */
 	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
 		struct xive_q *q = &xc->queues[i];
 
+		/* Single escalation, no queue 7 */
+		if (i == 7 && xive->single_escalation)
+			break;
+
 		/* Is queue already enabled ? Provision it */
 		if (xive->qmap & (1 << i)) {
 			r = xive_provision_queue(vcpu, i);
-			if (r == 0)
+			if (r == 0 && !xive->single_escalation)
 				xive_attach_escalation(vcpu, i);
 			if (r)
 				goto bail;
@@ -1122,11 +1140,6 @@  int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	if (r)
 		goto bail;
 
-	/* Enable the VP */
-	r = xive_native_enable_vp(xc->vp_id);
-	if (r)
-		goto bail;
-
 	/* Route the IPI */
 	r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
 	if (!r)
@@ -1473,6 +1486,7 @@  static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
 
 	pr_devel("  val=0x016%llx (server=0x%x, guest_prio=%d)\n",
 		 val, server, guest_prio);
+
 	/*
 	 * If the source doesn't already have an IPI, allocate
 	 * one and get the corresponding data
@@ -1761,6 +1775,8 @@  static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 	if (xive->vp_base == XIVE_INVALID_VP)
 		ret = -ENOMEM;
 
+	xive->single_escalation = xive_native_has_single_escalation();
+
 	if (ret) {
 		kfree(xive);
 		return ret;
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 6ba63f8e8a61..a08ae6fd4c51 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -120,6 +120,8 @@  struct kvmppc_xive {
 	u32	q_order;
 	u32	q_page_order;
 
+	/* Flags */
+	u8	single_escalation;
 };
 
 #define KVMPPC_XIVE_Q_COUNT	8
@@ -201,25 +203,20 @@  static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
  * is as follow.
  *
  * Guest request for 0...6 are honored. Guest request for anything
- * higher results in a priority of 7 being applied.
- *
- * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
- * in order to match AIX expectations
+ * higher results in a priority of 6 being applied.
  *
  * Similar mapping is done for CPPR values
  */
 static inline u8 xive_prio_from_guest(u8 prio)
 {
-	if (prio == 0xff || prio < 8)
+	if (prio == 0xff || prio < 6)
 		return prio;
-	return 7;
+	return 6;
 }
 
 static inline u8 xive_prio_to_guest(u8 prio)
 {
-	if (prio == 0xff || prio < 7)
-		return prio;
-	return 0xb;
+	return prio;
 }
 
 static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index ebc244b08d67..d22aeb0b69e1 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -42,6 +42,7 @@  static u32 xive_provision_chip_count;
 static u32 xive_queue_shift;
 static u32 xive_pool_vps = XIVE_INVALID_VP;
 static struct kmem_cache *xive_provision_cache;
+static bool xive_has_single_esc;
 
 int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
 {
@@ -571,6 +572,10 @@  bool __init xive_native_init(void)
 			break;
 	}
 
+	/* Do we support single escalation */
+	if (of_get_property(np, "single-escalation-support", NULL) != NULL)
+		xive_has_single_esc = true;
+
 	/* Configure Thread Management areas for KVM */
 	for_each_possible_cpu(cpu)
 		kvmppc_set_xive_tima(cpu, r.start, tima);
@@ -667,12 +672,15 @@  void xive_native_free_vp_block(u32 vp_base)
 }
 EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
 
-int xive_native_enable_vp(u32 vp_id)
+int xive_native_enable_vp(u32 vp_id, bool single_escalation)
 {
 	s64 rc;
+	u64 flags = OPAL_XIVE_VP_ENABLED;
 
+	if (single_escalation)
+		flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
 	for (;;) {
-		rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
+		rc = opal_xive_set_vp_info(vp_id, flags, 0);
 		if (rc != OPAL_BUSY)
 			break;
 		msleep(1);
@@ -710,3 +718,9 @@  int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
+
+bool xive_native_has_single_escalation(void)
+{
+	return xive_has_single_esc;
+}
+EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);