Message ID | 1511236663.2466.4.camel@kernel.crashing.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, 2017-11-21 at 14:57 +1100, Benjamin Herrenschmidt wrote: > That feature, provided by Power9 DDD2.0 and later, when supported > by newer OPAL versions, allows to sacrifice a queue (priority 7) > in favor of merging all the escalation interrupts of the queues > of a single VP into a single interrupt. > > This reduces the number of host interrupts used up by KVM guests > especially when those guests use multiple priorities. > > It will also enable a future change to control the masking of the > escalation interrupts more precisely to avoid spurrious ones. > > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> > --- > > To test, you need a DD2.x chip and this series applied to > your skiboot firmware: > > https://patchwork.ozlabs.org/project/skiboot/list/?series=14500 Or better, this one: https://patchwork.ozlabs.org/project/skiboot/list/?series=14526 > > arch/powerpc/include/asm/opal-api.h | 1 + > arch/powerpc/include/asm/xive.h | 3 ++- > arch/powerpc/kvm/book3s_xive.c | 48 ++++++++++++++++++++++++------------- > arch/powerpc/kvm/book3s_xive.h | 15 +++++------- > arch/powerpc/sysdev/xive/native.c | 18 ++++++++++++-- > 5 files changed, 57 insertions(+), 28 deletions(-) > > diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h > index 450a60b81d2a..4df668a32ab4 100644 > --- a/arch/powerpc/include/asm/opal-api.h > +++ b/arch/powerpc/include/asm/opal-api.h > @@ -1070,6 +1070,7 @@ enum { > /* Flags for OPAL_XIVE_GET/SET_VP_INFO */ > enum { > OPAL_XIVE_VP_ENABLED = 0x00000001, > + OPAL_XIVE_VP_SINGLE_ESCALATION = 0x00000002, > }; > > /* "Any chip" replacement for chip ID for allocation functions */ > diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h > index 371fbebf1ec9..11d5edeb5c22 100644 > --- a/arch/powerpc/include/asm/xive.h > +++ b/arch/powerpc/include/asm/xive.h > @@ -143,9 +143,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio); > > extern void xive_native_sync_source(u32 hw_irq); > extern bool is_xive_irq(struct irq_chip *chip); > -extern int xive_native_enable_vp(u32 vp_id); > +extern int xive_native_enable_vp(u32 vp_id, bool single_escalation); > extern int xive_native_disable_vp(u32 vp_id); > extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); > +extern bool xive_native_has_single_escalation(void); > > #else > > diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c > index 6cff5bdfd6b7..a102efeabf05 100644 > --- a/arch/powerpc/kvm/book3s_xive.c > +++ b/arch/powerpc/kvm/book3s_xive.c > @@ -112,19 +112,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) > return -EIO; > } > > - /* > - * Future improvement: start with them disabled > - * and handle DD2 and later scheme of merged escalation > - * interrupts > - */ > - name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", > - vcpu->kvm->arch.lpid, xc->server_num, prio); > + if (xc->xive->single_escalation) > + name = kasprintf(GFP_KERNEL, "kvm-%d-%d", > + vcpu->kvm->arch.lpid, xc->server_num); > + else > + name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", > + vcpu->kvm->arch.lpid, xc->server_num, prio); > if (!name) { > pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n", > prio, xc->server_num); > rc = -ENOMEM; > goto error; > } > + > + pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio); > + > rc = request_irq(xc->esc_virq[prio], xive_esc_irq, > IRQF_NO_THREAD, name, vcpu); > if (rc) { > @@ -191,12 +193,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio) > > pr_devel("Provisioning prio... %d\n", prio); > > - /* Provision each VCPU and enable escalations */ > + /* Provision each VCPU and enable escalations if needed */ > kvm_for_each_vcpu(i, vcpu, kvm) { > if (!vcpu->arch.xive_vcpu) > continue; > rc = xive_provision_queue(vcpu, prio); > - if (rc == 0) > + if (rc == 0 && !xive->single_escalation) > xive_attach_escalation(vcpu, prio); > if (rc) > return rc; > @@ -1081,6 +1083,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, > /* Allocate IPI */ > xc->vp_ipi = xive_native_alloc_irq(); > if (!xc->vp_ipi) { > + pr_err("Failed to allocate xive irq for VCPU IPI\n"); > r = -EIO; > goto bail; > } > @@ -1090,19 +1093,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, > if (r) > goto bail; > > + /* > + * Enable the VP first as the single escalation mode will > + * affect escalation interrupts numbering > + */ > + r = xive_native_enable_vp(xc->vp_id, xive->single_escalation); > + if (r) { > + pr_err("Failed to enable VP in OPAL, err %d\n", r); > + goto bail; > + } > + > /* > * Initialize queues. Initially we set them all for no queueing > * and we enable escalation for queue 0 only which we'll use for > * our mfrr change notifications. If the VCPU is hot-plugged, we > - * do handle provisioning however. > + * do handle provisioning however based on the existing "map" > + * of enabled queues. > */ > for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { > struct xive_q *q = &xc->queues[i]; > > + /* Single escalation, no queue 7 */ > + if (i == 7 && xive->single_escalation) > + break; > + > /* Is queue already enabled ? Provision it */ > if (xive->qmap & (1 << i)) { > r = xive_provision_queue(vcpu, i); > - if (r == 0) > + if (r == 0 && !xive->single_escalation) > xive_attach_escalation(vcpu, i); > if (r) > goto bail; > @@ -1122,11 +1140,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, > if (r) > goto bail; > > - /* Enable the VP */ > - r = xive_native_enable_vp(xc->vp_id); > - if (r) > - goto bail; > - > /* Route the IPI */ > r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI); > if (!r) > @@ -1473,6 +1486,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) > > pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n", > val, server, guest_prio); > + > /* > * If the source doesn't already have an IPI, allocate > * one and get the corresponding data > @@ -1761,6 +1775,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) > if (xive->vp_base == XIVE_INVALID_VP) > ret = -ENOMEM; > > + xive->single_escalation = xive_native_has_single_escalation(); > + > if (ret) { > kfree(xive); > return ret; > diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h > index 6ba63f8e8a61..a08ae6fd4c51 100644 > --- a/arch/powerpc/kvm/book3s_xive.h > +++ b/arch/powerpc/kvm/book3s_xive.h > @@ -120,6 +120,8 @@ struct kvmppc_xive { > u32 q_order; > u32 q_page_order; > > + /* Flags */ > + u8 single_escalation; > }; > > #define KVMPPC_XIVE_Q_COUNT 8 > @@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp > * is as follow. > * > * Guest request for 0...6 are honored. Guest request for anything > - * higher results in a priority of 7 being applied. > - * > - * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb > - * in order to match AIX expectations > + * higher results in a priority of 6 being applied. > * > * Similar mapping is done for CPPR values > */ > static inline u8 xive_prio_from_guest(u8 prio) > { > - if (prio == 0xff || prio < 8) > + if (prio == 0xff || prio < 6) > return prio; > - return 7; > + return 6; > } > > static inline u8 xive_prio_to_guest(u8 prio) > { > - if (prio == 0xff || prio < 7) > - return prio; > - return 0xb; > + return prio; > } > > static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle) > diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c > index ebc244b08d67..d22aeb0b69e1 100644 > --- a/arch/powerpc/sysdev/xive/native.c > +++ b/arch/powerpc/sysdev/xive/native.c > @@ -42,6 +42,7 @@ static u32 xive_provision_chip_count; > static u32 xive_queue_shift; > static u32 xive_pool_vps = XIVE_INVALID_VP; > static struct kmem_cache *xive_provision_cache; > +static bool xive_has_single_esc; > > int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) > { > @@ -571,6 +572,10 @@ bool __init xive_native_init(void) > break; > } > > + /* Do we support single escalation */ > + if (of_get_property(np, "single-escalation-support", NULL) != NULL) > + xive_has_single_esc = true; > + > /* Configure Thread Management areas for KVM */ > for_each_possible_cpu(cpu) > kvmppc_set_xive_tima(cpu, r.start, tima); > @@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base) > } > EXPORT_SYMBOL_GPL(xive_native_free_vp_block); > > -int xive_native_enable_vp(u32 vp_id) > +int xive_native_enable_vp(u32 vp_id, bool single_escalation) > { > s64 rc; > + u64 flags = OPAL_XIVE_VP_ENABLED; > > + if (single_escalation) > + flags |= OPAL_XIVE_VP_SINGLE_ESCALATION; > for (;;) { > - rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0); > + rc = opal_xive_set_vp_info(vp_id, flags, 0); > if (rc != OPAL_BUSY) > break; > msleep(1); > @@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id) > return 0; > } > EXPORT_SYMBOL_GPL(xive_native_get_vp_info); > + > +bool xive_native_has_single_escalation(void) > +{ > + return xive_has_single_esc; > +} > +EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 450a60b81d2a..4df668a32ab4 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1070,6 +1070,7 @@ enum { /* Flags for OPAL_XIVE_GET/SET_VP_INFO */ enum { OPAL_XIVE_VP_ENABLED = 0x00000001, + OPAL_XIVE_VP_SINGLE_ESCALATION = 0x00000002, }; /* "Any chip" replacement for chip ID for allocation functions */ diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 371fbebf1ec9..11d5edeb5c22 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -143,9 +143,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio); extern void xive_native_sync_source(u32 hw_irq); extern bool is_xive_irq(struct irq_chip *chip); -extern int xive_native_enable_vp(u32 vp_id); +extern int xive_native_enable_vp(u32 vp_id, bool single_escalation); extern int xive_native_disable_vp(u32 vp_id); extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); +extern bool xive_native_has_single_escalation(void); #else diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 6cff5bdfd6b7..a102efeabf05 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -112,19 +112,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) return -EIO; } - /* - * Future improvement: start with them disabled - * and handle DD2 and later scheme of merged escalation - * interrupts - */ - name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", - vcpu->kvm->arch.lpid, xc->server_num, prio); + if (xc->xive->single_escalation) + name = kasprintf(GFP_KERNEL, "kvm-%d-%d", + vcpu->kvm->arch.lpid, xc->server_num); + else + name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", + vcpu->kvm->arch.lpid, xc->server_num, prio); if (!name) { pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n", prio, xc->server_num); rc = -ENOMEM; goto error; } + + pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio); + rc = request_irq(xc->esc_virq[prio], xive_esc_irq, IRQF_NO_THREAD, name, vcpu); if (rc) { @@ -191,12 +193,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio) pr_devel("Provisioning prio... %d\n", prio); - /* Provision each VCPU and enable escalations */ + /* Provision each VCPU and enable escalations if needed */ kvm_for_each_vcpu(i, vcpu, kvm) { if (!vcpu->arch.xive_vcpu) continue; rc = xive_provision_queue(vcpu, prio); - if (rc == 0) + if (rc == 0 && !xive->single_escalation) xive_attach_escalation(vcpu, prio); if (rc) return rc; @@ -1081,6 +1083,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, /* Allocate IPI */ xc->vp_ipi = xive_native_alloc_irq(); if (!xc->vp_ipi) { + pr_err("Failed to allocate xive irq for VCPU IPI\n"); r = -EIO; goto bail; } @@ -1090,19 +1093,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, if (r) goto bail; + /* + * Enable the VP first as the single escalation mode will + * affect escalation interrupts numbering + */ + r = xive_native_enable_vp(xc->vp_id, xive->single_escalation); + if (r) { + pr_err("Failed to enable VP in OPAL, err %d\n", r); + goto bail; + } + /* * Initialize queues. Initially we set them all for no queueing * and we enable escalation for queue 0 only which we'll use for * our mfrr change notifications. If the VCPU is hot-plugged, we - * do handle provisioning however. + * do handle provisioning however based on the existing "map" + * of enabled queues. */ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { struct xive_q *q = &xc->queues[i]; + /* Single escalation, no queue 7 */ + if (i == 7 && xive->single_escalation) + break; + /* Is queue already enabled ? Provision it */ if (xive->qmap & (1 << i)) { r = xive_provision_queue(vcpu, i); - if (r == 0) + if (r == 0 && !xive->single_escalation) xive_attach_escalation(vcpu, i); if (r) goto bail; @@ -1122,11 +1140,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, if (r) goto bail; - /* Enable the VP */ - r = xive_native_enable_vp(xc->vp_id); - if (r) - goto bail; - /* Route the IPI */ r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI); if (!r) @@ -1473,6 +1486,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n", val, server, guest_prio); + /* * If the source doesn't already have an IPI, allocate * one and get the corresponding data @@ -1761,6 +1775,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) if (xive->vp_base == XIVE_INVALID_VP) ret = -ENOMEM; + xive->single_escalation = xive_native_has_single_escalation(); + if (ret) { kfree(xive); return ret; diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 6ba63f8e8a61..a08ae6fd4c51 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -120,6 +120,8 @@ struct kvmppc_xive { u32 q_order; u32 q_page_order; + /* Flags */ + u8 single_escalation; }; #define KVMPPC_XIVE_Q_COUNT 8 @@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp * is as follow. * * Guest request for 0...6 are honored. Guest request for anything - * higher results in a priority of 7 being applied. - * - * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb - * in order to match AIX expectations + * higher results in a priority of 6 being applied. * * Similar mapping is done for CPPR values */ static inline u8 xive_prio_from_guest(u8 prio) { - if (prio == 0xff || prio < 8) + if (prio == 0xff || prio < 6) return prio; - return 7; + return 6; } static inline u8 xive_prio_to_guest(u8 prio) { - if (prio == 0xff || prio < 7) - return prio; - return 0xb; + return prio; } static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle) diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index ebc244b08d67..d22aeb0b69e1 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -42,6 +42,7 @@ static u32 xive_provision_chip_count; static u32 xive_queue_shift; static u32 xive_pool_vps = XIVE_INVALID_VP; static struct kmem_cache *xive_provision_cache; +static bool xive_has_single_esc; int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) { @@ -571,6 +572,10 @@ bool __init xive_native_init(void) break; } + /* Do we support single escalation */ + if (of_get_property(np, "single-escalation-support", NULL) != NULL) + xive_has_single_esc = true; + /* Configure Thread Management areas for KVM */ for_each_possible_cpu(cpu) kvmppc_set_xive_tima(cpu, r.start, tima); @@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base) } EXPORT_SYMBOL_GPL(xive_native_free_vp_block); -int xive_native_enable_vp(u32 vp_id) +int xive_native_enable_vp(u32 vp_id, bool single_escalation) { s64 rc; + u64 flags = OPAL_XIVE_VP_ENABLED; + if (single_escalation) + flags |= OPAL_XIVE_VP_SINGLE_ESCALATION; for (;;) { - rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0); + rc = opal_xive_set_vp_info(vp_id, flags, 0); if (rc != OPAL_BUSY) break; msleep(1); @@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id) return 0; } EXPORT_SYMBOL_GPL(xive_native_get_vp_info); + +bool xive_native_has_single_escalation(void) +{ + return xive_has_single_esc; +} +EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
That feature, provided by Power9 DDD2.0 and later, when supported by newer OPAL versions, allows to sacrifice a queue (priority 7) in favor of merging all the escalation interrupts of the queues of a single VP into a single interrupt. This reduces the number of host interrupts used up by KVM guests especially when those guests use multiple priorities. It will also enable a future change to control the masking of the escalation interrupts more precisely to avoid spurrious ones. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> --- To test, you need a DD2.x chip and this series applied to your skiboot firmware: https://patchwork.ozlabs.org/project/skiboot/list/?series=14500 arch/powerpc/include/asm/opal-api.h | 1 + arch/powerpc/include/asm/xive.h | 3 ++- arch/powerpc/kvm/book3s_xive.c | 48 ++++++++++++++++++++++++------------- arch/powerpc/kvm/book3s_xive.h | 15 +++++------- arch/powerpc/sysdev/xive/native.c | 18 ++++++++++++-- 5 files changed, 57 insertions(+), 28 deletions(-)