diff mbox

powerpc/kvm/xive: Resend re-routed interrupts on CPU priority change

Message ID de6c435b8ed7ca3120630f2fd2e6c43bb035aa5e.camel@kernel.crashing.org (mailing list archive)
State New, archived
Headers show

Commit Message

Benjamin Herrenschmidt May 10, 2018, 3:06 a.m. UTC
When a vcpu priority (CPPR) is set to a lower value (masking more
interrupts), we stop processing interrupts already in the queue
for the priorities that have now been masked.

If those interrupts were previously re-routed to a different
CPU, they might still be stuck until the older one that has
them in its queue processes them. In the case of guest CPU
unplug, that can be never.

To address that without creating additional overhead for
the normal interrupt processing path, this changes H_CPPR
handling so that when such a priority change occurs, we
scan the interrupt queue for that vCPU, and for any
interrupt in there that has been re-routed, we replace it
with a dummy and force a re-trigger.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kvm/book3s_xive_template.c | 108 ++++++++++++++++++++++--
 1 file changed, 101 insertions(+), 7 deletions(-)

Comments

Alexey Kardashevskiy May 10, 2018, 3:26 a.m. UTC | #1
On 10/5/18 1:06 pm, Benjamin Herrenschmidt wrote:
> When a vcpu priority (CPPR) is set to a lower value (masking more
> interrupts), we stop processing interrupts already in the queue
> for the priorities that have now been masked.
> 
> If those interrupts were previously re-routed to a different
> CPU, they might still be stuck until the older one that has
> them in its queue processes them. In the case of guest CPU
> unplug, that can be never.
> 
> To address that without creating additional overhead for
> the normal interrupt processing path, this changes H_CPPR
> handling so that when such a priority change occurs, we
> scan the interrupt queue for that vCPU, and for any
> interrupt in there that has been re-routed, we replace it
> with a dummy and force a re-trigger.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>


Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>



> ---
>  arch/powerpc/kvm/book3s_xive_template.c | 108 ++++++++++++++++++++++--
>  1 file changed, 101 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
> index c7a5deadd1cc..99c3620b40d9 100644
> --- a/arch/powerpc/kvm/book3s_xive_template.c
> +++ b/arch/powerpc/kvm/book3s_xive_template.c
> @@ -11,6 +11,9 @@
>  #define XGLUE(a,b) a##b
>  #define GLUE(a,b) XGLUE(a,b)
>  
> +/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
> +#define XICS_DUMMY	1
> +
>  static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
>  {
>  	u8 cppr;
> @@ -205,6 +208,10 @@ static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
>  				goto skip_ipi;
>  		}
>  
> +		/* If it's the dummy interrupt, continue searching */
> +		if (hirq == XICS_DUMMY)
> +			goto skip_ipi;
> +
>  		/* If fetching, update queue pointers */
>  		if (scan_type == scan_fetch) {
>  			q->idx = idx;
> @@ -385,9 +392,76 @@ static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
>  	__x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
>  }
>  
> +static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
> +					       struct kvmppc_xive_vcpu *xc)
> +{
> +	unsigned int prio;
> +
> +	/* For each priority that is now masked */
> +	for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
> +		struct xive_q *q = &xc->queues[prio];
> +		struct kvmppc_xive_irq_state *state;
> +		struct kvmppc_xive_src_block *sb;
> +		u32 idx, toggle, entry, irq, hw_num;
> +		struct xive_irq_data *xd;
> +		__be32 *qpage;
> +		u16 src;
> +
> +		idx = q->idx;
> +		toggle = q->toggle;
> +		qpage = READ_ONCE(q->qpage);
> +		if (!qpage)
> +			continue;
> +
> +		/* For each interrupt in the queue */
> +		for (;;) {
> +			entry = be32_to_cpup(qpage + idx);
> +
> +			/* No more ? */
> +			if ((entry >> 31) == toggle)
> +				break;
> +			irq = entry & 0x7fffffff;
> +
> +			/* Skip dummies and IPIs */
> +			if (irq == XICS_DUMMY || irq == XICS_IPI)
> +				goto next;
> +			sb = kvmppc_xive_find_source(xive, irq, &src);
> +			if (!sb)
> +				goto next;
> +			state = &sb->irq_state[src];
> +
> +			/* Has it been rerouted ? */
> +			if (xc->server_num == state->act_server)
> +				goto next;
> +
> +			/*
> +			 * Allright, it *has* been re-routed, kill it from
> +			 * the queue.
> +			 */
> +			qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
> +
> +			/* Find the HW interrupt */
> +			kvmppc_xive_select_irq(state, &hw_num, &xd);
> +
> +			/* If it's not an LSI, set PQ to 11 the EOI will force a resend */
> +			if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
> +				GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
> +
> +			/* EOI the source */
> +			GLUE(X_PFX,source_eoi)(hw_num, xd);
> +
> +		next:
> +			idx = (idx + 1) & q->msk;
> +			if (idx == 0)
> +				toggle ^= 1;
> +		}
> +	}
> +}
> +
>  X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
>  {
>  	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
>  	u8 old_cppr;
>  
>  	pr_devel("H_CPPR(cppr=%ld)\n", cppr);
> @@ -407,14 +481,34 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
>  	 */
>  	smp_mb();
>  
> -	/*
> -	 * We are masking less, we need to look for pending things
> -	 * to deliver and set VP pending bits accordingly to trigger
> -	 * a new interrupt otherwise we might miss MFRR changes for
> -	 * which we have optimized out sending an IPI signal.
> -	 */
> -	if (cppr > old_cppr)
> +	if (cppr > old_cppr) {
> +		/*
> +		 * We are masking less, we need to look for pending things
> +		 * to deliver and set VP pending bits accordingly to trigger
> +		 * a new interrupt otherwise we might miss MFRR changes for
> +		 * which we have optimized out sending an IPI signal.
> +		 */
>  		GLUE(X_PFX,push_pending_to_hw)(xc);
> +	} else {
> +		/*
> +		 * We are masking more, we need to check the queue for any
> +		 * interrupt that has been routed to another CPU, take
> +		 * it out (replace it with the dummy) and retrigger it.
> +		 *
> +		 * This is necessary since those interrupts may otherwise
> +		 * never be processed, at least not until this CPU restores
> +		 * its CPPR.
> +		 *
> +		 * This is in theory racy vs. HW adding new interrupts to
> +		 * the queue. In practice this works because the interesting
> +		 * cases are when the guest has done a set_xive() to move the
> +		 * interrupt away, which flushes the xive, followed by the
> +		 * target CPU doing a H_CPPR. So any new interrupt coming into
> +		 * the queue must still be routed to us and isn't a source
> +		 * of concern.
> +		 */
> +		GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
> +	}
>  
>  	/* Apply new CPPR */
>  	xc->hw_cppr = cppr;
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
diff mbox

Patch

diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index c7a5deadd1cc..99c3620b40d9 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -11,6 +11,9 @@ 
 #define XGLUE(a,b) a##b
 #define GLUE(a,b) XGLUE(a,b)
 
+/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
+#define XICS_DUMMY	1
+
 static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
 {
 	u8 cppr;
@@ -205,6 +208,10 @@  static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
 				goto skip_ipi;
 		}
 
+		/* If it's the dummy interrupt, continue searching */
+		if (hirq == XICS_DUMMY)
+			goto skip_ipi;
+
 		/* If fetching, update queue pointers */
 		if (scan_type == scan_fetch) {
 			q->idx = idx;
@@ -385,9 +392,76 @@  static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
 	__x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
 }
 
+static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
+					       struct kvmppc_xive_vcpu *xc)
+{
+	unsigned int prio;
+
+	/* For each priority that is now masked */
+	for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+		struct xive_q *q = &xc->queues[prio];
+		struct kvmppc_xive_irq_state *state;
+		struct kvmppc_xive_src_block *sb;
+		u32 idx, toggle, entry, irq, hw_num;
+		struct xive_irq_data *xd;
+		__be32 *qpage;
+		u16 src;
+
+		idx = q->idx;
+		toggle = q->toggle;
+		qpage = READ_ONCE(q->qpage);
+		if (!qpage)
+			continue;
+
+		/* For each interrupt in the queue */
+		for (;;) {
+			entry = be32_to_cpup(qpage + idx);
+
+			/* No more ? */
+			if ((entry >> 31) == toggle)
+				break;
+			irq = entry & 0x7fffffff;
+
+			/* Skip dummies and IPIs */
+			if (irq == XICS_DUMMY || irq == XICS_IPI)
+				goto next;
+			sb = kvmppc_xive_find_source(xive, irq, &src);
+			if (!sb)
+				goto next;
+			state = &sb->irq_state[src];
+
+			/* Has it been rerouted ? */
+			if (xc->server_num == state->act_server)
+				goto next;
+
+			/*
+			 * Allright, it *has* been re-routed, kill it from
+			 * the queue.
+			 */
+			qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
+
+			/* Find the HW interrupt */
+			kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+			/* If it's not an LSI, set PQ to 11 the EOI will force a resend */
+			if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
+				GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
+
+			/* EOI the source */
+			GLUE(X_PFX,source_eoi)(hw_num, xd);
+
+		next:
+			idx = (idx + 1) & q->msk;
+			if (idx == 0)
+				toggle ^= 1;
+		}
+	}
+}
+
 X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
 {
 	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
 	u8 old_cppr;
 
 	pr_devel("H_CPPR(cppr=%ld)\n", cppr);
@@ -407,14 +481,34 @@  X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
 	 */
 	smp_mb();
 
-	/*
-	 * We are masking less, we need to look for pending things
-	 * to deliver and set VP pending bits accordingly to trigger
-	 * a new interrupt otherwise we might miss MFRR changes for
-	 * which we have optimized out sending an IPI signal.
-	 */
-	if (cppr > old_cppr)
+	if (cppr > old_cppr) {
+		/*
+		 * We are masking less, we need to look for pending things
+		 * to deliver and set VP pending bits accordingly to trigger
+		 * a new interrupt otherwise we might miss MFRR changes for
+		 * which we have optimized out sending an IPI signal.
+		 */
 		GLUE(X_PFX,push_pending_to_hw)(xc);
+	} else {
+		/*
+		 * We are masking more, we need to check the queue for any
+		 * interrupt that has been routed to another CPU, take
+		 * it out (replace it with the dummy) and retrigger it.
+		 *
+		 * This is necessary since those interrupts may otherwise
+		 * never be processed, at least not until this CPU restores
+		 * its CPPR.
+		 *
+		 * This is in theory racy vs. HW adding new interrupts to
+		 * the queue. In practice this works because the interesting
+		 * cases are when the guest has done a set_xive() to move the
+		 * interrupt away, which flushes the xive, followed by the
+		 * target CPU doing a H_CPPR. So any new interrupt coming into
+		 * the queue must still be routed to us and isn't a source
+		 * of concern.
+		 */
+		GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
+	}
 
 	/* Apply new CPPR */
 	xc->hw_cppr = cppr;