diff mbox series

[1/2] genirq: Extract irq_set_affinity_masks() from devm_platform_get_irqs_affinity()

Message ID 20220216090845.1278114-2-maz@kernel.org (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series net: mvpp2: Survive CPU hotplug events | expand

Checks

Context Check Description
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 16445 this patch: 16445
netdev/cc_maintainers warning 2 maintainers not CCed: rafael@kernel.org peterz@infradead.org
netdev/build_clang success Errors and warnings before: 2428 this patch: 2428
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 15619 this patch: 15619
netdev/checkpatch warning WARNING: line length of 88 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Marc Zyngier Feb. 16, 2022, 9:08 a.m. UTC
In order to better support drivers that deal with interrupts in a more
"hands-on" way, extract the core of devm_platform_get_irqs_affinity()
and expose it as irq_set_affinity_masks().

This helper allows a driver to provide a set of wired interrupts that
are to be configured as managed interrupts. As with the original helper,
this is exported as EXPORT_SYMBOL_GPL.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/base/platform.c   | 20 +++-----------------
 include/linux/interrupt.h |  8 ++++++++
 kernel/irq/affinity.c     | 27 +++++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 17 deletions(-)

Comments

Greg Kroah-Hartman Feb. 16, 2022, 10:56 a.m. UTC | #1
On Wed, Feb 16, 2022 at 09:08:44AM +0000, Marc Zyngier wrote:
> In order to better support drivers that deal with interrupts in a more
> "hands-on" way, extract the core of devm_platform_get_irqs_affinity()
> and expose it as irq_set_affinity_masks().
> 
> This helper allows a driver to provide a set of wired interrupts that
> are to be configured as managed interrupts. As with the original helper,
> this is exported as EXPORT_SYMBOL_GPL.
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>  drivers/base/platform.c   | 20 +++-----------------
>  include/linux/interrupt.h |  8 ++++++++
>  kernel/irq/affinity.c     | 27 +++++++++++++++++++++++++++
>  3 files changed, 38 insertions(+), 17 deletions(-)
> 

Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
John Garry Feb. 17, 2022, 5:07 p.m. UTC | #2
On 16/02/2022 09:08, Marc Zyngier wrote:

Hi Marc,

> In order to better support drivers that deal with interrupts in a more
> "hands-on" way, extract the core of devm_platform_get_irqs_affinity()
> and expose it as irq_set_affinity_masks().
> 
> This helper allows a driver to provide a set of wired interrupts that
> are to be configured as managed interrupts. As with the original helper,
> this is exported as EXPORT_SYMBOL_GPL.

I know you mentioned it in 2/2, but it would be interesting to see how 
network controller drivers can handle the problem of missing in-flight 
IO completions for managed irq shutdown. For storage controllers this is 
all now safely handled in the block layer.

> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>

Just a small comment, below.

Tested-by: John Garry <john.garry@huawei.com> #D05

Thanks,
John

> ---
>   drivers/base/platform.c   | 20 +++-----------------
>   include/linux/interrupt.h |  8 ++++++++
>   kernel/irq/affinity.c     | 27 +++++++++++++++++++++++++++
>   3 files changed, 38 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/base/platform.c b/drivers/base/platform.c
> index 6cb04ac48bf0..b363cf6ce5be 100644
> --- a/drivers/base/platform.c
> +++ b/drivers/base/platform.c
> @@ -335,7 +335,6 @@ int devm_platform_get_irqs_affinity(struct platform_device *dev,
>   				    int **irqs)
>   {
>   	struct irq_affinity_devres *ptr;
> -	struct irq_affinity_desc *desc;
>   	size_t size;
>   	int i, ret, nvec;
>   
> @@ -376,31 +375,18 @@ int devm_platform_get_irqs_affinity(struct platform_device *dev,
>   		ptr->irq[i] = irq;
>   	}
>   
> -	desc = irq_create_affinity_masks(nvec, affd);
> -	if (!desc) {
> -		ret = -ENOMEM;
> +	ret = irq_set_affinity_masks(affd, ptr->irq, nvec);
> +	if (ret) {
> +		dev_err(&dev->dev, "failed to update affinity descriptors (%d)\n", ret);
>   		goto err_free_devres;
>   	}
>   
> -	for (i = 0; i < nvec; i++) {
> -		ret = irq_update_affinity_desc(ptr->irq[i], &desc[i]);
> -		if (ret) {
> -			dev_err(&dev->dev, "failed to update irq%d affinity descriptor (%d)\n",
> -				ptr->irq[i], ret);
> -			goto err_free_desc;
> -		}
> -	}
> -
>   	devres_add(&dev->dev, ptr);
>   
> -	kfree(desc);
> -
>   	*irqs = ptr->irq;
>   
>   	return nvec;
>   
> -err_free_desc:
> -	kfree(desc);
>   err_free_devres:
>   	devres_free(ptr);
>   	return ret;
> diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
> index 9367f1cb2e3c..6bfce96206f8 100644
> --- a/include/linux/interrupt.h
> +++ b/include/linux/interrupt.h
> @@ -381,6 +381,8 @@ irq_create_affinity_masks(unsigned int nvec, struct irq_affinity *affd);
>   unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
>   				       const struct irq_affinity *affd);
>   
> +int irq_set_affinity_masks(struct irq_affinity *affd, int *irqs, int nvec);
> +
>   #else /* CONFIG_SMP */
>   
>   static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
> @@ -443,6 +445,12 @@ irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
>   	return maxvec;
>   }
>   
> +static inline int
> +irq_set_affinity_masks(struct irq_affinity *affd, int *irqs, int nvec)
> +{
> +	return -EINVAL;
> +}
> +
>   #endif /* CONFIG_SMP */
>   
>   /*
> diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
> index f7ff8919dc9b..c0f868cd5b87 100644
> --- a/kernel/irq/affinity.c
> +++ b/kernel/irq/affinity.c
> @@ -512,3 +512,30 @@ unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
>   
>   	return resv + min(set_vecs, maxvec - resv);
>   }
> +
> +/*
> + * irq_set_affinity_masks - Set the affinity masks of a number of interrupts
> + *                          for multiqueue spreading
> + * @affd:	Description of the affinity requirements
> + * @irqs:	An array of interrupt numbers
> + * @nvec:	The total number of interrupts
> + */
> +int irq_set_affinity_masks(struct irq_affinity *affd, int *irqs, int nvec)
> +{
> +	struct irq_affinity_desc *desc;
> +	int i, err = 0;

nit: it might be worth doing something similar to how 
pci_alloc_irq_vectors_affinity() handles sets with no pre- and 
post-vectors with msi_default_affd

> +
> +	desc = irq_create_affinity_masks(nvec, affd);
> +	if (!desc)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < nvec; i++) {
> +		err = irq_update_affinity_desc(irqs[i], desc + i);
> +		if (err)
> +			break;
> +	}
> +
> +	kfree(desc);
> +	return err;
> +}
> +EXPORT_SYMBOL_GPL(irq_set_affinity_masks);
Marc Zyngier Feb. 17, 2022, 5:17 p.m. UTC | #3
Hi John,

On 2022-02-17 17:07, John Garry wrote:
> On 16/02/2022 09:08, Marc Zyngier wrote:
> 
> Hi Marc,
> 
>> In order to better support drivers that deal with interrupts in a more
>> "hands-on" way, extract the core of devm_platform_get_irqs_affinity()
>> and expose it as irq_set_affinity_masks().
>> 
>> This helper allows a driver to provide a set of wired interrupts that
>> are to be configured as managed interrupts. As with the original 
>> helper,
>> this is exported as EXPORT_SYMBOL_GPL.
> 
> I know you mentioned it in 2/2, but it would be interesting to see how
> network controller drivers can handle the problem of missing in-flight
> IO completions for managed irq shutdown. For storage controllers this
> is all now safely handled in the block layer.

Do you have a pointer to this? It'd be interesting to see if there is
a common pattern.

> 
>> 
>> Signed-off-by: Marc Zyngier <maz@kernel.org>
> 
> Just a small comment, below.
> 
> Tested-by: John Garry <john.garry@huawei.com> #D05
> 
> Thanks,
> John
> 
>> ---
>>   drivers/base/platform.c   | 20 +++-----------------
>>   include/linux/interrupt.h |  8 ++++++++
>>   kernel/irq/affinity.c     | 27 +++++++++++++++++++++++++++
>>   3 files changed, 38 insertions(+), 17 deletions(-)
>> 
>> diff --git a/drivers/base/platform.c b/drivers/base/platform.c
>> index 6cb04ac48bf0..b363cf6ce5be 100644
>> --- a/drivers/base/platform.c
>> +++ b/drivers/base/platform.c
>> @@ -335,7 +335,6 @@ int devm_platform_get_irqs_affinity(struct 
>> platform_device *dev,
>>   				    int **irqs)
>>   {
>>   	struct irq_affinity_devres *ptr;
>> -	struct irq_affinity_desc *desc;
>>   	size_t size;
>>   	int i, ret, nvec;
>>   @@ -376,31 +375,18 @@ int devm_platform_get_irqs_affinity(struct 
>> platform_device *dev,
>>   		ptr->irq[i] = irq;
>>   	}
>>   -	desc = irq_create_affinity_masks(nvec, affd);
>> -	if (!desc) {
>> -		ret = -ENOMEM;
>> +	ret = irq_set_affinity_masks(affd, ptr->irq, nvec);
>> +	if (ret) {
>> +		dev_err(&dev->dev, "failed to update affinity descriptors (%d)\n", 
>> ret);
>>   		goto err_free_devres;
>>   	}
>>   -	for (i = 0; i < nvec; i++) {
>> -		ret = irq_update_affinity_desc(ptr->irq[i], &desc[i]);
>> -		if (ret) {
>> -			dev_err(&dev->dev, "failed to update irq%d affinity descriptor 
>> (%d)\n",
>> -				ptr->irq[i], ret);
>> -			goto err_free_desc;
>> -		}
>> -	}
>> -
>>   	devres_add(&dev->dev, ptr);
>>   -	kfree(desc);
>> -
>>   	*irqs = ptr->irq;
>>     	return nvec;
>>   -err_free_desc:
>> -	kfree(desc);
>>   err_free_devres:
>>   	devres_free(ptr);
>>   	return ret;
>> diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
>> index 9367f1cb2e3c..6bfce96206f8 100644
>> --- a/include/linux/interrupt.h
>> +++ b/include/linux/interrupt.h
>> @@ -381,6 +381,8 @@ irq_create_affinity_masks(unsigned int nvec, 
>> struct irq_affinity *affd);
>>   unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned 
>> int maxvec,
>>   				       const struct irq_affinity *affd);
>>   +int irq_set_affinity_masks(struct irq_affinity *affd, int *irqs, 
>> int nvec);
>> +
>>   #else /* CONFIG_SMP */
>>     static inline int irq_set_affinity(unsigned int irq, const struct 
>> cpumask *m)
>> @@ -443,6 +445,12 @@ irq_calc_affinity_vectors(unsigned int minvec, 
>> unsigned int maxvec,
>>   	return maxvec;
>>   }
>>   +static inline int
>> +irq_set_affinity_masks(struct irq_affinity *affd, int *irqs, int 
>> nvec)
>> +{
>> +	return -EINVAL;
>> +}
>> +
>>   #endif /* CONFIG_SMP */
>>     /*
>> diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
>> index f7ff8919dc9b..c0f868cd5b87 100644
>> --- a/kernel/irq/affinity.c
>> +++ b/kernel/irq/affinity.c
>> @@ -512,3 +512,30 @@ unsigned int irq_calc_affinity_vectors(unsigned 
>> int minvec, unsigned int maxvec,
>>     	return resv + min(set_vecs, maxvec - resv);
>>   }
>> +
>> +/*
>> + * irq_set_affinity_masks - Set the affinity masks of a number of 
>> interrupts
>> + *                          for multiqueue spreading
>> + * @affd:	Description of the affinity requirements
>> + * @irqs:	An array of interrupt numbers
>> + * @nvec:	The total number of interrupts
>> + */
>> +int irq_set_affinity_masks(struct irq_affinity *affd, int *irqs, int 
>> nvec)
>> +{
>> +	struct irq_affinity_desc *desc;
>> +	int i, err = 0;
> 
> nit: it might be worth doing something similar to how
> pci_alloc_irq_vectors_affinity() handles sets with no pre- and
> post-vectors with msi_default_affd

Yes, good point. This would probably simplify most callers of this code.

Thanks,

           M.
John Garry Feb. 18, 2022, 8:41 a.m. UTC | #4
On 17/02/2022 17:17, Marc Zyngier wrote:

Hi Marc,

>> I know you mentioned it in 2/2, but it would be interesting to see how
>> network controller drivers can handle the problem of missing in-flight
>> IO completions for managed irq shutdown. For storage controllers this
>> is all now safely handled in the block layer.
> 
> Do you have a pointer to this? It'd be interesting to see if there is
> a common pattern.

Check blk_mq_hctx_notify_offline() and other hotplug handler friends in 
block/blk-mq.c and also blk_mq_get_ctx()/blk_mq_map_queue()

So the key steps in CPU offlining are:
- when the last CPU in HW queue context cpumask is going offline we mark 
the HW queue as inactive and no longer queue requests there
- drain all in-flight requests before we allow that last CPU to go 
offline, meaning that we always have a CPU online to service any 
completion interrupts

This scheme relies on symmetrical HW submission and completion queues 
and also that the blk-mq HW queue context cpumask is same as the HW 
queue's IRQ affinity mask (see blk_mq_pci_map_queues()).

I am not sure how much this would fit with the networking stack or that 
marvell driver.

Thanks,
John
Thomas Gleixner March 15, 2022, 2:25 p.m. UTC | #5
On Fri, Feb 18 2022 at 08:41, John Garry wrote:
> On 17/02/2022 17:17, Marc Zyngier wrote:
>>> I know you mentioned it in 2/2, but it would be interesting to see how
>>> network controller drivers can handle the problem of missing in-flight
>>> IO completions for managed irq shutdown. For storage controllers this
>>> is all now safely handled in the block layer.
>> 
>> Do you have a pointer to this? It'd be interesting to see if there is
>> a common pattern.
>
> Check blk_mq_hctx_notify_offline() and other hotplug handler friends in 
> block/blk-mq.c and also blk_mq_get_ctx()/blk_mq_map_queue()
>
> So the key steps in CPU offlining are:
> - when the last CPU in HW queue context cpumask is going offline we mark 
> the HW queue as inactive and no longer queue requests there
> - drain all in-flight requests before we allow that last CPU to go 
> offline, meaning that we always have a CPU online to service any 
> completion interrupts
>
> This scheme relies on symmetrical HW submission and completion queues 
> and also that the blk-mq HW queue context cpumask is same as the HW 
> queue's IRQ affinity mask (see blk_mq_pci_map_queues()).
>
> I am not sure how much this would fit with the networking stack or that 
> marvell driver.

The problem with networking is RX flow steering.

The driver in question initializes the RX flows in
mvpp22_port_rss_init() by default so the packets are evenly distributed
accross the RX queues.

So without actually steering the RX flow away from the RX queue which is
associated to the to be unplugged CPU, this does not really work well.

Thanks,

        tglx
diff mbox series

Patch

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 6cb04ac48bf0..b363cf6ce5be 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -335,7 +335,6 @@  int devm_platform_get_irqs_affinity(struct platform_device *dev,
 				    int **irqs)
 {
 	struct irq_affinity_devres *ptr;
-	struct irq_affinity_desc *desc;
 	size_t size;
 	int i, ret, nvec;
 
@@ -376,31 +375,18 @@  int devm_platform_get_irqs_affinity(struct platform_device *dev,
 		ptr->irq[i] = irq;
 	}
 
-	desc = irq_create_affinity_masks(nvec, affd);
-	if (!desc) {
-		ret = -ENOMEM;
+	ret = irq_set_affinity_masks(affd, ptr->irq, nvec);
+	if (ret) {
+		dev_err(&dev->dev, "failed to update affinity descriptors (%d)\n", ret);
 		goto err_free_devres;
 	}
 
-	for (i = 0; i < nvec; i++) {
-		ret = irq_update_affinity_desc(ptr->irq[i], &desc[i]);
-		if (ret) {
-			dev_err(&dev->dev, "failed to update irq%d affinity descriptor (%d)\n",
-				ptr->irq[i], ret);
-			goto err_free_desc;
-		}
-	}
-
 	devres_add(&dev->dev, ptr);
 
-	kfree(desc);
-
 	*irqs = ptr->irq;
 
 	return nvec;
 
-err_free_desc:
-	kfree(desc);
 err_free_devres:
 	devres_free(ptr);
 	return ret;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9367f1cb2e3c..6bfce96206f8 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -381,6 +381,8 @@  irq_create_affinity_masks(unsigned int nvec, struct irq_affinity *affd);
 unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
 				       const struct irq_affinity *affd);
 
+int irq_set_affinity_masks(struct irq_affinity *affd, int *irqs, int nvec);
+
 #else /* CONFIG_SMP */
 
 static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
@@ -443,6 +445,12 @@  irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
 	return maxvec;
 }
 
+static inline int
+irq_set_affinity_masks(struct irq_affinity *affd, int *irqs, int nvec)
+{
+	return -EINVAL;
+}
+
 #endif /* CONFIG_SMP */
 
 /*
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index f7ff8919dc9b..c0f868cd5b87 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -512,3 +512,30 @@  unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
 
 	return resv + min(set_vecs, maxvec - resv);
 }
+
+/*
+ * irq_set_affinity_masks - Set the affinity masks of a number of interrupts
+ *                          for multiqueue spreading
+ * @affd:	Description of the affinity requirements
+ * @irqs:	An array of interrupt numbers
+ * @nvec:	The total number of interrupts
+ */
+int irq_set_affinity_masks(struct irq_affinity *affd, int *irqs, int nvec)
+{
+	struct irq_affinity_desc *desc;
+	int i, err = 0;
+
+	desc = irq_create_affinity_masks(nvec, affd);
+	if (!desc)
+		return -ENOMEM;
+
+	for (i = 0; i < nvec; i++) {
+		err = irq_update_affinity_desc(irqs[i], desc + i);
+		if (err)
+			break;
+	}
+
+	kfree(desc);
+	return err;
+}
+EXPORT_SYMBOL_GPL(irq_set_affinity_masks);