diff mbox series

[03/14] ibmvnic: simplify ibmvnic_set_queue_affinity()

Message ID 20241228184949.31582-4-yury.norov@gmail.com (mailing list archive)
State Not Applicable
Delegated to: Netdev Maintainers
Headers show
Series cpumask: cleanup cpumask_next_wrap() implementation and usage | expand

Commit Message

Yury Norov Dec. 28, 2024, 6:49 p.m. UTC
A loop based on cpumask_next_wrap() opencodes the dedicated macro
for_each_online_cpu_wrap(). Using the macro allows to avoid setting
bits affinity mask more than once when stride >= num_online_cpus.

This also helps to drop cpumask handling code in the caller function.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

Comments

Nick Child Jan. 7, 2025, 10:37 p.m. UTC | #1
On Sat, Dec 28, 2024 at 10:49:35AM -0800, Yury Norov wrote:
> A loop based on cpumask_next_wrap() opencodes the dedicated macro
> for_each_online_cpu_wrap(). Using the macro allows to avoid setting
> bits affinity mask more than once when stride >= num_online_cpus.
> 
> This also helps to drop cpumask handling code in the caller function.
> 
> Signed-off-by: Yury Norov <yury.norov@gmail.com>
> ---
>  drivers/net/ethernet/ibm/ibmvnic.c | 17 ++++++++++-------
>  1 file changed, 10 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
> index e95ae0d39948..4cfd90fb206b 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.c
> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> @@ -234,11 +234,16 @@ static int ibmvnic_set_queue_affinity(struct ibmvnic_sub_crq_queue *queue,
>  		(*stragglers)--;
>  	}
>  	/* atomic write is safer than writing bit by bit directly */
> -	for (i = 0; i < stride; i++) {
> -		cpumask_set_cpu(*cpu, mask);
> -		*cpu = cpumask_next_wrap(*cpu, cpu_online_mask,
> -					 nr_cpu_ids, false);
> +	for_each_online_cpu_wrap(i, *cpu) {
> +		if (!stride--)
> +			break;
> +		cpumask_set_cpu(i, mask);
>  	}
> +
> +	/* For the next queue we start from the first unused CPU in this queue */
> +	if (i < nr_cpu_ids)
> +		*cpu = i + 1;
> +
This should read '*cpu = i'. Since the loop breaks after incrementing i.
Thanks!

>  	/* set queue affinity mask */
>  	cpumask_copy(queue->affinity_mask, mask);
>  	rc = irq_set_affinity_and_hint(queue->irq, queue->affinity_mask);
> @@ -256,7 +261,7 @@ static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter)
>  	int num_rxqs = adapter->num_active_rx_scrqs, i_rxqs = 0;
>  	int num_txqs = adapter->num_active_tx_scrqs, i_txqs = 0;
>  	int total_queues, stride, stragglers, i;
> -	unsigned int num_cpu, cpu;
> +	unsigned int num_cpu, cpu = 0;
>  	bool is_rx_queue;
>  	int rc = 0;
>  
> @@ -274,8 +279,6 @@ static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter)
>  	stride = max_t(int, num_cpu / total_queues, 1);
>  	/* number of leftover cpu's */
>  	stragglers = num_cpu >= total_queues ? num_cpu % total_queues : 0;
> -	/* next available cpu to assign irq to */
> -	cpu = cpumask_next(-1, cpu_online_mask);
>  
>  	for (i = 0; i < total_queues; i++) {
>  		is_rx_queue = false;
> -- 
> 2.43.0
>
Yury Norov Jan. 7, 2025, 10:42 p.m. UTC | #2
On Tue, Jan 07, 2025 at 04:37:17PM -0600, Nick Child wrote:
> On Sat, Dec 28, 2024 at 10:49:35AM -0800, Yury Norov wrote:
> > A loop based on cpumask_next_wrap() opencodes the dedicated macro
> > for_each_online_cpu_wrap(). Using the macro allows to avoid setting
> > bits affinity mask more than once when stride >= num_online_cpus.
> > 
> > This also helps to drop cpumask handling code in the caller function.
> > 
> > Signed-off-by: Yury Norov <yury.norov@gmail.com>
> > ---
> >  drivers/net/ethernet/ibm/ibmvnic.c | 17 ++++++++++-------
> >  1 file changed, 10 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
> > index e95ae0d39948..4cfd90fb206b 100644
> > --- a/drivers/net/ethernet/ibm/ibmvnic.c
> > +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> > @@ -234,11 +234,16 @@ static int ibmvnic_set_queue_affinity(struct ibmvnic_sub_crq_queue *queue,
> >  		(*stragglers)--;
> >  	}
> >  	/* atomic write is safer than writing bit by bit directly */
> > -	for (i = 0; i < stride; i++) {
> > -		cpumask_set_cpu(*cpu, mask);
> > -		*cpu = cpumask_next_wrap(*cpu, cpu_online_mask,
> > -					 nr_cpu_ids, false);
> > +	for_each_online_cpu_wrap(i, *cpu) {
> > +		if (!stride--)
> > +			break;
> > +		cpumask_set_cpu(i, mask);
> >  	}
> > +
> > +	/* For the next queue we start from the first unused CPU in this queue */
> > +	if (i < nr_cpu_ids)
> > +		*cpu = i + 1;
> > +
> This should read '*cpu = i'. Since the loop breaks after incrementing i.
> Thanks!

cpumask_next_wrap() makes '+ 1' for you. The for_each_cpu_wrap() starts
exactly where you point. So, this '+1' needs to be explicit now.

Does that make sense?

> 
> >  	/* set queue affinity mask */
> >  	cpumask_copy(queue->affinity_mask, mask);
> >  	rc = irq_set_affinity_and_hint(queue->irq, queue->affinity_mask);
> > @@ -256,7 +261,7 @@ static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter)
> >  	int num_rxqs = adapter->num_active_rx_scrqs, i_rxqs = 0;
> >  	int num_txqs = adapter->num_active_tx_scrqs, i_txqs = 0;
> >  	int total_queues, stride, stragglers, i;
> > -	unsigned int num_cpu, cpu;
> > +	unsigned int num_cpu, cpu = 0;
> >  	bool is_rx_queue;
> >  	int rc = 0;
> >  
> > @@ -274,8 +279,6 @@ static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter)
> >  	stride = max_t(int, num_cpu / total_queues, 1);
> >  	/* number of leftover cpu's */
> >  	stragglers = num_cpu >= total_queues ? num_cpu % total_queues : 0;
> > -	/* next available cpu to assign irq to */
> > -	cpu = cpumask_next(-1, cpu_online_mask);
> >  
> >  	for (i = 0; i < total_queues; i++) {
> >  		is_rx_queue = false;
> > -- 
> > 2.43.0
> >
Yury Norov Jan. 7, 2025, 11:04 p.m. UTC | #3
On Tue, Jan 07, 2025 at 02:43:01PM -0800, Yury Norov wrote:
> On Tue, Jan 07, 2025 at 04:37:17PM -0600, Nick Child wrote:
> > On Sat, Dec 28, 2024 at 10:49:35AM -0800, Yury Norov wrote:
> > > A loop based on cpumask_next_wrap() opencodes the dedicated macro
> > > for_each_online_cpu_wrap(). Using the macro allows to avoid setting
> > > bits affinity mask more than once when stride >= num_online_cpus.
> > > 
> > > This also helps to drop cpumask handling code in the caller function.
> > > 
> > > Signed-off-by: Yury Norov <yury.norov@gmail.com>
> > > ---
> > >  drivers/net/ethernet/ibm/ibmvnic.c | 17 ++++++++++-------
> > >  1 file changed, 10 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
> > > index e95ae0d39948..4cfd90fb206b 100644
> > > --- a/drivers/net/ethernet/ibm/ibmvnic.c
> > > +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> > > @@ -234,11 +234,16 @@ static int ibmvnic_set_queue_affinity(struct ibmvnic_sub_crq_queue *queue,
> > >  		(*stragglers)--;
> > >  	}
> > >  	/* atomic write is safer than writing bit by bit directly */
> > > -	for (i = 0; i < stride; i++) {
> > > -		cpumask_set_cpu(*cpu, mask);
> > > -		*cpu = cpumask_next_wrap(*cpu, cpu_online_mask,
> > > -					 nr_cpu_ids, false);
> > > +	for_each_online_cpu_wrap(i, *cpu) {
> > > +		if (!stride--)
> > > +			break;
> > > +		cpumask_set_cpu(i, mask);
> > >  	}
> > > +
> > > +	/* For the next queue we start from the first unused CPU in this queue */
> > > +	if (i < nr_cpu_ids)
> > > +		*cpu = i + 1;
> > > +
> > This should read '*cpu = i'. Since the loop breaks after incrementing i.
> > Thanks!
> 
> cpumask_next_wrap() makes '+ 1' for you. The for_each_cpu_wrap() starts
> exactly where you point. So, this '+1' needs to be explicit now.
> 
> Does that make sense?

Ah, I think I see what you mean. It should be like this, right?

  for_each_online_cpu_wrap(i, *cpu) {
  	if (!stride--) {
        	*cpu = i + 1;
  		break;
        }
  	cpumask_set_cpu(i, mask);
  }
Nick Child Jan. 8, 2025, 2:08 p.m. UTC | #4
On Tue, Jan 07, 2025 at 03:04:40PM -0800, Yury Norov wrote:
> On Tue, Jan 07, 2025 at 02:43:01PM -0800, Yury Norov wrote:
> > On Tue, Jan 07, 2025 at 04:37:17PM -0600, Nick Child wrote:
> > > On Sat, Dec 28, 2024 at 10:49:35AM -0800, Yury Norov wrote:
> > > > A loop based on cpumask_next_wrap() opencodes the dedicated macro
> > > > for_each_online_cpu_wrap(). Using the macro allows to avoid setting
> > > > bits affinity mask more than once when stride >= num_online_cpus.
> > > > 
> > > > This also helps to drop cpumask handling code in the caller function.
> > > > 
> > > > Signed-off-by: Yury Norov <yury.norov@gmail.com>
> > > > ---
> > > >  drivers/net/ethernet/ibm/ibmvnic.c | 17 ++++++++++-------
> > > >  1 file changed, 10 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
> > > > index e95ae0d39948..4cfd90fb206b 100644
> > > > --- a/drivers/net/ethernet/ibm/ibmvnic.c
> > > > +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> > > > @@ -234,11 +234,16 @@ static int ibmvnic_set_queue_affinity(struct ibmvnic_sub_crq_queue *queue,
> > > >  		(*stragglers)--;
> > > >  	}
> > > >  	/* atomic write is safer than writing bit by bit directly */
> > > > -	for (i = 0; i < stride; i++) {
> > > > -		cpumask_set_cpu(*cpu, mask);
> > > > -		*cpu = cpumask_next_wrap(*cpu, cpu_online_mask,
> > > > -					 nr_cpu_ids, false);
> > > > +	for_each_online_cpu_wrap(i, *cpu) {
> > > > +		if (!stride--)
> > > > +			break;
> > > > +		cpumask_set_cpu(i, mask);
> > > >  	}
> > > > +
> > > > +	/* For the next queue we start from the first unused CPU in this queue */
> > > > +	if (i < nr_cpu_ids)
> > > > +		*cpu = i + 1;
> > > > +
> > > This should read '*cpu = i'. Since the loop breaks after incrementing i.
> > > Thanks!
> > 
> > cpumask_next_wrap() makes '+ 1' for you. The for_each_cpu_wrap() starts
> > exactly where you point. So, this '+1' needs to be explicit now.
> > 
> > Does that make sense?
> 
> Ah, I think I see what you mean. It should be like this, right?
> 
>   for_each_online_cpu_wrap(i, *cpu) {
>   	if (!stride--) {
>         	*cpu = i + 1;
>   		break;
>         }
>   	cpumask_set_cpu(i, mask);
>   }
Not quite, for_each_online_cpu_wrap will increment i to point to the
next online cpu, then enter the body of the loop. When we break (beacuse
stride is zero), we exit the loop early before i is added to any mask, i
is the next unassigned online cpu.
I tested this to make sure, we see unused cpus (#7, #23)  with the patch as is:
  IRQ : 256 -> ibmvnic-30000003-tx0
	/proc/irq/256/smp_affinity_list:0-6
  IRQ : 257 -> ibmvnic-30000003-tx1
	/proc/irq/257/smp_affinity_list:16-22
  IRQ : 258 -> ibmvnic-30000003-rx0
	/proc/irq/258/smp_affinity_list:8-14
  IRQ : 259 -> ibmvnic-30000003-rx1
	/proc/irq/259/smp_affinity_list:24-30
diff mbox series

Patch

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index e95ae0d39948..4cfd90fb206b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -234,11 +234,16 @@  static int ibmvnic_set_queue_affinity(struct ibmvnic_sub_crq_queue *queue,
 		(*stragglers)--;
 	}
 	/* atomic write is safer than writing bit by bit directly */
-	for (i = 0; i < stride; i++) {
-		cpumask_set_cpu(*cpu, mask);
-		*cpu = cpumask_next_wrap(*cpu, cpu_online_mask,
-					 nr_cpu_ids, false);
+	for_each_online_cpu_wrap(i, *cpu) {
+		if (!stride--)
+			break;
+		cpumask_set_cpu(i, mask);
 	}
+
+	/* For the next queue we start from the first unused CPU in this queue */
+	if (i < nr_cpu_ids)
+		*cpu = i + 1;
+
 	/* set queue affinity mask */
 	cpumask_copy(queue->affinity_mask, mask);
 	rc = irq_set_affinity_and_hint(queue->irq, queue->affinity_mask);
@@ -256,7 +261,7 @@  static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter)
 	int num_rxqs = adapter->num_active_rx_scrqs, i_rxqs = 0;
 	int num_txqs = adapter->num_active_tx_scrqs, i_txqs = 0;
 	int total_queues, stride, stragglers, i;
-	unsigned int num_cpu, cpu;
+	unsigned int num_cpu, cpu = 0;
 	bool is_rx_queue;
 	int rc = 0;
 
@@ -274,8 +279,6 @@  static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter)
 	stride = max_t(int, num_cpu / total_queues, 1);
 	/* number of leftover cpu's */
 	stragglers = num_cpu >= total_queues ? num_cpu % total_queues : 0;
-	/* next available cpu to assign irq to */
-	cpu = cpumask_next(-1, cpu_online_mask);
 
 	for (i = 0; i < total_queues; i++) {
 		is_rx_queue = false;