diff mbox series

[net-next,3/3] net/mlx4: support per-queue statistics via netlink

Message ID 20240423194931.97013-4-jdamato@fastly.com (mailing list archive)
State Superseded
Headers show
Series mlx4: Add support for netdev-genl API | expand

Commit Message

Joe Damato April 23, 2024, 7:49 p.m. UTC
Make mlx4 compatible with the newly added netlink queue stats API.

Signed-off-by: Joe Damato <jdamato@fastly.com>
Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
---
 .../net/ethernet/mellanox/mlx4/en_netdev.c    | 91 +++++++++++++++++++
 1 file changed, 91 insertions(+)

Comments

Joe Damato April 23, 2024, 10:42 p.m. UTC | #1
On Tue, Apr 23, 2024 at 07:49:30PM +0000, Joe Damato wrote:
> Make mlx4 compatible with the newly added netlink queue stats API.
> 
> Signed-off-by: Joe Damato <jdamato@fastly.com>
> Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
> ---
>  .../net/ethernet/mellanox/mlx4/en_netdev.c    | 91 +++++++++++++++++++
>  1 file changed, 91 insertions(+)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> index 5d3fde63b273..c7f04d4820c6 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> @@ -43,6 +43,7 @@
>  #include <net/vxlan.h>
>  #include <net/devlink.h>
>  #include <net/rps.h>
> +#include <net/netdev_queues.h>
>  
>  #include <linux/mlx4/driver.h>
>  #include <linux/mlx4/device.h>
> @@ -3099,6 +3100,95 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
>  	last_i += NUM_PHY_STATS;
>  }
>  
> +static void mlx4_get_queue_stats_rx(struct net_device *dev, int i,
> +				    struct netdev_queue_stats_rx *stats)
> +{
> +	struct mlx4_en_priv *priv = netdev_priv(dev);
> +	const struct mlx4_en_rx_ring *ring;
> +
> +	stats->packets = 0xff;
> +	stats->bytes = 0xff;
> +	stats->alloc_fail = 0xff;
> +
> +	spin_lock_bh(&priv->stats_lock);
> +
> +	if (!priv->port_up || mlx4_is_master(priv->mdev->dev))
> +		goto out_unlock;
> +
> +	ring = priv->rx_ring[i];
> +	stats->packets = READ_ONCE(ring->packets);
> +	stats->bytes   = READ_ONCE(ring->bytes);
> +	stats->alloc_fail = READ_ONCE(ring->dropped);
> +
> +out_unlock:
> +	spin_unlock_bh(&priv->stats_lock);
> +}
> +
> +static void mlx4_get_queue_stats_tx(struct net_device *dev, int i,
> +				    struct netdev_queue_stats_tx *stats)
> +{
> +	struct mlx4_en_priv *priv = netdev_priv(dev);
> +	const struct mlx4_en_tx_ring *ring;
> +
> +	stats->packets = 0xff;
> +	stats->bytes = 0xff;
> +
> +	spin_lock_bh(&priv->stats_lock);
> +
> +	if (!priv->port_up || mlx4_is_master(priv->mdev->dev))
> +		goto out_unlock;
> +
> +	ring = priv->tx_ring[TX][i];
> +	stats->packets = READ_ONCE(ring->packets);
> +	stats->bytes   = READ_ONCE(ring->bytes);
> +
> +out_unlock:
> +	spin_unlock_bh(&priv->stats_lock);
> +}
> +
> +static void mlx4_get_base_stats(struct net_device *dev,
> +				struct netdev_queue_stats_rx *rx,
> +				struct netdev_queue_stats_tx *tx)
> +{
> +	struct mlx4_en_priv *priv = netdev_priv(dev);
> +	int i;
> +
> +	rx->packets = 0xff;
> +	rx->bytes = 0xff;
> +	rx->alloc_fail = 0xff;
> +	tx->packets = 0xff;
> +	tx->bytes = 0xff;
> +
> +	spin_lock_bh(&priv->stats_lock);
> +
> +	if (!priv->port_up || mlx4_is_master(priv->mdev->dev))
> +		goto out_unlock;

I realized in this case, I'll need to set the fields initialized to 0xff
above to 0 before doing the increments below.

Sorry about that; just realized that now and will fix that in the v2 (along
with any other feedback I get), probably something:

  if (priv->rx_ring_num) {
          rx->packets = 0;
          rx->bytes = 0;
          rx->alloc_fail = 0;
  }

Here for the RX side and see below for the TX side.

> +	for (i = 0; i < priv->rx_ring_num; i++) {
> +		const struct mlx4_en_rx_ring *ring = priv->rx_ring[i];
> +
> +		rx->packets += READ_ONCE(ring->packets);
> +		rx->bytes += READ_ONCE(ring->bytes);
> +		rx->alloc_fail += READ_ONCE(ring->dropped);
> +	}

Similar to above, probably will fix with something like this here:

  if (priv->tx_ring_num[TX]) {
          tx->packets = 0;
          tx->bytes = 0;
  }

Sorry for the noise, I should have noticed this before sending it out.

> +	for (i = 0; i < priv->tx_ring_num[TX]; i++) {
> +		const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i];
> +
> +		tx->packets += READ_ONCE(ring->packets);
> +		tx->bytes   += READ_ONCE(ring->bytes);
> +	}
> +
> +out_unlock:
> +	spin_unlock_bh(&priv->stats_lock);
> +}
> +
> +static const struct netdev_stat_ops mlx4_stat_ops = {
> +	.get_queue_stats_rx     = mlx4_get_queue_stats_rx,
> +	.get_queue_stats_tx     = mlx4_get_queue_stats_tx,
> +	.get_base_stats         = mlx4_get_base_stats,
> +};
> +
>  int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
>  			struct mlx4_en_port_profile *prof)
>  {
> @@ -3262,6 +3352,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
>  	netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
>  	netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
>  
> +	dev->stat_ops = &mlx4_stat_ops;
>  	dev->ethtool_ops = &mlx4_en_ethtool_ops;
>  
>  	/*
> -- 
> 2.25.1
>
Jakub Kicinski April 24, 2024, 12:57 a.m. UTC | #2
On Tue, 23 Apr 2024 12:42:13 -1000 Joe Damato wrote:
> I realized in this case, I'll need to set the fields initialized to 0xff
> above to 0 before doing the increments below.

I don't know mlx4 very well, but glancing at the code - are you sure we
need to loop over the queues is the "base" callbacks?

The base callbacks are for getting "historical" data, i.e. info which
was associated with queues which are no longer present. You seem to
sweep all queues, so I'd have expected "base" to just set the values 
to 0. And the real values to come from the per-queue callbacks.

The init to 0xff looks quite sus.

Also what does this:

>	if (!priv->port_up || mlx4_is_master(priv->mdev->dev))

do? 
Joe Damato April 24, 2024, 5:54 a.m. UTC | #3
On Tue, Apr 23, 2024 at 05:57:18PM -0700, Jakub Kicinski wrote:
> On Tue, 23 Apr 2024 12:42:13 -1000 Joe Damato wrote:
> > I realized in this case, I'll need to set the fields initialized to 0xff
> > above to 0 before doing the increments below.
> 
> I don't know mlx4 very well, but glancing at the code - are you sure we
> need to loop over the queues is the "base" callbacks?
> 
> The base callbacks are for getting "historical" data, i.e. info which
> was associated with queues which are no longer present. You seem to
> sweep all queues, so I'd have expected "base" to just set the values 
> to 0. And the real values to come from the per-queue callbacks.

Hmm. Sorry I must have totally misunderstood what the purpose of "base"
was. I've just now more closely looked at bnxt which (maybe?) is the only
driver that implements base and I think maybe I kind of get it now.

For some reason, I thought it meant "the total stats of all queues"; I didn't
know it was intended to provide "historical" data as you say.

Making it set everything to 0 makes sense to me. I suppose I could also simply
omit it? What do you think?

> The init to 0xff looks quite sus.

Yes the init to 0xff is wrong, too. I noticed that, as well.

Here's what I have listed so far in my changelog for the v2 (which I haven't
sent yet), but perhaps the maintainers of mlx4 can weigh in?

v1 -> v2:
 - Patch 1/3 now initializes dropped to 0.
 - Patch 3/3 includes several changes:
   - mlx4_get_queue_stats_rx and mlx4_get_queue_stats_tx check if i is
     valid before proceeding.
   - All initialization to 0xff for stats fields has been omit. The
     network stack does this before calling into the driver functions, so
     I've adjusted the driver functions to only set values if there is
     data to set, leaving the network stack's 0xff in place if not.
   - mlx4_get_base_stats sets all stats to 0 (no locking etc needed).

Let me know if that sounds vaguely correct?

> Also what does this:
> 
> >	if (!priv->port_up || mlx4_is_master(priv->mdev->dev))
> 
> do? 
Jakub Kicinski April 24, 2024, 2:28 p.m. UTC | #4
On Tue, 23 Apr 2024 22:54:50 -0700 Joe Damato wrote:
> On Tue, Apr 23, 2024 at 05:57:18PM -0700, Jakub Kicinski wrote:
> > On Tue, 23 Apr 2024 12:42:13 -1000 Joe Damato wrote:  
> > > I realized in this case, I'll need to set the fields initialized to 0xff
> > > above to 0 before doing the increments below.  
> > 
> > I don't know mlx4 very well, but glancing at the code - are you sure we
> > need to loop over the queues is the "base" callbacks?
> > 
> > The base callbacks are for getting "historical" data, i.e. info which
> > was associated with queues which are no longer present. You seem to
> > sweep all queues, so I'd have expected "base" to just set the values 
> > to 0. And the real values to come from the per-queue callbacks.  
> 
> Hmm. Sorry I must have totally misunderstood what the purpose of "base"
> was. I've just now more closely looked at bnxt which (maybe?) is the only
> driver that implements base and I think maybe I kind of get it now.
> 
> For some reason, I thought it meant "the total stats of all queues"; I didn't
> know it was intended to provide "historical" data as you say.
> 
> Making it set everything to 0 makes sense to me. I suppose I could also simply
> omit it? What do you think?

The base is used to figure out which stats are reported when we dump 
a summary for the whole device. So you gotta set them to 0.

> > The init to 0xff looks quite sus.  
> 
> Yes the init to 0xff is wrong, too. I noticed that, as well.
> 
> Here's what I have listed so far in my changelog for the v2 (which I haven't
> sent yet), but perhaps the maintainers of mlx4 can weigh in?
> 
> v1 -> v2:
>  - Patch 1/3 now initializes dropped to 0.
>  - Patch 3/3 includes several changes:
>    - mlx4_get_queue_stats_rx and mlx4_get_queue_stats_tx check if i is
>      valid before proceeding.
>    - All initialization to 0xff for stats fields has been omit. The
>      network stack does this before calling into the driver functions, so
>      I've adjusted the driver functions to only set values if there is
>      data to set, leaving the network stack's 0xff in place if not.
>    - mlx4_get_base_stats sets all stats to 0 (no locking etc needed).

All the ones you report right? Not just zero the struct.
Any day now (tm) someone will add a lot more stats to the struct
so the init should be selective only to the stats that are actually
supported.

> Let me know if that sounds vaguely correct?
> 
> > Also what does this:
> >   
> > >	if (!priv->port_up || mlx4_is_master(priv->mdev->dev))  
> > 
> > do? 
Joe Damato April 24, 2024, 4:39 p.m. UTC | #5
On Wed, Apr 24, 2024 at 07:28:18AM -0700, Jakub Kicinski wrote:
> On Tue, 23 Apr 2024 22:54:50 -0700 Joe Damato wrote:
> > On Tue, Apr 23, 2024 at 05:57:18PM -0700, Jakub Kicinski wrote:
> > > On Tue, 23 Apr 2024 12:42:13 -1000 Joe Damato wrote:  
> > > > I realized in this case, I'll need to set the fields initialized to 0xff
> > > > above to 0 before doing the increments below.  
> > > 
> > > I don't know mlx4 very well, but glancing at the code - are you sure we
> > > need to loop over the queues is the "base" callbacks?
> > > 
> > > The base callbacks are for getting "historical" data, i.e. info which
> > > was associated with queues which are no longer present. You seem to
> > > sweep all queues, so I'd have expected "base" to just set the values 
> > > to 0. And the real values to come from the per-queue callbacks.  
> > 
> > Hmm. Sorry I must have totally misunderstood what the purpose of "base"
> > was. I've just now more closely looked at bnxt which (maybe?) is the only
> > driver that implements base and I think maybe I kind of get it now.
> > 
> > For some reason, I thought it meant "the total stats of all queues"; I didn't
> > know it was intended to provide "historical" data as you say.
> > 
> > Making it set everything to 0 makes sense to me. I suppose I could also simply
> > omit it? What do you think?
> 
> The base is used to figure out which stats are reported when we dump 
> a summary for the whole device. So you gotta set them to 0.

OK, thanks for your patience and the explanation. Will do.

> > > The init to 0xff looks quite sus.  
> > 
> > Yes the init to 0xff is wrong, too. I noticed that, as well.
> > 
> > Here's what I have listed so far in my changelog for the v2 (which I haven't
> > sent yet), but perhaps the maintainers of mlx4 can weigh in?
> > 
> > v1 -> v2:
> >  - Patch 1/3 now initializes dropped to 0.
> >  - Patch 3/3 includes several changes:
> >    - mlx4_get_queue_stats_rx and mlx4_get_queue_stats_tx check if i is
> >      valid before proceeding.
> >    - All initialization to 0xff for stats fields has been omit. The
> >      network stack does this before calling into the driver functions, so
> >      I've adjusted the driver functions to only set values if there is
> >      data to set, leaving the network stack's 0xff in place if not.
> >    - mlx4_get_base_stats sets all stats to 0 (no locking etc needed).
> 
> All the ones you report right? Not just zero the struct.
> Any day now (tm) someone will add a lot more stats to the struct
> so the init should be selective only to the stats that are actually
> supported.

Yes, not just zero the struct. Since I am reporting bytes packets for both
RX and TX and alloc_fail for RX I'll be setting those fields to 0
explicitly.

And there's also a warning about unused qtype (oops) in patch 2/3.

So, the revised v2 list (pending anything Mellanox wants) is:

  v1 -> v2:
   - Patch 1/3 now initializes dropped to 0.
   - Patch 2/3 fix use of unitialized qtype warning.
   - Patch 3/3 includes several changes:
     - mlx4_get_queue_stats_rx and mlx4_get_queue_stats_tx check if i is
       valid before proceeding.
     - All initialization to 0xff for stats fields has been omit. The
       network stack does this before calling into the driver functions, so
       I've adjusted the driver functions to only set values if there is
       data to set, leaving the network stack's 0xff in place if not.
     - mlx4_get_base_stats set all stat fields to 0 individually (no locking etc needed).

I'll hold off on sending this v2 until we hear back from Mellanox to be
sure there isn't anything else I'm missing.

> > Let me know if that sounds vaguely correct?
> > 
> > > Also what does this:
> > >   
> > > >	if (!priv->port_up || mlx4_is_master(priv->mdev->dev))  
> > > 
> > > do? 
Jakub Kicinski April 25, 2024, 3:46 a.m. UTC | #6
On Wed, 24 Apr 2024 09:39:40 -0700 Joe Damato wrote:
> FWIW, I also attempted to implement this API for i40e (hardware I also
> have):
> 
>   https://lore.kernel.org/lkml/20240410043936.206169-1-jdamato@fastly.com/

Ah, missed the second patch on that thread initially!

> But there are some complications I haven't resolved, so I'm focusing on
> mlx4 and mlx5, first, and will have to come back to i40e later.

FWIW I hope this series will get ironed out soon and we'll have far
more qstats defined:
https://lore.kernel.org/all/20240423113141.1752-1-xuanzhuo@linux.alibaba.com/

The drop counts in particular could be useful in production, not so
sure about the rest :)
Joe Damato April 25, 2024, 4:31 p.m. UTC | #7
On Wed, Apr 24, 2024 at 09:39:43AM -0700, Joe Damato wrote:
> On Wed, Apr 24, 2024 at 07:28:18AM -0700, Jakub Kicinski wrote:
> > On Tue, 23 Apr 2024 22:54:50 -0700 Joe Damato wrote:
> > > On Tue, Apr 23, 2024 at 05:57:18PM -0700, Jakub Kicinski wrote:
> > > > On Tue, 23 Apr 2024 12:42:13 -1000 Joe Damato wrote:  
> > > > > I realized in this case, I'll need to set the fields initialized to 0xff
> > > > > above to 0 before doing the increments below.  
> > > > 
> > > > I don't know mlx4 very well, but glancing at the code - are you sure we
> > > > need to loop over the queues is the "base" callbacks?
> > > > 
> > > > The base callbacks are for getting "historical" data, i.e. info which
> > > > was associated with queues which are no longer present. You seem to
> > > > sweep all queues, so I'd have expected "base" to just set the values 
> > > > to 0. And the real values to come from the per-queue callbacks.  
> > > 
> > > Hmm. Sorry I must have totally misunderstood what the purpose of "base"
> > > was. I've just now more closely looked at bnxt which (maybe?) is the only
> > > driver that implements base and I think maybe I kind of get it now.
> > > 
> > > For some reason, I thought it meant "the total stats of all queues"; I didn't
> > > know it was intended to provide "historical" data as you say.
> > > 
> > > Making it set everything to 0 makes sense to me. I suppose I could also simply
> > > omit it? What do you think?
> > 
> > The base is used to figure out which stats are reported when we dump 
> > a summary for the whole device. So you gotta set them to 0.
> 
> OK, thanks for your patience and the explanation. Will do.
> 
> > > > The init to 0xff looks quite sus.  
> > > 
> > > Yes the init to 0xff is wrong, too. I noticed that, as well.
> > > 
> > > Here's what I have listed so far in my changelog for the v2 (which I haven't
> > > sent yet), but perhaps the maintainers of mlx4 can weigh in?
> > > 
> > > v1 -> v2:
> > >  - Patch 1/3 now initializes dropped to 0.
> > >  - Patch 3/3 includes several changes:
> > >    - mlx4_get_queue_stats_rx and mlx4_get_queue_stats_tx check if i is
> > >      valid before proceeding.
> > >    - All initialization to 0xff for stats fields has been omit. The
> > >      network stack does this before calling into the driver functions, so
> > >      I've adjusted the driver functions to only set values if there is
> > >      data to set, leaving the network stack's 0xff in place if not.
> > >    - mlx4_get_base_stats sets all stats to 0 (no locking etc needed).
> > 
> > All the ones you report right? Not just zero the struct.
> > Any day now (tm) someone will add a lot more stats to the struct
> > so the init should be selective only to the stats that are actually
> > supported.
> 
> Yes, not just zero the struct. Since I am reporting bytes packets for both
> RX and TX and alloc_fail for RX I'll be setting those fields to 0
> explicitly.
> 
> And there's also a warning about unused qtype (oops) in patch 2/3.
> 
> So, the revised v2 list (pending anything Mellanox wants) is:
> 
>   v1 -> v2:
>    - Patch 1/3 now initializes dropped to 0.
>    - Patch 2/3 fix use of unitialized qtype warning.
>    - Patch 3/3 includes several changes:
>      - mlx4_get_queue_stats_rx and mlx4_get_queue_stats_tx check if i is
>        valid before proceeding.
>      - All initialization to 0xff for stats fields has been omit. The
>        network stack does this before calling into the driver functions, so
>        I've adjusted the driver functions to only set values if there is
>        data to set, leaving the network stack's 0xff in place if not.
>      - mlx4_get_base_stats set all stat fields to 0 individually (no locking etc needed).
> 
> I'll hold off on sending this v2 until we hear back from Mellanox to be
> sure there isn't anything else I'm missing.

It's been a few days and I haven't heard back from the mlx4 folks, so I
think I'll probably send my v2 later today which, hopefully, will fix most
of the above issues.
Leon Romanovsky April 30, 2024, 12:33 p.m. UTC | #8
On Thu, Apr 25, 2024 at 09:31:14AM -0700, Joe Damato wrote:
> On Wed, Apr 24, 2024 at 09:39:43AM -0700, Joe Damato wrote:
> > On Wed, Apr 24, 2024 at 07:28:18AM -0700, Jakub Kicinski wrote:
> > > On Tue, 23 Apr 2024 22:54:50 -0700 Joe Damato wrote:
> > > > On Tue, Apr 23, 2024 at 05:57:18PM -0700, Jakub Kicinski wrote:
> > > > > On Tue, 23 Apr 2024 12:42:13 -1000 Joe Damato wrote:  
> > > > > > I realized in this case, I'll need to set the fields initialized to 0xff
> > > > > > above to 0 before doing the increments below.  
> > > > > 
> > > > > I don't know mlx4 very well, but glancing at the code - are you sure we
> > > > > need to loop over the queues is the "base" callbacks?
> > > > > 
> > > > > The base callbacks are for getting "historical" data, i.e. info which
> > > > > was associated with queues which are no longer present. You seem to
> > > > > sweep all queues, so I'd have expected "base" to just set the values 
> > > > > to 0. And the real values to come from the per-queue callbacks.  
> > > > 
> > > > Hmm. Sorry I must have totally misunderstood what the purpose of "base"
> > > > was. I've just now more closely looked at bnxt which (maybe?) is the only
> > > > driver that implements base and I think maybe I kind of get it now.
> > > > 
> > > > For some reason, I thought it meant "the total stats of all queues"; I didn't
> > > > know it was intended to provide "historical" data as you say.
> > > > 
> > > > Making it set everything to 0 makes sense to me. I suppose I could also simply
> > > > omit it? What do you think?
> > > 
> > > The base is used to figure out which stats are reported when we dump 
> > > a summary for the whole device. So you gotta set them to 0.
> > 
> > OK, thanks for your patience and the explanation. Will do.
> > 
> > > > > The init to 0xff looks quite sus.  
> > > > 
> > > > Yes the init to 0xff is wrong, too. I noticed that, as well.
> > > > 
> > > > Here's what I have listed so far in my changelog for the v2 (which I haven't
> > > > sent yet), but perhaps the maintainers of mlx4 can weigh in?
> > > > 
> > > > v1 -> v2:
> > > >  - Patch 1/3 now initializes dropped to 0.
> > > >  - Patch 3/3 includes several changes:
> > > >    - mlx4_get_queue_stats_rx and mlx4_get_queue_stats_tx check if i is
> > > >      valid before proceeding.
> > > >    - All initialization to 0xff for stats fields has been omit. The
> > > >      network stack does this before calling into the driver functions, so
> > > >      I've adjusted the driver functions to only set values if there is
> > > >      data to set, leaving the network stack's 0xff in place if not.
> > > >    - mlx4_get_base_stats sets all stats to 0 (no locking etc needed).
> > > 
> > > All the ones you report right? Not just zero the struct.
> > > Any day now (tm) someone will add a lot more stats to the struct
> > > so the init should be selective only to the stats that are actually
> > > supported.
> > 
> > Yes, not just zero the struct. Since I am reporting bytes packets for both
> > RX and TX and alloc_fail for RX I'll be setting those fields to 0
> > explicitly.
> > 
> > And there's also a warning about unused qtype (oops) in patch 2/3.
> > 
> > So, the revised v2 list (pending anything Mellanox wants) is:
> > 
> >   v1 -> v2:
> >    - Patch 1/3 now initializes dropped to 0.
> >    - Patch 2/3 fix use of unitialized qtype warning.
> >    - Patch 3/3 includes several changes:
> >      - mlx4_get_queue_stats_rx and mlx4_get_queue_stats_tx check if i is
> >        valid before proceeding.
> >      - All initialization to 0xff for stats fields has been omit. The
> >        network stack does this before calling into the driver functions, so
> >        I've adjusted the driver functions to only set values if there is
> >        data to set, leaving the network stack's 0xff in place if not.
> >      - mlx4_get_base_stats set all stat fields to 0 individually (no locking etc needed).
> > 
> > I'll hold off on sending this v2 until we hear back from Mellanox to be
> > sure there isn't anything else I'm missing.
> 
> It's been a few days and I haven't heard back from the mlx4 folks, so I
> think I'll probably send my v2 later today which, hopefully, will fix most
> of the above issues.

MLNX folks were in long vacation in last two weeks.

Thanks
Joe Damato April 30, 2024, 4:18 p.m. UTC | #9
On Tue, Apr 30, 2024 at 03:33:14PM +0300, Leon Romanovsky wrote:
> On Thu, Apr 25, 2024 at 09:31:14AM -0700, Joe Damato wrote:
> > On Wed, Apr 24, 2024 at 09:39:43AM -0700, Joe Damato wrote:
> > > On Wed, Apr 24, 2024 at 07:28:18AM -0700, Jakub Kicinski wrote:
> > > > On Tue, 23 Apr 2024 22:54:50 -0700 Joe Damato wrote:
> > > > > On Tue, Apr 23, 2024 at 05:57:18PM -0700, Jakub Kicinski wrote:
> > > > > > On Tue, 23 Apr 2024 12:42:13 -1000 Joe Damato wrote:  
> > > > > > > I realized in this case, I'll need to set the fields initialized to 0xff
> > > > > > > above to 0 before doing the increments below.  
> > > > > > 
> > > > > > I don't know mlx4 very well, but glancing at the code - are you sure we
> > > > > > need to loop over the queues is the "base" callbacks?
> > > > > > 
> > > > > > The base callbacks are for getting "historical" data, i.e. info which
> > > > > > was associated with queues which are no longer present. You seem to
> > > > > > sweep all queues, so I'd have expected "base" to just set the values 
> > > > > > to 0. And the real values to come from the per-queue callbacks.  
> > > > > 
> > > > > Hmm. Sorry I must have totally misunderstood what the purpose of "base"
> > > > > was. I've just now more closely looked at bnxt which (maybe?) is the only
> > > > > driver that implements base and I think maybe I kind of get it now.
> > > > > 
> > > > > For some reason, I thought it meant "the total stats of all queues"; I didn't
> > > > > know it was intended to provide "historical" data as you say.
> > > > > 
> > > > > Making it set everything to 0 makes sense to me. I suppose I could also simply
> > > > > omit it? What do you think?
> > > > 
> > > > The base is used to figure out which stats are reported when we dump 
> > > > a summary for the whole device. So you gotta set them to 0.
> > > 
> > > OK, thanks for your patience and the explanation. Will do.
> > > 
> > > > > > The init to 0xff looks quite sus.  
> > > > > 
> > > > > Yes the init to 0xff is wrong, too. I noticed that, as well.
> > > > > 
> > > > > Here's what I have listed so far in my changelog for the v2 (which I haven't
> > > > > sent yet), but perhaps the maintainers of mlx4 can weigh in?
> > > > > 
> > > > > v1 -> v2:
> > > > >  - Patch 1/3 now initializes dropped to 0.
> > > > >  - Patch 3/3 includes several changes:
> > > > >    - mlx4_get_queue_stats_rx and mlx4_get_queue_stats_tx check if i is
> > > > >      valid before proceeding.
> > > > >    - All initialization to 0xff for stats fields has been omit. The
> > > > >      network stack does this before calling into the driver functions, so
> > > > >      I've adjusted the driver functions to only set values if there is
> > > > >      data to set, leaving the network stack's 0xff in place if not.
> > > > >    - mlx4_get_base_stats sets all stats to 0 (no locking etc needed).
> > > > 
> > > > All the ones you report right? Not just zero the struct.
> > > > Any day now (tm) someone will add a lot more stats to the struct
> > > > so the init should be selective only to the stats that are actually
> > > > supported.
> > > 
> > > Yes, not just zero the struct. Since I am reporting bytes packets for both
> > > RX and TX and alloc_fail for RX I'll be setting those fields to 0
> > > explicitly.
> > > 
> > > And there's also a warning about unused qtype (oops) in patch 2/3.
> > > 
> > > So, the revised v2 list (pending anything Mellanox wants) is:
> > > 
> > >   v1 -> v2:
> > >    - Patch 1/3 now initializes dropped to 0.
> > >    - Patch 2/3 fix use of unitialized qtype warning.
> > >    - Patch 3/3 includes several changes:
> > >      - mlx4_get_queue_stats_rx and mlx4_get_queue_stats_tx check if i is
> > >        valid before proceeding.
> > >      - All initialization to 0xff for stats fields has been omit. The
> > >        network stack does this before calling into the driver functions, so
> > >        I've adjusted the driver functions to only set values if there is
> > >        data to set, leaving the network stack's 0xff in place if not.
> > >      - mlx4_get_base_stats set all stat fields to 0 individually (no locking etc needed).
> > > 
> > > I'll hold off on sending this v2 until we hear back from Mellanox to be
> > > sure there isn't anything else I'm missing.
> > 
> > It's been a few days and I haven't heard back from the mlx4 folks, so I
> > think I'll probably send my v2 later today which, hopefully, will fix most
> > of the above issues.
> 
> MLNX folks were in long vacation in last two weeks.

OK, thanks for letting me know.

If any of those folks are following this thread, I hope they'll take a look
at the v2 of this series.

I was prepared to send the v3 today, but given that they've been out for a
bit I will hold off on sending the v3 for another day or two.

Thanks,
Joe
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 5d3fde63b273..c7f04d4820c6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -43,6 +43,7 @@ 
 #include <net/vxlan.h>
 #include <net/devlink.h>
 #include <net/rps.h>
+#include <net/netdev_queues.h>
 
 #include <linux/mlx4/driver.h>
 #include <linux/mlx4/device.h>
@@ -3099,6 +3100,95 @@  void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
 	last_i += NUM_PHY_STATS;
 }
 
+static void mlx4_get_queue_stats_rx(struct net_device *dev, int i,
+				    struct netdev_queue_stats_rx *stats)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	const struct mlx4_en_rx_ring *ring;
+
+	stats->packets = 0xff;
+	stats->bytes = 0xff;
+	stats->alloc_fail = 0xff;
+
+	spin_lock_bh(&priv->stats_lock);
+
+	if (!priv->port_up || mlx4_is_master(priv->mdev->dev))
+		goto out_unlock;
+
+	ring = priv->rx_ring[i];
+	stats->packets = READ_ONCE(ring->packets);
+	stats->bytes   = READ_ONCE(ring->bytes);
+	stats->alloc_fail = READ_ONCE(ring->dropped);
+
+out_unlock:
+	spin_unlock_bh(&priv->stats_lock);
+}
+
+static void mlx4_get_queue_stats_tx(struct net_device *dev, int i,
+				    struct netdev_queue_stats_tx *stats)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	const struct mlx4_en_tx_ring *ring;
+
+	stats->packets = 0xff;
+	stats->bytes = 0xff;
+
+	spin_lock_bh(&priv->stats_lock);
+
+	if (!priv->port_up || mlx4_is_master(priv->mdev->dev))
+		goto out_unlock;
+
+	ring = priv->tx_ring[TX][i];
+	stats->packets = READ_ONCE(ring->packets);
+	stats->bytes   = READ_ONCE(ring->bytes);
+
+out_unlock:
+	spin_unlock_bh(&priv->stats_lock);
+}
+
+static void mlx4_get_base_stats(struct net_device *dev,
+				struct netdev_queue_stats_rx *rx,
+				struct netdev_queue_stats_tx *tx)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	int i;
+
+	rx->packets = 0xff;
+	rx->bytes = 0xff;
+	rx->alloc_fail = 0xff;
+	tx->packets = 0xff;
+	tx->bytes = 0xff;
+
+	spin_lock_bh(&priv->stats_lock);
+
+	if (!priv->port_up || mlx4_is_master(priv->mdev->dev))
+		goto out_unlock;
+
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		const struct mlx4_en_rx_ring *ring = priv->rx_ring[i];
+
+		rx->packets += READ_ONCE(ring->packets);
+		rx->bytes += READ_ONCE(ring->bytes);
+		rx->alloc_fail += READ_ONCE(ring->dropped);
+	}
+
+	for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+		const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i];
+
+		tx->packets += READ_ONCE(ring->packets);
+		tx->bytes   += READ_ONCE(ring->bytes);
+	}
+
+out_unlock:
+	spin_unlock_bh(&priv->stats_lock);
+}
+
+static const struct netdev_stat_ops mlx4_stat_ops = {
+	.get_queue_stats_rx     = mlx4_get_queue_stats_rx,
+	.get_queue_stats_tx     = mlx4_get_queue_stats_tx,
+	.get_base_stats         = mlx4_get_base_stats,
+};
+
 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 			struct mlx4_en_port_profile *prof)
 {
@@ -3262,6 +3352,7 @@  int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
 	netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
 
+	dev->stat_ops = &mlx4_stat_ops;
 	dev->ethtool_ops = &mlx4_en_ethtool_ops;
 
 	/*