diff mbox series

[v6,net-next,5/5] net: mscc: ocelot: use bulk reads for stats

Message ID 20220210041345.321216-6-colin.foster@in-advantage.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series use bulk reads for ocelot statistics | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 136 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Colin Foster Feb. 10, 2022, 4:13 a.m. UTC
Create and utilize bulk regmap reads instead of single access for gathering
stats. The background reading of statistics happens frequently, and over
a few contiguous memory regions.

High speed PCIe buses and MMIO access will probably see negligible
performance increase. Lower speed buses like SPI and I2C could see
significant performance increase, since the bus configuration and register
access times account for a large percentage of data transfer time.

Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
---
 drivers/net/ethernet/mscc/ocelot.c | 79 +++++++++++++++++++++++++-----
 include/soc/mscc/ocelot.h          |  8 +++
 2 files changed, 75 insertions(+), 12 deletions(-)

Comments

Vladimir Oltean Feb. 10, 2022, 10:36 a.m. UTC | #1
On Wed, Feb 09, 2022 at 08:13:45PM -0800, Colin Foster wrote:
> Create and utilize bulk regmap reads instead of single access for gathering
> stats. The background reading of statistics happens frequently, and over
> a few contiguous memory regions.
> 
> High speed PCIe buses and MMIO access will probably see negligible
> performance increase. Lower speed buses like SPI and I2C could see
> significant performance increase, since the bus configuration and register
> access times account for a large percentage of data transfer time.
> 
> Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
> ---
>  drivers/net/ethernet/mscc/ocelot.c | 79 +++++++++++++++++++++++++-----
>  include/soc/mscc/ocelot.h          |  8 +++
>  2 files changed, 75 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
> index ab36732e7d3f..fdbd31149dfc 100644
> --- a/drivers/net/ethernet/mscc/ocelot.c
> +++ b/drivers/net/ethernet/mscc/ocelot.c
> @@ -1738,25 +1738,36 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
>  EXPORT_SYMBOL(ocelot_get_strings);
>  
>  /* Caller must hold &ocelot->stats_lock */
> -static void ocelot_update_stats_for_port(struct ocelot *ocelot, int port)
> +static int ocelot_update_stats_for_port(struct ocelot *ocelot, int port)
>  {
> -	int j;
> +	unsigned int idx = port * ocelot->num_stats;
> +	struct ocelot_stats_region *region;
> +	int err, j;
>  
>  	/* Configure the port to read the stats from */
>  	ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(port), SYS_STAT_CFG);
>  
> -	for (j = 0; j < ocelot->num_stats; j++) {
> -		u32 val;
> -		unsigned int idx = port * ocelot->num_stats + j;
> +	list_for_each_entry(region, &ocelot->stats_regions, node) {
> +		err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> +					   region->offset, region->buf,
> +					   region->count);
> +		if (err)
> +			return err;
>  
> -		val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> -				      ocelot->stats_layout[j].offset);
> +		for (j = 0; j < region->count; j++) {
> +			u64 *stat = &ocelot->stats[idx + j];
> +			u64 val = region->buf[j];
>  
> -		if (val < (ocelot->stats[idx] & U32_MAX))
> -			ocelot->stats[idx] += (u64)1 << 32;
> +			if (val < (*stat & U32_MAX))
> +				*stat += (u64)1 << 32;
> +
> +			*stat = (*stat & ~(u64)U32_MAX) + val;
> +		}
>  
> -		ocelot->stats[idx] = (ocelot->stats[idx] & ~(u64)U32_MAX) + val;
> +		idx += region->count;
>  	}
> +
> +	return err;
>  }
>  
>  static void ocelot_check_stats_work(struct work_struct *work)
> @@ -1777,12 +1788,14 @@ static void ocelot_check_stats_work(struct work_struct *work)
>  
>  void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
>  {
> -	int i;
> +	int i, err;
>  
>  	mutex_lock(&ocelot->stats_lock);
>  
>  	/* check and update now */
> -	ocelot_update_stats_for_port(ocelot, port);
> +	err = ocelot_update_stats_for_port(ocelot, port);

ocelot_check_stats_work() should also check for errors.

> +	if (err)
> +		dev_err(ocelot->dev, "Error %d updating ethtool stats\n", err);
>  
>  	/* Copy all counters */
>  	for (i = 0; i < ocelot->num_stats; i++)
> @@ -1801,6 +1814,41 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
>  }
>  EXPORT_SYMBOL(ocelot_get_sset_count);
>  
> +static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
> +{
> +	struct ocelot_stats_region *region = NULL;
> +	unsigned int last;
> +	int i;
> +
> +	INIT_LIST_HEAD(&ocelot->stats_regions);
> +
> +	for (i = 0; i < ocelot->num_stats; i++) {
> +		if (region && ocelot->stats_layout[i].offset == last + 1) {
> +			region->count++;
> +		} else {
> +			region = devm_kzalloc(ocelot->dev, sizeof(*region),
> +					      GFP_KERNEL);
> +			if (!region)
> +				return -ENOMEM;
> +
> +			region->offset = ocelot->stats_layout[i].offset;
> +			region->count = 1;
> +			list_add_tail(&region->node, &ocelot->stats_regions);
> +		}
> +
> +		last = ocelot->stats_layout[i].offset;
> +	}
> +
> +	list_for_each_entry(region, &ocelot->stats_regions, node) {
> +		region->buf = devm_kcalloc(ocelot->dev, region->count,
> +					   sizeof(*region->buf), GFP_KERNEL);
> +		if (!region->buf)
> +			return -ENOMEM;
> +	}
> +
> +	return 0;
> +}
> +
>  int ocelot_get_ts_info(struct ocelot *ocelot, int port,
>  		       struct ethtool_ts_info *info)
>  {
> @@ -2801,6 +2849,13 @@ int ocelot_init(struct ocelot *ocelot)
>  				 ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
>  				 ANA_CPUQ_8021_CFG, i);
>  
> +	ret = ocelot_prepare_stats_regions(ocelot);
> +	if (ret) {
> +		destroy_workqueue(ocelot->stats_queue);
> +		destroy_workqueue(ocelot->owq);
> +		return ret;
> +	}
> +
>  	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
>  	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
>  			   OCELOT_STATS_CHECK_DELAY);
> diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
> index 312b72558659..d3291a5f7e88 100644
> --- a/include/soc/mscc/ocelot.h
> +++ b/include/soc/mscc/ocelot.h
> @@ -542,6 +542,13 @@ struct ocelot_stat_layout {
>  	char name[ETH_GSTRING_LEN];
>  };
>  
> +struct ocelot_stats_region {
> +	struct list_head node;
> +	u32 offset;
> +	int count;
> +	u32 *buf;
> +};
> +
>  enum ocelot_tag_prefix {
>  	OCELOT_TAG_PREFIX_DISABLED	= 0,
>  	OCELOT_TAG_PREFIX_NONE,
> @@ -673,6 +680,7 @@ struct ocelot {
>  	struct regmap_field		*regfields[REGFIELD_MAX];
>  	const u32 *const		*map;
>  	const struct ocelot_stat_layout	*stats_layout;
> +	struct list_head		stats_regions;
>  	unsigned int			num_stats;
>  
>  	u32				pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
> -- 
> 2.25.1
>
Colin Foster Feb. 10, 2022, 2:37 p.m. UTC | #2
On Thu, Feb 10, 2022 at 10:36:37AM +0000, Vladimir Oltean wrote:
> On Wed, Feb 09, 2022 at 08:13:45PM -0800, Colin Foster wrote:
> > Create and utilize bulk regmap reads instead of single access for gathering
> > stats. The background reading of statistics happens frequently, and over
> > a few contiguous memory regions.
> > 
> > High speed PCIe buses and MMIO access will probably see negligible
> > performance increase. Lower speed buses like SPI and I2C could see
> > significant performance increase, since the bus configuration and register
> > access times account for a large percentage of data transfer time.
> > 
> > Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
> > ---
> >  drivers/net/ethernet/mscc/ocelot.c | 79 +++++++++++++++++++++++++-----
> >  include/soc/mscc/ocelot.h          |  8 +++
> >  2 files changed, 75 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
> > index ab36732e7d3f..fdbd31149dfc 100644
> > --- a/drivers/net/ethernet/mscc/ocelot.c
> > +++ b/drivers/net/ethernet/mscc/ocelot.c
> > @@ -1738,25 +1738,36 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
> >  EXPORT_SYMBOL(ocelot_get_strings);
> >  
> >  /* Caller must hold &ocelot->stats_lock */
> > -static void ocelot_update_stats_for_port(struct ocelot *ocelot, int port)
> > +static int ocelot_update_stats_for_port(struct ocelot *ocelot, int port)
> >  {
> > -	int j;
> > +	unsigned int idx = port * ocelot->num_stats;
> > +	struct ocelot_stats_region *region;
> > +	int err, j;
> >  
> >  	/* Configure the port to read the stats from */
> >  	ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(port), SYS_STAT_CFG);
> >  
> > -	for (j = 0; j < ocelot->num_stats; j++) {
> > -		u32 val;
> > -		unsigned int idx = port * ocelot->num_stats + j;
> > +	list_for_each_entry(region, &ocelot->stats_regions, node) {
> > +		err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> > +					   region->offset, region->buf,
> > +					   region->count);
> > +		if (err)
> > +			return err;
> >  
> > -		val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> > -				      ocelot->stats_layout[j].offset);
> > +		for (j = 0; j < region->count; j++) {
> > +			u64 *stat = &ocelot->stats[idx + j];
> > +			u64 val = region->buf[j];
> >  
> > -		if (val < (ocelot->stats[idx] & U32_MAX))
> > -			ocelot->stats[idx] += (u64)1 << 32;
> > +			if (val < (*stat & U32_MAX))
> > +				*stat += (u64)1 << 32;
> > +
> > +			*stat = (*stat & ~(u64)U32_MAX) + val;
> > +		}
> >  
> > -		ocelot->stats[idx] = (ocelot->stats[idx] & ~(u64)U32_MAX) + val;
> > +		idx += region->count;
> >  	}
> > +
> > +	return err;
> >  }
> >  
> >  static void ocelot_check_stats_work(struct work_struct *work)
> > @@ -1777,12 +1788,14 @@ static void ocelot_check_stats_work(struct work_struct *work)
> >  
> >  void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
> >  {
> > -	int i;
> > +	int i, err;
> >  
> >  	mutex_lock(&ocelot->stats_lock);
> >  
> >  	/* check and update now */
> > -	ocelot_update_stats_for_port(ocelot, port);
> > +	err = ocelot_update_stats_for_port(ocelot, port);
> 
> ocelot_check_stats_work() should also check for errors.

Worthy enough for a resend. I'll send the fix patch to net with your
correct email and send this update once that gets merged through.

Thanks!

> 
> > +	if (err)
> > +		dev_err(ocelot->dev, "Error %d updating ethtool stats\n", err);
> >  
> >  	/* Copy all counters */
> >  	for (i = 0; i < ocelot->num_stats; i++)
> > @@ -1801,6 +1814,41 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
> >  }
> >  EXPORT_SYMBOL(ocelot_get_sset_count);
> >  
> > +static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
> > +{
> > +	struct ocelot_stats_region *region = NULL;
> > +	unsigned int last;
> > +	int i;
> > +
> > +	INIT_LIST_HEAD(&ocelot->stats_regions);
> > +
> > +	for (i = 0; i < ocelot->num_stats; i++) {
> > +		if (region && ocelot->stats_layout[i].offset == last + 1) {
> > +			region->count++;
> > +		} else {
> > +			region = devm_kzalloc(ocelot->dev, sizeof(*region),
> > +					      GFP_KERNEL);
> > +			if (!region)
> > +				return -ENOMEM;
> > +
> > +			region->offset = ocelot->stats_layout[i].offset;
> > +			region->count = 1;
> > +			list_add_tail(&region->node, &ocelot->stats_regions);
> > +		}
> > +
> > +		last = ocelot->stats_layout[i].offset;
> > +	}
> > +
> > +	list_for_each_entry(region, &ocelot->stats_regions, node) {
> > +		region->buf = devm_kcalloc(ocelot->dev, region->count,
> > +					   sizeof(*region->buf), GFP_KERNEL);
> > +		if (!region->buf)
> > +			return -ENOMEM;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  int ocelot_get_ts_info(struct ocelot *ocelot, int port,
> >  		       struct ethtool_ts_info *info)
> >  {
> > @@ -2801,6 +2849,13 @@ int ocelot_init(struct ocelot *ocelot)
> >  				 ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
> >  				 ANA_CPUQ_8021_CFG, i);
> >  
> > +	ret = ocelot_prepare_stats_regions(ocelot);
> > +	if (ret) {
> > +		destroy_workqueue(ocelot->stats_queue);
> > +		destroy_workqueue(ocelot->owq);
> > +		return ret;
> > +	}
> > +
> >  	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
> >  	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
> >  			   OCELOT_STATS_CHECK_DELAY);
> > diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
> > index 312b72558659..d3291a5f7e88 100644
> > --- a/include/soc/mscc/ocelot.h
> > +++ b/include/soc/mscc/ocelot.h
> > @@ -542,6 +542,13 @@ struct ocelot_stat_layout {
> >  	char name[ETH_GSTRING_LEN];
> >  };
> >  
> > +struct ocelot_stats_region {
> > +	struct list_head node;
> > +	u32 offset;
> > +	int count;
> > +	u32 *buf;
> > +};
> > +
> >  enum ocelot_tag_prefix {
> >  	OCELOT_TAG_PREFIX_DISABLED	= 0,
> >  	OCELOT_TAG_PREFIX_NONE,
> > @@ -673,6 +680,7 @@ struct ocelot {
> >  	struct regmap_field		*regfields[REGFIELD_MAX];
> >  	const u32 *const		*map;
> >  	const struct ocelot_stat_layout	*stats_layout;
> > +	struct list_head		stats_regions;
> >  	unsigned int			num_stats;
> >  
> >  	u32				pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
> > -- 
> > 2.25.1
> >
Colin Foster Feb. 10, 2022, 3:21 p.m. UTC | #3
On Thu, Feb 10, 2022 at 10:36:37AM +0000, Vladimir Oltean wrote:
> On Wed, Feb 09, 2022 at 08:13:45PM -0800, Colin Foster wrote:
> > Create and utilize bulk regmap reads instead of single access for gathering
> > stats. The background reading of statistics happens frequently, and over
> > a few contiguous memory regions.
> > 
> > High speed PCIe buses and MMIO access will probably see negligible
> > performance increase. Lower speed buses like SPI and I2C could see
> > significant performance increase, since the bus configuration and register
> > access times account for a large percentage of data transfer time.
> > 
> > Signed-off-by: Colin Foster <colin.foster@in-advantage.com>
> > ---
> >  drivers/net/ethernet/mscc/ocelot.c | 79 +++++++++++++++++++++++++-----
> >  include/soc/mscc/ocelot.h          |  8 +++
> >  2 files changed, 75 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
> > index ab36732e7d3f..fdbd31149dfc 100644
> > --- a/drivers/net/ethernet/mscc/ocelot.c
> > +++ b/drivers/net/ethernet/mscc/ocelot.c
> > @@ -1738,25 +1738,36 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
> >  EXPORT_SYMBOL(ocelot_get_strings);
> >  
> >  /* Caller must hold &ocelot->stats_lock */
> > -static void ocelot_update_stats_for_port(struct ocelot *ocelot, int port)
> > +static int ocelot_update_stats_for_port(struct ocelot *ocelot, int port)
> >  {
> > -	int j;
> > +	unsigned int idx = port * ocelot->num_stats;
> > +	struct ocelot_stats_region *region;
> > +	int err, j;
> >  
> >  	/* Configure the port to read the stats from */
> >  	ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(port), SYS_STAT_CFG);
> >  
> > -	for (j = 0; j < ocelot->num_stats; j++) {
> > -		u32 val;
> > -		unsigned int idx = port * ocelot->num_stats + j;
> > +	list_for_each_entry(region, &ocelot->stats_regions, node) {
> > +		err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> > +					   region->offset, region->buf,
> > +					   region->count);
> > +		if (err)
> > +			return err;
> >  
> > -		val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
> > -				      ocelot->stats_layout[j].offset);
> > +		for (j = 0; j < region->count; j++) {
> > +			u64 *stat = &ocelot->stats[idx + j];
> > +			u64 val = region->buf[j];
> >  
> > -		if (val < (ocelot->stats[idx] & U32_MAX))
> > -			ocelot->stats[idx] += (u64)1 << 32;
> > +			if (val < (*stat & U32_MAX))
> > +				*stat += (u64)1 << 32;
> > +
> > +			*stat = (*stat & ~(u64)U32_MAX) + val;
> > +		}
> >  
> > -		ocelot->stats[idx] = (ocelot->stats[idx] & ~(u64)U32_MAX) + val;
> > +		idx += region->count;
> >  	}
> > +
> > +	return err;
> >  }
> >  
> >  static void ocelot_check_stats_work(struct work_struct *work)
> > @@ -1777,12 +1788,14 @@ static void ocelot_check_stats_work(struct work_struct *work)
> >  
> >  void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
> >  {
> > -	int i;
> > +	int i, err;
> >  
> >  	mutex_lock(&ocelot->stats_lock);
> >  
> >  	/* check and update now */
> > -	ocelot_update_stats_for_port(ocelot, port);
> > +	err = ocelot_update_stats_for_port(ocelot, port);
> 
> ocelot_check_stats_work() should also check for errors.

Another change I'm catching: I assume calling dev_err while holding a
mutex is frowned upon, so I'm moving this err check after the counter
copy / mutex unlock. I'll submit that fix after patch 1 gets merged into
net / mainline.

> 
> > +	if (err)
> > +		dev_err(ocelot->dev, "Error %d updating ethtool stats\n", err);
> >  
> >  	/* Copy all counters */
> >  	for (i = 0; i < ocelot->num_stats; i++)
> > @@ -1801,6 +1814,41 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
> >  }
> >  EXPORT_SYMBOL(ocelot_get_sset_count);
> >  
> > +static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
> > +{
> > +	struct ocelot_stats_region *region = NULL;
> > +	unsigned int last;
> > +	int i;
> > +
> > +	INIT_LIST_HEAD(&ocelot->stats_regions);
> > +
> > +	for (i = 0; i < ocelot->num_stats; i++) {
> > +		if (region && ocelot->stats_layout[i].offset == last + 1) {
> > +			region->count++;
> > +		} else {
> > +			region = devm_kzalloc(ocelot->dev, sizeof(*region),
> > +					      GFP_KERNEL);
> > +			if (!region)
> > +				return -ENOMEM;
> > +
> > +			region->offset = ocelot->stats_layout[i].offset;
> > +			region->count = 1;
> > +			list_add_tail(&region->node, &ocelot->stats_regions);
> > +		}
> > +
> > +		last = ocelot->stats_layout[i].offset;
> > +	}
> > +
> > +	list_for_each_entry(region, &ocelot->stats_regions, node) {
> > +		region->buf = devm_kcalloc(ocelot->dev, region->count,
> > +					   sizeof(*region->buf), GFP_KERNEL);
> > +		if (!region->buf)
> > +			return -ENOMEM;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  int ocelot_get_ts_info(struct ocelot *ocelot, int port,
> >  		       struct ethtool_ts_info *info)
> >  {
> > @@ -2801,6 +2849,13 @@ int ocelot_init(struct ocelot *ocelot)
> >  				 ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
> >  				 ANA_CPUQ_8021_CFG, i);
> >  
> > +	ret = ocelot_prepare_stats_regions(ocelot);
> > +	if (ret) {
> > +		destroy_workqueue(ocelot->stats_queue);
> > +		destroy_workqueue(ocelot->owq);
> > +		return ret;
> > +	}
> > +
> >  	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
> >  	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
> >  			   OCELOT_STATS_CHECK_DELAY);
> > diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
> > index 312b72558659..d3291a5f7e88 100644
> > --- a/include/soc/mscc/ocelot.h
> > +++ b/include/soc/mscc/ocelot.h
> > @@ -542,6 +542,13 @@ struct ocelot_stat_layout {
> >  	char name[ETH_GSTRING_LEN];
> >  };
> >  
> > +struct ocelot_stats_region {
> > +	struct list_head node;
> > +	u32 offset;
> > +	int count;
> > +	u32 *buf;
> > +};
> > +
> >  enum ocelot_tag_prefix {
> >  	OCELOT_TAG_PREFIX_DISABLED	= 0,
> >  	OCELOT_TAG_PREFIX_NONE,
> > @@ -673,6 +680,7 @@ struct ocelot {
> >  	struct regmap_field		*regfields[REGFIELD_MAX];
> >  	const u32 *const		*map;
> >  	const struct ocelot_stat_layout	*stats_layout;
> > +	struct list_head		stats_regions;
> >  	unsigned int			num_stats;
> >  
> >  	u32				pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];
> > -- 
> > 2.25.1
> >
Vladimir Oltean Feb. 10, 2022, 3:27 p.m. UTC | #4
On Thu, Feb 10, 2022 at 07:21:03AM -0800, Colin Foster wrote:
> > ocelot_check_stats_work() should also check for errors.
> 
> Another change I'm catching: I assume calling dev_err while holding a
> mutex is frowned upon, so I'm moving this err check after the counter
> copy / mutex unlock. I'll submit that fix after patch 1 gets merged into
> net / mainline.

Where did you read that? Doing work that doesn't need to be under a lock
while that work is held isn't preferable, of course, but what is special
about dev_err?
Colin Foster Feb. 10, 2022, 3:35 p.m. UTC | #5
On Thu, Feb 10, 2022 at 03:27:13PM +0000, Vladimir Oltean wrote:
> On Thu, Feb 10, 2022 at 07:21:03AM -0800, Colin Foster wrote:
> > > ocelot_check_stats_work() should also check for errors.
> > 
> > Another change I'm catching: I assume calling dev_err while holding a
> > mutex is frowned upon, so I'm moving this err check after the counter
> > copy / mutex unlock. I'll submit that fix after patch 1 gets merged into
> > net / mainline.
> 
> Where did you read that? Doing work that doesn't need to be under a lock
> while that work is held isn't preferable, of course, but what is special
> about dev_err?

I didn't read it anywhere, just a hunch about string formatting
overhead. Either way - not necessary to do inside of stats_lock, so I
moved it out. Next round will hopefully be early next week.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index ab36732e7d3f..fdbd31149dfc 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1738,25 +1738,36 @@  void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
 EXPORT_SYMBOL(ocelot_get_strings);
 
 /* Caller must hold &ocelot->stats_lock */
-static void ocelot_update_stats_for_port(struct ocelot *ocelot, int port)
+static int ocelot_update_stats_for_port(struct ocelot *ocelot, int port)
 {
-	int j;
+	unsigned int idx = port * ocelot->num_stats;
+	struct ocelot_stats_region *region;
+	int err, j;
 
 	/* Configure the port to read the stats from */
 	ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(port), SYS_STAT_CFG);
 
-	for (j = 0; j < ocelot->num_stats; j++) {
-		u32 val;
-		unsigned int idx = port * ocelot->num_stats + j;
+	list_for_each_entry(region, &ocelot->stats_regions, node) {
+		err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
+					   region->offset, region->buf,
+					   region->count);
+		if (err)
+			return err;
 
-		val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
-				      ocelot->stats_layout[j].offset);
+		for (j = 0; j < region->count; j++) {
+			u64 *stat = &ocelot->stats[idx + j];
+			u64 val = region->buf[j];
 
-		if (val < (ocelot->stats[idx] & U32_MAX))
-			ocelot->stats[idx] += (u64)1 << 32;
+			if (val < (*stat & U32_MAX))
+				*stat += (u64)1 << 32;
+
+			*stat = (*stat & ~(u64)U32_MAX) + val;
+		}
 
-		ocelot->stats[idx] = (ocelot->stats[idx] & ~(u64)U32_MAX) + val;
+		idx += region->count;
 	}
+
+	return err;
 }
 
 static void ocelot_check_stats_work(struct work_struct *work)
@@ -1777,12 +1788,14 @@  static void ocelot_check_stats_work(struct work_struct *work)
 
 void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
 {
-	int i;
+	int i, err;
 
 	mutex_lock(&ocelot->stats_lock);
 
 	/* check and update now */
-	ocelot_update_stats_for_port(ocelot, port);
+	err = ocelot_update_stats_for_port(ocelot, port);
+	if (err)
+		dev_err(ocelot->dev, "Error %d updating ethtool stats\n", err);
 
 	/* Copy all counters */
 	for (i = 0; i < ocelot->num_stats; i++)
@@ -1801,6 +1814,41 @@  int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
 }
 EXPORT_SYMBOL(ocelot_get_sset_count);
 
+static int ocelot_prepare_stats_regions(struct ocelot *ocelot)
+{
+	struct ocelot_stats_region *region = NULL;
+	unsigned int last;
+	int i;
+
+	INIT_LIST_HEAD(&ocelot->stats_regions);
+
+	for (i = 0; i < ocelot->num_stats; i++) {
+		if (region && ocelot->stats_layout[i].offset == last + 1) {
+			region->count++;
+		} else {
+			region = devm_kzalloc(ocelot->dev, sizeof(*region),
+					      GFP_KERNEL);
+			if (!region)
+				return -ENOMEM;
+
+			region->offset = ocelot->stats_layout[i].offset;
+			region->count = 1;
+			list_add_tail(&region->node, &ocelot->stats_regions);
+		}
+
+		last = ocelot->stats_layout[i].offset;
+	}
+
+	list_for_each_entry(region, &ocelot->stats_regions, node) {
+		region->buf = devm_kcalloc(ocelot->dev, region->count,
+					   sizeof(*region->buf), GFP_KERNEL);
+		if (!region->buf)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
 int ocelot_get_ts_info(struct ocelot *ocelot, int port,
 		       struct ethtool_ts_info *info)
 {
@@ -2801,6 +2849,13 @@  int ocelot_init(struct ocelot *ocelot)
 				 ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
 				 ANA_CPUQ_8021_CFG, i);
 
+	ret = ocelot_prepare_stats_regions(ocelot);
+	if (ret) {
+		destroy_workqueue(ocelot->stats_queue);
+		destroy_workqueue(ocelot->owq);
+		return ret;
+	}
+
 	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
 	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
 			   OCELOT_STATS_CHECK_DELAY);
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 312b72558659..d3291a5f7e88 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -542,6 +542,13 @@  struct ocelot_stat_layout {
 	char name[ETH_GSTRING_LEN];
 };
 
+struct ocelot_stats_region {
+	struct list_head node;
+	u32 offset;
+	int count;
+	u32 *buf;
+};
+
 enum ocelot_tag_prefix {
 	OCELOT_TAG_PREFIX_DISABLED	= 0,
 	OCELOT_TAG_PREFIX_NONE,
@@ -673,6 +680,7 @@  struct ocelot {
 	struct regmap_field		*regfields[REGFIELD_MAX];
 	const u32 *const		*map;
 	const struct ocelot_stat_layout	*stats_layout;
+	struct list_head		stats_regions;
 	unsigned int			num_stats;
 
 	u32				pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM];