diff mbox series

[net,v1,1/4] octeon_ep: fix race conditions in ndo_get_stats64

Message ID 20241203072130.2316913-2-srasheed@marvell.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series Fix race conditions in ndo_get_stats64 | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3 this patch: 3
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang success Errors and warnings before: 3 this patch: 3
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 304 this patch: 304
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 83 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 17 this patch: 17
netdev/source_inline success Was 0 now: 0

Commit Message

Shinas Rasheed Dec. 3, 2024, 7:21 a.m. UTC
ndo_get_stats64() can race with ndo_stop(), which frees input and
output queue resources. Implement device state variable to protect
against such resource usage.

Fixes: 6a610a46bad1 ("octeon_ep: add support for ndo ops")
Signed-off-by: Shinas Rasheed <srasheed@marvell.com>
---
 .../ethernet/marvell/octeon_ep/octep_main.c   | 27 +++++++++++++++++++
 .../ethernet/marvell/octeon_ep/octep_main.h   |  8 ++++++
 2 files changed, 35 insertions(+)

Comments

Jakub Kicinski Dec. 4, 2024, 2:23 a.m. UTC | #1
On Mon, 2 Dec 2024 23:21:27 -0800 Shinas Rasheed wrote:
> +	clear_bit(OCTEP_DEV_STATE_OPEN, &oct->state);
> +	/* Make sure device state open is cleared so that no more
> +	 * stats fetch can happen intermittently
> +	 */
> +	smp_mb__after_atomic();
> +	while (octep_drv_busy(oct))
> +		msleep(20);

This is a poor re-implementation of a lock.
We have more lock types in Linux than I care to count now.
Please just use the right one. Hint, it's probably a spin lock or
RCU+synchronize_net() on close.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 549436efc204..872b4848027c 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -733,6 +733,7 @@  static int octep_open(struct net_device *netdev)
 	if (ret > 0)
 		octep_link_up(netdev);
 
+	set_bit(OCTEP_DEV_STATE_OPEN, &oct->state);
 	return 0;
 
 set_queues_err:
@@ -745,6 +746,11 @@  static int octep_open(struct net_device *netdev)
 	return -1;
 }
 
+static bool octep_drv_busy(struct octep_device *oct)
+{
+	return test_bit(OCTEP_DEV_STATE_READ_STATS, &oct->state);
+}
+
 /**
  * octep_stop() - stop the octeon network device.
  *
@@ -759,6 +765,14 @@  static int octep_stop(struct net_device *netdev)
 
 	netdev_info(netdev, "Stopping the device ...\n");
 
+	clear_bit(OCTEP_DEV_STATE_OPEN, &oct->state);
+	/* Make sure device state open is cleared so that no more
+	 * stats fetch can happen intermittently
+	 */
+	smp_mb__after_atomic();
+	while (octep_drv_busy(oct))
+		msleep(20);
+
 	octep_ctrl_net_set_link_status(oct, OCTEP_CTRL_NET_INVALID_VFID, false,
 				       false);
 	octep_ctrl_net_set_rx_state(oct, OCTEP_CTRL_NET_INVALID_VFID, false,
@@ -1001,6 +1015,16 @@  static void octep_get_stats64(struct net_device *netdev,
 					    &oct->iface_rx_stats,
 					    &oct->iface_tx_stats);
 
+	set_bit(OCTEP_DEV_STATE_READ_STATS, &oct->state);
+	/* Make sure read stats state is set, so that ndo_stop
+	 * doesn't clear resources as they are read
+	 */
+	smp_mb__after_atomic();
+	if (!test_bit(OCTEP_DEV_STATE_OPEN, &oct->state)) {
+		clear_bit(OCTEP_DEV_STATE_READ_STATS, &oct->state);
+		return;
+	}
+
 	tx_packets = 0;
 	tx_bytes = 0;
 	rx_packets = 0;
@@ -1022,6 +1046,7 @@  static void octep_get_stats64(struct net_device *netdev,
 	stats->rx_errors = oct->iface_rx_stats.err_pkts;
 	stats->collisions = oct->iface_tx_stats.xscol;
 	stats->tx_fifo_errors = oct->iface_tx_stats.undflw;
+	clear_bit(OCTEP_DEV_STATE_READ_STATS, &oct->state);
 }
 
 /**
@@ -1526,6 +1551,8 @@  static int octep_sriov_disable(struct octep_device *oct)
 	pci_disable_sriov(pdev);
 	CFG_GET_ACTIVE_VFS(oct->conf) = 0;
 
+	clear_bit(OCTEP_DEV_STATE_OPEN, &oct->state);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
index fee59e0e0138..78293366e7de 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
@@ -223,6 +223,12 @@  struct octep_pfvf_info {
 	u32 mbox_version;
 };
 
+/* Device state */
+enum octep_dev_state {
+	OCTEP_DEV_STATE_OPEN,
+	OCTEP_DEV_STATE_READ_STATS,
+};
+
 /* The Octeon device specific private data structure.
  * Each Octeon device has this structure to represent all its components.
  */
@@ -318,6 +324,8 @@  struct octep_device {
 	atomic_t hb_miss_cnt;
 	/* Task to reset device on heartbeat miss */
 	struct delayed_work hb_task;
+	/* Device state */
+	unsigned long state;
 };
 
 static inline u16 OCTEP_MAJOR_REV(struct octep_device *oct)