diff mbox series

[v3,net-next,07/11] net: prep switchdev drivers for concurrent SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS

Message ID 20210210091445.741269-8-olteanv@gmail.com (mailing list archive)
State New, archived
Headers show
Series Cleanup in brport flags switchdev offload for DSA | expand

Commit Message

Vladimir Oltean Feb. 10, 2021, 9:14 a.m. UTC
From: Vladimir Oltean <vladimir.oltean@nxp.com>

Because the bridge will start offloading SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS
while not serialized by any lock such as the br->lock spinlock, existing
drivers that treat that attribute and cache the brport flags might no
longer work correctly.

The issue is that the brport flags are a single unsigned long bitmask,
and the bridge only guarantees the validity of the changed bits, not the
full state. So when offloading two concurrent switchdev attributes, such
as one for BR_LEARNING and another for BR_FLOOD, it might happen that
the flags having BR_FLOOD are written into the cached value, and this in
turn disables the BR_LEARNING bit which was set previously.

We can fix this across the board by keeping individual boolean variables
for each brport flag. Note that mlxsw and prestera were setting the
BR_LEARNING_SYNC flag too, but that appears to be just dead code, so I
removed it.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
---
Changes in v3:
Patch is new.

 .../marvell/prestera/prestera_switchdev.c     | 32 ++++++++++------
 .../mellanox/mlxsw/spectrum_switchdev.c       | 38 ++++++++++++-------
 drivers/net/ethernet/rocker/rocker.h          |  2 +-
 drivers/net/ethernet/rocker/rocker_main.c     |  2 +-
 drivers/net/ethernet/rocker/rocker_ofdpa.c    | 26 +++++++------
 net/bridge/br_switchdev.c                     |  3 +-
 6 files changed, 62 insertions(+), 41 deletions(-)

Comments

Ido Schimmel Feb. 10, 2021, 10:12 a.m. UTC | #1
On Wed, Feb 10, 2021 at 11:14:41AM +0200, Vladimir Oltean wrote:
> From: Vladimir Oltean <vladimir.oltean@nxp.com>
> 
> Because the bridge will start offloading SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS
> while not serialized by any lock such as the br->lock spinlock, existing
> drivers that treat that attribute and cache the brport flags might no
> longer work correctly.

Can you explain the race? This notification is sent from sysfs/netlink
call path, both of which take rtnl.
Vladimir Oltean Feb. 10, 2021, 10:23 a.m. UTC | #2
On Wed, Feb 10, 2021 at 12:12:57PM +0200, Ido Schimmel wrote:
> On Wed, Feb 10, 2021 at 11:14:41AM +0200, Vladimir Oltean wrote:
> > From: Vladimir Oltean <vladimir.oltean@nxp.com>
> > 
> > Because the bridge will start offloading SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS
> > while not serialized by any lock such as the br->lock spinlock, existing
> > drivers that treat that attribute and cache the brport flags might no
> > longer work correctly.
> 
> Can you explain the race? This notification is sent from sysfs/netlink
> call path, both of which take rtnl.

Replying here to both you and Nikolay: there isn't any race, sorry, I
missed the fact that brport_store takes the rtnl_mutex and by extension
I thought that RTM_SETLINK runs unlocked too, without really checking.
Well, at least that's good news, the implementation can be a lot more
straightforward then...
David Miller Feb. 10, 2021, 11:34 p.m. UTC | #3
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 10 Feb 2021 11:14:41 +0200

> From: Vladimir Oltean <vladimir.oltean@nxp.com>
> 
> Because the bridge will start offloading SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS
> while not serialized by any lock such as the br->lock spinlock, existing
> drivers that treat that attribute and cache the brport flags might no
> longer work correctly.
> 
> The issue is that the brport flags are a single unsigned long bitmask,
> and the bridge only guarantees the validity of the changed bits, not the
> full state. So when offloading two concurrent switchdev attributes, such
> as one for BR_LEARNING and another for BR_FLOOD, it might happen that
> the flags having BR_FLOOD are written into the cached value, and this in
> turn disables the BR_LEARNING bit which was set previously.
> 
> We can fix this across the board by keeping individual boolean variables
> for each brport flag. Note that mlxsw and prestera were setting the
> BR_LEARNING_SYNC flag too, but that appears to be just dead code, so I
> removed it.
> 
> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>


This needs updating because, as discussed, there is no race.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index a797a7ff0cfe..76030c4c1546 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -49,7 +49,9 @@  struct prestera_bridge_port {
 	struct prestera_bridge *bridge;
 	struct list_head vlan_list;
 	refcount_t ref_count;
-	unsigned long flags;
+	bool learning;
+	bool ucast_flood;
+	bool mcast_flood;
 	u8 stp_state;
 };
 
@@ -339,8 +341,9 @@  prestera_bridge_port_create(struct prestera_bridge *bridge,
 	if (!br_port)
 		return NULL;
 
-	br_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC |
-				BR_MCAST_FLOOD;
+	br_port->learning = true;
+	br_port->ucast_flood = true;
+	br_port->mcast_flood = true;
 	br_port->stp_state = BR_STATE_DISABLED;
 	refcount_set(&br_port->ref_count, 1);
 	br_port->bridge = bridge;
@@ -404,11 +407,11 @@  prestera_bridge_1d_port_join(struct prestera_bridge_port *br_port)
 	if (err)
 		return err;
 
-	err = prestera_hw_port_flood_set(port, br_port->flags & BR_FLOOD);
+	err = prestera_hw_port_flood_set(port, br_port->ucast_flood);
 	if (err)
 		goto err_port_flood_set;
 
-	err = prestera_hw_port_learning_set(port, br_port->flags & BR_LEARNING);
+	err = prestera_hw_port_learning_set(port, br_port->learning);
 	if (err)
 		goto err_port_learning_set;
 
@@ -594,19 +597,24 @@  static int prestera_port_attr_br_flags_set(struct prestera_port *port,
 		return 0;
 
 	if (flags.mask & BR_FLOOD) {
-		err = prestera_hw_port_flood_set(port, flags.val & BR_FLOOD);
+		bool ucast_flood = !!(flags.val & BR_FLOOD);
+
+		err = prestera_hw_port_flood_set(port, ucast_flood);
 		if (err)
 			return err;
+
+		br_port->ucast_flood = ucast_flood;
 	}
 
 	if (flags.mask & BR_LEARNING) {
-		err = prestera_hw_port_learning_set(port,
-						    flags.val & BR_LEARNING);
+		bool learning = !!(flags.val & BR_LEARNING);
+
+		err = prestera_hw_port_learning_set(port, learning);
 		if (err)
 			return err;
-	}
 
-	memcpy(&br_port->flags, &flags.val, sizeof(flags.val));
+		br_port->learning = learning;
+	}
 
 	return 0;
 }
@@ -899,11 +907,11 @@  prestera_port_vlan_bridge_join(struct prestera_port_vlan *port_vlan,
 	if (port_vlan->br_port)
 		return 0;
 
-	err = prestera_hw_port_flood_set(port, br_port->flags & BR_FLOOD);
+	err = prestera_hw_port_flood_set(port, br_port->ucast_flood);
 	if (err)
 		return err;
 
-	err = prestera_hw_port_learning_set(port, br_port->flags & BR_LEARNING);
+	err = prestera_hw_port_learning_set(port, br_port->learning);
 	if (err)
 		goto err_port_learning_set;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 7af79e3a3c7a..0c00754e0cb5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -62,9 +62,11 @@  struct mlxsw_sp_bridge_port {
 	struct list_head vlans_list;
 	unsigned int ref_count;
 	u8 stp_state;
-	unsigned long flags;
 	bool mrouter;
 	bool lagged;
+	bool ucast_flood;
+	bool mcast_flood;
+	bool learning;
 	union {
 		u16 lag_id;
 		u16 system_port;
@@ -349,8 +351,9 @@  mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device,
 	bridge_port->dev = brport_dev;
 	bridge_port->bridge_device = bridge_device;
 	bridge_port->stp_state = BR_STATE_DISABLED;
-	bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC |
-			     BR_MCAST_FLOOD;
+	bridge_port->learning = true;
+	bridge_port->ucast_flood = true;
+	bridge_port->mcast_flood = true;
 	INIT_LIST_HEAD(&bridge_port->vlans_list);
 	list_add(&bridge_port->list, &bridge_device->ports_list);
 	bridge_port->ref_count = 1;
@@ -669,36 +672,45 @@  static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port,
 		return 0;
 
 	if (flags.mask & BR_FLOOD) {
+		bool ucast_flood = !!(flags.val & BR_FLOOD);
+
 		err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port,
 							   bridge_port,
 							   MLXSW_SP_FLOOD_TYPE_UC,
-							   flags.val & BR_FLOOD);
+							   ucast_flood);
 		if (err)
 			return err;
+
+		bridge_port->ucast_flood = ucast_flood;
 	}
 
 	if (flags.mask & BR_LEARNING) {
+		bool learning = !!(flags.val & BR_LEARNING);
+
 		err = mlxsw_sp_bridge_port_learning_set(mlxsw_sp_port,
-							bridge_port,
-							flags.val & BR_LEARNING);
+							bridge_port, learning);
 		if (err)
 			return err;
+
+		bridge_port->learning = learning;
 	}
 
 	if (bridge_port->bridge_device->multicast_enabled)
-		goto out;
+		return 0;
 
 	if (flags.mask & BR_MCAST_FLOOD) {
+		bool mcast_flood = !!(flags.val & BR_MCAST_FLOOD);
+
 		err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port,
 							   bridge_port,
 							   MLXSW_SP_FLOOD_TYPE_MC,
-							   flags.val & BR_MCAST_FLOOD);
+							   mcast_flood);
 		if (err)
 			return err;
+
+		bridge_port->mcast_flood = mcast_flood;
 	}
 
-out:
-	memcpy(&bridge_port->flags, &flags.val, sizeof(flags.val));
 	return 0;
 }
 
@@ -796,7 +808,7 @@  static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port)
 
 	bridge_device = bridge_port->bridge_device;
 	return bridge_device->multicast_enabled ? bridge_port->mrouter :
-					bridge_port->flags & BR_MCAST_FLOOD;
+						  bridge_port->mcast_flood;
 }
 
 static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -962,7 +974,7 @@  mlxsw_sp_port_vlan_fid_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
 		return PTR_ERR(fid);
 
 	err = mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_UC, local_port,
-				     bridge_port->flags & BR_FLOOD);
+				     bridge_port->ucast_flood);
 	if (err)
 		goto err_fid_uc_flood_set;
 
@@ -1043,7 +1055,7 @@  mlxsw_sp_port_vlan_bridge_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
 		return err;
 
 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid,
-					     bridge_port->flags & BR_LEARNING);
+					     bridge_port->learning);
 	if (err)
 		goto err_port_vid_learning_set;
 
diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h
index 315a6e5c0f59..2f533c44fe88 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -103,7 +103,7 @@  struct rocker_world_ops {
 	int (*port_attr_stp_state_set)(struct rocker_port *rocker_port,
 				       u8 state);
 	int (*port_attr_bridge_flags_set)(struct rocker_port *rocker_port,
-					  unsigned long brport_flags);
+					  struct switchdev_brport_flags flags);
 	int (*port_attr_bridge_flags_support_get)(const struct rocker_port *
 						  rocker_port,
 						  unsigned long *
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 898abf3d14d0..b1a67c6423a8 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -1593,7 +1593,7 @@  rocker_world_port_attr_bridge_flags_set(struct rocker_port *rocker_port,
 	if (flags.mask & ~brport_flags_s)
 		return -EINVAL;
 
-	return wops->port_attr_bridge_flags_set(rocker_port, flags.val);
+	return wops->port_attr_bridge_flags_set(rocker_port, flags);
 }
 
 static int
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 967a634ee9ac..eb36c2a2da7b 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -198,7 +198,7 @@  struct ofdpa_port {
 	struct net_device *bridge_dev;
 	__be16 internal_vlan_id;
 	int stp_state;
-	u32 brport_flags;
+	bool learning;
 	unsigned long ageing_time;
 	bool ctrls[OFDPA_CTRL_MAX];
 	unsigned long vlan_bitmap[OFDPA_VLAN_BITMAP_LEN];
@@ -2423,7 +2423,7 @@  static int ofdpa_port_pre_init(struct rocker_port *rocker_port)
 	ofdpa_port->rocker_port = rocker_port;
 	ofdpa_port->dev = rocker_port->dev;
 	ofdpa_port->pport = rocker_port->pport;
-	ofdpa_port->brport_flags = BR_LEARNING;
+	ofdpa_port->learning = true;
 	ofdpa_port->ageing_time = BR_DEFAULT_AGEING_TIME;
 	return 0;
 }
@@ -2433,8 +2433,7 @@  static int ofdpa_port_init(struct rocker_port *rocker_port)
 	struct ofdpa_port *ofdpa_port = rocker_port->wpriv;
 	int err;
 
-	rocker_port_set_learning(rocker_port,
-				 !!(ofdpa_port->brport_flags & BR_LEARNING));
+	rocker_port_set_learning(rocker_port, ofdpa_port->learning);
 
 	err = ofdpa_port_ig_tbl(ofdpa_port, 0);
 	if (err) {
@@ -2488,20 +2487,23 @@  static int ofdpa_port_attr_stp_state_set(struct rocker_port *rocker_port,
 }
 
 static int ofdpa_port_attr_bridge_flags_set(struct rocker_port *rocker_port,
-					    unsigned long brport_flags)
+					    struct switchdev_brport_flags flags)
 {
 	struct ofdpa_port *ofdpa_port = rocker_port->wpriv;
-	unsigned long orig_flags;
-	int err = 0;
+	int err;
 
-	orig_flags = ofdpa_port->brport_flags;
-	ofdpa_port->brport_flags = brport_flags;
+	if (flags.mask & BR_LEARNING) {
+		bool learning = !!(flags.val & BR_LEARNING);
 
-	if ((orig_flags ^ ofdpa_port->brport_flags) & BR_LEARNING)
 		err = rocker_port_set_learning(ofdpa_port->rocker_port,
-					       !!(ofdpa_port->brport_flags & BR_LEARNING));
+					       learning);
+		if (err)
+			return err;
 
-	return err;
+		ofdpa_port->learning = learning;
+	}
+
+	return 0;
 }
 
 static int
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 92d207c8a30e..dbd94156960f 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -72,12 +72,11 @@  int br_switchdev_set_port_flag(struct net_bridge_port *p,
 	};
 	int err;
 
-	flags &= BR_PORT_FLAGS_HW_OFFLOAD;
 	mask &= BR_PORT_FLAGS_HW_OFFLOAD;
 	if (!mask)
 		return 0;
 
-	attr.u.brport_flags.val = flags;
+	attr.u.brport_flags.val = flags & mask;
 	attr.u.brport_flags.mask = mask;
 
 	/* We run from atomic context here */