Message ID | 20221018165619.134535-6-netdev@kapio-technology.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Extend locked port feature with FDB locked flag (MAC-Auth/MAB) | expand |
On Tue, Oct 18, 2022 at 06:56:12PM +0200, Hans J. Schultz wrote: > Add a new u16 for fdb flags to propagate through the DSA layer towards the > fdb add and del functions of the drivers. > > Signed-off-by: Hans J. Schultz <netdev@kapio-technology.com> > --- > include/net/dsa.h | 2 ++ > net/dsa/dsa_priv.h | 6 ++++-- > net/dsa/port.c | 10 ++++++---- > net/dsa/slave.c | 10 ++++++++-- > net/dsa/switch.c | 16 ++++++++-------- > 5 files changed, 28 insertions(+), 16 deletions(-) > > diff --git a/include/net/dsa.h b/include/net/dsa.h > index ee369670e20e..e4b641b20713 100644 > --- a/include/net/dsa.h > +++ b/include/net/dsa.h > @@ -821,6 +821,8 @@ static inline bool dsa_port_tree_same(const struct dsa_port *a, > return a->ds->dst == b->ds->dst; > } > > +#define DSA_FDB_FLAG_LOCKED (1 << 0) > + > typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, > bool is_static, void *data); > struct dsa_switch_ops { > diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h > index 6e65c7ffd6f3..c943e8934063 100644 > --- a/net/dsa/dsa_priv.h > +++ b/net/dsa/dsa_priv.h > @@ -65,6 +65,7 @@ struct dsa_notifier_fdb_info { > const struct dsa_port *dp; > const unsigned char *addr; > u16 vid; > + u16 fdb_flags; > struct dsa_db db; > }; > > @@ -131,6 +132,7 @@ struct dsa_switchdev_event_work { > */ > unsigned char addr[ETH_ALEN]; > u16 vid; > + u16 fdb_flags; > bool host_addr; > }; > > @@ -241,9 +243,9 @@ int dsa_port_vlan_msti(struct dsa_port *dp, > const struct switchdev_vlan_msti *msti); > int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu); > int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, > - u16 vid); > + u16 vid, u16 fdb_flags); > int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, > - u16 vid); > + u16 vid, u16 fdb_flags); > int dsa_port_standalone_host_fdb_add(struct dsa_port *dp, > const unsigned char *addr, u16 vid); > int dsa_port_standalone_host_fdb_del(struct dsa_port *dp, > diff --git a/net/dsa/port.c b/net/dsa/port.c > index 208168276995..ff4f66f14d39 100644 > --- a/net/dsa/port.c > +++ b/net/dsa/port.c > @@ -304,7 +304,7 @@ static int dsa_port_inherit_brport_flags(struct dsa_port *dp, > struct netlink_ext_ack *extack) > { > const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | > - BR_BCAST_FLOOD | BR_PORT_LOCKED; > + BR_BCAST_FLOOD; > struct net_device *brport_dev = dsa_port_to_bridge_port(dp); > int flag, err; > > @@ -328,7 +328,7 @@ static void dsa_port_clear_brport_flags(struct dsa_port *dp) > { > const unsigned long val = BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD; > const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | > - BR_BCAST_FLOOD | BR_PORT_LOCKED; > + BR_BCAST_FLOOD | BR_PORT_LOCKED | BR_PORT_MAB; Why does the mask of cleared brport flags differ from the one of set brport flags, and what/where is the explanation for this change? > int flag, err; > > for_each_set_bit(flag, &mask, 32) { > @@ -956,12 +956,13 @@ int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu) > } > > int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, > - u16 vid) > + u16 vid, u16 fdb_flags) > { > struct dsa_notifier_fdb_info info = { > .dp = dp, > .addr = addr, > .vid = vid, > + .fdb_flags = fdb_flags, > .db = { > .type = DSA_DB_BRIDGE, > .bridge = *dp->bridge, > @@ -979,12 +980,13 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, > } > > int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, > - u16 vid) > + u16 vid, u16 fdb_flags) > { > struct dsa_notifier_fdb_info info = { > .dp = dp, > .addr = addr, > .vid = vid, > + .fdb_flags = fdb_flags, > .db = { > .type = DSA_DB_BRIDGE, > .bridge = *dp->bridge, > diff --git a/net/dsa/slave.c b/net/dsa/slave.c > index 1a59918d3b30..65f0c578ef44 100644 > --- a/net/dsa/slave.c > +++ b/net/dsa/slave.c > @@ -3246,6 +3246,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) > container_of(work, struct dsa_switchdev_event_work, work); > const unsigned char *addr = switchdev_work->addr; > struct net_device *dev = switchdev_work->dev; > + u16 fdb_flags = switchdev_work->fdb_flags; > u16 vid = switchdev_work->vid; > struct dsa_switch *ds; > struct dsa_port *dp; > @@ -3261,7 +3262,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) > else if (dp->lag) > err = dsa_port_lag_fdb_add(dp, addr, vid); > else > - err = dsa_port_fdb_add(dp, addr, vid); > + err = dsa_port_fdb_add(dp, addr, vid, fdb_flags); > if (err) { > dev_err(ds->dev, > "port %d failed to add %pM vid %d to fdb: %d\n", > @@ -3277,7 +3278,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) > else if (dp->lag) > err = dsa_port_lag_fdb_del(dp, addr, vid); > else > - err = dsa_port_fdb_del(dp, addr, vid); > + err = dsa_port_fdb_del(dp, addr, vid, fdb_flags); > if (err) { > dev_err(ds->dev, > "port %d failed to delete %pM vid %d from fdb: %d\n", > @@ -3315,6 +3316,7 @@ static int dsa_slave_fdb_event(struct net_device *dev, > struct dsa_port *dp = dsa_slave_to_port(dev); > bool host_addr = fdb_info->is_local; > struct dsa_switch *ds = dp->ds; > + u16 fdb_flags = 0; > > if (ctx && ctx != dp) > return 0; > @@ -3361,6 +3363,9 @@ static int dsa_slave_fdb_event(struct net_device *dev, > orig_dev->name, fdb_info->addr, fdb_info->vid, > host_addr ? " as host address" : ""); > > + if (fdb_info->locked) > + fdb_flags |= DSA_FDB_FLAG_LOCKED; This is the bridge->driver direction. In which of the changes up until now/through which mechanism will the bridge emit a SWITCHDEV_FDB_ADD_TO_DEVICE with fdb_info->locked = true? Don't the other switchdev drivers except DSA (search for SWITCHDEV_FDB_EVENT_TO_DEVICE in the drivers/ folder) need to handle this new flag too, even if to reject it? When other drivers will want to look at fdb_info->locked, they'll have the surprise that it's impossible to maintain backwards compatibility, because they didn't use to treat the flag at all in the past (so either locked or unlocked, they did the same thing). > + > INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work); > switchdev_work->event = event; > switchdev_work->dev = dev; > @@ -3369,6 +3374,7 @@ static int dsa_slave_fdb_event(struct net_device *dev, > ether_addr_copy(switchdev_work->addr, fdb_info->addr); > switchdev_work->vid = fdb_info->vid; > switchdev_work->host_addr = host_addr; > + switchdev_work->fdb_flags = fdb_flags;
On Thu, Oct 20, 2022 at 04:02:24PM +0300, Vladimir Oltean wrote: > On Tue, Oct 18, 2022 at 06:56:12PM +0200, Hans J. Schultz wrote: > > @@ -3315,6 +3316,7 @@ static int dsa_slave_fdb_event(struct net_device *dev, > > struct dsa_port *dp = dsa_slave_to_port(dev); > > bool host_addr = fdb_info->is_local; > > struct dsa_switch *ds = dp->ds; > > + u16 fdb_flags = 0; > > > > if (ctx && ctx != dp) > > return 0; > > @@ -3361,6 +3363,9 @@ static int dsa_slave_fdb_event(struct net_device *dev, > > orig_dev->name, fdb_info->addr, fdb_info->vid, > > host_addr ? " as host address" : ""); > > > > + if (fdb_info->locked) > > + fdb_flags |= DSA_FDB_FLAG_LOCKED; > > This is the bridge->driver direction. In which of the changes up until > now/through which mechanism will the bridge emit a > SWITCHDEV_FDB_ADD_TO_DEVICE with fdb_info->locked = true? I believe it can happen in the following call chain: br_handle_frame_finish br_fdb_update // p->flags & BR_PORT_MAB fdb_notify br_switchdev_fdb_notify This can happen with Spectrum when a packet ingresses via a locked port and incurs an FDB miss in hardware. The packet will be trapped and injected to the Rx path where it should invoke the above call chain. > Don't the other switchdev drivers except DSA (search for SWITCHDEV_FDB_EVENT_TO_DEVICE > in the drivers/ folder) need to handle this new flag too, even if to reject it? Yes, agree. At least with mlxsw it is not a big deal right now because it ignores entries with !BR_FDB_ADDED_BY_USER and locked entries are always like that, but it would be good to make it more explicit. > > When other drivers will want to look at fdb_info->locked, they'll have > the surprise that it's impossible to maintain backwards compatibility, > because they didn't use to treat the flag at all in the past (so either > locked or unlocked, they did the same thing). > > > + > > INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work); > > switchdev_work->event = event; > > switchdev_work->dev = dev; > > @@ -3369,6 +3374,7 @@ static int dsa_slave_fdb_event(struct net_device *dev, > > ether_addr_copy(switchdev_work->addr, fdb_info->addr); > > switchdev_work->vid = fdb_info->vid; > > switchdev_work->host_addr = host_addr; > > + switchdev_work->fdb_flags = fdb_flags;
On Thu, Oct 20, 2022 at 04:24:16PM +0300, Ido Schimmel wrote: > On Thu, Oct 20, 2022 at 04:02:24PM +0300, Vladimir Oltean wrote: > > On Tue, Oct 18, 2022 at 06:56:12PM +0200, Hans J. Schultz wrote: > > > @@ -3315,6 +3316,7 @@ static int dsa_slave_fdb_event(struct net_device *dev, > > > struct dsa_port *dp = dsa_slave_to_port(dev); > > > bool host_addr = fdb_info->is_local; > > > struct dsa_switch *ds = dp->ds; > > > + u16 fdb_flags = 0; > > > > > > if (ctx && ctx != dp) > > > return 0; > > > @@ -3361,6 +3363,9 @@ static int dsa_slave_fdb_event(struct net_device *dev, > > > orig_dev->name, fdb_info->addr, fdb_info->vid, > > > host_addr ? " as host address" : ""); > > > > > > + if (fdb_info->locked) > > > + fdb_flags |= DSA_FDB_FLAG_LOCKED; > > > > This is the bridge->driver direction. In which of the changes up until > > now/through which mechanism will the bridge emit a > > SWITCHDEV_FDB_ADD_TO_DEVICE with fdb_info->locked = true? > > I believe it can happen in the following call chain: > > br_handle_frame_finish > br_fdb_update // p->flags & BR_PORT_MAB > fdb_notify > br_switchdev_fdb_notify > > This can happen with Spectrum when a packet ingresses via a locked port > and incurs an FDB miss in hardware. The packet will be trapped and > injected to the Rx path where it should invoke the above call chain. Ah, so this is the case which in mv88e6xxx would generate an ATU violation interrupt; in the Spectrum case it generates a special packet. Right now this packet isn't generated, right? I think we have the same thing in ocelot, a port can be configured to send "learn frames" to the CPU. Should these packets be injected into the bridge RX path in the first place? They reach the CPU because of an FDB miss, not because the CPU was the intended destination.
On Thu, Oct 20, 2022 at 04:35:06PM +0300, Vladimir Oltean wrote: > On Thu, Oct 20, 2022 at 04:24:16PM +0300, Ido Schimmel wrote: > > On Thu, Oct 20, 2022 at 04:02:24PM +0300, Vladimir Oltean wrote: > > > On Tue, Oct 18, 2022 at 06:56:12PM +0200, Hans J. Schultz wrote: > > > > @@ -3315,6 +3316,7 @@ static int dsa_slave_fdb_event(struct net_device *dev, > > > > struct dsa_port *dp = dsa_slave_to_port(dev); > > > > bool host_addr = fdb_info->is_local; > > > > struct dsa_switch *ds = dp->ds; > > > > + u16 fdb_flags = 0; > > > > > > > > if (ctx && ctx != dp) > > > > return 0; > > > > @@ -3361,6 +3363,9 @@ static int dsa_slave_fdb_event(struct net_device *dev, > > > > orig_dev->name, fdb_info->addr, fdb_info->vid, > > > > host_addr ? " as host address" : ""); > > > > > > > > + if (fdb_info->locked) > > > > + fdb_flags |= DSA_FDB_FLAG_LOCKED; > > > > > > This is the bridge->driver direction. In which of the changes up until > > > now/through which mechanism will the bridge emit a > > > SWITCHDEV_FDB_ADD_TO_DEVICE with fdb_info->locked = true? > > > > I believe it can happen in the following call chain: > > > > br_handle_frame_finish > > br_fdb_update // p->flags & BR_PORT_MAB > > fdb_notify > > br_switchdev_fdb_notify > > > > This can happen with Spectrum when a packet ingresses via a locked port > > and incurs an FDB miss in hardware. The packet will be trapped and > > injected to the Rx path where it should invoke the above call chain. > > Ah, so this is the case which in mv88e6xxx would generate an ATU > violation interrupt; in the Spectrum case it generates a special packet. Not sure what you mean by "special" :) It's simply the packet that incurred the FDB miss on the SMAC. > Right now this packet isn't generated, right? Right. We don't support BR_PORT_LOCKED so these checks are not currently enabled in hardware. To be clear, only packets received via locked ports are able to trigger the check. > > I think we have the same thing in ocelot, a port can be configured to > send "learn frames" to the CPU. > > Should these packets be injected into the bridge RX path in the first > place? They reach the CPU because of an FDB miss, not because the CPU > was the intended destination. The reason to inject them to the Rx path is so that they will trigger the creation of the "locked" entry in the bridge driver (when MAB is on), thereby notifying user space about the presence of a new MAC behind the locked port. We can try to parse them in the driver and notify the bridge driver via SWITCHDEV_FDB_ADD_TO_BRIDGE, but it's quite ugly...
On Thu, Oct 20, 2022 at 04:57:35PM +0300, Ido Schimmel wrote: > On Thu, Oct 20, 2022 at 04:35:06PM +0300, Vladimir Oltean wrote: > > On Thu, Oct 20, 2022 at 04:24:16PM +0300, Ido Schimmel wrote: > > > On Thu, Oct 20, 2022 at 04:02:24PM +0300, Vladimir Oltean wrote: > > > > On Tue, Oct 18, 2022 at 06:56:12PM +0200, Hans J. Schultz wrote: > > > > > @@ -3315,6 +3316,7 @@ static int dsa_slave_fdb_event(struct net_device *dev, > > > > > struct dsa_port *dp = dsa_slave_to_port(dev); > > > > > bool host_addr = fdb_info->is_local; > > > > > struct dsa_switch *ds = dp->ds; > > > > > + u16 fdb_flags = 0; > > > > > > > > > > if (ctx && ctx != dp) > > > > > return 0; > > > > > @@ -3361,6 +3363,9 @@ static int dsa_slave_fdb_event(struct net_device *dev, > > > > > orig_dev->name, fdb_info->addr, fdb_info->vid, > > > > > host_addr ? " as host address" : ""); > > > > > > > > > > + if (fdb_info->locked) > > > > > + fdb_flags |= DSA_FDB_FLAG_LOCKED; > > > > > > > > This is the bridge->driver direction. In which of the changes up until > > > > now/through which mechanism will the bridge emit a > > > > SWITCHDEV_FDB_ADD_TO_DEVICE with fdb_info->locked = true? > > > > > > I believe it can happen in the following call chain: > > > > > > br_handle_frame_finish > > > br_fdb_update // p->flags & BR_PORT_MAB > > > fdb_notify > > > br_switchdev_fdb_notify > > > > > > This can happen with Spectrum when a packet ingresses via a locked port > > > and incurs an FDB miss in hardware. The packet will be trapped and > > > injected to the Rx path where it should invoke the above call chain. > > > > Ah, so this is the case which in mv88e6xxx would generate an ATU > > violation interrupt; in the Spectrum case it generates a special packet. > > Not sure what you mean by "special" :) It's simply the packet that > incurred the FDB miss on the SMAC. > > > Right now this packet isn't generated, right? > > Right. We don't support BR_PORT_LOCKED so these checks are not currently > enabled in hardware. To be clear, only packets received via locked ports > are able to trigger the check. > > > > > I think we have the same thing in ocelot, a port can be configured to > > send "learn frames" to the CPU. > > > > Should these packets be injected into the bridge RX path in the first > > place? They reach the CPU because of an FDB miss, not because the CPU > > was the intended destination. > > The reason to inject them to the Rx path is so that they will trigger > the creation of the "locked" entry in the bridge driver (when MAB is > on), thereby notifying user space about the presence of a new MAC behind > the locked port. We can try to parse them in the driver and notify the > bridge driver via SWITCHDEV_FDB_ADD_TO_BRIDGE, but it's quite ugly... "ugly" => your words, not mine... But abstracting things a bit, doing what you just said (SWITCHDEV_FDB_ADD_TO_BRIDGE) for learn frames would be exactly the same thing as what mv88e6xxx is doing (so your "ugly" comment equally applies to Marvell). The learn frames are "special" in the sense that they don't belong to the data path of the software bridge*, they are just hardware specific information which the driver must deal with, using a channel that happens to be Ethernet and not an IRQ/MDIO. *in other words, a bridge with proper RX filtering should not even receive these frames, or would need special casing for BR_PORT_MAB to not drop them in the first place. I would incline towards an unified approach for CPU assisted learning, regardless of this (minor, IMO) difference between Marvell and other vendors.
On Thu, Oct 20, 2022 at 04:57:35PM +0300, Ido Schimmel wrote: > > Right now this packet isn't generated, right? > > Right. We don't support BR_PORT_LOCKED so these checks are not currently > enabled in hardware. To be clear, only packets received via locked ports > are able to trigger the check. You mean BR_PORT_MAB, not BR_PORT_LOCKED, right? AFAIU, "locked" means drop unknown MAC SA, "mab" means "install BR_FDB_LOCKED entry on port" (and also maybe still drop, if "locked" is also set on port). Sad there isn't any good documentation about these flags in the patches that Hans is proposing.
On Thu, Oct 20, 2022 at 05:04:00PM +0300, Vladimir Oltean wrote: > On Thu, Oct 20, 2022 at 04:57:35PM +0300, Ido Schimmel wrote: > > On Thu, Oct 20, 2022 at 04:35:06PM +0300, Vladimir Oltean wrote: > > > On Thu, Oct 20, 2022 at 04:24:16PM +0300, Ido Schimmel wrote: > > > > On Thu, Oct 20, 2022 at 04:02:24PM +0300, Vladimir Oltean wrote: > > > > > On Tue, Oct 18, 2022 at 06:56:12PM +0200, Hans J. Schultz wrote: > > > > > > @@ -3315,6 +3316,7 @@ static int dsa_slave_fdb_event(struct net_device *dev, > > > > > > struct dsa_port *dp = dsa_slave_to_port(dev); > > > > > > bool host_addr = fdb_info->is_local; > > > > > > struct dsa_switch *ds = dp->ds; > > > > > > + u16 fdb_flags = 0; > > > > > > > > > > > > if (ctx && ctx != dp) > > > > > > return 0; > > > > > > @@ -3361,6 +3363,9 @@ static int dsa_slave_fdb_event(struct net_device *dev, > > > > > > orig_dev->name, fdb_info->addr, fdb_info->vid, > > > > > > host_addr ? " as host address" : ""); > > > > > > > > > > > > + if (fdb_info->locked) > > > > > > + fdb_flags |= DSA_FDB_FLAG_LOCKED; > > > > > > > > > > This is the bridge->driver direction. In which of the changes up until > > > > > now/through which mechanism will the bridge emit a > > > > > SWITCHDEV_FDB_ADD_TO_DEVICE with fdb_info->locked = true? > > > > > > > > I believe it can happen in the following call chain: > > > > > > > > br_handle_frame_finish > > > > br_fdb_update // p->flags & BR_PORT_MAB > > > > fdb_notify > > > > br_switchdev_fdb_notify > > > > > > > > This can happen with Spectrum when a packet ingresses via a locked port > > > > and incurs an FDB miss in hardware. The packet will be trapped and > > > > injected to the Rx path where it should invoke the above call chain. > > > > > > Ah, so this is the case which in mv88e6xxx would generate an ATU > > > violation interrupt; in the Spectrum case it generates a special packet. > > > > Not sure what you mean by "special" :) It's simply the packet that > > incurred the FDB miss on the SMAC. > > > > > Right now this packet isn't generated, right? > > > > Right. We don't support BR_PORT_LOCKED so these checks are not currently > > enabled in hardware. To be clear, only packets received via locked ports > > are able to trigger the check. > > > > > > > > I think we have the same thing in ocelot, a port can be configured to > > > send "learn frames" to the CPU. > > > > > > Should these packets be injected into the bridge RX path in the first > > > place? They reach the CPU because of an FDB miss, not because the CPU > > > was the intended destination. > > > > The reason to inject them to the Rx path is so that they will trigger > > the creation of the "locked" entry in the bridge driver (when MAB is > > on), thereby notifying user space about the presence of a new MAC behind > > the locked port. We can try to parse them in the driver and notify the > > bridge driver via SWITCHDEV_FDB_ADD_TO_BRIDGE, but it's quite ugly... > > "ugly" => your words, not mine... But abstracting things a bit, doing > what you just said (SWITCHDEV_FDB_ADD_TO_BRIDGE) for learn frames would > be exactly the same thing as what mv88e6xxx is doing (so your "ugly" > comment equally applies to Marvell). My understanding is that mv88e6xxx only reads the SMAC and FID/VID from hardware and notifies them to the bridge driver. It does not need to parse them out of the Ethernet frame that triggered the "violation". This is the "ugly" part (in my opinion). > The learn frames are "special" in the sense that they don't belong to > the data path of the software bridge*, they are just hardware specific > information which the driver must deal with, using a channel that > happens to be Ethernet and not an IRQ/MDIO. I think we misunderstand each other because I don't understand why you call them "special" nor "hardware specific information" :/ We don't inject to the software data path some hardware specific frames, but rather the original Ethernet frames that triggered the violation. The same thing happens with packets that encountered a neighbour miss during routing or whose TTL was decremented to zero. The hardware can't generate ARP or ICMP packets, so the original packet is injected to the Rx path so that the kernel will generate the necessary control packets in response. > *in other words, a bridge with proper RX filtering should not even > receive these frames, or would need special casing for BR_PORT_MAB to > not drop them in the first place. > > I would incline towards an unified approach for CPU assisted learning, > regardless of this (minor, IMO) difference between Marvell and other > vendors. OK, understood. Assuming you don't like the above, I need to check if we can do something similar to what mv88e6xxx is doing (because I don't think mv88e6xxx can do anything else).
On Thu, Oct 20, 2022 at 05:11:04PM +0300, Vladimir Oltean wrote: > On Thu, Oct 20, 2022 at 04:57:35PM +0300, Ido Schimmel wrote: > > > Right now this packet isn't generated, right? > > > > Right. We don't support BR_PORT_LOCKED so these checks are not currently > > enabled in hardware. To be clear, only packets received via locked ports > > are able to trigger the check. > > You mean BR_PORT_MAB, not BR_PORT_LOCKED, right? I actually meant BR_PORT_LOCKED... The hardware has a single bit per port that can be used to enable security checks on the port. If security checks are enabled, then before L2 forwarding the hardware will perform an FDB lookup with the SMAC and FID (VID), which can have one of three results: 1. Match. FDB entry found and it points to the ingress port. In this case the packet continues to the regular L2 pipeline. 2. Mismatch. FDB entry found, but it points to a different port than ingress port. In this case we want to drop the packet like the software bridge. 3. Miss. FDB entry not found. Here I was thinking to always tell the packet to go to the software data path so that it will trigger the creation of the "locked" entry if MAB is enabled. If MAB is not enabled, it will simply be dropped by the bridge. We can't control it per port in hardware, which is why the BR_PORT_MAB flag is not consulted. > AFAIU, "locked" means drop unknown MAC SA, "mab" means "install > BR_FDB_LOCKED entry on port" (and also maybe still drop, if "locked" > is also set on port). Right, but you can't have "mab" without "locked" (from patch #1): ``` @@ -943,6 +946,13 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[], br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS, BR_NEIGH_SUPPRESS); br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED); br_set_port_flag(p, tb, IFLA_BRPORT_LOCKED, BR_PORT_LOCKED); + br_set_port_flag(p, tb, IFLA_BRPORT_MAB, BR_PORT_MAB); + + if (!(p->flags & BR_PORT_LOCKED) && (p->flags & BR_PORT_MAB)) { + NL_SET_ERR_MSG(extack, "MAB cannot be enabled when port is unlocked"); + p->flags = old_flags; + return -EINVAL; + } changed_mask = old_flags ^ p->flags; ``` > Sad there isn't any good documentation about these flags in the patches > that Hans is proposing. Will try to comment with better commit messages for patches #1 and #2. Not sure I will have time today.
On Thu, Oct 20, 2022 at 05:58:42PM +0300, Ido Schimmel wrote: > My understanding is that mv88e6xxx only reads the SMAC and FID/VID from > hardware and notifies them to the bridge driver. It does not need to > parse them out of the Ethernet frame that triggered the "violation". > This is the "ugly" part (in my opinion). I think that the Marvell approach is uglier, but maybe that's just me. Between parsing a MAC SA/VLAN ID from an Ethernet frame than having to concern myself with rate limiting IRQs which need MDIO access, I'd rather parse Ethernet frames all day long. With Ethernet we have all sorts of coping mechanisms, NAPI, IRQ coalescing. The Ethernet interrupts are designed to be very high bandwidth. You can even put a storm policer on Ethernet traffic and rate limit the learn frames. I don't like where the Marvell specific impl is going, I don't think it is a good first implementation of a new feature, since it will inevitably shape the way in which other hardware with CPU assisted learning will do things. For example, not sure if blackhole FDB entries are going to be needed by other implementations as well. I kind of thought that the Linux bridge would be more resilient to DoS than it actually is. Now I'm not sure if me and Andrew gave bad advice with the whole protection mechanisms put in place as UAPI for mv88e6xxx's quirks. > > The learn frames are "special" in the sense that they don't belong to > > the data path of the software bridge*, they are just hardware specific > > information which the driver must deal with, using a channel that > > happens to be Ethernet and not an IRQ/MDIO. > > I think we misunderstand each other because I don't understand why you > call them "special" nor "hardware specific information" :/ I call them special because there is no need to present these packets to application software. Understood and agreed that they are identical to the original packet which triggered the trap (plus some metadata which denotes the trap reason, presumably), although I don't think this really matters too much. > We don't inject to the software data path some hardware specific > frames, but rather the original Ethernet frames that triggered the > violation. The same thing happens with packets that encountered a > neighbour miss during routing or whose TTL was decremented to zero. > The hardware can't generate ARP or ICMP packets, so the original > packet is injected to the Rx path so that the kernel will generate the > necessary control packets in response. Can't speak for IP forwarding offload unfortunately, but it seems like you presented a different/unrelated situation here. CPU assisted learning is not slow path processing, because nothing needs to be done further with that packet except for extracting its MAC SA/VID, and learning it. The rest of the original packet is really not necessary. > > *in other words, a bridge with proper RX filtering should not even > > receive these frames, or would need special casing for BR_PORT_MAB to > > not drop them in the first place. > > > > I would incline towards an unified approach for CPU assisted learning, > > regardless of this (minor, IMO) difference between Marvell and other > > vendors. > > OK, understood. Assuming you don't like the above, I need to check if we > can do something similar to what mv88e6xxx is doing (because I don't > think mv88e6xxx can do anything else). No no, I like having an Ethernet channel (see the first reply to this email), I think it has benefits and I don't want to make Spectrum follow an inferior route just because that's the model. But on the other hand, nobody right now needs the mechanism that Hans put in place for setting BR_FDB_LOCKED in software, and notifying it back to the driver. Moreover, when Ethernet-based CPU assisted learning will come, this mechanism will not be the only possibility, and that should be a separate discussion. I still think that generic helpers to emit SWITCHDEV_FDB_ADD_TO_BRIDGE based on an skb are an equally valid approach, and would diverge significantly less from Marvell without imposing any real limitation. In the implementation proposed here, we have variation for the sake of variation, and we come up with hypothetical examples of how they might be useful. At least half this patch set is full of maybes, I can't really say I like that.
On Thu, Oct 20, 2022 at 06:23:37PM +0300, Ido Schimmel wrote: > 3. Miss. FDB entry not found. Here I was thinking to always tell the > packet to go to the software data path so that it will trigger the > creation of the "locked" entry if MAB is enabled. If MAB is not enabled, > it will simply be dropped by the bridge. We can't control it per port in > hardware, which is why the BR_PORT_MAB flag is not consulted. Ah, ok, this is the part I was missing, so you can't control an FDB miss to generate a learn frame only on some ports. But in principle, it still is the BR_PORT_MAB flag the one which requires these frames to be generated, not BR_PORT_LOCKED. You can have all ports LOCKED but not MAB, and no learn frames will be necessary to be sent to the CPU. Only EAPOL, which is link-local multicast, will reach software for further processing and unlock the port for a certain MAC DA.
On 2022-10-20 15:35, Vladimir Oltean wrote: > On Thu, Oct 20, 2022 at 04:24:16PM +0300, Ido Schimmel wrote: >> On Thu, Oct 20, 2022 at 04:02:24PM +0300, Vladimir Oltean wrote: >> > On Tue, Oct 18, 2022 at 06:56:12PM +0200, Hans J. Schultz wrote: >> > > @@ -3315,6 +3316,7 @@ static int dsa_slave_fdb_event(struct net_device *dev, >> > > struct dsa_port *dp = dsa_slave_to_port(dev); >> > > bool host_addr = fdb_info->is_local; >> > > struct dsa_switch *ds = dp->ds; >> > > + u16 fdb_flags = 0; >> > > >> > > if (ctx && ctx != dp) >> > > return 0; >> > > @@ -3361,6 +3363,9 @@ static int dsa_slave_fdb_event(struct net_device *dev, >> > > orig_dev->name, fdb_info->addr, fdb_info->vid, >> > > host_addr ? " as host address" : ""); >> > > >> > > + if (fdb_info->locked) >> > > + fdb_flags |= DSA_FDB_FLAG_LOCKED; >> > >> > This is the bridge->driver direction. In which of the changes up until >> > now/through which mechanism will the bridge emit a >> > SWITCHDEV_FDB_ADD_TO_DEVICE with fdb_info->locked = true? >> >> I believe it can happen in the following call chain: >> >> br_handle_frame_finish >> br_fdb_update // p->flags & BR_PORT_MAB >> fdb_notify >> br_switchdev_fdb_notify >> >> This can happen with Spectrum when a packet ingresses via a locked >> port >> and incurs an FDB miss in hardware. The packet will be trapped and >> injected to the Rx path where it should invoke the above call chain. > > Ah, so this is the case which in mv88e6xxx would generate an ATU > violation interrupt; in the Spectrum case it generates a special > packet. > Right now this packet isn't generated, right? > > I think we have the same thing in ocelot, a port can be configured to > send "learn frames" to the CPU. > > Should these packets be injected into the bridge RX path in the first > place? They reach the CPU because of an FDB miss, not because the CPU > was the intended destination. Just to add to it, now that there is a u16 for flags in the bridge->driver direction, making it easier to add such flags, I expect that for the mv88e6xxx driver there shall be a 'IS_DYNAMIC' flag also, as authorized hosts will have their authorized FDB entries added with dynamic entries... Now as the bridge will not be able to refresh such authorized FDB entries based on unicast incoming traffic on the locked port in the offloaded case, besides we don't want the CPU to do such in this case anyway, to keep the authorized line alive without having to reauthorize in like every 5 minutes, the driver needs to do the ageing (and refreshing) of the dynamic entry added from userspace. When the entry "ages" out, there is the HoldAt1 feature and Age Out Violations that should be used to tell userspace (plus bridge) that this authorization has been removed by the driver as the host has gone quiet. So all in all, there is the need of another flag from userspace->bridge->driver, telling that we want a dynamic ATU entry (with mv88e6xxx it will start at age 7).
On 2022-10-20 15:02, Vladimir Oltean wrote: >> --- a/net/dsa/port.c >> +++ b/net/dsa/port.c >> @@ -304,7 +304,7 @@ static int dsa_port_inherit_brport_flags(struct >> dsa_port *dp, >> struct netlink_ext_ack *extack) >> { >> const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | >> - BR_BCAST_FLOOD | BR_PORT_LOCKED; >> + BR_BCAST_FLOOD; >> struct net_device *brport_dev = dsa_port_to_bridge_port(dp); >> int flag, err; >> >> @@ -328,7 +328,7 @@ static void dsa_port_clear_brport_flags(struct >> dsa_port *dp) >> { >> const unsigned long val = BR_FLOOD | BR_MCAST_FLOOD | >> BR_BCAST_FLOOD; >> const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | >> - BR_BCAST_FLOOD | BR_PORT_LOCKED; >> + BR_BCAST_FLOOD | BR_PORT_LOCKED | BR_PORT_MAB; > > Why does the mask of cleared brport flags differ from the one of set > brport flags, and what/where is the explanation for this change? > I guess you mean, why it differs from the inherit flag mask list? If so it is explained in the update to v7 in 00/12.
On Thu, Oct 20, 2022 at 09:43:40PM +0200, netdev@kapio-technology.com wrote: > I guess you mean, why it differs from the inherit flag mask list? > > If so it is explained in the update to v7 in 00/12. The following is written there: | v7: Remove locked port and mab flags from DSA flags | inherit list as it messes with the learning | setting and those flags are not naturally meant | for enheriting, but should be set explicitly. Can you go one level deeper with the explanation? What messes with the learning setting? Why are those brport flags not naturally meant for inheriting? It's pretty hard to take your patch set seriously if you don't provide proper explanations.
On Thu, Oct 20, 2022 at 08:47:39PM +0200, netdev@kapio-technology.com wrote: > Just to add to it, now that there is a u16 for flags in the bridge->driver > direction, making it easier to add such flags, I expect that for the > mv88e6xxx driver there shall be a 'IS_DYNAMIC' flag also, as authorized > hosts will have their authorized FDB entries added with dynamic entries... With what is implemented in this patchset, the MAB daemon uses static FDB entries for authorizations, just like the selftests, right? That's the only thing that works. > Now as the bridge will not be able to refresh such authorized FDB entries > based on unicast incoming traffic on the locked port in the offloaded case, > besides we don't want the CPU to do such in this case anyway, ..because the software bridge refreshes the FDB entry based on the traffic it sees, and the hardware port refreshes the corresponding ATU entry based on the traffic *it* sees, and the 2 are not in sync because most of the traffic is autonomously forwarded, causing the FDB to be refreshed more often in hardware than in software.. > to keep the authorized line alive without having to reauthorize in > like every 5 minutes, the driver needs to do the ageing (and refreshing) > of the dynamic entry added from userspace. You're saying "now [...] to keep the authorized line alive [...], the driver needs to do the [...] refreshing of the dynamic [FDB] entry". Can you point me to the code where that is done now? Or perhaps I'm misunderstanding and it is a "future now"... > When the entry "ages" out, there is the HoldAt1 feature and Age Out > Violations that should be used to tell userspace (plus bridge) that > this authorization has been removed by the driver as the host has gone > quiet. So this is your proposal for how a dynamic FDB entry could be offloaded. Have you given any thought to how can we prevent the software FDB entry from ageing out first, and causing the hardware FDB entry to be removed too, through the ensuing switchdev notification? > So all in all, there is the need of another flag from > userspace->bridge->driver, telling that we want a dynamic ATU entry (with > mv88e6xxx it will start at age 7). Sorry for the elementary question, but what is gained from making the authorized FDB entries dynamic in the bridge? You don't have to reauthorize every 5 minutes even with the current implementation; you could make the FDB entries static. The ability for authorized stations to roam? This is why the authorizations are removed every 5 minutes, to see if anybody is still there? Who removes the authorizations in the implementation with the currently proposed patch set? The MAB daemon, right? Could you please present a high level overview of how you want things to look in the end and how far you are along that line? Maybe a set of user space + kernel repos where everything is implemented and works?
diff --git a/include/net/dsa.h b/include/net/dsa.h index ee369670e20e..e4b641b20713 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -821,6 +821,8 @@ static inline bool dsa_port_tree_same(const struct dsa_port *a, return a->ds->dst == b->ds->dst; } +#define DSA_FDB_FLAG_LOCKED (1 << 0) + typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 6e65c7ffd6f3..c943e8934063 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -65,6 +65,7 @@ struct dsa_notifier_fdb_info { const struct dsa_port *dp; const unsigned char *addr; u16 vid; + u16 fdb_flags; struct dsa_db db; }; @@ -131,6 +132,7 @@ struct dsa_switchdev_event_work { */ unsigned char addr[ETH_ALEN]; u16 vid; + u16 fdb_flags; bool host_addr; }; @@ -241,9 +243,9 @@ int dsa_port_vlan_msti(struct dsa_port *dp, const struct switchdev_vlan_msti *msti); int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu); int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, - u16 vid); + u16 vid, u16 fdb_flags); int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, - u16 vid); + u16 vid, u16 fdb_flags); int dsa_port_standalone_host_fdb_add(struct dsa_port *dp, const unsigned char *addr, u16 vid); int dsa_port_standalone_host_fdb_del(struct dsa_port *dp, diff --git a/net/dsa/port.c b/net/dsa/port.c index 208168276995..ff4f66f14d39 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -304,7 +304,7 @@ static int dsa_port_inherit_brport_flags(struct dsa_port *dp, struct netlink_ext_ack *extack) { const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | - BR_BCAST_FLOOD | BR_PORT_LOCKED; + BR_BCAST_FLOOD; struct net_device *brport_dev = dsa_port_to_bridge_port(dp); int flag, err; @@ -328,7 +328,7 @@ static void dsa_port_clear_brport_flags(struct dsa_port *dp) { const unsigned long val = BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD; const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | - BR_BCAST_FLOOD | BR_PORT_LOCKED; + BR_BCAST_FLOOD | BR_PORT_LOCKED | BR_PORT_MAB; int flag, err; for_each_set_bit(flag, &mask, 32) { @@ -956,12 +956,13 @@ int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu) } int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, - u16 vid) + u16 vid, u16 fdb_flags) { struct dsa_notifier_fdb_info info = { .dp = dp, .addr = addr, .vid = vid, + .fdb_flags = fdb_flags, .db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, @@ -979,12 +980,13 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, } int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, - u16 vid) + u16 vid, u16 fdb_flags) { struct dsa_notifier_fdb_info info = { .dp = dp, .addr = addr, .vid = vid, + .fdb_flags = fdb_flags, .db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 1a59918d3b30..65f0c578ef44 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -3246,6 +3246,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) container_of(work, struct dsa_switchdev_event_work, work); const unsigned char *addr = switchdev_work->addr; struct net_device *dev = switchdev_work->dev; + u16 fdb_flags = switchdev_work->fdb_flags; u16 vid = switchdev_work->vid; struct dsa_switch *ds; struct dsa_port *dp; @@ -3261,7 +3262,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) else if (dp->lag) err = dsa_port_lag_fdb_add(dp, addr, vid); else - err = dsa_port_fdb_add(dp, addr, vid); + err = dsa_port_fdb_add(dp, addr, vid, fdb_flags); if (err) { dev_err(ds->dev, "port %d failed to add %pM vid %d to fdb: %d\n", @@ -3277,7 +3278,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) else if (dp->lag) err = dsa_port_lag_fdb_del(dp, addr, vid); else - err = dsa_port_fdb_del(dp, addr, vid); + err = dsa_port_fdb_del(dp, addr, vid, fdb_flags); if (err) { dev_err(ds->dev, "port %d failed to delete %pM vid %d from fdb: %d\n", @@ -3315,6 +3316,7 @@ static int dsa_slave_fdb_event(struct net_device *dev, struct dsa_port *dp = dsa_slave_to_port(dev); bool host_addr = fdb_info->is_local; struct dsa_switch *ds = dp->ds; + u16 fdb_flags = 0; if (ctx && ctx != dp) return 0; @@ -3361,6 +3363,9 @@ static int dsa_slave_fdb_event(struct net_device *dev, orig_dev->name, fdb_info->addr, fdb_info->vid, host_addr ? " as host address" : ""); + if (fdb_info->locked) + fdb_flags |= DSA_FDB_FLAG_LOCKED; + INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work); switchdev_work->event = event; switchdev_work->dev = dev; @@ -3369,6 +3374,7 @@ static int dsa_slave_fdb_event(struct net_device *dev, ether_addr_copy(switchdev_work->addr, fdb_info->addr); switchdev_work->vid = fdb_info->vid; switchdev_work->host_addr = host_addr; + switchdev_work->fdb_flags = fdb_flags; dsa_schedule_work(&switchdev_work->work); diff --git a/net/dsa/switch.c b/net/dsa/switch.c index ce56acdba203..dd355556892e 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -234,7 +234,7 @@ static int dsa_port_do_mdb_del(struct dsa_port *dp, } static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr, - u16 vid, struct dsa_db db) + u16 vid, u16 fdb_flags, struct dsa_db db) { struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a; @@ -278,7 +278,7 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr, } static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr, - u16 vid, struct dsa_db db) + u16 vid, u16 fdb_flags, struct dsa_db db) { struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a; @@ -404,8 +404,8 @@ static int dsa_switch_host_fdb_add(struct dsa_switch *ds, info->vid, info->db); } else { - err = dsa_port_do_fdb_add(dp, info->addr, - info->vid, info->db); + err = dsa_port_do_fdb_add(dp, info->addr, info->vid, + info->fdb_flags, info->db); } if (err) break; @@ -432,8 +432,8 @@ static int dsa_switch_host_fdb_del(struct dsa_switch *ds, info->vid, info->db); } else { - err = dsa_port_do_fdb_del(dp, info->addr, - info->vid, info->db); + err = dsa_port_do_fdb_del(dp, info->addr, info->vid, + info->fdb_flags, info->db); } if (err) break; @@ -452,7 +452,7 @@ static int dsa_switch_fdb_add(struct dsa_switch *ds, if (!ds->ops->port_fdb_add) return -EOPNOTSUPP; - return dsa_port_do_fdb_add(dp, info->addr, info->vid, info->db); + return dsa_port_do_fdb_add(dp, info->addr, info->vid, info->fdb_flags, info->db); } static int dsa_switch_fdb_del(struct dsa_switch *ds, @@ -464,7 +464,7 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds, if (!ds->ops->port_fdb_del) return -EOPNOTSUPP; - return dsa_port_do_fdb_del(dp, info->addr, info->vid, info->db); + return dsa_port_do_fdb_del(dp, info->addr, info->vid, info->fdb_flags, info->db); } static int dsa_switch_lag_fdb_add(struct dsa_switch *ds,
Add a new u16 for fdb flags to propagate through the DSA layer towards the fdb add and del functions of the drivers. Signed-off-by: Hans J. Schultz <netdev@kapio-technology.com> --- include/net/dsa.h | 2 ++ net/dsa/dsa_priv.h | 6 ++++-- net/dsa/port.c | 10 ++++++---- net/dsa/slave.c | 10 ++++++++-- net/dsa/switch.c | 16 ++++++++-------- 5 files changed, 28 insertions(+), 16 deletions(-)