diff mbox series

[net-next,5/8] net: lan966x: Add lag support for lan966x.

Message ID 20220626130451.1079933-6-horatiu.vultur@microchip.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: lan966x: Add lag support | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 8 of 8 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Horatiu Vultur June 26, 2022, 1:04 p.m. UTC
Add link aggregation hardware offload support for lan966x

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
---
 .../net/ethernet/microchip/lan966x/Makefile   |   2 +-
 .../ethernet/microchip/lan966x/lan966x_lag.c  | 296 ++++++++++++++++++
 .../ethernet/microchip/lan966x/lan966x_main.h |  28 ++
 .../microchip/lan966x/lan966x_switchdev.c     |  78 ++++-
 4 files changed, 388 insertions(+), 16 deletions(-)
 create mode 100644 drivers/net/ethernet/microchip/lan966x/lan966x_lag.c

Comments

Vladimir Oltean June 26, 2022, 2:11 p.m. UTC | #1
Hi Horatiu,

Just casually browsing through the patches. A comment below.

On Sun, Jun 26, 2022 at 03:04:48PM +0200, Horatiu Vultur wrote:
> Add link aggregation hardware offload support for lan966x
> 
> Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
> ---
>  .../net/ethernet/microchip/lan966x/Makefile   |   2 +-
>  .../ethernet/microchip/lan966x/lan966x_lag.c  | 296 ++++++++++++++++++
>  .../ethernet/microchip/lan966x/lan966x_main.h |  28 ++
>  .../microchip/lan966x/lan966x_switchdev.c     |  78 ++++-
>  4 files changed, 388 insertions(+), 16 deletions(-)
>  create mode 100644 drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
> 
> diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile
> index fd2e0ebb2427..0c22c86bdaa9 100644
> --- a/drivers/net/ethernet/microchip/lan966x/Makefile
> +++ b/drivers/net/ethernet/microchip/lan966x/Makefile
> @@ -8,4 +8,4 @@ obj-$(CONFIG_LAN966X_SWITCH) += lan966x-switch.o
>  lan966x-switch-objs  := lan966x_main.o lan966x_phylink.o lan966x_port.o \
>  			lan966x_mac.o lan966x_ethtool.o lan966x_switchdev.o \
>  			lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o \
> -			lan966x_ptp.o lan966x_fdma.o
> +			lan966x_ptp.o lan966x_fdma.o lan966x_lag.o
> diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
> new file mode 100644
> index 000000000000..c721a05d44d2
> --- /dev/null
> +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
> @@ -0,0 +1,296 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +
> +#include "lan966x_main.h"
> +
> +static void lan966x_lag_set_aggr_pgids(struct lan966x *lan966x)
> +{
> +	u32 visited = GENMASK(lan966x->num_phys_ports - 1, 0);
> +	int p, lag, i;
> +
> +	/* Reset destination and aggregation PGIDS */
> +	for (p = 0; p < lan966x->num_phys_ports; ++p)
> +		lan_wr(ANA_PGID_PGID_SET(BIT(p)),
> +		       lan966x, ANA_PGID(p));
> +
> +	for (p = PGID_AGGR; p < PGID_SRC; ++p)
> +		lan_wr(ANA_PGID_PGID_SET(visited),
> +		       lan966x, ANA_PGID(p));
> +
> +	/* The visited ports bitmask holds the list of ports offloading any
> +	 * bonding interface. Initially we mark all these ports as unvisited,
> +	 * then every time we visit a port in this bitmask, we know that it is
> +	 * the lowest numbered port, i.e. the one whose logical ID == physical
> +	 * port ID == LAG ID. So we mark as visited all further ports in the
> +	 * bitmask that are offloading the same bonding interface. This way,
> +	 * we set up the aggregation PGIDs only once per bonding interface.
> +	 */
> +	for (p = 0; p < lan966x->num_phys_ports; ++p) {
> +		struct lan966x_port *port = lan966x->ports[p];
> +
> +		if (!port || !port->bond)
> +			continue;
> +
> +		visited &= ~BIT(p);
> +	}
> +
> +	/* Now, set PGIDs for each active LAG */
> +	for (lag = 0; lag < lan966x->num_phys_ports; ++lag) {
> +		struct lan966x_port *port = lan966x->ports[lag];
> +		int num_active_ports = 0;
> +		struct net_device *bond;
> +		unsigned long bond_mask;
> +		u8 aggr_idx[16];
> +
> +		if (!port || !port->bond || (visited & BIT(lag)))
> +			continue;
> +
> +		bond = port->bond;
> +		bond_mask = lan966x_lag_get_mask(lan966x, bond, true);
> +
> +		for_each_set_bit(p, &bond_mask, lan966x->num_phys_ports) {
> +			lan_wr(ANA_PGID_PGID_SET(bond_mask),
> +			       lan966x, ANA_PGID(p));
> +			aggr_idx[num_active_ports++] = p;
> +		}

This incorrect logic seems to have been copied from ocelot from before
commit a14e6b69f393 ("net: mscc: ocelot: fix incorrect balancing with
down LAG ports").

The issue is that you calculate bond_mask with only_active_ports=true.
This means the for_each_set_bit() will not iterate through the inactive
LAG ports, and won't set the bond_mask as the PGID destination for those
ports.

That isn't what is desired; as explained in that commit, inactive LAG
ports should be removed via the aggregation PGIDs and not via the
destination PGIDs. Otherwise, an FDB entry targeted towards the
LAG (effectively towards the "primary" LAG port, whose logical port ID
gives the LAG ID) will not egress even the "secondary" LAG port if the
primary's link is down.

> +
> +		for (i = PGID_AGGR; i < PGID_SRC; ++i) {
> +			u32 ac;
> +
> +			ac = lan_rd(lan966x, ANA_PGID(i));
> +			ac &= ~bond_mask;
> +			/* Don't do division by zero if there was no active
> +			 * port. Just make all aggregation codes zero.
> +			 */
> +			if (num_active_ports)
> +				ac |= BIT(aggr_idx[i % num_active_ports]);
> +			lan_wr(ANA_PGID_PGID_SET(ac),
> +			       lan966x, ANA_PGID(i));
> +		}
> +
> +		/* Mark all ports in the same LAG as visited to avoid applying
> +		 * the same config again.
> +		 */
> +		for (p = lag; p < lan966x->num_phys_ports; p++) {
> +			struct lan966x_port *port = lan966x->ports[p];
> +
> +			if (!port)
> +				continue;
> +
> +			if (port->bond == bond)
> +				visited |= BIT(p);
> +		}
> +	}
> +}
Horatiu Vultur June 27, 2022, 6:46 a.m. UTC | #2
The 06/26/2022 17:11, Vladimir Oltean wrote:
> 
> Hi Horatiu,

Hi Vladimir,

> 
> Just casually browsing through the patches. A comment below.

> 
> On Sun, Jun 26, 2022 at 03:04:48PM +0200, Horatiu Vultur wrote:
> > Add link aggregation hardware offload support for lan966x
> >
> > Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
> > ---
> >  .../net/ethernet/microchip/lan966x/Makefile   |   2 +-
> >  .../ethernet/microchip/lan966x/lan966x_lag.c  | 296 ++++++++++++++++++
> >  .../ethernet/microchip/lan966x/lan966x_main.h |  28 ++
> >  .../microchip/lan966x/lan966x_switchdev.c     |  78 ++++-
> >  4 files changed, 388 insertions(+), 16 deletions(-)
> >  create mode 100644 drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
> >
> > diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile
> > index fd2e0ebb2427..0c22c86bdaa9 100644
> > --- a/drivers/net/ethernet/microchip/lan966x/Makefile
> > +++ b/drivers/net/ethernet/microchip/lan966x/Makefile
> > @@ -8,4 +8,4 @@ obj-$(CONFIG_LAN966X_SWITCH) += lan966x-switch.o
> >  lan966x-switch-objs  := lan966x_main.o lan966x_phylink.o lan966x_port.o \
> >                       lan966x_mac.o lan966x_ethtool.o lan966x_switchdev.o \
> >                       lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o \
> > -                     lan966x_ptp.o lan966x_fdma.o
> > +                     lan966x_ptp.o lan966x_fdma.o lan966x_lag.o
> > diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
> > new file mode 100644
> > index 000000000000..c721a05d44d2
> > --- /dev/null
> > +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
> > @@ -0,0 +1,296 @@
> > +// SPDX-License-Identifier: GPL-2.0+
> > +
> > +#include "lan966x_main.h"
> > +
> > +static void lan966x_lag_set_aggr_pgids(struct lan966x *lan966x)
> > +{
> > +     u32 visited = GENMASK(lan966x->num_phys_ports - 1, 0);
> > +     int p, lag, i;
> > +
> > +     /* Reset destination and aggregation PGIDS */
> > +     for (p = 0; p < lan966x->num_phys_ports; ++p)
> > +             lan_wr(ANA_PGID_PGID_SET(BIT(p)),
> > +                    lan966x, ANA_PGID(p));
> > +
> > +     for (p = PGID_AGGR; p < PGID_SRC; ++p)
> > +             lan_wr(ANA_PGID_PGID_SET(visited),
> > +                    lan966x, ANA_PGID(p));
> > +
> > +     /* The visited ports bitmask holds the list of ports offloading any
> > +      * bonding interface. Initially we mark all these ports as unvisited,
> > +      * then every time we visit a port in this bitmask, we know that it is
> > +      * the lowest numbered port, i.e. the one whose logical ID == physical
> > +      * port ID == LAG ID. So we mark as visited all further ports in the
> > +      * bitmask that are offloading the same bonding interface. This way,
> > +      * we set up the aggregation PGIDs only once per bonding interface.
> > +      */
> > +     for (p = 0; p < lan966x->num_phys_ports; ++p) {
> > +             struct lan966x_port *port = lan966x->ports[p];
> > +
> > +             if (!port || !port->bond)
> > +                     continue;
> > +
> > +             visited &= ~BIT(p);
> > +     }
> > +
> > +     /* Now, set PGIDs for each active LAG */
> > +     for (lag = 0; lag < lan966x->num_phys_ports; ++lag) {
> > +             struct lan966x_port *port = lan966x->ports[lag];
> > +             int num_active_ports = 0;
> > +             struct net_device *bond;
> > +             unsigned long bond_mask;
> > +             u8 aggr_idx[16];
> > +
> > +             if (!port || !port->bond || (visited & BIT(lag)))
> > +                     continue;
> > +
> > +             bond = port->bond;
> > +             bond_mask = lan966x_lag_get_mask(lan966x, bond, true);
> > +
> > +             for_each_set_bit(p, &bond_mask, lan966x->num_phys_ports) {
> > +                     lan_wr(ANA_PGID_PGID_SET(bond_mask),
> > +                            lan966x, ANA_PGID(p));
> > +                     aggr_idx[num_active_ports++] = p;
> > +             }
> 
> This incorrect logic seems to have been copied from ocelot from before
> commit a14e6b69f393 ("net: mscc: ocelot: fix incorrect balancing with
> down LAG ports").
> 
> The issue is that you calculate bond_mask with only_active_ports=true.
> This means the for_each_set_bit() will not iterate through the inactive
> LAG ports, and won't set the bond_mask as the PGID destination for those
> ports.
> 
> That isn't what is desired; as explained in that commit, inactive LAG
> ports should be removed via the aggregation PGIDs and not via the
> destination PGIDs. Otherwise, an FDB entry targeted towards the
> LAG (effectively towards the "primary" LAG port, whose logical port ID
> gives the LAG ID) will not egress even the "secondary" LAG port if the
> primary's link is down.

Thanks for looking at this.
That is correct, ocelot was the source of inspiration. The issue that
you described in the mentioned commit is fixed in the last patch of this
series.
I will have a look at your commit and will try to integrated it. Thanks.

> 
> > +
> > +             for (i = PGID_AGGR; i < PGID_SRC; ++i) {
> > +                     u32 ac;
> > +
> > +                     ac = lan_rd(lan966x, ANA_PGID(i));
> > +                     ac &= ~bond_mask;
> > +                     /* Don't do division by zero if there was no active
> > +                      * port. Just make all aggregation codes zero.
> > +                      */
> > +                     if (num_active_ports)
> > +                             ac |= BIT(aggr_idx[i % num_active_ports]);
> > +                     lan_wr(ANA_PGID_PGID_SET(ac),
> > +                            lan966x, ANA_PGID(i));
> > +             }
> > +
> > +             /* Mark all ports in the same LAG as visited to avoid applying
> > +              * the same config again.
> > +              */
> > +             for (p = lag; p < lan966x->num_phys_ports; p++) {
> > +                     struct lan966x_port *port = lan966x->ports[p];
> > +
> > +                     if (!port)
> > +                             continue;
> > +
> > +                     if (port->bond == bond)
> > +                             visited |= BIT(p);
> > +             }
> > +     }
> > +}
Vladimir Oltean June 27, 2022, 9:40 a.m. UTC | #3
On Mon, Jun 27, 2022 at 08:46:12AM +0200, Horatiu Vultur wrote:
> > This incorrect logic seems to have been copied from ocelot from before
> > commit a14e6b69f393 ("net: mscc: ocelot: fix incorrect balancing with
> > down LAG ports").
> > 
> > The issue is that you calculate bond_mask with only_active_ports=true.
> > This means the for_each_set_bit() will not iterate through the inactive
> > LAG ports, and won't set the bond_mask as the PGID destination for those
> > ports.
> > 
> > That isn't what is desired; as explained in that commit, inactive LAG
> > ports should be removed via the aggregation PGIDs and not via the
> > destination PGIDs. Otherwise, an FDB entry targeted towards the
> > LAG (effectively towards the "primary" LAG port, whose logical port ID
> > gives the LAG ID) will not egress even the "secondary" LAG port if the
> > primary's link is down.
> 
> Thanks for looking at this.
> That is correct, ocelot was the source of inspiration. The issue that
> you described in the mentioned commit is fixed in the last patch of this
> series.
> I will have a look at your commit and will try to integrated it. Thanks.

I figured that would be the case, although I didn't really understand
the explanation from patch 8/8 (arguably, there it is said that the
switch tries to send on the down port, not that it won't send on the up
port, which is more relevant information). But in any case, it would be
good to introduce code that works from the beginning, rather than fix it
up in a follow-up patch. I believe that the commit I referenced is a
simplification either way, since it removes the "only_active_ports"
argument from the bond mask function.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile
index fd2e0ebb2427..0c22c86bdaa9 100644
--- a/drivers/net/ethernet/microchip/lan966x/Makefile
+++ b/drivers/net/ethernet/microchip/lan966x/Makefile
@@ -8,4 +8,4 @@  obj-$(CONFIG_LAN966X_SWITCH) += lan966x-switch.o
 lan966x-switch-objs  := lan966x_main.o lan966x_phylink.o lan966x_port.o \
 			lan966x_mac.o lan966x_ethtool.o lan966x_switchdev.o \
 			lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o \
-			lan966x_ptp.o lan966x_fdma.o
+			lan966x_ptp.o lan966x_fdma.o lan966x_lag.o
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
new file mode 100644
index 000000000000..c721a05d44d2
--- /dev/null
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
@@ -0,0 +1,296 @@ 
+// SPDX-License-Identifier: GPL-2.0+
+
+#include "lan966x_main.h"
+
+static void lan966x_lag_set_aggr_pgids(struct lan966x *lan966x)
+{
+	u32 visited = GENMASK(lan966x->num_phys_ports - 1, 0);
+	int p, lag, i;
+
+	/* Reset destination and aggregation PGIDS */
+	for (p = 0; p < lan966x->num_phys_ports; ++p)
+		lan_wr(ANA_PGID_PGID_SET(BIT(p)),
+		       lan966x, ANA_PGID(p));
+
+	for (p = PGID_AGGR; p < PGID_SRC; ++p)
+		lan_wr(ANA_PGID_PGID_SET(visited),
+		       lan966x, ANA_PGID(p));
+
+	/* The visited ports bitmask holds the list of ports offloading any
+	 * bonding interface. Initially we mark all these ports as unvisited,
+	 * then every time we visit a port in this bitmask, we know that it is
+	 * the lowest numbered port, i.e. the one whose logical ID == physical
+	 * port ID == LAG ID. So we mark as visited all further ports in the
+	 * bitmask that are offloading the same bonding interface. This way,
+	 * we set up the aggregation PGIDs only once per bonding interface.
+	 */
+	for (p = 0; p < lan966x->num_phys_ports; ++p) {
+		struct lan966x_port *port = lan966x->ports[p];
+
+		if (!port || !port->bond)
+			continue;
+
+		visited &= ~BIT(p);
+	}
+
+	/* Now, set PGIDs for each active LAG */
+	for (lag = 0; lag < lan966x->num_phys_ports; ++lag) {
+		struct lan966x_port *port = lan966x->ports[lag];
+		int num_active_ports = 0;
+		struct net_device *bond;
+		unsigned long bond_mask;
+		u8 aggr_idx[16];
+
+		if (!port || !port->bond || (visited & BIT(lag)))
+			continue;
+
+		bond = port->bond;
+		bond_mask = lan966x_lag_get_mask(lan966x, bond, true);
+
+		for_each_set_bit(p, &bond_mask, lan966x->num_phys_ports) {
+			lan_wr(ANA_PGID_PGID_SET(bond_mask),
+			       lan966x, ANA_PGID(p));
+			aggr_idx[num_active_ports++] = p;
+		}
+
+		for (i = PGID_AGGR; i < PGID_SRC; ++i) {
+			u32 ac;
+
+			ac = lan_rd(lan966x, ANA_PGID(i));
+			ac &= ~bond_mask;
+			/* Don't do division by zero if there was no active
+			 * port. Just make all aggregation codes zero.
+			 */
+			if (num_active_ports)
+				ac |= BIT(aggr_idx[i % num_active_ports]);
+			lan_wr(ANA_PGID_PGID_SET(ac),
+			       lan966x, ANA_PGID(i));
+		}
+
+		/* Mark all ports in the same LAG as visited to avoid applying
+		 * the same config again.
+		 */
+		for (p = lag; p < lan966x->num_phys_ports; p++) {
+			struct lan966x_port *port = lan966x->ports[p];
+
+			if (!port)
+				continue;
+
+			if (port->bond == bond)
+				visited |= BIT(p);
+		}
+	}
+}
+
+static void lan966x_lag_set_port_ids(struct lan966x *lan966x)
+{
+	struct lan966x_port *port;
+	u32 bond_mask;
+	u32 lag_id;
+	int p;
+
+	for (p = 0; p < lan966x->num_phys_ports; ++p) {
+		port = lan966x->ports[p];
+		if (!port)
+			continue;
+
+		lag_id = port->chip_port;
+
+		bond_mask = lan966x_lag_get_mask(lan966x, port->bond, false);
+		if (bond_mask)
+			lag_id = __ffs(bond_mask);
+
+		lan_rmw(ANA_PORT_CFG_PORTID_VAL_SET(lag_id),
+			ANA_PORT_CFG_PORTID_VAL,
+			lan966x, ANA_PORT_CFG(port->chip_port));
+	}
+}
+
+int lan966x_lag_port_join(struct lan966x_port *port,
+			  struct net_device *brport_dev,
+			  struct net_device *bond,
+			  struct netlink_ext_ack *extack)
+{
+	struct lan966x *lan966x = port->lan966x;
+	struct net_device *dev = port->dev;
+
+	port->bond = bond;
+	lan966x_lag_set_port_ids(lan966x);
+	lan966x_update_fwd_mask(lan966x);
+	lan966x_lag_set_aggr_pgids(lan966x);
+
+	switchdev_bridge_port_offload(brport_dev, dev, port,
+				      &lan966x_switchdev_nb,
+				      &lan966x_switchdev_blocking_nb,
+				      false, extack);
+
+	return 0;
+}
+
+void lan966x_lag_port_leave(struct lan966x_port *port, struct net_device *bond)
+{
+	struct lan966x *lan966x = port->lan966x;
+
+	port->bond = NULL;
+	lan966x_lag_set_port_ids(lan966x);
+	lan966x_update_fwd_mask(lan966x);
+	lan966x_lag_set_aggr_pgids(lan966x);
+}
+
+int lan966x_lag_port_prechangeupper(struct net_device *dev,
+				    struct netdev_notifier_changeupper_info *info)
+{
+	struct lan966x_port *port = netdev_priv(dev);
+	struct lan966x *lan966x = port->lan966x;
+	struct netdev_lag_upper_info *lui;
+	struct netlink_ext_ack *extack;
+
+	extack = netdev_notifier_info_to_extack(&info->info);
+	lui = info->upper_info;
+	if (!lui)
+		return NOTIFY_DONE;
+
+	if (lui->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "LAG device using unsupported Tx type");
+		return notifier_from_errno(-EINVAL);
+	}
+
+	switch (lui->hash_type) {
+	case NETDEV_LAG_HASH_L2:
+		lan_wr(ANA_AGGR_CFG_AC_DMAC_ENA_SET(1) |
+		       ANA_AGGR_CFG_AC_SMAC_ENA_SET(1),
+		       lan966x, ANA_AGGR_CFG);
+		break;
+	case NETDEV_LAG_HASH_L34:
+		lan_wr(ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA_SET(1) |
+		       ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA_SET(1) |
+		       ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA_SET(1),
+		       lan966x, ANA_AGGR_CFG);
+		break;
+	case NETDEV_LAG_HASH_L23:
+		lan_wr(ANA_AGGR_CFG_AC_DMAC_ENA_SET(1) |
+		       ANA_AGGR_CFG_AC_SMAC_ENA_SET(1) |
+		       ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA_SET(1) |
+		       ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA_SET(1),
+		       lan966x, ANA_AGGR_CFG);
+		break;
+	default:
+		NL_SET_ERR_MSG_MOD(extack,
+				   "LAG device using unsupported hash type");
+		return notifier_from_errno(-EINVAL);
+	}
+
+	return NOTIFY_OK;
+}
+
+int lan966x_lag_port_changelowerstate(struct net_device *dev,
+				      struct netdev_notifier_changelowerstate_info *info)
+{
+	struct netdev_lag_lower_state_info *lag = info->lower_state_info;
+	struct lan966x_port *port = netdev_priv(dev);
+	struct lan966x *lan966x = port->lan966x;
+	bool is_active;
+
+	if (!port->bond)
+		return NOTIFY_DONE;
+
+	is_active = lag->link_up && lag->tx_enabled;
+	if (port->lag_tx_active == is_active)
+		return NOTIFY_DONE;
+
+	port->lag_tx_active = is_active;
+	lan966x_lag_set_aggr_pgids(lan966x);
+
+	return NOTIFY_OK;
+}
+
+int lan966x_lag_netdev_prechangeupper(struct net_device *dev,
+				      struct netdev_notifier_changeupper_info *info)
+{
+	struct lan966x_port *port;
+	struct net_device *lower;
+	struct list_head *iter;
+	int err;
+
+	netdev_for_each_lower_dev(dev, lower, iter) {
+		if (!lan966x_netdevice_check(lower))
+			continue;
+
+		port = netdev_priv(lower);
+		if (port->bond != dev)
+			continue;
+
+		err = lan966x_port_prechangeupper(lower, dev, info);
+		if (err)
+			return err;
+	}
+
+	return NOTIFY_DONE;
+}
+
+int lan966x_lag_netdev_changeupper(struct net_device *dev,
+				   struct netdev_notifier_changeupper_info *info)
+{
+	struct lan966x_port *port;
+	struct net_device *lower;
+	struct list_head *iter;
+	int err;
+
+	netdev_for_each_lower_dev(dev, lower, iter) {
+		if (!lan966x_netdevice_check(lower))
+			continue;
+
+		port = netdev_priv(lower);
+		if (port->bond != dev)
+			continue;
+
+		err = lan966x_port_changeupper(lower, dev, info);
+		if (err)
+			return err;
+	}
+
+	return NOTIFY_DONE;
+}
+
+bool lan966x_lag_first_port(struct net_device *lag, struct net_device *dev)
+{
+	struct lan966x_port *port = netdev_priv(dev);
+	struct lan966x *lan966x = port->lan966x;
+	unsigned long bond_mask;
+
+	if (port->bond != lag)
+		return false;
+
+	bond_mask = lan966x_lag_get_mask(lan966x, lag, false);
+	if (bond_mask && port->chip_port == __ffs(bond_mask))
+		return true;
+
+	return false;
+}
+
+u32 lan966x_lag_get_mask(struct lan966x *lan966x, struct net_device *bond,
+			 bool only_active_ports)
+{
+	struct lan966x_port *port;
+	u32 mask = 0;
+	int p;
+
+	if (!bond)
+		return mask;
+
+	for (p = 0; p < lan966x->num_phys_ports; p++) {
+		port = lan966x->ports[p];
+		if (!port)
+			continue;
+
+		if (port->bond == bond) {
+			if (only_active_ports && !port->lag_tx_active)
+				continue;
+
+			mask |= BIT(p);
+		}
+	}
+
+	return mask;
+}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index 4701c20c8393..2c382cf8fe3a 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -291,6 +291,9 @@  struct lan966x_port {
 	u8 ptp_cmd;
 	u16 ts_id;
 	struct sk_buff_head tx_skbs;
+
+	struct net_device *bond;
+	bool lag_tx_active;
 };
 
 extern const struct phylink_mac_ops lan966x_phylink_mac_ops;
@@ -407,6 +410,31 @@  int lan966x_fdma_init(struct lan966x *lan966x);
 void lan966x_fdma_deinit(struct lan966x *lan966x);
 irqreturn_t lan966x_fdma_irq_handler(int irq, void *args);
 
+int lan966x_lag_port_join(struct lan966x_port *port,
+			  struct net_device *brport_dev,
+			  struct net_device *bond,
+			  struct netlink_ext_ack *extack);
+void lan966x_lag_port_leave(struct lan966x_port *port, struct net_device *bond);
+int lan966x_lag_port_prechangeupper(struct net_device *dev,
+				    struct netdev_notifier_changeupper_info *info);
+int lan966x_lag_port_changelowerstate(struct net_device *dev,
+				      struct netdev_notifier_changelowerstate_info *info);
+int lan966x_lag_netdev_prechangeupper(struct net_device *dev,
+				      struct netdev_notifier_changeupper_info *info);
+int lan966x_lag_netdev_changeupper(struct net_device *dev,
+				   struct netdev_notifier_changeupper_info *info);
+bool lan966x_lag_first_port(struct net_device *lag, struct net_device *dev);
+u32 lan966x_lag_get_mask(struct lan966x *lan966x, struct net_device *bond,
+			 bool only_active_ports);
+
+int lan966x_port_changeupper(struct net_device *dev,
+			     struct net_device *brport_dev,
+			     struct netdev_notifier_changeupper_info *info);
+int lan966x_port_prechangeupper(struct net_device *dev,
+				struct net_device *brport_dev,
+				struct netdev_notifier_changeupper_info *info);
+void lan966x_update_fwd_mask(struct lan966x *lan966x);
+
 static inline void __iomem *lan_addr(void __iomem *base[],
 				     int id, int tinst, int tcnt,
 				     int gbase, int ginst,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
index 4bc626ce031a..9fa6868f79a3 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
@@ -130,7 +130,7 @@  static int lan966x_port_pre_bridge_flags(struct lan966x_port *port,
 	return 0;
 }
 
-static void lan966x_update_fwd_mask(struct lan966x *lan966x)
+void lan966x_update_fwd_mask(struct lan966x *lan966x)
 {
 	int i;
 
@@ -138,9 +138,15 @@  static void lan966x_update_fwd_mask(struct lan966x *lan966x)
 		struct lan966x_port *port = lan966x->ports[i];
 		unsigned long mask = 0;
 
-		if (port && lan966x->bridge_fwd_mask & BIT(i))
+		if (port && lan966x->bridge_fwd_mask & BIT(i)) {
 			mask = lan966x->bridge_fwd_mask & ~BIT(i);
 
+			if (port->bond)
+				mask &= ~lan966x_lag_get_mask(lan966x,
+							      port->bond,
+							      false);
+		}
+
 		mask |= BIT(CPU_PORT);
 
 		lan_wr(ANA_PGID_PGID_SET(mask),
@@ -239,6 +245,7 @@  static int lan966x_port_attr_set(struct net_device *dev, const void *ctx,
 }
 
 static int lan966x_port_bridge_join(struct lan966x_port *port,
+				    struct net_device *brport_dev,
 				    struct net_device *bridge,
 				    struct netlink_ext_ack *extack)
 {
@@ -256,7 +263,7 @@  static int lan966x_port_bridge_join(struct lan966x_port *port,
 		}
 	}
 
-	err = switchdev_bridge_port_offload(dev, dev, port,
+	err = switchdev_bridge_port_offload(brport_dev, dev, port,
 					    &lan966x_switchdev_nb,
 					    &lan966x_switchdev_blocking_nb,
 					    false, extack);
@@ -293,8 +300,9 @@  static void lan966x_port_bridge_leave(struct lan966x_port *port,
 	lan966x_vlan_port_apply(port);
 }
 
-static int lan966x_port_changeupper(struct net_device *dev,
-				    struct netdev_notifier_changeupper_info *info)
+int lan966x_port_changeupper(struct net_device *dev,
+			     struct net_device *brport_dev,
+			     struct netdev_notifier_changeupper_info *info)
 {
 	struct lan966x_port *port = netdev_priv(dev);
 	struct netlink_ext_ack *extack;
@@ -304,25 +312,42 @@  static int lan966x_port_changeupper(struct net_device *dev,
 
 	if (netif_is_bridge_master(info->upper_dev)) {
 		if (info->linking)
-			err = lan966x_port_bridge_join(port, info->upper_dev,
+			err = lan966x_port_bridge_join(port, brport_dev,
+						       info->upper_dev,
 						       extack);
 		else
 			lan966x_port_bridge_leave(port, info->upper_dev);
 	}
 
+	if (netif_is_lag_master(info->upper_dev)) {
+		if (info->linking)
+			err = lan966x_lag_port_join(port, info->upper_dev,
+						    info->upper_dev,
+						    extack);
+		else
+			lan966x_lag_port_leave(port, info->upper_dev);
+	}
+
 	return err;
 }
 
-static int lan966x_port_prechangeupper(struct net_device *dev,
-				       struct netdev_notifier_changeupper_info *info)
+int lan966x_port_prechangeupper(struct net_device *dev,
+				struct net_device *brport_dev,
+				struct netdev_notifier_changeupper_info *info)
 {
 	struct lan966x_port *port = netdev_priv(dev);
+	int err = NOTIFY_DONE;
 
 	if (netif_is_bridge_master(info->upper_dev) && !info->linking)
-		switchdev_bridge_port_unoffload(port->dev, port,
-						NULL, NULL);
+		switchdev_bridge_port_unoffload(brport_dev, port, NULL, NULL);
 
-	return NOTIFY_DONE;
+	if (netif_is_lag_master(info->upper_dev) && info->linking)
+		err = lan966x_lag_port_prechangeupper(dev, info);
+
+	if (netif_is_lag_master(info->upper_dev) && !info->linking)
+		switchdev_bridge_port_unoffload(brport_dev, port, NULL, NULL);
+
+	return err;
 }
 
 static int lan966x_foreign_bridging_check(struct net_device *upper,
@@ -400,21 +425,44 @@  static int lan966x_netdevice_port_event(struct net_device *dev,
 	int err = 0;
 
 	if (!lan966x_netdevice_check(dev)) {
-		if (event == NETDEV_CHANGEUPPER)
-			return lan966x_bridge_check(dev, ptr);
+		switch (event) {
+		case NETDEV_CHANGEUPPER:
+		case NETDEV_PRECHANGEUPPER:
+			err = lan966x_bridge_check(dev, ptr);
+			if (err)
+				return err;
+
+			if (netif_is_lag_master(dev)) {
+				if (event == NETDEV_CHANGEUPPER)
+					err = lan966x_lag_netdev_changeupper(dev,
+									     ptr);
+				else
+					err = lan966x_lag_netdev_prechangeupper(dev,
+										ptr);
+
+				return err;
+			}
+			break;
+		default:
+			return 0;
+		}
+
 		return 0;
 	}
 
 	switch (event) {
 	case NETDEV_PRECHANGEUPPER:
-		err = lan966x_port_prechangeupper(dev, ptr);
+		err = lan966x_port_prechangeupper(dev, dev, ptr);
 		break;
 	case NETDEV_CHANGEUPPER:
 		err = lan966x_bridge_check(dev, ptr);
 		if (err)
 			return err;
 
-		err = lan966x_port_changeupper(dev, ptr);
+		err = lan966x_port_changeupper(dev, dev, ptr);
+		break;
+	case NETDEV_CHANGELOWERSTATE:
+		err = lan966x_lag_port_changelowerstate(dev, ptr);
 		break;
 	}