diff mbox series

net: dsa: mv88e6060: report max mtu 1536

Message ID 20220810082745.1466895-1-saproj@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series net: dsa: mv88e6060: report max mtu 1536 | expand

Checks

Context Check Description
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 6 maintainers not CCed: vivien.didelot@gmail.com davem@davemloft.net edumazet@google.com kuba@kernel.org pabeni@redhat.com andrew@lunn.ch
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 32 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Sergei Antonov Aug. 10, 2022, 8:27 a.m. UTC
This driver sets the MaxFrameSize bit to 1 during setup,
see GLOBAL_CONTROL_MAX_FRAME_1536 in mv88e6060_setup_global().
Thus MTU is always 1536.
Introduce mv88e6060_port_max_mtu() to report it back to system.

Signed-off-by: Sergei Antonov <saproj@gmail.com>
CC: Vladimir Oltean <olteanv@gmail.com>
CC: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/dsa/mv88e6060.c | 7 ++++++-
 drivers/net/dsa/mv88e6060.h | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

Comments

Vladimir Oltean Aug. 10, 2022, 10:08 a.m. UTC | #1
Hi Sergei,

On Wed, Aug 10, 2022 at 11:27:45AM +0300, Sergei Antonov wrote:
> This driver sets the MaxFrameSize bit to 1 during setup,
> see GLOBAL_CONTROL_MAX_FRAME_1536 in mv88e6060_setup_global().
> Thus MTU is always 1536.
> Introduce mv88e6060_port_max_mtu() to report it back to system.
> 
> Signed-off-by: Sergei Antonov <saproj@gmail.com>
> CC: Vladimir Oltean <olteanv@gmail.com>
> CC: Florian Fainelli <f.fainelli@gmail.com>
> ---
>  drivers/net/dsa/mv88e6060.c | 7 ++++++-
>  drivers/net/dsa/mv88e6060.h | 1 +
>  2 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
> index a4c6eb9a52d0..c53734379b96 100644
> --- a/drivers/net/dsa/mv88e6060.c
> +++ b/drivers/net/dsa/mv88e6060.c
> @@ -160,7 +160,6 @@ static int mv88e6060_setup_addr(struct mv88e6060_priv *priv)
>  	u16 val;
>  
>  	eth_random_addr(addr);
> -

Extraneous change.

>  	val = addr[0] << 8 | addr[1];
>  
>  	/* The multicast bit is always transmitted as a zero, so the switch uses
> @@ -212,6 +211,11 @@ static int mv88e6060_setup(struct dsa_switch *ds)
>  	return 0;
>  }
>  
> +static int mv88e6060_port_max_mtu(struct dsa_switch *ds, int port)
> +{
> +	return MV88E6060_MAX_MTU;
> +}

Does this solve any problem? It's ok for the hardware MTU to be higher
than advertised. The problem is when the hardware doesn't accept what
the stack thinks it should.

> +
>  static int mv88e6060_port_to_phy_addr(int port)
>  {
>  	if (port >= 0 && port < MV88E6060_PORTS)
> @@ -247,6 +251,7 @@ mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val)
>  static const struct dsa_switch_ops mv88e6060_switch_ops = {
>  	.get_tag_protocol = mv88e6060_get_tag_protocol,
>  	.setup		= mv88e6060_setup,
> +	.port_max_mtu	= mv88e6060_port_max_mtu,
>  	.phy_read	= mv88e6060_phy_read,
>  	.phy_write	= mv88e6060_phy_write,
>  };
> diff --git a/drivers/net/dsa/mv88e6060.h b/drivers/net/dsa/mv88e6060.h
> index 6c13c2421b64..382fe462fb2d 100644
> --- a/drivers/net/dsa/mv88e6060.h
> +++ b/drivers/net/dsa/mv88e6060.h
> @@ -11,6 +11,7 @@
>  #define __MV88E6060_H
>  
>  #define MV88E6060_PORTS	6
> +#define MV88E6060_MAX_MTU	1536
>  
>  #define REG_PORT(p)		(0x8 + (p))
>  #define PORT_STATUS		0x00
> -- 
> 2.32.0
> 

You're the first person to submit a patch on mv88e6060 that I see.
Is there a board with this switch available somewhere? Does the driver
still work?
Sergei Antonov Aug. 10, 2022, noon UTC | #2
On Wed, 10 Aug 2022 at 13:08, Vladimir Oltean <olteanv@gmail.com> wrote:
>
> Hi Sergei,
>
> On Wed, Aug 10, 2022 at 11:27:45AM +0300, Sergei Antonov wrote:
> > This driver sets the MaxFrameSize bit to 1 during setup,
> > see GLOBAL_CONTROL_MAX_FRAME_1536 in mv88e6060_setup_global().
> > Thus MTU is always 1536.
> > Introduce mv88e6060_port_max_mtu() to report it back to system.
> >
> > Signed-off-by: Sergei Antonov <saproj@gmail.com>
> > CC: Vladimir Oltean <olteanv@gmail.com>
> > CC: Florian Fainelli <f.fainelli@gmail.com>
> > ---
> >  drivers/net/dsa/mv88e6060.c | 7 ++++++-
> >  drivers/net/dsa/mv88e6060.h | 1 +
> >  2 files changed, 7 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
> > index a4c6eb9a52d0..c53734379b96 100644
> > --- a/drivers/net/dsa/mv88e6060.c
> > +++ b/drivers/net/dsa/mv88e6060.c
> > @@ -160,7 +160,6 @@ static int mv88e6060_setup_addr(struct mv88e6060_priv *priv)
> >       u16 val;
> >
> >       eth_random_addr(addr);
> > -
>
> Extraneous change.

Thanks! My fault.

> >       val = addr[0] << 8 | addr[1];
> >
> >       /* The multicast bit is always transmitted as a zero, so the switch uses
> > @@ -212,6 +211,11 @@ static int mv88e6060_setup(struct dsa_switch *ds)
> >       return 0;
> >  }
> >
> > +static int mv88e6060_port_max_mtu(struct dsa_switch *ds, int port)
> > +{
> > +     return MV88E6060_MAX_MTU;
> > +}
>
> Does this solve any problem? It's ok for the hardware MTU to be higher
> than advertised. The problem is when the hardware doesn't accept what
> the stack thinks it should.

I need some time to reconstruct the problem. IIRC there was an attempt
to set MTU 1504 (1500 + a switch overhead), but can not reproduce it
at the moment.

> > +
> >  static int mv88e6060_port_to_phy_addr(int port)
> >  {
> >       if (port >= 0 && port < MV88E6060_PORTS)
> > @@ -247,6 +251,7 @@ mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val)
> >  static const struct dsa_switch_ops mv88e6060_switch_ops = {
> >       .get_tag_protocol = mv88e6060_get_tag_protocol,
> >       .setup          = mv88e6060_setup,
> > +     .port_max_mtu   = mv88e6060_port_max_mtu,
> >       .phy_read       = mv88e6060_phy_read,
> >       .phy_write      = mv88e6060_phy_write,
> >  };
> > diff --git a/drivers/net/dsa/mv88e6060.h b/drivers/net/dsa/mv88e6060.h
> > index 6c13c2421b64..382fe462fb2d 100644
> > --- a/drivers/net/dsa/mv88e6060.h
> > +++ b/drivers/net/dsa/mv88e6060.h
> > @@ -11,6 +11,7 @@
> >  #define __MV88E6060_H
> >
> >  #define MV88E6060_PORTS      6
> > +#define MV88E6060_MAX_MTU    1536
> >
> >  #define REG_PORT(p)          (0x8 + (p))
> >  #define PORT_STATUS          0x00
> > --
> > 2.32.0
> >
>
> You're the first person to submit a patch on mv88e6060 that I see.
> Is there a board with this switch available somewhere? Does the driver
> still work?

Very nice to get your feedback. Because, yes, I am working with a
device which has mv88e6060, it is called MOXA NPort 6610.

The driver works now. There was one problem which I had to workaround.
Inside my device only ports 2 and 5 are used, so I initially wrote in
.dts:
        switch@0 {
                compatible = "marvell,mv88e6060";
                reg = <16>;

                ports {
                        #address-cells = <1>;
                        #size-cells = <0>;

                        port@2 {
                                reg = <2>;
                                label = "lan2";
                        };

                        port@5 {
                                reg = <5>;
                                label = "cpu";
                                ethernet = <&mac1>;
                        };
                };
        };
and the driver crashed in mv88e6060_setup_port() on a null pointer.
Two workarounds are possible:
1. Describe ports 0, 1, 3, 4 in .dts too.
2. Insert this code at the beginning of mv88e6060_setup_port():
if(!dsa_is_cpu_port(priv->ds, p) && !dsa_to_port(priv->ds, p)->cpu_dp)
    return 0;
'cpu_dp' was the null pointer the driver crashed at.

One more observation. Generating and setting a random MAC in
mv88e6060_setup_addr() is not necessary - the switch works without it
(at least in my case).
Vladimir Oltean Aug. 10, 2022, 1:35 p.m. UTC | #3
On Wed, Aug 10, 2022 at 03:00:20PM +0300, Sergei Antonov wrote:
> > >       val = addr[0] << 8 | addr[1];
> > >
> > >       /* The multicast bit is always transmitted as a zero, so the switch uses
> > > @@ -212,6 +211,11 @@ static int mv88e6060_setup(struct dsa_switch *ds)
> > >       return 0;
> > >  }
> > >
> > > +static int mv88e6060_port_max_mtu(struct dsa_switch *ds, int port)
> > > +{
> > > +     return MV88E6060_MAX_MTU;
> > > +}
> >
> > Does this solve any problem? It's ok for the hardware MTU to be higher
> > than advertised. The problem is when the hardware doesn't accept what
> > the stack thinks it should.
> 
> I need some time to reconstruct the problem. IIRC there was an attempt
> to set MTU 1504 (1500 + a switch overhead), but can not reproduce it
> at the moment.

What kernel are you using? According to Documentation/process/maintainer-netdev.rst,
you should test the patches you submit against the master branch from one of
https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
or
https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
depending on whether it's a new feature or if it fixes a problem.

Currently, both net and net-next contain the same thing (we are in a
merge window so net-next will not progress until kernel 6.0-rc1 is cut),
which is that dsa_slave_change_mtu() will not do anything because of
this:

	if (!ds->ops->port_change_mtu)
		return -EOPNOTSUPP;

(which mv88e6060 does not implement)

So I am slightly doubtful that anyone attempts an MTU change for this
switch, as you say.

The DSA master (host port, not switch), on the other hand, is a
different story. Its MTU is updated to 1504 by dsa_master_setup().

> > You're the first person to submit a patch on mv88e6060 that I see.
> > Is there a board with this switch available somewhere? Does the driver
> > still work?
> 
> Very nice to get your feedback. Because, yes, I am working with a
> device which has mv88e6060, it is called MOXA NPort 6610.
> 
> The driver works now. There was one problem which I had to workaround.
> Inside my device only ports 2 and 5 are used, so I initially wrote in
> .dts:
>         switch@0 {
>                 compatible = "marvell,mv88e6060";
>                 reg = <16>;

reg = <16> for switch@0? Something is wrong, probably switch@0.

> 
>                 ports {
>                         #address-cells = <1>;
>                         #size-cells = <0>;
> 
>                         port@2 {
>                                 reg = <2>;
>                                 label = "lan2";
>                         };
> 
>                         port@5 {
>                                 reg = <5>;
>                                 label = "cpu";
>                                 ethernet = <&mac1>;
>                         };
>                 };
>         };
> and the driver crashed in mv88e6060_setup_port() on a null pointer.
> Two workarounds are possible:
> 1. Describe ports 0, 1, 3, 4 in .dts too.

No, it should work with just port@2 and port@5 defined.

> 2. Insert this code at the beginning of mv88e6060_setup_port():
> if(!dsa_is_cpu_port(priv->ds, p) && !dsa_to_port(priv->ds, p)->cpu_dp)
>     return 0;
> 'cpu_dp' was the null pointer the driver crashed at.

You mean here:

			(dsa_is_cpu_port(priv->ds, p) ?
			 dsa_user_ports(priv->ds) :
			 BIT(dsa_to_port(priv->ds, p)->cpu_dp->index)));

Yes, this is a limitation that has been made worse by blind code
conversions (nobody seems to have the hardware or to know someone who
does; I've been tempted to delete the driver a few times or at least to
move it to staging, because of the unrealistically long delays until
someone chirps that something is broken for it, even when it obviously is).
The driver assumes that if the port isn't a CPU port, it's a user port.
That's clearly false.

You can probably put this at the beginning of mv88e6060_setup_port():

	if (dsa_is_unused_port(priv->ds, p))
		return 0;

The bug seems to have been introduced by commit 0abfd494deef ("net: dsa:
use dedicated CPU port"), because, although before we'd be uselessly
programming the port VLAN for a disabled port, now in doing so, we
dereference a NULL pointer.

FWIW, in case there is ever a need to backport, the vintage-correct fix
would be to use something like this:

	if (!dsa_port_is_valid(priv->ds->ports[p]))
		return 0;

but in that case the process is:
- send patch against current "net" tree
- wait until patch is queued up for "linux-stable" and backported as far
  as possible
- email will be sent that patch failed to apply to the still-maintained
  LTS branches as far as the Fixes: tag required (this is why it is
  important to populate the Fixes: tag correctly)
- reply to that email with a manually backported patch, just for that
  stable tree (linux-4.14.y etc)

> 
> One more observation. Generating and setting a random MAC in
> mv88e6060_setup_addr() is not necessary - the switch works without it
> (at least in my case).

The GLOBAL_MAC address that the switch uses there will be used as MAC SA
in PAUSE frames (802.3 flow control). Not clear if you were aware of
that fact when saying that the switch "works without it". In other words,
if you make a change in that area, I expect that flow control is what
you test, and not, say, ping.

It's true that some other switches use a MAC SA of 00:00:00:00:00:00 for
PAUSE frames (ocelot_init_port) and this hasn't caused a problem for them.
I don't know if the 6060 supports this mode. If it does, it's worth a shot.
Sergei Antonov Aug. 10, 2022, 3:56 p.m. UTC | #4
On Wed, 10 Aug 2022 at 16:35, Vladimir Oltean <olteanv@gmail.com> wrote:
>
> On Wed, Aug 10, 2022 at 03:00:20PM +0300, Sergei Antonov wrote:
> > > >       val = addr[0] << 8 | addr[1];
> > > >
> > > >       /* The multicast bit is always transmitted as a zero, so the switch uses
> > > > @@ -212,6 +211,11 @@ static int mv88e6060_setup(struct dsa_switch *ds)
> > > >       return 0;
> > > >  }
> > > >
> > > > +static int mv88e6060_port_max_mtu(struct dsa_switch *ds, int port)
> > > > +{
> > > > +     return MV88E6060_MAX_MTU;
> > > > +}
> > >
> > > Does this solve any problem? It's ok for the hardware MTU to be higher
> > > than advertised. The problem is when the hardware doesn't accept what
> > > the stack thinks it should.
> >
> > I need some time to reconstruct the problem. IIRC there was an attempt
> > to set MTU 1504 (1500 + a switch overhead), but can not reproduce it
> > at the moment.
>
> What kernel are you using? According to Documentation/process/maintainer-netdev.rst,
> you should test the patches you submit against the master branch from one of
> https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
> or
> https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
> depending on whether it's a new feature or if it fixes a problem.
>
> Currently, both net and net-next contain the same thing (we are in a
> merge window so net-next will not progress until kernel 6.0-rc1 is cut),
> which is that dsa_slave_change_mtu() will not do anything because of
> this:
>
>         if (!ds->ops->port_change_mtu)
>                 return -EOPNOTSUPP;
>
> (which mv88e6060 does not implement)
>
> So I am slightly doubtful that anyone attempts an MTU change for this
> switch, as you say.
>
> The DSA master (host port, not switch), on the other hand, is a
> different story. Its MTU is updated to 1504 by dsa_master_setup().
>
> > > You're the first person to submit a patch on mv88e6060 that I see.
> > > Is there a board with this switch available somewhere? Does the driver
> > > still work?
> >
> > Very nice to get your feedback. Because, yes, I am working with a
> > device which has mv88e6060, it is called MOXA NPort 6610.
> >
> > The driver works now. There was one problem which I had to workaround.
> > Inside my device only ports 2 and 5 are used, so I initially wrote in
> > .dts:
> >         switch@0 {
> >                 compatible = "marvell,mv88e6060";
> >                 reg = <16>;
>
> reg = <16> for switch@0? Something is wrong, probably switch@0.

Thanks for noticing it.
In my case the device addresses are:
  PHY Registers - 0x10-0x14
  Switch Core Registers - 0x18-0x1D
  Switch Global Registers - 0x1F
I renamed switch@0 to switch@10 and made reg hexadecimal for clarity:
"reg = <0x10>". It works, see below for more information on testing.
Should I leave it like so?

> > 2. Insert this code at the beginning of mv88e6060_setup_port():
> > if(!dsa_is_cpu_port(priv->ds, p) && !dsa_to_port(priv->ds, p)->cpu_dp)
> >     return 0;
> > 'cpu_dp' was the null pointer the driver crashed at.
>
> You mean here:
>
>                         (dsa_is_cpu_port(priv->ds, p) ?
>                          dsa_user_ports(priv->ds) :
>                          BIT(dsa_to_port(priv->ds, p)->cpu_dp->index)));

Yes.

> Yes, this is a limitation that has been made worse by blind code
> conversions (nobody seems to have the hardware or to know someone who
> does; I've been tempted to delete the driver a few times or at least to
> move it to staging, because of the unrealistically long delays until
> someone chirps that something is broken for it, even when it obviously is).
> The driver assumes that if the port isn't a CPU port, it's a user port.
> That's clearly false.
>
> You can probably put this at the beginning of mv88e6060_setup_port():
>
>         if (dsa_is_unused_port(priv->ds, p))
>                 return 0;
>
> The bug seems to have been introduced by commit 0abfd494deef ("net: dsa:
> use dedicated CPU port"), because, although before we'd be uselessly
> programming the port VLAN for a disabled port, now in doing so, we
> dereference a NULL pointer.

The suggested fix with dsa_is_unused_port() works. I tested it on the
'netdev/net.git' repo, see below. Should I submit it as a patch
(Fixes: 0abfd494deef)?

So I tested "dsa_is_unused_port()" and "switch@10" fixes with
https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
What I did after system boot-up:

~ # dmesg | grep mv88
[    7.187296] mv88e6060 92000090.mdio--1-mii:10: switch Marvell
88E6060 (B0) detected
[    8.325712] mv88e6060 92000090.mdio--1-mii:10: switch Marvell
88E6060 (B0) detected
[    9.190299] mv88e6060 92000090.mdio--1-mii:10 lan2 (uninitialized):
PHY [dsa-0.0:02] driver [Generic PHY] (irq=POLL)

~ # ip a
1: lo: <LOOPBACK> mtu 65536 qdisc noop qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: eth0: <BROADCAST,MULTICAST> mtu 1504 qdisc noop qlen 1000
    link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
3: lan2@eth0: <BROADCAST,MULTICAST,M-DOWN> mtu 1500 qdisc noop qlen 1000
    link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff

~ # ip link set dev eth0 address 00:90:e8:00:10:03 up

~ # ip a add 192.168.127.254/24 dev lan2

~ # ip link set dev lan2 address 00:90:e8:00:10:03 up
[   56.383801] DSA: failed to set STP state 3 (-95)
[   56.385491] mv88e6060 92000090.mdio--1-mii:10 lan2: configuring for
phy/gmii link mode
[   58.694319] mv88e6060 92000090.mdio--1-mii:10 lan2: Link is Up -
100Mbps/Full - flow control off
[   58.699244] IPv6: ADDRCONF(NETDEV_CHANGE): lan2: link becomes ready

~ # ip a
1: lo: <LOOPBACK> mtu 65536 qdisc noop qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1504 qdisc pfifo_fast qlen 1000
    link/ether 00:90:e8:00:10:03 brd ff:ff:ff:ff:ff:ff
    inet6 fe80::290:e8ff:fe00:1003/64 scope link
       valid_lft forever preferred_lft forever
3: lan2@eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue qlen 1000
    link/ether 00:90:e8:00:10:03 brd ff:ff:ff:ff:ff:ff
    inet 192.168.127.254/24 scope global lan2
       valid_lft forever preferred_lft forever
    inet6 fe80::290:e8ff:fe00:1003/64 scope link
       valid_lft forever preferred_lft forever

Ping, ssh, scp work.

Is it correct for eth0 and lan2@eth0 to have the same MAC? I could not
make it work with different MACs.

> FWIW, in case there is ever a need to backport, the vintage-correct fix
> would be to use something like this:
>
>         if (!dsa_port_is_valid(priv->ds->ports[p]))
>                 return 0;
>
> but in that case the process is:
> - send patch against current "net" tree
> - wait until patch is queued up for "linux-stable" and backported as far
>   as possible
> - email will be sent that patch failed to apply to the still-maintained
>   LTS branches as far as the Fixes: tag required (this is why it is
>   important to populate the Fixes: tag correctly)
> - reply to that email with a manually backported patch, just for that
>   stable tree (linux-4.14.y etc)
>
> >
> > One more observation. Generating and setting a random MAC in
> > mv88e6060_setup_addr() is not necessary - the switch works without it
> > (at least in my case).
>
> The GLOBAL_MAC address that the switch uses there will be used as MAC SA
> in PAUSE frames (802.3 flow control). Not clear if you were aware of
> that fact when saying that the switch "works without it". In other words,
> if you make a change in that area, I expect that flow control is what
> you test, and not, say, ping.
>
> It's true that some other switches use a MAC SA of 00:00:00:00:00:00 for
> PAUSE frames (ocelot_init_port) and this hasn't caused a problem for them.
> I don't know if the 6060 supports this mode. If it does, it's worth a shot.

I don't know how to test flow control. Ping, ssh, scp work even with
mv88e6060_setup_addr() code removed. Of course, if MAC SA plays some
role in other scenarios, let it be :).
Vladimir Oltean Aug. 10, 2022, 7:38 p.m. UTC | #5
On Wed, Aug 10, 2022 at 06:56:25PM +0300, Sergei Antonov wrote:
> > reg = <16> for switch@0? Something is wrong, probably switch@0.
> 
> Thanks for noticing it.
> In my case the device addresses are:
>   PHY Registers - 0x10-0x14
>   Switch Core Registers - 0x18-0x1D
>   Switch Global Registers - 0x1F
> I renamed switch@0 to switch@10 and made reg hexadecimal for clarity:
> "reg = <0x10>". It works, see below for more information on testing.
> Should I leave it like so?

Should be fine like that. I think Marvell switches tend to have this
habit of occupying multiple PHY addresses, and our DT bindings want to
see the first one.

> > The bug seems to have been introduced by commit 0abfd494deef ("net: dsa:
> > use dedicated CPU port"), because, although before we'd be uselessly
> > programming the port VLAN for a disabled port, now in doing so, we
> > dereference a NULL pointer.
> 
> The suggested fix with dsa_is_unused_port() works. I tested it on the
> 'netdev/net.git' repo, see below. Should I submit it as a patch
> (Fixes: 0abfd494deef)?

Yes. See the section that talks about "git log -1 --pretty=fixes" in
process/submitting-patches.rst for how the Fixes tag should actually
look like.

I thought about whether dsa_is_unused_port() is sufficient, since
theoretically dsa_is_dsa_port() is also a possibility which isn't
covered by the check. But I rechecked and it appears that the Marvell
6060 doesn't support cascade ports, so we should be fine with just that.

> So I tested "dsa_is_unused_port()" and "switch@10" fixes with
> https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
> What I did after system boot-up:
> 
> ~ # dmesg | grep mv88
> [    7.187296] mv88e6060 92000090.mdio--1-mii:10: switch Marvell 88E6060 (B0) detected
> [    8.325712] mv88e6060 92000090.mdio--1-mii:10: switch Marvell 88E6060 (B0) detected
> [    9.190299] mv88e6060 92000090.mdio--1-mii:10 lan2 (uninitialized): PHY [dsa-0.0:02] driver [Generic PHY] (irq=POLL)
> 
> ~ # ip a
> 1: lo: <LOOPBACK> mtu 65536 qdisc noop qlen 1000
>     link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> 2: eth0: <BROADCAST,MULTICAST> mtu 1504 qdisc noop qlen 1000
>     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff

The DSA master is super odd for starting with an all-zero MAC address.
What driver handles this part? Normally, drivers are expected to work
with a MAC address provided by the firmware (of_get_mac_address or
other, perhaps proprietary, means) and fall back to eth_random_addr()
if that is missing.

> 3: lan2@eth0: <BROADCAST,MULTICAST,M-DOWN> mtu 1500 qdisc noop qlen 1000
>     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff

Here DSA inherits the MAC address of the master. It does this by default
in dsa_slave_create() -> eth_hw_addr_inherit(). If the OF node for the
DSA port has its own MAC address, that will have priority over the MAC
address of the master.

> ~ # ip link set dev eth0 address 00:90:e8:00:10:03 up

This shouldn't be necessary, neither assigning a MAC address nor putting
the master up, see Documentation/networking/dsa/configuration.rst, the
master comes up automatically.

> ~ # ip a add 192.168.127.254/24 dev lan2
> 
> ~ # ip link set dev lan2 address 00:90:e8:00:10:03 up
> [   56.383801] DSA: failed to set STP state 3 (-95)

errno 95 is EOPNOTSUPP, we shouldn't warn here, I'll submit a patch for
that.

> [   56.385491] mv88e6060 92000090.mdio--1-mii:10 lan2: configuring for phy/gmii link mode
> [   58.694319] mv88e6060 92000090.mdio--1-mii:10 lan2: Link is Up - 100Mbps/Full - flow control off

The link was negotiated without flow control, so you can't test flow
control under these conditions. This depends upon what the internal PHY
of the mv88e6060 is advertising, and what the link partner is advertising.
What is advertised is a subset of what is supported (and resolved by
linkmode_resolve_pause).

I'm a bit uncertain as to what happens when the 6060 driver doesn't
validate the phylink supported mask at all (it doesn't populate the
mac_capabilities structure and it doesn't implement ds->ops->phylink_validate)
but I think what happens is that whatever the PHY supports isn't further
reduced in any way by the MAC side of things.

> [   58.699244] IPv6: ADDRCONF(NETDEV_CHANGE): lan2: link becomes ready
> 
> ~ # ip a
> 1: lo: <LOOPBACK> mtu 65536 qdisc noop qlen 1000
>     link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> 2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1504 qdisc pfifo_fast qlen 1000
>     link/ether 00:90:e8:00:10:03 brd ff:ff:ff:ff:ff:ff
>     inet6 fe80::290:e8ff:fe00:1003/64 scope link
>        valid_lft forever preferred_lft forever
> 3: lan2@eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue qlen 1000
>     link/ether 00:90:e8:00:10:03 brd ff:ff:ff:ff:ff:ff
>     inet 192.168.127.254/24 scope global lan2
>        valid_lft forever preferred_lft forever
>     inet6 fe80::290:e8ff:fe00:1003/64 scope link
>        valid_lft forever preferred_lft forever
> 
> Ping, ssh, scp work.
> 
> Is it correct for eth0 and lan2@eth0 to have the same MAC?

It is not wrong, it's a configuration that many deployed DSA systems use.

> I could not make it work with different MACs.

That is a problem, and I believe it's a problem with the DSA master driver.
See, the reason it should work is this. Switch ports don't really have a
MAC address, since they forward everything and not really terminate anything.
The MAC address of a switch port is a software construct which means
that software L3 termination interfaces (of which we have one per port)
should accept packets with some known MAC DA, and drop the rest, and
everything should be fine.

There are multiple kinds of DSA tags, but 6060 uses a trailer, and this
will not shift the 'real' MAC DA of packets compared to where the DSA
master expects to see them. So if the MAC address of the DSA master is A,
the MAC address of lan2 is B, and you ping lan2 from the outside world,
the DSA master will see packets with a MAC DA of B.

So the DSA master sees packets with a MAC DA different from its own
dev->dev_addr (A) and thinks it's within its right to drop them. Except
that it isn't, because we do the following to prevent it:

(1) in case the DSA master supports IFF_UNICAST_FLT, we call dev_uc_add()
    from dsa_slave_set_mac_address() and from dsa_slave_open(), and this
    informs it of our address B.
(2) in case it doesn't support IFF_UNICAST_FLT, we just call
    dsa_master_set_promiscuity() and that should keep it promiscuous and
    it should accept packets regardless of MAC DA (that's the definition).

So you should run some tcpdump and ethtool -S on the DSA master and see
whether it receives any packets or it drops them. It's possible that
tcpdump makes packets be accepted, since it puts the interface in
promiscuous mode.

> I don't know how to test flow control. Ping, ssh, scp work even with
> mv88e6060_setup_addr() code removed. Of course, if MAC SA plays some
> role in other scenarios, let it be :).

I think it's best to leave alone things you don't really care about.
The address in mv88e6060_setup_addr() should have nothing to do with
what you really seem to want to know, just with flow control.
Sergei Antonov Aug. 11, 2022, 8:23 a.m. UTC | #6
On Wed, 10 Aug 2022 at 22:38, Vladimir Oltean <olteanv@gmail.com> wrote:
> > > The bug seems to have been introduced by commit 0abfd494deef ("net: dsa:
> > > use dedicated CPU port"), because, although before we'd be uselessly
> > > programming the port VLAN for a disabled port, now in doing so, we
> > > dereference a NULL pointer.
> >
> > The suggested fix with dsa_is_unused_port() works. I tested it on the
> > 'netdev/net.git' repo, see below. Should I submit it as a patch
> > (Fixes: 0abfd494deef)?
>
> Yes. See the section that talks about "git log -1 --pretty=fixes" in
> process/submitting-patches.rst for how the Fixes tag should actually
> look like.
>
> I thought about whether dsa_is_unused_port() is sufficient, since
> theoretically dsa_is_dsa_port() is also a possibility which isn't
> covered by the check. But I rechecked and it appears that the Marvell
> 6060 doesn't support cascade ports, so we should be fine with just that.

Great. I submitted a patch.

> > So I tested "dsa_is_unused_port()" and "switch@10" fixes with
> > https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
> > What I did after system boot-up:
> >
> > ~ # dmesg | grep mv88
> > [    7.187296] mv88e6060 92000090.mdio--1-mii:10: switch Marvell 88E6060 (B0) detected
> > [    8.325712] mv88e6060 92000090.mdio--1-mii:10: switch Marvell 88E6060 (B0) detected
> > [    9.190299] mv88e6060 92000090.mdio--1-mii:10 lan2 (uninitialized): PHY [dsa-0.0:02] driver [Generic PHY] (irq=POLL)
> >
> > ~ # ip a
> > 1: lo: <LOOPBACK> mtu 65536 qdisc noop qlen 1000
> >     link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> > 2: eth0: <BROADCAST,MULTICAST> mtu 1504 qdisc noop qlen 1000
> >     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
>
> The DSA master is super odd for starting with an all-zero MAC address.
> What driver handles this part? Normally, drivers are expected to work
> with a MAC address provided by the firmware (of_get_mac_address or
> other, perhaps proprietary, means) and fall back to eth_random_addr()
> if that is missing.

eth0 is handled by the CONFIG_ARM_MOXART_ETHER driver. By the way, I
had to change some code in it to make it work, and I am going to
submit a patch or two later.
The driver does not know its MAC address initially. On my hardware it
is stored in a flash memory chip, so I assign it using "ip link set
..." either manually or from an /etc/init.d script. A solution with
early MAC assignment in the moxart_mac_probe() function is doable. Do
you think I should implement it?

> > 3: lan2@eth0: <BROADCAST,MULTICAST,M-DOWN> mtu 1500 qdisc noop qlen 1000
> >     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
>
> Here DSA inherits the MAC address of the master. It does this by default
> in dsa_slave_create() -> eth_hw_addr_inherit(). If the OF node for the
> DSA port has its own MAC address, that will have priority over the MAC
> address of the master.

I quickly tried setting a MAC address in the moxart_mac_probe()
function and DSA did inherit it. Looks like the way to go.

> > ~ # ip link set dev eth0 address 00:90:e8:00:10:03 up
>
> This shouldn't be necessary, neither assigning a MAC address nor putting
> the master up, see Documentation/networking/dsa/configuration.rst, the
> master comes up automatically.

Yes, it indeed does. Thanks.

> > ~ # ip a add 192.168.127.254/24 dev lan2
> >
> > ~ # ip link set dev lan2 address 00:90:e8:00:10:03 up
> > [   56.383801] DSA: failed to set STP state 3 (-95)
>
> errno 95 is EOPNOTSUPP, we shouldn't warn here, I'll submit a patch for
> that.

Great, I'll review it.

> > Is it correct for eth0 and lan2@eth0 to have the same MAC?
>
> It is not wrong, it's a configuration that many deployed DSA systems use.

Is it also not wrong with several lanN@eth0 interfaces? I'm asking it
because I will probably need to support hardware with more than one
port on the 6060.

> > I could not make it work with different MACs.
>
> That is a problem, and I believe it's a problem with the DSA master driver.
> See, the reason it should work is this. Switch ports don't really have a
> MAC address, since they forward everything and not really terminate anything.
> The MAC address of a switch port is a software construct which means
> that software L3 termination interfaces (of which we have one per port)
> should accept packets with some known MAC DA, and drop the rest, and
> everything should be fine.
>
> There are multiple kinds of DSA tags, but 6060 uses a trailer, and this
> will not shift the 'real' MAC DA of packets compared to where the DSA
> master expects to see them. So if the MAC address of the DSA master is A,
> the MAC address of lan2 is B, and you ping lan2 from the outside world,
> the DSA master will see packets with a MAC DA of B.
>
> So the DSA master sees packets with a MAC DA different from its own
> dev->dev_addr (A) and thinks it's within its right to drop them. Except
> that it isn't, because we do the following to prevent it:
>
> (1) in case the DSA master supports IFF_UNICAST_FLT, we call dev_uc_add()
>     from dsa_slave_set_mac_address() and from dsa_slave_open(), and this
>     informs it of our address B.
> (2) in case it doesn't support IFF_UNICAST_FLT, we just call
>     dsa_master_set_promiscuity() and that should keep it promiscuous and
>     it should accept packets regardless of MAC DA (that's the definition).
>
> So you should run some tcpdump and ethtool -S on the DSA master and see
> whether it receives any packets or it drops them. It's possible that
> tcpdump makes packets be accepted, since it puts the interface in
> promiscuous mode.

When I tried to make it work with different MAC addresses, I used
Wireshark and saw that ARP packets did not reach the interface unless
they were broadcast. I might have been a configuration issue rather
than driver issue. Thanks for the explanation! But I will happily
stick to the common MAC address solution if it is not wrong.
Andrew Lunn Aug. 11, 2022, 2:12 p.m. UTC | #7
> > > 2: eth0: <BROADCAST,MULTICAST> mtu 1504 qdisc noop qlen 1000
> > >     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
> >
> > The DSA master is super odd for starting with an all-zero MAC address.
> > What driver handles this part? Normally, drivers are expected to work
> > with a MAC address provided by the firmware (of_get_mac_address or
> > other, perhaps proprietary, means) and fall back to eth_random_addr()
> > if that is missing.
> 
> eth0 is handled by the CONFIG_ARM_MOXART_ETHER driver. By the way, I
> had to change some code in it to make it work, and I am going to
> submit a patch or two later.
> The driver does not know its MAC address initially. On my hardware it
> is stored in a flash memory chip, so I assign it using "ip link set
> ..." either manually or from an /etc/init.d script. A solution with
> early MAC assignment in the moxart_mac_probe() function is doable. Do
> you think I should implement it?

I would suggest a few patches:

1) Use eth_hw_addr_random() to assign a random MAC address during probe.
2) Remove is_valid_ether_addr() from moxart_mac_open()
3) Add a call to platform_get_ethdev_address() during probe
4) Remove is_valid_ether_addr() from moxart_set_mac_address(). The core does this

platform_get_ethdev_address() will call of_get_mac_addr_nvmem() which
might be able to get your MAC address out of flash, without user space
being involved.

      Andrew
Vladimir Oltean Aug. 16, 2022, 8:42 p.m. UTC | #8
On Thu, Aug 11, 2022 at 11:23:02AM +0300, Sergei Antonov wrote:
> > > Is it correct for eth0 and lan2@eth0 to have the same MAC?
> >
> > It is not wrong, it's a configuration that many deployed DSA systems use.
> 
> Is it also not wrong with several lanN@eth0 interfaces? I'm asking it
> because I will probably need to support hardware with more than one
> port on the 6060.

Having multiple DSA interfaces share the same MAC address is also not
entirely wrong, no, and is quite common. But here you'll have to do some
more reading.

The thinking goes, since each DSA netdev constitutes its own L3
termination interface, then they are either connected to different L2
networks, or they are connected together (back to back, in an external
cable loopback). The first case doesn't violate the rule that an L2
network should have a single MAC address per VLAN, and the back-to-back
case should work too. While the other would imply sending packets with a
MAC SA equal to the MAC DA. That's not illegal AFAIK either - as long as
no switch sees the packets*, at least.

There's also the possibility for ports to be bridged together, and in
that case, the L3 termination interface stops being the DSA netdev and
starts being the bridge itself, so the ports' MAC addresses stop
mattering too.

There are limitations though, for example this won't work:

                br0
               /   \
              /     \
 lan0 ---- lan1    lan2 ----- lan3

When all of lan0 - lan3 share the same MAC address, and lan0 wants to
ping lan3, br0 will say "oh, this packet is for me!" because the bridge
marks the MAC addresses of all its bridge ports in the FDB as BR_FDB_LOCAL
(local termination, no forwarding).

The last topology I presented is quite common in the kselftest framework,
and the reason why tools/testing/selftests/drivers/net/dsa/forwarding.config
sets STABLE_MAC_ADDRS=yes; so that each port is given a unique address.

*if you're thinking "well, DSA is a switch, too", then not quite. The
ports, when operating as standalone like mv88e6060 does, should have all
bridge layer functions disabled, like for example address learning.

> > So you should run some tcpdump and ethtool -S on the DSA master and see
> > whether it receives any packets or it drops them. It's possible that
> > tcpdump makes packets be accepted, since it puts the interface in
> > promiscuous mode.
> 
> When I tried to make it work with different MAC addresses, I used
> Wireshark and saw that ARP packets did not reach the interface unless
> they were broadcast. I might have been a configuration issue rather
> than driver issue. Thanks for the explanation! But I will happily
> stick to the common MAC address solution if it is not wrong.

After you said that the driver is moxart, I took a quick look at its few
lines of code and I think I know what the problem is.

Basically this driver has a "slow race" between:

static void moxart_mac_set_rx_mode(struct net_device *ndev)
{
	struct moxart_mac_priv_t *priv = netdev_priv(ndev);

	spin_lock_irq(&priv->txlock);

	(ndev->flags & IFF_PROMISC) ? (priv->reg_maccr |= RCV_ALL) : <- this
				      (priv->reg_maccr &= ~RCV_ALL);

	(ndev->flags & IFF_ALLMULTI) ? (priv->reg_maccr |= RX_MULTIPKT) :
				       (priv->reg_maccr &= ~RX_MULTIPKT);

	if ((ndev->flags & IFF_MULTICAST) && netdev_mc_count(ndev)) {
		priv->reg_maccr |= HT_MULTI_EN;
		moxart_mac_setmulticast(ndev);
	} else {
		priv->reg_maccr &= ~HT_MULTI_EN;
	}

	writel(priv->reg_maccr, priv->base + REG_MAC_CTRL);

	spin_unlock_irq(&priv->txlock);
}

and

static void moxart_mac_reset(struct net_device *ndev)
{
	struct moxart_mac_priv_t *priv = netdev_priv(ndev);

	writel(SW_RST, priv->base + REG_MAC_CTRL);
	while (readl(priv->base + REG_MAC_CTRL) & SW_RST)
		mdelay(10);

	writel(0, priv->base + REG_INTERRUPT_MASK);

	priv->reg_maccr = RX_BROADPKT | FULLDUP | CRC_APD | RX_FTL; <- this
}

(called from moxart_mac_open)

Basically moxart_mac_reset() overwrites the RCV_ALL bit from priv->reg_maccr,
not taking into consideration that an interface may be put in
promiscuous mode even while it's not down. See commit d2615bf45069
("net: core: Always propagate flag changes to interfaces") from November
2013 as proof that it can (and btw, DSA does exactly this). The moxart
driver dates from August 2013, so probably this wasn't an issue for a
few months, and then started being one.
Sergei Antonov Aug. 17, 2022, 5:09 p.m. UTC | #9
On Thu, 11 Aug 2022 at 17:12, Andrew Lunn <andrew@lunn.ch> wrote:
> > The driver does not know its MAC address initially. On my hardware it
> > is stored in a flash memory chip, so I assign it using "ip link set
> > ..." either manually or from an /etc/init.d script. A solution with
> > early MAC assignment in the moxart_mac_probe() function is doable. Do
> > you think I should implement it?
>
> I would suggest a few patches:
>
> 1) Use eth_hw_addr_random() to assign a random MAC address during probe.
> 2) Remove is_valid_ether_addr() from moxart_mac_open()
> 3) Add a call to platform_get_ethdev_address() during probe
> 4) Remove is_valid_ether_addr() from moxart_set_mac_address(). The core does this
>
> platform_get_ethdev_address() will call of_get_mac_addr_nvmem() which
> might be able to get your MAC address out of flash, without user space
> being involved.

Great suggestions! So I am submitting a patch named
"net: moxa: MAC address reading, generating, validity checking"
diff mbox series

Patch

diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
index a4c6eb9a52d0..c53734379b96 100644
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -160,7 +160,6 @@  static int mv88e6060_setup_addr(struct mv88e6060_priv *priv)
 	u16 val;
 
 	eth_random_addr(addr);
-
 	val = addr[0] << 8 | addr[1];
 
 	/* The multicast bit is always transmitted as a zero, so the switch uses
@@ -212,6 +211,11 @@  static int mv88e6060_setup(struct dsa_switch *ds)
 	return 0;
 }
 
+static int mv88e6060_port_max_mtu(struct dsa_switch *ds, int port)
+{
+	return MV88E6060_MAX_MTU;
+}
+
 static int mv88e6060_port_to_phy_addr(int port)
 {
 	if (port >= 0 && port < MV88E6060_PORTS)
@@ -247,6 +251,7 @@  mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val)
 static const struct dsa_switch_ops mv88e6060_switch_ops = {
 	.get_tag_protocol = mv88e6060_get_tag_protocol,
 	.setup		= mv88e6060_setup,
+	.port_max_mtu	= mv88e6060_port_max_mtu,
 	.phy_read	= mv88e6060_phy_read,
 	.phy_write	= mv88e6060_phy_write,
 };
diff --git a/drivers/net/dsa/mv88e6060.h b/drivers/net/dsa/mv88e6060.h
index 6c13c2421b64..382fe462fb2d 100644
--- a/drivers/net/dsa/mv88e6060.h
+++ b/drivers/net/dsa/mv88e6060.h
@@ -11,6 +11,7 @@ 
 #define __MV88E6060_H
 
 #define MV88E6060_PORTS	6
+#define MV88E6060_MAX_MTU	1536
 
 #define REG_PORT(p)		(0x8 + (p))
 #define PORT_STATUS		0x00