Message ID | 20241109015633.82638-3-Tristram.Ha@microchip.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: dsa: microchip: Add SGMII port support to KSZ9477 switch | expand |
On Fri, Nov 08, 2024 at 05:56:33PM -0800, Tristram.Ha@microchip.com wrote: > From: Tristram Ha <tristram.ha@microchip.com> > > The SGMII module of KSZ9477 switch can be setup in 3 ways: 0 for direct > connect, 1 for 1000BaseT/1000BaseX SFP, and 2 for 10/100/1000BaseT SFP. This naming is rather odd. First off, i would drop 'SFP'. It does not have to be an SFP on the other end, it could be another switch for example. 1 is PHY_INTERFACE_MODE_1000BASEX and 2 is PHY_INTERFACE_MODE_SGMII. > SFP is typically used so the default is 1. The driver can detect > 10/100/1000BaseT SFP and change the mode to 2. phylink will tell you want mode to use. I would ignore what the hardware detects, so this driver is just the same as every other driver, making it easier to maintain. Andrew
> +static void port_sgmii_r(struct ksz_device *dev, uint port, u16 devid, u16 reg, > + u16 *buf, u16 len) > +{ > + u32 data; > + > + port_sgmii_s(dev, port, devid, reg, len); > + while (len) { > + ksz_pread32(dev, port, REG_PORT_SGMII_DATA__4, &data); > + *buf++ = (u16)data; > + len--; > + } > +} > + > +static void port_sgmii_w(struct ksz_device *dev, uint port, u16 devid, u16 reg, > + u16 *buf, u16 len) > +{ > + u32 data; > + > + port_sgmii_s(dev, port, devid, reg, len); > + while (len) { > + data = *buf++; > + ksz_pwrite32(dev, port, REG_PORT_SGMII_DATA__4, data); > + len--; > + } > +} This kind of looks like a C45 only MDIO bus. #define MMD_DEVICE_ID_VENDOR_MII 0x1F #define SR_MII MMD_DEVICE_ID_VENDOR_MII This is identical to MDIO_MMD_VEND2. #define SR_MII_RESET BIT(15) #define SR_MII_LOOPBACK BIT(14) #define SR_MII_SPEED_100MBIT BIT(13) #define SR_MII_AUTO_NEG_ENABLE BIT(12) #define SR_MII_POWER_DOWN BIT(11) #define SR_MII_AUTO_NEG_RESTART BIT(9) #define SR_MII_FULL_DUPLEX BIT(8) #define SR_MII_SPEED_1000MBIT BIT(6) A standard BMCR. #define MMD_SR_MII_STATUS 0x0001 #define MMD_SR_MII_ID_1 0x0002 #define MMD_SR_MII_ID_2 0x0003 #define MMD_SR_MII_AUTO_NEGOTIATION 0x0004 Same as: #define MII_BMSR 0x01 /* Basic mode status register */ #define MII_PHYSID1 0x02 /* PHYS ID 1 */ #define MII_PHYSID2 0x03 /* PHYS ID 2 */ #define MII_ADVERTISE 0x04 /* Advertisement control reg */ So i think your first patch should be to replace all these with the standard macros. That will then help make it clearer how much is generic, could the existing helpers be used, probably with a wrapper to make your C22 device mapped to C45 MDIO_MMD_VEND2 look like a C22 device? Andrew
On Sat, 9 Nov 2024 at 03:56, <Tristram.Ha@microchip.com> wrote: > diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c > index f73833e24622..8163342d778a 100644 > --- a/drivers/net/dsa/microchip/ksz_common.c > +++ b/drivers/net/dsa/microchip/ksz_common.c > @@ -354,10 +354,30 @@ static void ksz9477_phylink_mac_link_up(struct phylink_config *config, > int speed, int duplex, bool tx_pause, > bool rx_pause); > > +static struct phylink_pcs * > +ksz_phylink_mac_select_pcs(struct phylink_config *config, > + phy_interface_t interface) > +{ > + struct dsa_port *dp = dsa_phylink_to_port(config); > + struct ksz_device *dev = dp->ds->priv; > + struct ksz_port *p = &dev->ports[dp->index]; > + > + if (!p->sgmii) > + return ERR_PTR(-EOPNOTSUPP); Since commit 7530ea26c810 ("net: phylink: remove "using_mac_select_pcs""), returning ERR_PTR(-EOPNOTSUPP) here would actually be fatal. This error code no longer carries any special meaning. It would be a good idea to Cc Russell King for phylink changes. > + switch (interface) { > + case PHY_INTERFACE_MODE_SGMII: > + case PHY_INTERFACE_MODE_1000BASEX: > + return &p->pcs_priv->pcs; > + default: > + return NULL; > + } > +} > + > static const struct phylink_mac_ops ksz9477_phylink_mac_ops = { > .mac_config = ksz_phylink_mac_config, > .mac_link_down = ksz_phylink_mac_link_down, > .mac_link_up = ksz9477_phylink_mac_link_up, > + .mac_select_pcs = ksz_phylink_mac_select_pcs, > };
> Subject: Re: [PATCH net-next 2/2] net: dsa: microchip: Add SGMII port support to > KSZ9477 switch > > EXTERNAL EMAIL: Do not click links or open attachments unless you know the content > is safe > > On Sat, 9 Nov 2024 at 03:56, <Tristram.Ha@microchip.com> wrote: > > diff --git a/drivers/net/dsa/microchip/ksz_common.c > b/drivers/net/dsa/microchip/ksz_common.c > > index f73833e24622..8163342d778a 100644 > > --- a/drivers/net/dsa/microchip/ksz_common.c > > +++ b/drivers/net/dsa/microchip/ksz_common.c > > @@ -354,10 +354,30 @@ static void ksz9477_phylink_mac_link_up(struct > phylink_config *config, > > int speed, int duplex, bool tx_pause, > > bool rx_pause); > > > > +static struct phylink_pcs * > > +ksz_phylink_mac_select_pcs(struct phylink_config *config, > > + phy_interface_t interface) > > +{ > > + struct dsa_port *dp = dsa_phylink_to_port(config); > > + struct ksz_device *dev = dp->ds->priv; > > + struct ksz_port *p = &dev->ports[dp->index]; > > + > > + if (!p->sgmii) > > + return ERR_PTR(-EOPNOTSUPP); > > Since commit 7530ea26c810 ("net: phylink: remove "using_mac_select_pcs""), > returning ERR_PTR(-EOPNOTSUPP) here would actually be fatal. This error > code no longer carries any special meaning. > > It would be a good idea to Cc Russell King for phylink changes. Thanks. Will update the code.
> On Fri, Nov 08, 2024 at 05:56:33PM -0800, Tristram.Ha@microchip.com wrote: > > From: Tristram Ha <tristram.ha@microchip.com> > > > > The SGMII module of KSZ9477 switch can be setup in 3 ways: 0 for direct > > connect, 1 for 1000BaseT/1000BaseX SFP, and 2 for 10/100/1000BaseT SFP. > > This naming is rather odd. First off, i would drop 'SFP'. It does not > have to be an SFP on the other end, it could be another switch for > example. 1 is PHY_INTERFACE_MODE_1000BASEX and 2 is > PHY_INTERFACE_MODE_SGMII. > > > SFP is typically used so the default is 1. The driver can detect > > 10/100/1000BaseT SFP and change the mode to 2. > > phylink will tell you want mode to use. I would ignore what the > hardware detects, so this driver is just the same as every other > driver, making it easier to maintain. There are some issues I found that will need your advises. The phylink SFP code categorizes SFP using fiber cable as PHY_INTERFACE_MODE_1000BASEX and SFP using a regular RJ45 connector as PHY_INTERFACE_MODE_SGMII, which has a PHY that can be accessed through I2C connection with a PHY driver. Now when SGMII SFP is used the phylink cannot be created because it fails the validation in phylink_sfp_config_phy(). The reason is the phydev has empty supported and advertising data fields as it is just created. Even if the provided PCS driver can change the supported field to pass the initial validation it cannot change the advertising field as it is readonly to the driver, and this fails the second validation with phylink_sfp_select_interface(). This problem is so obvious I thought I must be doing something wrong. This problem occurs in Linux 6.1 so it is not a new problem and nobody encountered it? This problem does not happen with SFP using fiber cable as a different function is used to initialize the phylink. As the SFP code already retrieves the basic support information from the SFP EEPROM and stores it in sfp_support it can be copied to supported and advertising to pass the validation. I mentioned the SGMII module operates differently for two types of SFP: SGMII and 1000BASEX. The 1000Base-T SFP operates the same as 1000Base-SX fiber SFP, and the driver would like it to be assigned PHY_INTERFACE_MODE_1000BASEX, but it is always assigned PHY_INTERFACE_MODE_SGMII in sfp_select_interface because 1000baseT_Full is compared before 1000baseX_Full. Now I am not sure if those SFPs I tested have correct EEPROM. Some no-brand ones return 0xff value when the PHY driver reads the link status from them and so that driver cannot tell when the link is down. Other than that those SFPs operate correctly in forwarding traffic. It seems there is no way to assign an interupt to that PHY and so polling is always used. The SFP using fiber cable does not have the above issues but has its own issue. The SFP cage can detect the cable is being plugged in as the light is detected. The PCS driver is then consulted to confirm the link. However as the cable is not completely plugged in the driver can report the link is not up. After two calls are done the port can be left into unconnected state forever even after the cable is plugged in. The driver can detect there is something different about the link value and can try to read it later to get a firm reading. This just means the driver needs to poll anyway. But if interrupt is used and the driver uses it to report link this issue is moot. I just feel the SFP code offered in the phylink model does not help to operate SFP well in the KSZ9477 switch, and it does not need that code to provide basic SGMII port connection.
> > +static void port_sgmii_r(struct ksz_device *dev, uint port, u16 devid, u16 reg, > > + u16 *buf, u16 len) > > +{ > > + u32 data; > > + > > + port_sgmii_s(dev, port, devid, reg, len); > > + while (len) { > > + ksz_pread32(dev, port, REG_PORT_SGMII_DATA__4, &data); > > + *buf++ = (u16)data; > > + len--; > > + } > > +} > > + > > +static void port_sgmii_w(struct ksz_device *dev, uint port, u16 devid, u16 reg, > > + u16 *buf, u16 len) > > +{ > > + u32 data; > > + > > + port_sgmii_s(dev, port, devid, reg, len); > > + while (len) { > > + data = *buf++; > > + ksz_pwrite32(dev, port, REG_PORT_SGMII_DATA__4, data); > > + len--; > > + } > > +} > > This kind of looks like a C45 only MDIO bus. > > #define MMD_DEVICE_ID_VENDOR_MII 0x1F > > #define SR_MII MMD_DEVICE_ID_VENDOR_MII > > This is identical to MDIO_MMD_VEND2. > > #define SR_MII_RESET BIT(15) > #define SR_MII_LOOPBACK BIT(14) > #define SR_MII_SPEED_100MBIT BIT(13) > #define SR_MII_AUTO_NEG_ENABLE BIT(12) > #define SR_MII_POWER_DOWN BIT(11) > #define SR_MII_AUTO_NEG_RESTART BIT(9) > #define SR_MII_FULL_DUPLEX BIT(8) > #define SR_MII_SPEED_1000MBIT BIT(6) > > A standard BMCR. > > #define MMD_SR_MII_STATUS 0x0001 > #define MMD_SR_MII_ID_1 0x0002 > #define MMD_SR_MII_ID_2 0x0003 > #define MMD_SR_MII_AUTO_NEGOTIATION 0x0004 > > Same as: > > #define MII_BMSR 0x01 /* Basic mode status register */ > #define MII_PHYSID1 0x02 /* PHYS ID 1 */ > #define MII_PHYSID2 0x03 /* PHYS ID 2 */ > #define MII_ADVERTISE 0x04 /* Advertisement control reg */ > > So i think your first patch should be to replace all these with the > standard macros. > > That will then help make it clearer how much is generic, could the > existing helpers be used, probably with a wrapper to make your C22 > device mapped to C45 MDIO_MMD_VEND2 look like a C22 device? I just realize there are already 1000BASEX definitions in mii.h that can be used. I will try to use generic names as much as possible.
On Tue, Nov 12, 2024 at 02:55:29AM +0000, Tristram.Ha@microchip.com wrote: > > On Fri, Nov 08, 2024 at 05:56:33PM -0800, Tristram.Ha@microchip.com wrote: > > > From: Tristram Ha <tristram.ha@microchip.com> > > > > > > The SGMII module of KSZ9477 switch can be setup in 3 ways: 0 for direct > > > connect, 1 for 1000BaseT/1000BaseX SFP, and 2 for 10/100/1000BaseT SFP. > > > > This naming is rather odd. First off, i would drop 'SFP'. It does not > > have to be an SFP on the other end, it could be another switch for > > example. 1 is PHY_INTERFACE_MODE_1000BASEX and 2 is > > PHY_INTERFACE_MODE_SGMII. > > > > > SFP is typically used so the default is 1. The driver can detect > > > 10/100/1000BaseT SFP and change the mode to 2. > > > > phylink will tell you want mode to use. I would ignore what the > > hardware detects, so this driver is just the same as every other > > driver, making it easier to maintain. > > There are some issues I found that will need your advises. > > The phylink SFP code categorizes SFP using fiber cable as > PHY_INTERFACE_MODE_1000BASEX and SFP using a regular RJ45 connector as > PHY_INTERFACE_MODE_SGMII, which has a PHY that can be accessed through > I2C connection with a PHY driver. Not quite correct, i think. If MDIO over I2C does not work, it will still decide on 1000BaseX vs SGMII from the SFP eeprom contents. There are some SFPs where the PHY is not accessible, and we have to live with however it is configured. > Now when SGMII SFP is used the phylink > cannot be created because it fails the validation in > phylink_sfp_config_phy(). Please stop using 'SGMII SFP'. It should just be SGMII. The MAC should not care what is on the other end, it could be a PHY, and SFP, or a switch, all using Cisco SGMII. > The reason is the phydev has empty supported > and advertising data fields as it is just created. Do you mean the phydev for the PHY in the SFP? Or do you have a second phydev here? I'm confused. > I mentioned the SGMII module operates differently for two types of SFP: > SGMII and 1000BASEX. The 1000Base-T SFP operates the same as 1000Base-SX > fiber SFP, and the driver would like it to be assigned > PHY_INTERFACE_MODE_1000BASEX, but it is always assigned > PHY_INTERFACE_MODE_SGMII in sfp_select_interface because 1000baseT_Full > is compared before 1000baseX_Full. > > Now I am not sure if those SFPs I tested have correct EEPROM. Some > no-brand ones return 0xff value when the PHY driver reads the link status > from them and so that driver cannot tell when the link is down. Other > than that those SFPs operate correctly in forwarding traffic. There is no standardisation of how you access the PHY in an SFP. So each manufacture can do their own thing. However, there are a small number of PHYs actually used inside SFPs, and we have support for those common ones. > It seems there is no way to assign an interupt to that PHY and so polling > is always used. Correct, the interface between the SFP and the SFP cage does not have an interrupt pin the PHY can use. > The SFP using fiber cable does not have the above issues but has its own > issue. The SFP cage can detect the cable is being plugged in as the > light is detected. The PCS driver is then consulted to confirm the link. > However as the cable is not completely plugged in the driver can report > the link is not up. After two calls are done the port can be left into > unconnected state forever even after the cable is plugged in. The driver > can detect there is something different about the link value and can try > to read it later to get a firm reading. This just means the driver needs > to poll anyway. But if interrupt is used and the driver uses it to > report link this issue is moot. Have you set pcs.poll? phylink will then poll the PCS every second. You can report PCS status any time. > I just feel the SFP code offered in the phylink model does not help to > operate SFP well in the KSZ9477 switch, and it does not need that code to > provide basic SGMII port connection. We need to understand what is different about the KSZ9477 switch that phylink does not work for it, yet phylink does work for mv88e6xxx, sja1105, rzn1, qca, ocelot, and other MAC drivers. Andrew
> On Tue, Nov 12, 2024 at 02:55:29AM +0000, Tristram.Ha@microchip.com wrote: > > > On Fri, Nov 08, 2024 at 05:56:33PM -0800, Tristram.Ha@microchip.com wrote: > > > > From: Tristram Ha <tristram.ha@microchip.com> > > > > > > > > The SGMII module of KSZ9477 switch can be setup in 3 ways: 0 for direct > > > > connect, 1 for 1000BaseT/1000BaseX SFP, and 2 for 10/100/1000BaseT SFP. > > > > > > This naming is rather odd. First off, i would drop 'SFP'. It does not > > > have to be an SFP on the other end, it could be another switch for > > > example. 1 is PHY_INTERFACE_MODE_1000BASEX and 2 is > > > PHY_INTERFACE_MODE_SGMII. > > > > > > > SFP is typically used so the default is 1. The driver can detect > > > > 10/100/1000BaseT SFP and change the mode to 2. > > > > > > phylink will tell you want mode to use. I would ignore what the > > > hardware detects, so this driver is just the same as every other > > > driver, making it easier to maintain. > > > > There are some issues I found that will need your advises. > > > > The phylink SFP code categorizes SFP using fiber cable as > > PHY_INTERFACE_MODE_1000BASEX and SFP using a regular RJ45 connector as > > PHY_INTERFACE_MODE_SGMII, which has a PHY that can be accessed through > > I2C connection with a PHY driver. > > Not quite correct, i think. If MDIO over I2C does not work, it will > still decide on 1000BaseX vs SGMII from the SFP eeprom contents. There > are some SFPs where the PHY is not accessible, and we have to live > with however it is configured. > > > Now when SGMII SFP is used the phylink > > cannot be created because it fails the validation in > > phylink_sfp_config_phy(). > > Please stop using 'SGMII SFP'. It should just be SGMII. The MAC should > not care what is on the other end, it could be a PHY, and SFP, or a > switch, all using Cisco SGMII. > > > The reason is the phydev has empty supported > > and advertising data fields as it is just created. > > Do you mean the phydev for the PHY in the SFP? Or do you have a second > phydev here? I'm confused. I am a little confused. There may be regular PHY using SGMII with MDIO access just like a RGMII PHY, but we are dealing specifically SFP here. The KSZ9477 switch board has a SFP cage where different SFP can be plugged in. The SFP driver has to be enabled in kernel configuration under Hardware Monitoring. The driver then can read the EEPROM of the SFP and access its PHY if available and provide support to the phylink driver. When the SFP EEPROM says it does not support 1000Base-T then the SFP bus code does not consider the SFP has a PHY and skips creating a MDIO bus for it and phylink_sfp_config_optical() is called to create the phylink. When the SFP says it supports 1000Base-T sfp_add_phy() is called by the SFP state machine and phylink_sfp_connect_phy() and phylink_sfp_config_phy() are run. It is in the last function that the validation fails as the just created phy device does not initialize its supported and advertising fields yet. The phy device has the opportunity later to fill them up if the phylink creation goes through, but that never happens. A fix is to fill those fields with sfp_support like this: @@ -3228,6 +3228,11 @@ static int phylink_sfp_config_phy(struct struct phylink_link_state config; int ret; + /* The newly created PHY device has empty settings. */ + if (linkmode_empty(phy->supported)) { + linkmode_copy(phy->supported, pl->sfp_support); + linkmode_copy(phy->advertising, pl->sfp_support); + } linkmode_copy(support, phy->supported); memset(&config, 0, sizeof(config)); The provided PCS driver from the DSA driver has an opportunity to change support with its validation check, but that does not look right as generally those checks remove certain bits from the link mode, but this requires completely copying new ones. And this still does not work as the advertising field passed to the PCS driver has a const modifier. > > I mentioned the SGMII module operates differently for two types of SFP: > > SGMII and 1000BASEX. The 1000Base-T SFP operates the same as 1000Base-SX > > fiber SFP, and the driver would like it to be assigned > > PHY_INTERFACE_MODE_1000BASEX, but it is always assigned > > PHY_INTERFACE_MODE_SGMII in sfp_select_interface because 1000baseT_Full > > is compared before 1000baseX_Full. > > > > Now I am not sure if those SFPs I tested have correct EEPROM. Some > > no-brand ones return 0xff value when the PHY driver reads the link status > > from them and so that driver cannot tell when the link is down. Other > > than that those SFPs operate correctly in forwarding traffic. > > There is no standardisation of how you access the PHY in an SFP. So > each manufacture can do their own thing. However, there are a small > number of PHYs actually used inside SFPs, and we have support for > those common ones. Now back to the discussion of the different modes used by the SGMII module. I think a better term like SerDes can be used to help understanding the operation, although I still cannot narrow down the precise definitions from looking at the internet. SGMII mode is said to support 10/100/1000Mbit. This is the default setting, so plugging such SFP allows the port to communicate without any register programming. The other mode is SerDes, which is fixed at 1000Mbit. This is typically used by SFP using fiber optics. This requires changing a register to make the port works. It seems those 1000Base-T SFPs all run in SerDes mode, at least from all SFPs I tried. The issue is then phylink assigns SGMII phy mode to such SFP as its EEPROM just says 1000Base-T support and not 1000BASEX phy mode so that the DSA driver can program the register correspondingly. Because of that the driver still needs to rely on its own detection to find out which mode to use. > Have you set pcs.poll? phylink will then poll the PCS every > second. You can report PCS status any time. I know about PCS polling. The SFP cage driver can provide link_up and link_down indications to the phylink driver. I thought this feature can be used without activating polling and when interrupt is not used. But that link_up indication can result with the port not connected as I mentioned. Anyway the SFP driver has its own state machine doing polling, so it is not like resources are not used. One more issue is if a SFP is not plugged in eventually the SFP driver says "please wait, module slow to respond." It may look like an error to regular users. The next error message definitely looks like it: "failed to read EEPROM: -EREMOTEIO." Do you know if anything can be done like defining some GPIO pins like setting up the tx_disable pin?
On Wed, Nov 13, 2024 at 02:12:36AM +0000, Tristram.Ha@microchip.com wrote: > When the SFP says it supports 1000Base-T sfp_add_phy() is called by the > SFP state machine and phylink_sfp_connect_phy() and > phylink_sfp_config_phy() are run. It is in the last function that the > validation fails as the just created phy device does not initialize its > supported and advertising fields yet. The phy device has the > opportunity later to fill them up if the phylink creation goes through, > but that never happens. > > A fix is to fill those fields with sfp_support like this: > > @@ -3228,6 +3228,11 @@ static int phylink_sfp_config_phy(struct > struct phylink_link_state config; > int ret; > > + /* The newly created PHY device has empty settings. */ > + if (linkmode_empty(phy->supported)) { > + linkmode_copy(phy->supported, pl->sfp_support); > + linkmode_copy(phy->advertising, pl->sfp_support); > + } > linkmode_copy(support, phy->supported); > > memset(&config, 0, sizeof(config)); > > The provided PCS driver from the DSA driver has an opportunity to change > support with its validation check, but that does not look right as > generally those checks remove certain bits from the link mode, but this > requires completely copying new ones. And this still does not work as > the advertising field passed to the PCS driver has a const modifier. I think I know what's happening, it's unfortunate it pushed you towards wrong conclusions. The "fix" you posted is wrong, and no, the PCS driver should not expand the supported mask, just restrict it as you said. The phydev->supported mask normally comes from the phy_probe() logic: /* Start out supporting everything. Eventually, * a controller will attach, and may modify one * or both of these values */ if (phydrv->features) { linkmode_copy(phydev->supported, phydrv->features); genphy_c45_read_eee_abilities(phydev); } else if (phydrv->get_features) err = phydrv->get_features(phydev); else if (phydev->is_c45) err = genphy_c45_pma_read_abilities(phydev); else err = genphy_read_abilities(phydev); The SFP bus code depends strictly on sfp_sm_probe_phy() -> phy_device_register() actually loading a precise device driver for the PHY synchronously via phy_bus_match(). There is another lazy loading mechanism later in phy_attach_direct(), for the Generic PHY driver: /* Assume that if there is no driver, that it doesn't * exist, and we should use the genphy driver. */ but that is too late for this code path, because as you say, phylink_sfp_config_phy() is coded up to only call phylink_attach_phy() if phylink_validate() succeeds. But phylink_validate() will only see a valid phydev->supported mask with the Generic PHY driver if we let that driver attach in phylink_attach_phy() in the first place. Personally, I think SFP modules with embedded PHYs strictly require the matching driver to be available to the kernel, due to that odd way in which the Generic PHY driver is loaded, but I will let the PHY library experts share their opinion as well. You would be better off improving the error message, see what PHY ID you get, then find and load the driver for it: diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 7dbcbf0a4ee2..8be473a7d262 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -1817,9 +1817,12 @@ static int sfp_sm_probe_phy(struct sfp *sfp, int addr, bool is_c45) err = sfp_add_phy(sfp->sfp_bus, phy); if (err) { + dev_err(sfp->dev, + "sfp_add_phy() for PHY %s (ID 0x%.8lx) failed: %pe, maybe PHY driver not loaded?\n", + phydev_name(phy), (unsigned long)phy->phy_id, + ERR_PTR(err)); phy_device_remove(phy); phy_device_free(phy); - dev_err(sfp->dev, "sfp_add_phy failed: %pe\n", ERR_PTR(err)); return err; } Chances are it's one of CONFIG_MARVELL_PHY or CONFIG_AQUANTIA_PHY.
> When the SFP EEPROM says it does not support 1000Base-T then the SFP bus > code does not consider the SFP has a PHY and skips creating a MDIO bus > for it and phylink_sfp_config_optical() is called to create the phylink. There are many SFPs out there with broken EEPROM contents. Do the SFPs you have say they are not 1000Base-T but actually are? If so, they are broken, and need a quirk adding. Russell King keeps a database of SFP EEPROM contents. Send him the output of `ethtool -m eth42 raw on hex on` > Now back to the discussion of the different modes used by the SGMII > module. I think a better term like SerDes can be used to help > understanding the operation, although I still cannot narrow down the > precise definitions from looking at the internet. SGMII mode is > said to support 10/100/1000Mbit. This is the default setting, so > plugging such SFP allows the port to communicate without any register > programming. The other mode is SerDes, which is fixed at 1000Mbit. This > is typically used by SFP using fiber optics. This requires changing a > register to make the port works. It seems those 1000Base-T SFPs all run > in SerDes mode, at least from all SFPs I tried. There is a comment in the code: /* Probe a SFP for a PHY device if the module supports copper - the PHY * normally sits at I2C bus address 0x56, and may either be a clause 22 * or clause 45 PHY. * * Clause 22 copper SFP modules normally operate in Cisco SGMII mode with * negotiation enabled, but some may be in 1000base-X - which is for the * PHY driver to determine. So the default is SGMII for copper SFPs, but there are a few oddballs using 1000BaseX. The Marvell PHY driver should figure this out, and the phylink will tell you want mode to use. > The issue is then phylink assigns SGMII phy mode to such SFP as its > EEPROM just says 1000Base-T support and not 1000BASEX phy mode so that > the DSA driver can program the register correspondingly. Because of that > the driver still needs to rely on its own detection to find out which > mode to use. > > > Have you set pcs.poll? phylink will then poll the PCS every > > second. You can report PCS status any time. > > I know about PCS polling. The SFP cage driver can provide link_up and > link_down indications to the phylink driver. The SPF cage provides LOS, Loss of Signal. This basically means there is light coming into the SFP, but not much more. It is not a trustworthy signal on its own. Phylink combines this with the PCS status, does the PCS also have link. You need the combination. > One more issue is if a SFP is not plugged in eventually the SFP driver > says "please wait, module slow to respond." Something is wrong with your GPIOs. Phylink thinks there is a module inserted, when in fact there is not. Add #define DEBUG 1 to the very top of sfp.c, so you can see the state transitions. I guess there is something wrong with the MODDEF0 GPIO. Andrew
> On Wed, Nov 13, 2024 at 02:12:36AM +0000, Tristram.Ha@microchip.com wrote: > > When the SFP says it supports 1000Base-T sfp_add_phy() is called by the > > SFP state machine and phylink_sfp_connect_phy() and > > phylink_sfp_config_phy() are run. It is in the last function that the > > validation fails as the just created phy device does not initialize its > > supported and advertising fields yet. The phy device has the > > opportunity later to fill them up if the phylink creation goes through, > > but that never happens. > > > > A fix is to fill those fields with sfp_support like this: > > > > @@ -3228,6 +3228,11 @@ static int phylink_sfp_config_phy(struct > > struct phylink_link_state config; > > int ret; > > > > + /* The newly created PHY device has empty settings. */ > > + if (linkmode_empty(phy->supported)) { > > + linkmode_copy(phy->supported, pl->sfp_support); > > + linkmode_copy(phy->advertising, pl->sfp_support); > > + } > > linkmode_copy(support, phy->supported); > > > > memset(&config, 0, sizeof(config)); > > > > The provided PCS driver from the DSA driver has an opportunity to change > > support with its validation check, but that does not look right as > > generally those checks remove certain bits from the link mode, but this > > requires completely copying new ones. And this still does not work as > > the advertising field passed to the PCS driver has a const modifier. > > I think I know what's happening, it's unfortunate it pushed you towards > wrong conclusions. > > The "fix" you posted is wrong, and no, the PCS driver should not expand > the supported mask, just restrict it as you said. The phydev->supported > mask normally comes from the phy_probe() logic: > > /* Start out supporting everything. Eventually, > * a controller will attach, and may modify one > * or both of these values > */ > if (phydrv->features) { > linkmode_copy(phydev->supported, phydrv->features); > genphy_c45_read_eee_abilities(phydev); > } > else if (phydrv->get_features) > err = phydrv->get_features(phydev); > else if (phydev->is_c45) > err = genphy_c45_pma_read_abilities(phydev); > else > err = genphy_read_abilities(phydev); > > The SFP bus code depends strictly on sfp_sm_probe_phy() -> phy_device_register() > actually loading a precise device driver for the PHY synchronously via > phy_bus_match(). There is another lazy loading mechanism later in > phy_attach_direct(), for the Generic PHY driver: > > /* Assume that if there is no driver, that it doesn't > * exist, and we should use the genphy driver. > */ > > but that is too late for this code path, because as you say, > phylink_sfp_config_phy() is coded up to only call phylink_attach_phy() > if phylink_validate() succeeds. But phylink_validate() will only see a > valid phydev->supported mask with the Generic PHY driver if we let that > driver attach in phylink_attach_phy() in the first place. > > Personally, I think SFP modules with embedded PHYs strictly require the > matching driver to be available to the kernel, due to that odd way in > which the Generic PHY driver is loaded, but I will let the PHY library > experts share their opinion as well. > > You would be better off improving the error message, see what PHY ID you > get, then find and load the driver for it: > > diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c > index 7dbcbf0a4ee2..8be473a7d262 100644 > --- a/drivers/net/phy/sfp.c > +++ b/drivers/net/phy/sfp.c > @@ -1817,9 +1817,12 @@ static int sfp_sm_probe_phy(struct sfp *sfp, int addr, bool > is_c45) > > err = sfp_add_phy(sfp->sfp_bus, phy); > if (err) { > + dev_err(sfp->dev, > + "sfp_add_phy() for PHY %s (ID 0x%.8lx) failed: %pe, maybe PHY driver > not loaded?\n", > + phydev_name(phy), (unsigned long)phy->phy_id, > + ERR_PTR(err)); > phy_device_remove(phy); > phy_device_free(phy); > - dev_err(sfp->dev, "sfp_add_phy failed: %pe\n", ERR_PTR(err)); > return err; > } > > > Chances are it's one of CONFIG_MARVELL_PHY or CONFIG_AQUANTIA_PHY. Indeed adding the Marvell PHY driver fixed the problem. There is nothing special about the Marvell PHY driver. It is just phy_probe() is called during PHY device creation just as you said. It may not be right to use sfp_support, but all three (sfp_support, supported, advertising) have about the same value at that point after the PHY driver is invoked: 0x62ff and 0x60f0. I mentioned before that some SFPs have faulty implementation where part of the returned PHY register value is 0xff. For example, PHY register 0 is 0x11ff, PHY register 1 is 0x79ff, and PHY register 2 is 0x01ff. The Marvell PHY id is 0x01410cc0, and I saw there is a special PHY id 0x01ff0cc0 defined for Finisar to accommodate this situation. Were those SFPs made by Finisar originally? Some of those PHY registers are correct, so I do not know if those wrong registers are intentional or not, but the link status register always has 0xff value and that cannot be right.
> > When the SFP EEPROM says it does not support 1000Base-T then the SFP bus > > code does not consider the SFP has a PHY and skips creating a MDIO bus > > for it and phylink_sfp_config_optical() is called to create the phylink. > > There are many SFPs out there with broken EEPROM contents. Do the SFPs > you have say they are not 1000Base-T but actually are? If so, they are > broken, and need a quirk adding. > > Russell King keeps a database of SFP EEPROM contents. Send him the > output of `ethtool -m eth42 raw on hex on` > > > Now back to the discussion of the different modes used by the SGMII > > module. I think a better term like SerDes can be used to help > > understanding the operation, although I still cannot narrow down the > > precise definitions from looking at the internet. SGMII mode is > > said to support 10/100/1000Mbit. This is the default setting, so > > plugging such SFP allows the port to communicate without any register > > programming. The other mode is SerDes, which is fixed at 1000Mbit. This > > is typically used by SFP using fiber optics. This requires changing a > > register to make the port works. It seems those 1000Base-T SFPs all run > > in SerDes mode, at least from all SFPs I tried. > > There is a comment in the code: > > /* Probe a SFP for a PHY device if the module supports copper - the PHY > * normally sits at I2C bus address 0x56, and may either be a clause 22 > * or clause 45 PHY. > * > * Clause 22 copper SFP modules normally operate in Cisco SGMII mode with > * negotiation enabled, but some may be in 1000base-X - which is for the > * PHY driver to determine. > > So the default is SGMII for copper SFPs, but there are a few oddballs > using 1000BaseX. The Marvell PHY driver should figure this out, and > the phylink will tell you want mode to use. Adding Marvell PHY driver fixes the main issue I raised. But the PHY support still shows only SGMII interface inside the PHY driver, and the ethtool module-info command shows 1000BASE-T. The only good brand-name SFPs I have are all 10/100/1000 type. I am going to get other brands to see if they get better or always use SerDes mode. That leaves the one situation where the SGMII port is connected directly to a MAC or each other. A customer once tried to do that and the SGMII register write was changed to support that, but I do not know if that project became a real product. Microchip does have a board connecting the SGMII ports of two chips together, but that does not run Linux, and there is no special configuration to make the ports work. As there is no SFP or PHY to tell phylink which interface to use I think the only solution is to declare fixed-link or some other phy modes in the device tree? I currently use fixed-link to handle this situation. There is another new issue though. The SGMII port in another chip can use 2.5G. The driver uses fixed PHY to get the MAC running. But the fixed PHY driver can only support speed up to 1000. There is no issue to adding higher speeds to that driver, but I think that is not advised?
> That leaves the one situation where the SGMII port is connected directly > to a MAC or each other. A customer once tried to do that and the SGMII > register write was changed to support that, but I do not know if that > project became a real product. This is often done to cascade switches. In this setup, going down to 100Mbps or 10Mbps makes no sense, so 1000BaseX is used, not SGMII. Today, fixed-link is used in this situation, combined with setting phy-mode to 1000basex. Russell King has said in the past that phylink could probably support this without fixed-link. > The SGMII port in another chip can use 2.5G. The driver uses fixed PHY > to get the MAC running. But the fixed PHY driver can only support speed > up to 1000. There is no issue to adding higher speeds to that driver, but > I think that is not advised? Well, 2.5G is obviously not SGMII. It is 2500BaseX. There are also many broken 2500BaseX implementations out there, which are SGMII cores overclocked, and the signalling disabled, because SGMII signalling at 2.5G makes no sense, you want 2500BaseX signalling. phylink fixed link also is not limited to 1G, it can do any speed. Andrew
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 0ba658a72d8f..1a53980b6e80 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -2,7 +2,7 @@ /* * Microchip KSZ9477 switch driver main logic * - * Copyright (C) 2017-2019 Microchip Technology Inc. + * Copyright (C) 2017-2024 Microchip Technology Inc. */ #include <linux/kernel.h> @@ -12,6 +12,8 @@ #include <linux/phy.h> #include <linux/if_bridge.h> #include <linux/if_vlan.h> +#include <linux/irqdomain.h> +#include <linux/phylink.h> #include <net/dsa.h> #include <net/switchdev.h> @@ -161,6 +163,382 @@ static int ksz9477_wait_alu_sta_ready(struct ksz_device *dev) 10, 1000); } +static void port_sgmii_s(struct ksz_device *dev, uint port, u16 devid, u16 reg, + u16 len) +{ + u32 data; + + data = devid & PORT_SGMII_DEVICE_ID_M; + data <<= PORT_SGMII_DEVICE_ID_S; + data |= reg; + if (len > 1) + data |= PORT_SGMII_AUTO_INCR; + ksz_pwrite32(dev, port, REG_PORT_SGMII_ADDR__4, data); +} + +static void port_sgmii_r(struct ksz_device *dev, uint port, u16 devid, u16 reg, + u16 *buf, u16 len) +{ + u32 data; + + port_sgmii_s(dev, port, devid, reg, len); + while (len) { + ksz_pread32(dev, port, REG_PORT_SGMII_DATA__4, &data); + *buf++ = (u16)data; + len--; + } +} + +static void port_sgmii_w(struct ksz_device *dev, uint port, u16 devid, u16 reg, + u16 *buf, u16 len) +{ + u32 data; + + port_sgmii_s(dev, port, devid, reg, len); + while (len) { + data = *buf++; + ksz_pwrite32(dev, port, REG_PORT_SGMII_DATA__4, data); + len--; + } +} + +static int port_sgmii_detect(struct ksz_device *dev, uint p) +{ + struct ksz_port *port = &dev->ports[p]; + int ret = 0; + + if (dev->sgmii_mode) { + u16 buf[6]; + int i = 0; + + do { + port_sgmii_r(dev, p, SR_MII, 0, buf, 6); + i++; + } while (!buf[5] && i < 10); + if (buf[5] & SR_MII_REMOTE_ACK) { + if (buf[5] & (SR_MII_REMOTE_HALF_DUPLEX | + SR_MII_REMOTE_FULL_DUPLEX)) + port->fiber = 1; + else if (dev->sgmii_mode == 1) + dev->sgmii_mode = 2; + ret = 1; + } else if (dev->sgmii_mode == 1) { + port->fiber = 1; + ret = 1; + } + } else { + /* Need to be told to run in direct mode. */ + port->fiber = 1; + ret = 1; + } + return ret; +} + +static void port_sgmii_reset(struct ksz_device *dev, uint p) +{ + u16 ctrl; + + port_sgmii_r(dev, p, SR_MII, MMD_SR_MII_CTRL, &ctrl, 1); + ctrl |= SR_MII_RESET; + port_sgmii_w(dev, p, SR_MII, MMD_SR_MII_CTRL, &ctrl, 1); +} + +static void port_sgmii_setup(struct ksz_device *dev, uint p, bool pcs, + bool master, bool autoneg, bool intr, int speed, + int duplex) +{ + u16 ctrl; + u16 cfg; + u16 adv; + + cfg = 0; + if (pcs) + cfg |= SR_MII_PCS_SGMII << SR_MII_PCS_MODE_S; + if (master) { + cfg |= SR_MII_TX_CFG_PHY_MASTER; + cfg |= SR_MII_SGMII_LINK_UP; + } + if (intr) + cfg |= SR_MII_AUTO_NEG_COMPLETE_INTR; + port_sgmii_w(dev, p, SR_MII, MMD_SR_MII_AUTO_NEG_CTRL, &cfg, 1); + port_sgmii_r(dev, p, SR_MII, MMD_SR_MII_CTRL, &ctrl, 1); + if (master || !autoneg) { + switch (speed) { + case SPEED_100: + ctrl |= SR_MII_SPEED_100MBIT; + break; + case SPEED_1000: + ctrl |= SR_MII_SPEED_1000MBIT; + break; + } + } + if (!autoneg) { + ctrl &= ~SR_MII_AUTO_NEG_ENABLE; + port_sgmii_w(dev, p, SR_MII, MMD_SR_MII_CTRL, &ctrl, 1); + return; + } else if (!(ctrl & SR_MII_AUTO_NEG_ENABLE)) { + ctrl |= SR_MII_AUTO_NEG_ENABLE; + port_sgmii_w(dev, p, SR_MII, MMD_SR_MII_CTRL, &ctrl, 1); + } + + /* Need to write to advertise register to send correct signal. */ + /* Default value is 0x0020. */ + adv = SR_MII_AUTO_NEG_ASYM_PAUSE_RX << SR_MII_AUTO_NEG_PAUSE_S; + if (duplex == DUPLEX_FULL) + adv |= SR_MII_AUTO_NEG_FULL_DUPLEX; + else + adv |= SR_MII_AUTO_NEG_HALF_DUPLEX; + port_sgmii_w(dev, p, SR_MII, MMD_SR_MII_AUTO_NEGOTIATION, &adv, 1); + if (master && autoneg) { + ctrl |= SR_MII_AUTO_NEG_RESTART; + port_sgmii_w(dev, p, SR_MII, MMD_SR_MII_CTRL, &ctrl, 1); + } +} + +#define PORT_LINK_UP BIT(0) +#define PORT_LINK_CHANGE BIT(1) +#define PORT_LINK_INVALID BIT(2) + +static int sgmii_port_get_speed(struct ksz_device *dev, uint p) +{ + struct ksz_port *info = &dev->ports[p]; + int ret = 0; + u16 status; + u16 speed; + u16 data; + u8 link; + + port_sgmii_r(dev, p, SR_MII, MMD_SR_MII_STATUS, &status, 1); + port_sgmii_r(dev, p, SR_MII, MMD_SR_MII_STATUS, &status, 1); + port_sgmii_r(dev, p, SR_MII, MMD_SR_MII_AUTO_NEG_STATUS, &data, 1); + + /* Typical register values in different modes. + * 10/100/1000: 1f0001 = 01ad 1f0005 = 4000 1f8002 = 0008 + * 1f0001 = 01bd 1f0005 = d000 1f8002 = 001a + * 1000: 1f0001 = 018d 1f0005 = 0000 1f8002 = 0000 + * 1f0001 = 01ad 1f0005 = 40a0 1f8002 = 0001 + * 1f0001 = 01ad 1f0005 = 41a0 1f8002 = 0001 + * fiber: 1f0001 = 0189 1f0005 = 0000 1f8002 = 0000 + * 1f0001 = 01ad 1f0005 = 41a0 1f8002 = 0001 + */ + + if (data & SR_MII_AUTO_NEG_COMPLETE_INTR) { + data &= ~SR_MII_AUTO_NEG_COMPLETE_INTR; + port_sgmii_w(dev, p, SR_MII, MMD_SR_MII_AUTO_NEG_STATUS, &data, + 1); + } + + /* Running in fiber mode. */ + if (info->fiber && !data) { + u16 link_up = PORT_LINK_STATUS; + + if (dev->sgmii_mode == 1) + link_up |= PORT_AUTO_NEG_ACKNOWLEDGE; + if ((status & link_up) == link_up) + data = SR_MII_STAT_LINK_UP | + (SR_MII_STAT_1000_MBPS << SR_MII_STAT_S) | + SR_MII_STAT_FULL_DUPLEX; + } + if (data & SR_MII_STAT_LINK_UP) { + ret = PORT_LINK_UP; + } else if (info->interface == PHY_INTERFACE_MODE_1000BASEX && + status == 0x018d) { + info->sgmii_link = 0xff; + return PORT_LINK_INVALID; + } + + link = (data & ~SR_MII_AUTO_NEG_COMPLETE_INTR); + if (info->sgmii_link == link) + return ret; + + if (data & SR_MII_STAT_LINK_UP) { + u16 ctrl; + + /* Need to update control register with same link setting. */ + ctrl = SR_MII_AUTO_NEG_ENABLE; + speed = (data >> SR_MII_STAT_S) & SR_MII_STAT_M; + if (speed == SR_MII_STAT_1000_MBPS) + ctrl |= SR_MII_SPEED_1000MBIT; + else if (speed == SR_MII_STAT_100_MBPS) + ctrl |= SR_MII_SPEED_100MBIT; + if (data & SR_MII_STAT_FULL_DUPLEX) + ctrl |= SR_MII_FULL_DUPLEX; + port_sgmii_w(dev, p, SR_MII, MMD_SR_MII_CTRL, &ctrl, 1); + + speed = (data >> SR_MII_STAT_S) & SR_MII_STAT_M; + info->phydev.speed = SPEED_10; + if (speed == SR_MII_STAT_1000_MBPS) + info->phydev.speed = SPEED_1000; + else if (speed == SR_MII_STAT_100_MBPS) + info->phydev.speed = SPEED_100; + + info->phydev.duplex = 0; + if (data & SR_MII_STAT_FULL_DUPLEX) + info->phydev.duplex = 1; + } + ret |= PORT_LINK_CHANGE; + info->sgmii_link = link; + info->phydev.link = (ret & PORT_LINK_UP); + return ret; +} + +static bool sgmii_need_polling(struct ksz_device *dev, struct ksz_port *p) +{ + /* SGMII mode 2 has link up and link down interrupts. */ + if (dev->sgmii_mode == 2 && p->sgmii_has_intr) + return false; + + /* SGMII mode 1 has link up interrupt but not link down interrupt. */ + if (dev->sgmii_mode == 1 && p->sgmii_has_intr && !p->phydev.link) + return false; + + /* SGMII mode 0 for direct connect has no link change. */ + if (dev->sgmii_mode == 0) + return false; + return true; +} + +static void sgmii_check_work(struct work_struct *work) +{ + struct ksz_device *dev = container_of(work, struct ksz_device, + sgmii_check.work); + u8 port = dev->info->sgmii_port - 1; + struct ksz_port *p = &dev->ports[port]; + int ret; + + ret = sgmii_port_get_speed(dev, port); + if (ret & PORT_LINK_CHANGE) { + struct phy_device *phydev; + + /* When simulating PHY. */ + p->phydev.interrupts = PHY_INTERRUPT_ENABLED; + phydev = mdiobus_get_phy(dev->ds->user_mii_bus, port); + if (phydev) + phy_trigger_machine(phydev); + + /* When using SFP code. */ + dsa_port_phylink_mac_change(dev->ds, port, p->phydev.link); + } + if (sgmii_need_polling(dev, p)) + schedule_delayed_work(&dev->sgmii_check, + msecs_to_jiffies(500)); +} + +static irqreturn_t ksz9477_sgmii_irq_thread_fn(int irq, void *dev_id) +{ + struct ksz_pcs *priv = dev_id; + struct ksz_device *dev = priv->dev; + u8 port = priv->port; + u16 data16 = 0; + int ret; + + port_sgmii_w(dev, port, SR_MII, MMD_SR_MII_AUTO_NEG_STATUS, &data16, 1); + ret = sgmii_port_get_speed(dev, port); + if (ret & PORT_LINK_CHANGE) { + struct ksz_port *p = &dev->ports[port]; + struct phy_device *phydev; + + /* When simulating PHY. */ + p->phydev.interrupts = PHY_INTERRUPT_ENABLED; + phydev = mdiobus_get_phy(dev->ds->user_mii_bus, port); + if (phydev) + phy_trigger_machine(phydev); + + /* When using SFP code. */ + dsa_port_phylink_mac_change(dev->ds, port, p->phydev.link); + + /* No interrupt for link down. */ + if (sgmii_need_polling(dev, p)) + schedule_delayed_work(&dev->sgmii_check, + msecs_to_jiffies(500)); + } + return IRQ_HANDLED; +} + +static void sgmii_initial_setup(struct ksz_device *dev, int port) +{ + struct ksz_port *p = &dev->ports[port]; + /* Assume SGMII mode is 2. */ + bool autoneg = true; + bool master = false; + bool intr = false; + bool pcs = true; + int irq, ret; + + /* Only setup SGMII port once. */ + if (!p->sgmii || p->sgmii_setup) + return; + + irq = irq_find_mapping(p->pirq.domain, PORT_SGMII_INT_LOC); + if (irq > 0) { + ret = request_threaded_irq(irq, NULL, + ksz9477_sgmii_irq_thread_fn, + IRQF_ONESHOT, "SGMII", + p->pcs_priv); + if (!ret) { + intr = true; + p->sgmii_has_intr = 1; + } + } + + /* Make invalid so the correct value is set. */ + p->sgmii_link = 0xff; + + INIT_DELAYED_WORK(&dev->sgmii_check, sgmii_check_work); + if (dev->sgmii_mode == 0) { + master = true; + autoneg = false; + } else if (dev->sgmii_mode == 1) { + pcs = false; + master = true; + } + port_sgmii_setup(dev, port, pcs, master, autoneg, intr, SPEED_1000, + DUPLEX_FULL); + + p->sgmii_setup = 1; + sgmii_port_get_speed(dev, port); + + /* Need to check link down if using fiber SFP. */ + if (sgmii_need_polling(dev, p)) + schedule_delayed_work(&dev->sgmii_check, + msecs_to_jiffies(500)); +} + +void ksz9477_pcs_get_state(struct phylink_pcs *pcs, + struct phylink_link_state *state) +{ + struct ksz_pcs *priv = container_of(pcs, struct ksz_pcs, pcs); + struct ksz_device *dev = priv->dev; + struct ksz_port *p = &dev->ports[priv->port]; + int ret; + + ret = sgmii_port_get_speed(dev, priv->port); + if (!(ret & PORT_LINK_UP)) + state->link = false; + state->duplex = p->phydev.duplex; + state->speed = p->phydev.speed; + if (state->interface == PHY_INTERFACE_MODE_SGMII) { + state->an_complete = state->link; + } else if (state->interface == PHY_INTERFACE_MODE_1000BASEX) { + if (ret == PORT_LINK_INVALID) + schedule_delayed_work(&dev->sgmii_check, + msecs_to_jiffies(200)); + } +} + +void ksz9477_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, int speed, int duplex) +{ + struct ksz_pcs *priv = container_of(pcs, struct ksz_pcs, pcs); + struct ksz_device *dev = priv->dev; + struct ksz_port *p = &dev->ports[priv->port]; + + /* No interrupt for link down. */ + if (sgmii_need_polling(dev, p)) + schedule_delayed_work(&dev->sgmii_check, + msecs_to_jiffies(500)); +} + int ksz9477_reset_switch(struct ksz_device *dev) { u8 data8; @@ -345,7 +723,7 @@ int ksz9477_r_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 *data) * A fixed PHY can be setup in the device tree, but this function is * still called for that port during initialization. * For RGMII PHY there is no way to access it so the fixed PHY should - * be used. For SGMII PHY the supporting code will be added later. + * be used. SGMII PHY is simulated as a regular PHY. */ if (!dev->info->internal_phy[addr]) { struct ksz_port *p = &dev->ports[addr]; @@ -355,7 +733,10 @@ int ksz9477_r_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 *data) val = 0x1140; break; case MII_BMSR: - val = 0x796d; + if (p->phydev.link) + val = 0x796d; + else + val = 0x7949; break; case MII_PHYSID1: val = 0x0022; @@ -368,6 +749,10 @@ int ksz9477_r_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 *data) break; case MII_LPA: val = 0xc5e1; + if (p->phydev.speed == SPEED_10) + val &= ~0x0180; + if (p->phydev.duplex == 0) + val &= ~0x0140; break; case MII_CTRL1000: val = 0x0700; @@ -378,6 +763,24 @@ int ksz9477_r_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 *data) else val = 0; break; + case MII_ESTATUS: + val = 0x3000; + break; + + /* This register holds the PHY interrupt status. */ + case MII_TPISTATUS: + val = (LINK_DOWN_INT | LINK_UP_INT) << 8; + if (p->phydev.interrupts == PHY_INTERRUPT_ENABLED) { + if (p->phydev.link) + val |= LINK_UP_INT; + else + val |= LINK_DOWN_INT; + } + p->phydev.interrupts = 0; + break; + default: + val = 0; + break; } } else { ret = ksz_pread16(dev, addr, 0x100 + (reg << 1), &val); @@ -973,11 +1376,27 @@ static phy_interface_t ksz9477_get_interface(struct ksz_device *dev, int port) void ksz9477_get_caps(struct ksz_device *dev, int port, struct phylink_config *config) { + struct ksz_port *p = &dev->ports[port]; + config->mac_capabilities = MAC_10 | MAC_100 | MAC_ASYM_PAUSE | MAC_SYM_PAUSE; if (dev->info->gbit_capable[port]) config->mac_capabilities |= MAC_1000FD; + + if (p->sgmii) { + struct phy_device *phydev; + + phydev = mdiobus_get_phy(dev->ds->user_mii_bus, port); + + /* Change this port interface to SGMII. */ + if (phydev) + phydev->interface = PHY_INTERFACE_MODE_SGMII; + __set_bit(PHY_INTERFACE_MODE_1000BASEX, + config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_SGMII, + config->supported_interfaces); + } } int ksz9477_set_ageing_time(struct ksz_device *dev, unsigned int msecs) @@ -1054,6 +1473,8 @@ void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) PORT_FORCE_TX_FLOW_CTRL | PORT_FORCE_RX_FLOW_CTRL, !dev->info->internal_phy[port]); + sgmii_initial_setup(dev, port); + if (cpu_port) member = dsa_user_ports(ds); else @@ -1132,6 +1553,10 @@ void ksz9477_config_cpu_port(struct dsa_switch *ds) continue; ksz_port_stp_state_set(ds, i, BR_STATE_DISABLED); } + + /* Switch reset does not reset SGMII module. */ + if (dev->info->sgmii_port > 0) + port_sgmii_reset(dev, dev->info->sgmii_port - 1); } int ksz9477_enable_stp_addr(struct ksz_device *dev) @@ -1201,6 +1626,23 @@ int ksz9477_setup(struct dsa_switch *ds) /* enable global MIB counter freeze function */ ksz_cfg(dev, REG_SW_MAC_CTRL_6, SW_MIB_COUNTER_FREEZE, true); + if (dev->info->sgmii_port > 0) { + struct ksz_port *p = &dev->ports[dev->info->sgmii_port - 1]; + struct ksz_pcs *pcs_priv; + + p->sgmii = port_sgmii_detect(dev, dev->info->sgmii_port - 1); + if (p->sgmii) { + pcs_priv = devm_kzalloc(dev->dev, + sizeof(struct ksz_pcs), + GFP_KERNEL); + if (!pcs_priv) + return -ENOMEM; + p->pcs_priv = pcs_priv; + pcs_priv->dev = dev; + pcs_priv->port = dev->info->sgmii_port - 1; + } + } + /* Make sure PME (WoL) is not enabled. If requested, it will * be enabled by ksz_wol_pre_shutdown(). Otherwise, some PMICs * do not like PME events changes before shutdown. @@ -1319,6 +1761,8 @@ int ksz9477_switch_init(struct ksz_device *dev) void ksz9477_switch_exit(struct ksz_device *dev) { + if (delayed_work_pending(&dev->sgmii_check)) + cancel_delayed_work_sync(&dev->sgmii_check); ksz9477_reset_switch(dev); } diff --git a/drivers/net/dsa/microchip/ksz9477.h b/drivers/net/dsa/microchip/ksz9477.h index d2166b0d881e..24a42d5ee023 100644 --- a/drivers/net/dsa/microchip/ksz9477.h +++ b/drivers/net/dsa/microchip/ksz9477.h @@ -2,7 +2,7 @@ /* * Microchip KSZ9477 series Header file * - * Copyright (C) 2017-2022 Microchip Technology Inc. + * Copyright (C) 2017-2024 Microchip Technology Inc. */ #ifndef __KSZ9477_H @@ -97,4 +97,9 @@ void ksz9477_acl_match_process_l2(struct ksz_device *dev, int port, u16 ethtype, u8 *src_mac, u8 *dst_mac, unsigned long cookie, u32 prio); +void ksz9477_pcs_get_state(struct phylink_pcs *pcs, + struct phylink_link_state *state); +void ksz9477_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, int speed, int duplex); + #endif diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index 04235c22bf40..0c15c0b16b42 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -805,6 +805,7 @@ #define REG_PORT_INT_STATUS 0x001B #define REG_PORT_INT_MASK 0x001F +#define PORT_SGMII_INT_LOC 3 #define PORT_SGMII_INT BIT(3) #define PORT_PTP_INT BIT(2) #define PORT_PHY_INT BIT(1) @@ -1025,6 +1026,11 @@ #define SR_MII_AUTO_NEG_FULL_DUPLEX BIT(5) #define MMD_SR_MII_REMOTE_CAPABILITY 0x0005 + +#define SR_MII_REMOTE_ACK BIT(14) +#define SR_MII_REMOTE_HALF_DUPLEX BIT(6) +#define SR_MII_REMOTE_FULL_DUPLEX BIT(5) + #define MMD_SR_MII_AUTO_NEG_EXP 0x0006 #define MMD_SR_MII_AUTO_NEG_EXT 0x000F diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index f73833e24622..8163342d778a 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -354,10 +354,30 @@ static void ksz9477_phylink_mac_link_up(struct phylink_config *config, int speed, int duplex, bool tx_pause, bool rx_pause); +static struct phylink_pcs * +ksz_phylink_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct ksz_device *dev = dp->ds->priv; + struct ksz_port *p = &dev->ports[dp->index]; + + if (!p->sgmii) + return ERR_PTR(-EOPNOTSUPP); + switch (interface) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + return &p->pcs_priv->pcs; + default: + return NULL; + } +} + static const struct phylink_mac_ops ksz9477_phylink_mac_ops = { .mac_config = ksz_phylink_mac_config, .mac_link_down = ksz_phylink_mac_link_down, .mac_link_up = ksz9477_phylink_mac_link_up, + .mac_select_pcs = ksz_phylink_mac_select_pcs, }; static const struct ksz_dev_ops ksz9477_dev_ops = { @@ -395,6 +415,8 @@ static const struct ksz_dev_ops ksz9477_dev_ops = { .reset = ksz9477_reset_switch, .init = ksz9477_switch_init, .exit = ksz9477_switch_exit, + .pcs_get_state = ksz9477_pcs_get_state, + .pcs_link_up = ksz9477_pcs_link_up, }; static const struct phylink_mac_ops lan937x_phylink_mac_ops = { @@ -1033,8 +1055,7 @@ static const struct regmap_range ksz9477_valid_regs[] = { regmap_reg_range(0x701b, 0x701b), regmap_reg_range(0x701f, 0x7020), regmap_reg_range(0x7030, 0x7030), - regmap_reg_range(0x7200, 0x7203), - regmap_reg_range(0x7206, 0x7207), + regmap_reg_range(0x7200, 0x7207), regmap_reg_range(0x7300, 0x7301), regmap_reg_range(0x7400, 0x7401), regmap_reg_range(0x7403, 0x7403), @@ -1554,6 +1575,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .internal_phy = {true, true, true, true, true, false, false}, .gbit_capable = {true, true, true, true, true, true, true}, + .sgmii_port = 7, .wr_table = &ksz9477_register_set, .rd_table = &ksz9477_register_set, }, @@ -1972,6 +1994,45 @@ static void ksz_phylink_get_caps(struct dsa_switch *ds, int port, dev->dev_ops->get_caps(dev, port, config); } +static void ksz_pcs_get_state(struct phylink_pcs *pcs, + struct phylink_link_state *state) +{ + struct ksz_pcs *priv = container_of(pcs, struct ksz_pcs, pcs); + struct ksz_device *dev = priv->dev; + + if (dev->dev_ops->pcs_get_state) + dev->dev_ops->pcs_get_state(pcs, state); +} + +static int ksz_pcs_config(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising, + bool permit_pause_to_mac) +{ + return 0; +} + +static void ksz_pcs_an_restart(struct phylink_pcs *pcs) +{ +} + +static void ksz_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, int speed, int duplex) +{ + struct ksz_pcs *priv = container_of(pcs, struct ksz_pcs, pcs); + struct ksz_device *dev = priv->dev; + + if (dev->dev_ops->pcs_link_up) + dev->dev_ops->pcs_link_up(pcs, mode, interface, speed, duplex); +} + +static const struct phylink_pcs_ops ksz_pcs_ops = { + .pcs_get_state = ksz_pcs_get_state, + .pcs_config = ksz_pcs_config, + .pcs_an_restart = ksz_pcs_an_restart, + .pcs_link_up = ksz_pcs_link_up, +}; + void ksz_r_mib_stats64(struct ksz_device *dev, int port) { struct ethtool_pause_stats *pstats; @@ -2021,7 +2082,7 @@ void ksz_r_mib_stats64(struct ksz_device *dev, int port) spin_unlock(&mib->stats64_lock); - if (dev->info->phy_errata_9477) { + if (dev->info->phy_errata_9477 && dev->info->sgmii_port != port + 1) { ret = ksz9477_errata_monitor(dev, port, raw->tx_late_col); if (ret) dev_err(dev->dev, "Failed to monitor transmission halt\n"); @@ -2531,6 +2592,12 @@ static int ksz_setup(struct dsa_switch *ds) return ret; } + if (dev->info->sgmii_port > 0) { + p = &dev->ports[dev->info->sgmii_port - 1]; + if (p->pcs_priv) + p->pcs_priv->pcs.ops = &ksz_pcs_ops; + } + /* Start with learning disabled on standalone user ports, and enabled * on the CPU port. In lack of other finer mechanisms, learning on the * CPU port will avoid flooding bridge local addresses on the network @@ -3355,6 +3422,17 @@ static void ksz_phylink_mac_config(struct phylink_config *config, if (dev->info->internal_phy[port]) return; + /* No need to configure XMII control register when using SGMII. */ + if (state->interface == PHY_INTERFACE_MODE_SGMII || + state->interface == PHY_INTERFACE_MODE_1000BASEX) { + struct ksz_port *p = &dev->ports[port]; + + p->interface = state->interface; + return; + } + if (dev->info->sgmii_port == port + 1) + return; + if (phylink_autoneg_inband(mode)) { dev_err(dev->dev, "In-band AN not supported!\n"); return; @@ -4783,6 +4861,9 @@ int ksz_switch_register(struct ksz_device *dev) dev->ports[i].num = i; } + /* Default SGMII mode is detecting which type of SFP is used. */ + dev->sgmii_mode = 1; + /* set the real number of ports */ dev->ds->num_ports = dev->info->port_cnt; @@ -4813,6 +4894,13 @@ int ksz_switch_register(struct ksz_device *dev) of_get_phy_mode(port, &dev->ports[port_num].interface); + /* SGMII port can be used without using SFP. */ + if (dev->info->sgmii_port == port_num + 1) { + if (of_phy_is_fixed_link(port) && + !dev->ports[port_num].interface) + dev->sgmii_mode = 0; + } + ksz_parse_rgmii_delay(dev, port_num, port); } of_node_put(ports); diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index bec846e20682..12b043bb766a 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -86,6 +86,7 @@ struct ksz_chip_data { bool supports_rgmii[KSZ_MAX_NUM_PORTS]; bool internal_phy[KSZ_MAX_NUM_PORTS]; bool gbit_capable[KSZ_MAX_NUM_PORTS]; + u8 sgmii_port; const struct regmap_access_table *wr_table; const struct regmap_access_table *rd_table; }; @@ -114,6 +115,13 @@ struct ksz_switch_macaddr { refcount_t refcount; }; +struct ksz_pcs { + struct phylink_pcs pcs; + struct ksz_device *dev; + int irq; + u8 port; +}; + struct ksz_port { bool remove_tag; /* Remove Tag flag set, for ksz8795 only */ bool learning; @@ -125,6 +133,10 @@ struct ksz_port { u32 force:1; u32 read:1; /* read MIB counters in background */ u32 freeze:1; /* MIB counter freeze is enabled */ + u32 sgmii:1; /* port is SGMII */ + u32 sgmii_has_intr:1; + u32 sgmii_link:8; + u32 sgmii_setup:1; struct ksz_port_mib mib; phy_interface_t interface; @@ -134,6 +146,7 @@ struct ksz_port { void *acl_priv; struct ksz_irq pirq; u8 num; + struct ksz_pcs *pcs_priv; #if IS_ENABLED(CONFIG_NET_DSA_MICROCHIP_KSZ_PTP) struct hwtstamp_config tstamp_config; bool hwts_tx_en; @@ -156,6 +169,7 @@ struct ksz_device { struct mutex alu_mutex; /* ALU access */ struct mutex vlan_mutex; /* vlan access */ const struct ksz_dev_ops *dev_ops; + const struct phylink_pcs_ops *pcs_ops; struct device *dev; struct regmap *regmap[__KSZ_NUM_REGMAPS]; @@ -181,6 +195,8 @@ struct ksz_device { struct ksz_port *ports; struct delayed_work mib_read; unsigned long mib_read_interval; + struct delayed_work sgmii_check; + u8 sgmii_mode; u16 mirror_rx; u16 mirror_tx; u16 port_mask; @@ -379,6 +395,11 @@ struct ksz_dev_ops { int (*reset)(struct ksz_device *dev); int (*init)(struct ksz_device *dev); void (*exit)(struct ksz_device *dev); + + void (*pcs_get_state)(struct phylink_pcs *pcs, + struct phylink_link_state *state); + void (*pcs_link_up)(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, int speed, int duplex); }; struct ksz_device *ksz_switch_alloc(struct device *base, void *priv);