diff mbox series

[net-next,v3,5/5] net: phy: add driver for built-in 2.5G ethernet PHY on MT7988

Message ID 20240520113456.21675-6-SkyLake.Huang@mediatek.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: phy: mediatek: Introduce mtk-phy-lib and add 2.5Gphy support | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 913 this patch: 913
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 14 of 14 maintainers
netdev/build_clang fail Errors and warnings before: 934 this patch: 934
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 918 this patch: 918
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

SkyLake Huang (黃啟澤) May 20, 2024, 11:34 a.m. UTC
From: "SkyLake.Huang" <skylake.huang@mediatek.com>

v1:
Add support for internal 2.5Gphy on MT7988. This driver will load
necessary firmware, add appropriate time delay and figure out LED.
Also, certain control registers will be set to fix link-up issues.

v2:
1. Move md32_en_cfg_base & pmb_addr detection in probe function.
2. Do not read PMB & MD32_EN_CFG base addresses from dts. We won't
change that from board to board. Leave them in driver code. Also,
release those addresses after firmware is triggered.
3. Remove half duplex code which leads to ambiguity. Those are for
testing & developing previously.
4. Use correct BMCR definitions.
5. Correct config_aneg / get_features / read_status functions.
6. Change mt7988_2p5ge prefix to mt798x_2p5ge in case that our next
platform uses this 2.5Gphy driver.

v3:
1. Add range check for firmware.
2. Fix c45_ids.mmds_present in probe function.
3. Still use genphy_update_link() in read_status because
genphy_c45_read_link() can't correct detect link on this phy.

Signed-off-by: SkyLake.Huang <skylake.huang@mediatek.com>
---
 MAINTAINERS                          |   1 +
 drivers/net/phy/mediatek/Kconfig     |  11 +
 drivers/net/phy/mediatek/Makefile    |   1 +
 drivers/net/phy/mediatek/mtk-2p5ge.c | 398 +++++++++++++++++++++++++++
 4 files changed, 411 insertions(+)
 create mode 100644 drivers/net/phy/mediatek/mtk-2p5ge.c

Comments

Andrew Lunn May 20, 2024, 1:33 p.m. UTC | #1
> +static int mt798x_2p5ge_phy_config_init(struct phy_device *phydev)
> +{
> +	struct mtk_i2p5ge_phy_priv *priv = phydev->priv;
> +	struct device *dev = &phydev->mdio.dev;
> +	const struct firmware *fw;
> +	struct pinctrl *pinctrl;
> +	int ret, i;
> +	u16 reg;
> +
> +	if (!priv->fw_loaded) {
> +		if (!priv->md32_en_cfg_base || !priv->pmb_addr) {
> +			dev_err(dev, "MD32_EN_CFG base & PMB addresses aren't valid\n");
> +			return -EINVAL;
> +		}
> +

https://www.kernel.org/doc/html/latest/process/coding-style.html

  6) Functions

  Functions should be short and sweet, and do just one thing. They
  should fit on one or two screenfuls of text (the ISO/ANSI screen
  size is 80x24, as we all know), and do one thing and do that well.

This is a big function, which does multiple things. Maybe pull the
downloading of firmware into a helper.

> +		ret = request_firmware(&fw, MT7988_2P5GE_PMB, dev);
> +		if (ret) {
> +			dev_err(dev, "failed to load firmware: %s, ret: %d\n",
> +				MT7988_2P5GE_PMB, ret);
> +			return ret;
> +		}
> +
> +		if (fw->size != MT7988_2P5GE_PMB_SIZE) {
> +			dev_err(dev, "Firmware size 0x%zx != 0x%x\n",
> +				fw->size, MT7988_2P5GE_PMB_SIZE);
> +			return -EINVAL;
> +		}
> +
> +		reg = readw(priv->md32_en_cfg_base);
> +		if (reg & MD32_EN) {
> +			phy_set_bits(phydev, MII_BMCR, BMCR_RESET);
> +			usleep_range(10000, 11000);
> +		}
> +		phy_set_bits(phydev, MII_BMCR, BMCR_PDOWN);
> +
> +		/* Write magic number to safely stall MCU */
> +		phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x800e, 0x1100);
> +		phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x800f, 0x00df);
> +
> +		for (i = 0; i < MT7988_2P5GE_PMB_SIZE - 1; i += 4)
> +			writel(*((uint32_t *)(fw->data + i)), priv->pmb_addr + i);
> +		release_firmware(fw);
> +
> +		writew(reg & ~MD32_EN, priv->md32_en_cfg_base);
> +		writew(reg | MD32_EN, priv->md32_en_cfg_base);
> +		phy_set_bits(phydev, MII_BMCR, BMCR_RESET);
> +		/* We need a delay here to stabilize initialization of MCU */
> +		usleep_range(7000, 8000);
> +		dev_info(dev, "Firmware loading/trigger ok.\n");
> +
> +		priv->fw_loaded = true;

So there is no way to know if this has already happened? Maybe the
bootloader downloaded the firmware so it could TFTP boot? Linux will
download the firmware again, which is a waste of time.

> +		iounmap(priv->md32_en_cfg_base);
> +		iounmap(priv->pmb_addr);
> +	}
> +
> +	/* Setup LED */
> +	phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED0_ON_CTRL,
> +			 MTK_PHY_LED_ON_POLARITY | MTK_PHY_LED_ON_LINK10 |
> +			 MTK_PHY_LED_ON_LINK100 | MTK_PHY_LED_ON_LINK1000 |
> +			 MTK_PHY_LED_ON_LINK2500);
> +	phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED1_ON_CTRL,
> +			 MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX);
> +
> +	pinctrl = devm_pinctrl_get_select(&phydev->mdio.dev, "i2p5gbe-led");

Calls to devm_pinctrl_get_select() is pretty unusual in drivers:

https://elixir.bootlin.com/linux/latest/C/ident/devm_pinctrl_get_select

Why is this needed? Generally, the DT file should describe the needed
pinmux setting, without needed anything additionally.

> +static int mt798x_2p5ge_phy_get_features(struct phy_device *phydev)
> +{
> +	int ret;
> +
> +	ret = genphy_c45_pma_read_abilities(phydev);
> +	if (ret)
> +		return ret;
> +
> +	/* We don't support HDX at MAC layer on mt7988. So mask phy's HDX capabilities here. */

So you make it clear, the MAC does not support half duplex. The MAC
should then remove it, not the PHY.

> +	linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, phydev->supported);
> +
> +	return 0;
> +}
> +
> +static int mt798x_2p5ge_phy_read_status(struct phy_device *phydev)
> +{
> +	int ret;
> +
> +	/* When MDIO_STAT1_LSTATUS is raised genphy_c45_read_link(), this phy actually
> +	 * hasn't finished AN. So use CL22's link update function instead.
> +	 */
> +	ret = genphy_update_link(phydev);
> +	if (ret)
> +		return ret;
> +
> +	phydev->speed = SPEED_UNKNOWN;
> +	phydev->duplex = DUPLEX_UNKNOWN;
> +	phydev->pause = 0;
> +	phydev->asym_pause = 0;
> +
> +	/* We'll read link speed through vendor specific registers down below. So remove
> +	 * phy_resolve_aneg_linkmode (AN on) & genphy_c45_read_pma (AN off).
> +	 */
> +	if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
> +		ret = genphy_c45_read_lpa(phydev);
> +		if (ret < 0)
> +			return ret;
> +
> +		/* Clause 45 doesn't define 1000BaseT support. Read the link partner's 1G
> +		 * advertisement via Clause 22
> +		 */
> +		ret = phy_read(phydev, MII_STAT1000);
> +		if (ret < 0)
> +			return ret;
> +		mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, ret);
> +	} else if (phydev->autoneg == AUTONEG_DISABLE) {
> +		return -EOPNOTSUPP;
> +	}

It is a bit late doing this now. The user requested this a long time
ago, and it will be hard to understand why it now returns EOPNOTSUPP.
You should check for AUTONEG_DISABLE in config_aneg() and return the
error there.

      Andrew
Andrew Lunn May 20, 2024, 1:35 p.m. UTC | #2
> +static int mt798x_2p5ge_phy_config_init(struct phy_device *phydev)
> +{
> +	struct mtk_i2p5ge_phy_priv *priv = phydev->priv;
> +	struct device *dev = &phydev->mdio.dev;
> +	const struct firmware *fw;
> +	struct pinctrl *pinctrl;
> +	int ret, i;
> +	u16 reg;
> +
> +	if (!priv->fw_loaded) {
> +		if (!priv->md32_en_cfg_base || !priv->pmb_addr) {
> +			dev_err(dev, "MD32_EN_CFG base & PMB addresses aren't valid\n");
> +			return -EINVAL;
> +		}

...

> +static int mt798x_2p5ge_phy_probe(struct phy_device *phydev)
> +{
> +	struct mtk_i2p5ge_phy_priv *priv;
> +
> +	priv = devm_kzalloc(&phydev->mdio.dev,
> +			    sizeof(struct mtk_i2p5ge_phy_priv), GFP_KERNEL);
> +	if (!priv)
> +		return -ENOMEM;
> +
> +	switch (phydev->drv->phy_id) {
> +	case MTK_2P5GPHY_ID_MT7988:
> +		priv->pmb_addr = ioremap(MT7988_2P5GE_PMB_BASE, MT7988_2P5GE_PMB_LEN);
> +		if (!priv->pmb_addr)
> +			return -ENOMEM;
> +		priv->md32_en_cfg_base = ioremap(MT7988_2P5GE_MD32_EN_CFG_BASE,
> +						 MT7988_2P5GE_MD32_EN_CFG_LEN);
> +		if (!priv->md32_en_cfg_base)
> +			return -ENOMEM;
> +
> +		/* The original hardware only sets MDIO_DEVS_PMAPMD */
> +		phydev->c45_ids.mmds_present |= (MDIO_DEVS_PCS | MDIO_DEVS_AN |
> +						 MDIO_DEVS_VEND1 | MDIO_DEVS_VEND2);
> +		break;
> +	default:
> +		return -EINVAL;
> +	}

How can priv->md32_en_cfg_base or priv->pmb_addr not be set in
mt798x_2p5ge_phy_config_init()

	Andrew
SkyLake Huang (黃啟澤) May 21, 2024, 8:17 a.m. UTC | #3
On Mon, 2024-05-20 at 15:35 +0200, Andrew Lunn wrote:
>  	 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
>  > +static int mt798x_2p5ge_phy_config_init(struct phy_device
> *phydev)
> > +{
> > +struct mtk_i2p5ge_phy_priv *priv = phydev->priv;
> > +struct device *dev = &phydev->mdio.dev;
> > +const struct firmware *fw;
> > +struct pinctrl *pinctrl;
> > +int ret, i;
> > +u16 reg;
> > +
> > +if (!priv->fw_loaded) {
> > +if (!priv->md32_en_cfg_base || !priv->pmb_addr) {
> > +dev_err(dev, "MD32_EN_CFG base & PMB addresses aren't valid\n");
> > +return -EINVAL;
> > +}
> 
> ...
> 
> > +static int mt798x_2p5ge_phy_probe(struct phy_device *phydev)
> > +{
> > +struct mtk_i2p5ge_phy_priv *priv;
> > +
> > +priv = devm_kzalloc(&phydev->mdio.dev,
> > +    sizeof(struct mtk_i2p5ge_phy_priv), GFP_KERNEL);
> > +if (!priv)
> > +return -ENOMEM;
> > +
> > +switch (phydev->drv->phy_id) {
> > +case MTK_2P5GPHY_ID_MT7988:
> > +priv->pmb_addr = ioremap(MT7988_2P5GE_PMB_BASE,
> MT7988_2P5GE_PMB_LEN);
> > +if (!priv->pmb_addr)
> > +return -ENOMEM;
> > +priv->md32_en_cfg_base = ioremap(MT7988_2P5GE_MD32_EN_CFG_BASE,
> > + MT7988_2P5GE_MD32_EN_CFG_LEN);
> > +if (!priv->md32_en_cfg_base)
> > +return -ENOMEM;
> > +
> > +/* The original hardware only sets MDIO_DEVS_PMAPMD */
> > +phydev->c45_ids.mmds_present |= (MDIO_DEVS_PCS | MDIO_DEVS_AN |
> > + MDIO_DEVS_VEND1 | MDIO_DEVS_VEND2);
> > +break;
> > +default:
> > +return -EINVAL;
> > +}
> 
> How can priv->md32_en_cfg_base or priv->pmb_addr not be set in
> mt798x_2p5ge_phy_config_init()
> 
> Andrew
Use command "$ifconfig eth1 down" and then "$ifconfig eth1 up",
mt798x_2p5ge_phy_config_init() will be called again and again. priv-
>md32_en_cfg_base & priv->pmb_addr are released after first firmware
loading. So just check these two values again for safety once priv-
>fw_loaded is overrided unexpectedly.
SkyLake Huang (黃啟澤) May 21, 2024, 9:07 a.m. UTC | #4
On Mon, 2024-05-20 at 15:33 +0200, Andrew Lunn wrote:
>  	 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
>  > +static int mt798x_2p5ge_phy_config_init(struct phy_device
> *phydev)
> > +{
> > +struct mtk_i2p5ge_phy_priv *priv = phydev->priv;
> > +struct device *dev = &phydev->mdio.dev;
> > +const struct firmware *fw;
> > +struct pinctrl *pinctrl;
> > +int ret, i;
> > +u16 reg;
> > +
> > +if (!priv->fw_loaded) {
> > +if (!priv->md32_en_cfg_base || !priv->pmb_addr) {
> > +dev_err(dev, "MD32_EN_CFG base & PMB addresses aren't valid\n");
> > +return -EINVAL;
> > +}
> > +
> 
> https://www.kernel.org/doc/html/latest/process/coding-style.html
> 
>   6) Functions
> 
>   Functions should be short and sweet, and do just one thing. They
>   should fit on one or two screenfuls of text (the ISO/ANSI screen
>   size is 80x24, as we all know), and do one thing and do that well.
> 
> This is a big function, which does multiple things. Maybe pull the
> downloading of firmware into a helper.
> 
Agree. I'll move this part to another function.

> > +ret = request_firmware(&fw, MT7988_2P5GE_PMB, dev);
> > +if (ret) {
> > +dev_err(dev, "failed to load firmware: %s, ret: %d\n",
> > +MT7988_2P5GE_PMB, ret);
> > +return ret;
> > +}
> > +
> > +if (fw->size != MT7988_2P5GE_PMB_SIZE) {
> > +dev_err(dev, "Firmware size 0x%zx != 0x%x\n",
> > +fw->size, MT7988_2P5GE_PMB_SIZE);
> > +return -EINVAL;
> > +}
> > +
> > +reg = readw(priv->md32_en_cfg_base);
> > +if (reg & MD32_EN) {
> > +phy_set_bits(phydev, MII_BMCR, BMCR_RESET);
> > +usleep_range(10000, 11000);
> > +}
> > +phy_set_bits(phydev, MII_BMCR, BMCR_PDOWN);
> > +
> > +/* Write magic number to safely stall MCU */
> > +phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x800e, 0x1100);
> > +phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x800f, 0x00df);
> > +
> > +for (i = 0; i < MT7988_2P5GE_PMB_SIZE - 1; i += 4)
> > +writel(*((uint32_t *)(fw->data + i)), priv->pmb_addr + i);
> > +release_firmware(fw);
> > +
> > +writew(reg & ~MD32_EN, priv->md32_en_cfg_base);
> > +writew(reg | MD32_EN, priv->md32_en_cfg_base);
> > +phy_set_bits(phydev, MII_BMCR, BMCR_RESET);
> > +/* We need a delay here to stabilize initialization of MCU */
> > +usleep_range(7000, 8000);
> > +dev_info(dev, "Firmware loading/trigger ok.\n");
> > +
> > +priv->fw_loaded = true;
> 
> So there is no way to know if this has already happened? Maybe the
> bootloader downloaded the firmware so it could TFTP boot? Linux will
> download the firmware again, which is a waste of time.
> 
Normal MTK's internal 2.5Gphy firmware loading procedure will be like:
1. request firmware
2. write firmware to correspoding memory address via sbus2pbus(faster
than MDIO)
3. Trigger MCU to run firmware by setting MD32_EN bit.

    So logically, we can determine if firmware is loaded by
bootloader(Uboot) or not by reading MD32_EN bit (step 3 above).
However, if step2 is bypassed, MD32_EN bit can still be set if CL22
reset (BMCR_RESET) is triggered by user. In this case, internal
2.5Gphy's MCU is running nothing.
    That is to say, for safety, we need to load firmware again right
atfer booting into Linux kernel. Actually, this takes just about 11ms.

> > +iounmap(priv->md32_en_cfg_base);
> > +iounmap(priv->pmb_addr);
> > +}
> > +
> > +/* Setup LED */
> > +phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED0_ON_CTRL,
> > + MTK_PHY_LED_ON_POLARITY | MTK_PHY_LED_ON_LINK10 |
> > + MTK_PHY_LED_ON_LINK100 | MTK_PHY_LED_ON_LINK1000 |
> > + MTK_PHY_LED_ON_LINK2500);
> > +phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED1_ON_CTRL,
> > + MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX);
> > +
> > +pinctrl = devm_pinctrl_get_select(&phydev->mdio.dev, "i2p5gbe-
> led");
> 
> Calls to devm_pinctrl_get_select() is pretty unusual in drivers:
> 
> 
https://elixir.bootlin.com/linux/latest/C/ident/devm_pinctrl_get_select
> 
> Why is this needed? Generally, the DT file should describe the needed
> pinmux setting, without needed anything additionally.
> 
This is needed because we need to switch to i2p5gbe-led pinmux group
after we set correct polarity. Or LED may blink unexpectedly.

> > +static int mt798x_2p5ge_phy_get_features(struct phy_device
> *phydev)
> > +{
> > +int ret;
> > +
> > +ret = genphy_c45_pma_read_abilities(phydev);
> > +if (ret)
> > +return ret;
> > +
> > +/* We don't support HDX at MAC layer on mt7988. So mask phy's HDX
> capabilities here. */
> 
> So you make it clear, the MAC does not support half duplex. The MAC
> should then remove it, not the PHY.
> 
> > +linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, phydev-
> >supported);
> > +
> > +return 0;
> > +}
> > +
> > +static int mt798x_2p5ge_phy_read_status(struct phy_device *phydev)
> > +{
> > +int ret;
> > +
> > +/* When MDIO_STAT1_LSTATUS is raised genphy_c45_read_link(), this
> phy actually
> > + * hasn't finished AN. So use CL22's link update function instead.
> > + */
> > +ret = genphy_update_link(phydev);
> > +if (ret)
> > +return ret;
> > +
> > +phydev->speed = SPEED_UNKNOWN;
> > +phydev->duplex = DUPLEX_UNKNOWN;
> > +phydev->pause = 0;
> > +phydev->asym_pause = 0;
> > +
> > +/* We'll read link speed through vendor specific registers down
> below. So remove
> > + * phy_resolve_aneg_linkmode (AN on) & genphy_c45_read_pma (AN
> off).
> > + */
> > +if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) 
> {
> > +ret = genphy_c45_read_lpa(phydev);
> > +if (ret < 0)
> > +return ret;
> > +
> > +/* Clause 45 doesn't define 1000BaseT support. Read the link
> partner's 1G
> > + * advertisement via Clause 22
> > + */
> > +ret = phy_read(phydev, MII_STAT1000);
> > +if (ret < 0)
> > +return ret;
> > +mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, ret);
> > +} else if (phydev->autoneg == AUTONEG_DISABLE) {
> > +return -EOPNOTSUPP;
> > +}
> 
> It is a bit late doing this now. The user requested this a long time
> ago, and it will be hard to understand why it now returns EOPNOTSUPP.
> You should check for AUTONEG_DISABLE in config_aneg() and return the
> error there.
> 
>       Andrew
Maybe I shouldn't return EOPNOTSUPP in config_aneg directly?
In this way, _phy_state_machine will be broken if I trigger "$ ethtool
-s ethtool eth1 autoneg off"

[  520.781368] ------------[ cut here ]------------
root@OpenWrt:/# [  520.785978] _phy_start_aneg+0x0/0xa4: returned: -95
[  520.792263] WARNING: CPU: 3 PID: 423 at drivers/net/phy/phy.c:1254 _phy_state_machine+0x78/0x258
[  520.801039] Modules linked in:
[  520.804087] CPU: 3 PID: 423 Comm: kworker/u16:4 Tainted: G        W          6.8.0-rc7-next-20240306-bpi-r3+ #102
[  520.814335] Hardware name: MediaTek MT7988A Reference Board (DT)
[  520.820330] Workqueue: events_power_efficient phy_state_machine
[  520.826240] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[  520.833190] pc : _phy_state_machine+0x78/0x258
[  520.837623] lr : _phy_state_machine+0x78/0x258
[  520.842056] sp : ffff800084b7bd30
[  520.845360] x29: ffff800084b7bd30 x28: 0000000000000000 x27: 0000000000000000
[  520.852487] x26: ffff000000c56900 x25: ffff000000c56980 x24: ffff000000012ac0
[  520.859613] x23: ffff00000001d005 x22: ffff000000fdf000 x21: 0000000000000001
[  520.866738] x20: 0000000000000004 x19: ffff000003a90000 x18: ffffffffffffffff
[  520.873864] x17: 0000000000000000 x16: 0000000000000000 x15: ffff800104b7b977
[  520.880988] x14: 0000000000000000 x13: 0000000000000183 x12: 00000000ffffffea
[  520.888114] x11: 0000000000000001 x10: 0000000000000001 x9 : ffff8000837222f0
[  520.895239] x8 : 0000000000017fe8 x7 : c0000000ffffefff x6 : 0000000000000001
[  520.902365] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000
[  520.909490] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000004120000
[  520.916615] Call trace:
[  520.919052]  _phy_state_machine+0x78/0x258
[  520.923139]  phy_state_machine+0x2c/0x80
[  520.927051]  process_one_work+0x178/0x31c
[  520.931054]  worker_thread+0x280/0x494
[  520.934795]  kthread+0xe4/0xe8
[  520.937841]  ret_from_fork+0x10/0x20
[  520.941408] ---[ end trace 0000000000000000 ]---

Now I prefer to give a warning like this, in
mt798x_2p5ge_phy_config_aneg():
if (phydev->autoneg == AUTONEG_DISABLE) {
	dev_warn(&phydev->mdio.dev, "Once AN off is set, this phy can't
link.\n");
	return genphy_c45_pma_setup_forced(phydev);
}
Andrew Lunn May 21, 2024, 5:20 p.m. UTC | #5
>     That is to say, for safety, we need to load firmware again right
> atfer booting into Linux kernel. Actually, this takes just about 11ms.

Since this is only 11ms, its not a big deal. If this was going over
MDIO it would be much slower and then it does become significant.

> > > +/* Setup LED */
> > > +phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED0_ON_CTRL,
> > > + MTK_PHY_LED_ON_POLARITY | MTK_PHY_LED_ON_LINK10 |
> > > + MTK_PHY_LED_ON_LINK100 | MTK_PHY_LED_ON_LINK1000 |
> > > + MTK_PHY_LED_ON_LINK2500);
> > > +phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED1_ON_CTRL,
> > > + MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX);
> > > +
> > > +pinctrl = devm_pinctrl_get_select(&phydev->mdio.dev, "i2p5gbe-
> > led");
> > 
> > Calls to devm_pinctrl_get_select() is pretty unusual in drivers:
> > 
> > 
> https://elixir.bootlin.com/linux/latest/C/ident/devm_pinctrl_get_select
> > 
> > Why is this needed? Generally, the DT file should describe the needed
> > pinmux setting, without needed anything additionally.
> > 
> This is needed because we need to switch to i2p5gbe-led pinmux group
> after we set correct polarity. Or LED may blink unexpectedly.

Since this is unusual, you should add a comment. Also, does the device
tree binding explain this? I expect most DT authors are used to
listing all the needed pins in the default pinmux node, and so will do
that, unless there is a comment in the binding advising against it.

> > It is a bit late doing this now. The user requested this a long time
> > ago, and it will be hard to understand why it now returns EOPNOTSUPP.
> > You should check for AUTONEG_DISABLE in config_aneg() and return the
> > error there.
> > 
> >       Andrew
> Maybe I shouldn't return EOPNOTSUPP in config_aneg directly?
> In this way, _phy_state_machine will be broken if I trigger "$ ethtool
> -s ethtool eth1 autoneg off"
> 
> [  520.781368] ------------[ cut here ]------------
> root@OpenWrt:/# [  520.785978] _phy_start_aneg+0x0/0xa4: returned: -95
> [  520.792263] WARNING: CPU: 3 PID: 423 at drivers/net/phy/phy.c:1254 _phy_state_machine+0x78/0x258
> [  520.801039] Modules linked in:
> [  520.804087] CPU: 3 PID: 423 Comm: kworker/u16:4 Tainted: G        W          6.8.0-rc7-next-20240306-bpi-r3+ #102
> [  520.814335] Hardware name: MediaTek MT7988A Reference Board (DT)
> [  520.820330] Workqueue: events_power_efficient phy_state_machine
> [  520.826240] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> [  520.833190] pc : _phy_state_machine+0x78/0x258
> [  520.837623] lr : _phy_state_machine+0x78/0x258
> [  520.842056] sp : ffff800084b7bd30
> [  520.845360] x29: ffff800084b7bd30 x28: 0000000000000000 x27: 0000000000000000
> [  520.852487] x26: ffff000000c56900 x25: ffff000000c56980 x24: ffff000000012ac0
> [  520.859613] x23: ffff00000001d005 x22: ffff000000fdf000 x21: 0000000000000001
> [  520.866738] x20: 0000000000000004 x19: ffff000003a90000 x18: ffffffffffffffff
> [  520.873864] x17: 0000000000000000 x16: 0000000000000000 x15: ffff800104b7b977
> [  520.880988] x14: 0000000000000000 x13: 0000000000000183 x12: 00000000ffffffea
> [  520.888114] x11: 0000000000000001 x10: 0000000000000001 x9 : ffff8000837222f0
> [  520.895239] x8 : 0000000000017fe8 x7 : c0000000ffffefff x6 : 0000000000000001
> [  520.902365] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000
> [  520.909490] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000004120000
> [  520.916615] Call trace:
> [  520.919052]  _phy_state_machine+0x78/0x258
> [  520.923139]  phy_state_machine+0x2c/0x80
> [  520.927051]  process_one_work+0x178/0x31c
> [  520.931054]  worker_thread+0x280/0x494
> [  520.934795]  kthread+0xe4/0xe8
> [  520.937841]  ret_from_fork+0x10/0x20
> [  520.941408] ---[ end trace 0000000000000000 ]---
> 
> Now I prefer to give a warning like this, in
> mt798x_2p5ge_phy_config_aneg():
> if (phydev->autoneg == AUTONEG_DISABLE) {
> 	dev_warn(&phydev->mdio.dev, "Once AN off is set, this phy can't
> link.\n");
> 	return genphy_c45_pma_setup_forced(phydev);
> }

That is ugly.

Ideally we should fix phylib to support a PHY which cannot do fixed
link. I suggest you first look at phy_ethtool_ksettings_set() and see
what it does if passed cmd->base.autoneg == True, but
ETHTOOL_LINK_MODE_Autoneg_BIT is not set in supported, because the PHY
does not support autoneg. Is that handled? Does it return EOPNOTSUPP?
Understanding this might help you implement the opposite.

The opposite is however not easy. There is no linkmode bit indicating
a PHY can do forced settings. The BMSR has a bit indicating the PHY is
capable of auto-neg, which is used to set
ETHTOOL_LINK_MODE_Autoneg_BIT. However there is no bit to indicate the
PHY supports forced configuration. The standard just assumes all PHYs
which are standard conforming can do forced settings. And i think this
is the first PHY we have come across which is broken like this.

So maybe we cannot fix this in phylib. In that case, the MAC drivers
ksetting_set() should check if the user is attempting to disable
autoneg, and return -EOPNOTSUPP. And i would keep the stack trace
above, which will help developers understand they need the MAC to help
out work around the broken PHY. You can probably also simplify the PHY
driver, take out any code which tries to handle forced settings.

	Andrew
Andrew Lunn May 21, 2024, 5:28 p.m. UTC | #6
On Tue, May 21, 2024 at 08:17:32AM +0000, SkyLake Huang (黃啟澤) wrote:
> On Mon, 2024-05-20 at 15:35 +0200, Andrew Lunn wrote:
> >  	 
> > External email : Please do not click links or open attachments until
> > you have verified the sender or the content.
> >  > +static int mt798x_2p5ge_phy_config_init(struct phy_device
> > *phydev)
> > > +{
> > > +struct mtk_i2p5ge_phy_priv *priv = phydev->priv;
> > > +struct device *dev = &phydev->mdio.dev;
> > > +const struct firmware *fw;
> > > +struct pinctrl *pinctrl;
> > > +int ret, i;
> > > +u16 reg;
> > > +
> > > +if (!priv->fw_loaded) {
> > > +if (!priv->md32_en_cfg_base || !priv->pmb_addr) {
> > > +dev_err(dev, "MD32_EN_CFG base & PMB addresses aren't valid\n");
> > > +return -EINVAL;
> > > +}
> > 
> > ...
> > 
> > > +static int mt798x_2p5ge_phy_probe(struct phy_device *phydev)
> > > +{
> > > +struct mtk_i2p5ge_phy_priv *priv;
> > > +
> > > +priv = devm_kzalloc(&phydev->mdio.dev,
> > > +    sizeof(struct mtk_i2p5ge_phy_priv), GFP_KERNEL);
> > > +if (!priv)
> > > +return -ENOMEM;
> > > +
> > > +switch (phydev->drv->phy_id) {
> > > +case MTK_2P5GPHY_ID_MT7988:
> > > +priv->pmb_addr = ioremap(MT7988_2P5GE_PMB_BASE,
> > MT7988_2P5GE_PMB_LEN);
> > > +if (!priv->pmb_addr)
> > > +return -ENOMEM;
> > > +priv->md32_en_cfg_base = ioremap(MT7988_2P5GE_MD32_EN_CFG_BASE,
> > > + MT7988_2P5GE_MD32_EN_CFG_LEN);
> > > +if (!priv->md32_en_cfg_base)
> > > +return -ENOMEM;
> > > +
> > > +/* The original hardware only sets MDIO_DEVS_PMAPMD */
> > > +phydev->c45_ids.mmds_present |= (MDIO_DEVS_PCS | MDIO_DEVS_AN |
> > > + MDIO_DEVS_VEND1 | MDIO_DEVS_VEND2);
> > > +break;
> > > +default:
> > > +return -EINVAL;
> > > +}
> > 
> > How can priv->md32_en_cfg_base or priv->pmb_addr not be set in
> > mt798x_2p5ge_phy_config_init()
> > 
> > Andrew
> Use command "$ifconfig eth1 down" and then "$ifconfig eth1 up",
> mt798x_2p5ge_phy_config_init() will be called again and again. priv-
> >md32_en_cfg_base & priv->pmb_addr are released after first firmware
> loading. So just check these two values again for safety once priv-
> >fw_loaded is overrided unexpectedly.

So the code is unsymmetrical. The memory is mapped in
mt798x_2p5ge_phy_probe() but unmapped in
mt798x_2p5ge_phy_config_init(). It would be better style to unmap it
in mt798x_2p5ge_phy_remove(). Alternatively, just map it when
downloading firmware, and unmap it straight afterwards.

Also, we generally discourage defensive programming. It is much better
to actually understand the code and know something is not possible.

	Andrew
SkyLake Huang (黃啟澤) May 24, 2024, 4:38 a.m. UTC | #7
On Tue, 2024-05-21 at 19:20 +0200, Andrew Lunn wrote:
> > > > +/* Setup LED */
> > > > +phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED0_ON_CTRL,
> > > > + MTK_PHY_LED_ON_POLARITY | MTK_PHY_LED_ON_LINK10 |
> > > > + MTK_PHY_LED_ON_LINK100 | MTK_PHY_LED_ON_LINK1000 |
> > > > + MTK_PHY_LED_ON_LINK2500);
> > > > +phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED1_ON_CTRL,
> > > > + MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX);
> > > > +
> > > > +pinctrl = devm_pinctrl_get_select(&phydev->mdio.dev, "i2p5gbe-
> > > led");
> > > 
> > > Calls to devm_pinctrl_get_select() is pretty unusual in drivers:
> > > 
> > > 
> > 
> https://elixir.bootlin.com/linux/latest/C/ident/devm_pinctrl_get_select
> > > 
> > > Why is this needed? Generally, the DT file should describe the
> needed
> > > pinmux setting, without needed anything additionally.
> > > 
> > This is needed because we need to switch to i2p5gbe-led pinmux
> group
> > after we set correct polarity. Or LED may blink unexpectedly.
> 
> Since this is unusual, you should add a comment. Also, does the
> device
> tree binding explain this? I expect most DT authors are used to
> listing all the needed pins in the default pinmux node, and so will
> do
> that, unless there is a comment in the binding advising against it.
> 
Then I'll add comments and remove "returning error if picntrl switching
fails" like this:
/* Switch pinctrl after setting polarity to avoid bogus blinking */
pinctrl = devm_pinctrl_get_select(&phydev->mdio.dev, "i2p5gbe-led");
if (IS_ERR(pinctrl)) {
	dev_err(&phydev->mdio.dev, "Fail to set LED pins!\n");
}

Actually current drivers/net/phy/mediatek-ge-soc.c has the same
mechanism, which utilizing gbe-led pinmux group in
mt7988_phy_fix_leds_polarities():

/* Only now setup pinctrl to avoid bogus blinking */
pinctrl = devm_pinctrl_get_select(&phydev->mdio.dev, "gbe-led");
if (IS_ERR(pinctrl))
	dev_err(&phydev->mdio.bus->dev, "Failed to setup PHY LED
pinctrl\n");

Actually those pinmux groups exist in drivers/pinctrl/mediatek/pinctrl-
mt7988 of current openWRT tree:

https://git.openwrt.org/?p=openwrt/openwrt.git;a=blob;f=target/linux/mediatek/files-6.1/drivers/pinctrl/mediatek/pinctrl-mt7988.c;h=9f9291124522c916c4e92f1598e0be44c3badad5;hb=f16dc4b42fb265affb2298e815a7ce0a13d60da6

I believe this will be upstreamed later.

> > > It is a bit late doing this now. The user requested this a long
> time
> > > ago, and it will be hard to understand why it now returns
> EOPNOTSUPP.
> > > You should check for AUTONEG_DISABLE in config_aneg() and return
> the
> > > error there.
> > > 
> > >       Andrew
> > Maybe I shouldn't return EOPNOTSUPP in config_aneg directly?
> > In this way, _phy_state_machine will be broken if I trigger "$
> ethtool
> > -s ethtool eth1 autoneg off"
> > 
> > [  520.781368] ------------[ cut here ]------------
> > root@OpenWrt:/# [  520.785978] _phy_start_aneg+0x0/0xa4: returned:
> -95
> > [  520.792263] WARNING: CPU: 3 PID: 423 at
> drivers/net/phy/phy.c:1254 _phy_state_machine+0x78/0x258
> > [  520.801039] Modules linked in:
> > [  520.804087] CPU: 3 PID: 423 Comm: kworker/u16:4 Tainted:
> G        W          6.8.0-rc7-next-20240306-bpi-r3+ #102
> > [  520.814335] Hardware name: MediaTek MT7988A Reference Board (DT)
> > [  520.820330] Workqueue: events_power_efficient phy_state_machine
> > [  520.826240] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT
> -SSBS BTYPE=--)
> > [  520.833190] pc : _phy_state_machine+0x78/0x258
> > [  520.837623] lr : _phy_state_machine+0x78/0x258
> > [  520.842056] sp : ffff800084b7bd30
> > [  520.845360] x29: ffff800084b7bd30 x28: 0000000000000000 x27:
> 0000000000000000
> > [  520.852487] x26: ffff000000c56900 x25: ffff000000c56980 x24:
> ffff000000012ac0
> > [  520.859613] x23: ffff00000001d005 x22: ffff000000fdf000 x21:
> 0000000000000001
> > [  520.866738] x20: 0000000000000004 x19: ffff000003a90000 x18:
> ffffffffffffffff
> > [  520.873864] x17: 0000000000000000 x16: 0000000000000000 x15:
> ffff800104b7b977
> > [  520.880988] x14: 0000000000000000 x13: 0000000000000183 x12:
> 00000000ffffffea
> > [  520.888114] x11: 0000000000000001 x10: 0000000000000001 x9 :
> ffff8000837222f0
> > [  520.895239] x8 : 0000000000017fe8 x7 : c0000000ffffefff x6 :
> 0000000000000001
> > [  520.902365] x5 : 0000000000000000 x4 : 0000000000000000 x3 :
> 0000000000000000
> > [  520.909490] x2 : 0000000000000000 x1 : 0000000000000000 x0 :
> ffff000004120000
> > [  520.916615] Call trace:
> > [  520.919052]  _phy_state_machine+0x78/0x258
> > [  520.923139]  phy_state_machine+0x2c/0x80
> > [  520.927051]  process_one_work+0x178/0x31c
> > [  520.931054]  worker_thread+0x280/0x494
> > [  520.934795]  kthread+0xe4/0xe8
> > [  520.937841]  ret_from_fork+0x10/0x20
> > [  520.941408] ---[ end trace 0000000000000000 ]---
> > 
> > Now I prefer to give a warning like this, in
> > mt798x_2p5ge_phy_config_aneg():
> > if (phydev->autoneg == AUTONEG_DISABLE) {
> > dev_warn(&phydev->mdio.dev, "Once AN off is set, this phy can't
> > link.\n");
> > return genphy_c45_pma_setup_forced(phydev);
> > }
> 
> That is ugly.
> 
> Ideally we should fix phylib to support a PHY which cannot do fixed
> link. I suggest you first look at phy_ethtool_ksettings_set() and see
> what it does if passed cmd->base.autoneg == True, but
> ETHTOOL_LINK_MODE_Autoneg_BIT is not set in supported, because the
> PHY
> does not support autoneg. Is that handled? Does it return EOPNOTSUPP?
> Understanding this might help you implement the opposite.
> 
> The opposite is however not easy. There is no linkmode bit indicating
> a PHY can do forced settings. The BMSR has a bit indicating the PHY
> is
> capable of auto-neg, which is used to set
> ETHTOOL_LINK_MODE_Autoneg_BIT. However there is no bit to indicate
> the
> PHY supports forced configuration. The standard just assumes all PHYs
> which are standard conforming can do forced settings. And i think
> this
> is the first PHY we have come across which is broken like this.
> 
Thanks for your clear explanation.

> So maybe we cannot fix this in phylib. In that case, the MAC drivers
> ksetting_set() should check if the user is attempting to disable
> autoneg, and return -EOPNOTSUPP. And i would keep the stack trace
> above, which will help developers understand they need the MAC to
> help
> out work around the broken PHY. You can probably also simplify the
> PHY
> driver, take out any code which tries to handle forced settings.
> 
> Andrew
> 
According to this, I think maybe we may need to patch
drivers/net/ethernet/mediatek/mtk_eth_soc.c later. I'll fix
mt798x_2p5ge_phy_config_aneg() like this first:

if (phydev->autoneg == AUTONEG_DISABLE)
	return -EOPNOTSUPP;
SkyLake Huang (黃啟澤) May 24, 2024, 4:44 a.m. UTC | #8
On Tue, 2024-05-21 at 19:28 +0200, Andrew Lunn wrote:
>  	 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
>  On Tue, May 21, 2024 at 08:17:32AM +0000, SkyLake Huang (黃啟澤) wrote:
> > On Mon, 2024-05-20 at 15:35 +0200, Andrew Lunn wrote:
> > >   
> > > External email : Please do not click links or open attachments
> until
> > > you have verified the sender or the content.
> > >  > +static int mt798x_2p5ge_phy_config_init(struct phy_device
> > > *phydev)
> > > > +{
> > > > +struct mtk_i2p5ge_phy_priv *priv = phydev->priv;
> > > > +struct device *dev = &phydev->mdio.dev;
> > > > +const struct firmware *fw;
> > > > +struct pinctrl *pinctrl;
> > > > +int ret, i;
> > > > +u16 reg;
> > > > +
> > > > +if (!priv->fw_loaded) {
> > > > +if (!priv->md32_en_cfg_base || !priv->pmb_addr) {
> > > > +dev_err(dev, "MD32_EN_CFG base & PMB addresses aren't
> valid\n");
> > > > +return -EINVAL;
> > > > +}
> > > 
> > > ...
> > > 
> > > > +static int mt798x_2p5ge_phy_probe(struct phy_device *phydev)
> > > > +{
> > > > +struct mtk_i2p5ge_phy_priv *priv;
> > > > +
> > > > +priv = devm_kzalloc(&phydev->mdio.dev,
> > > > +    sizeof(struct mtk_i2p5ge_phy_priv), GFP_KERNEL);
> > > > +if (!priv)
> > > > +return -ENOMEM;
> > > > +
> > > > +switch (phydev->drv->phy_id) {
> > > > +case MTK_2P5GPHY_ID_MT7988:
> > > > +priv->pmb_addr = ioremap(MT7988_2P5GE_PMB_BASE,
> > > MT7988_2P5GE_PMB_LEN);
> > > > +if (!priv->pmb_addr)
> > > > +return -ENOMEM;
> > > > +priv->md32_en_cfg_base =
> ioremap(MT7988_2P5GE_MD32_EN_CFG_BASE,
> > > > + MT7988_2P5GE_MD32_EN_CFG_LEN);
> > > > +if (!priv->md32_en_cfg_base)
> > > > +return -ENOMEM;
> > > > +
> > > > +/* The original hardware only sets MDIO_DEVS_PMAPMD */
> > > > +phydev->c45_ids.mmds_present |= (MDIO_DEVS_PCS | MDIO_DEVS_AN
> |
> > > > + MDIO_DEVS_VEND1 | MDIO_DEVS_VEND2);
> > > > +break;
> > > > +default:
> > > > +return -EINVAL;
> > > > +}
> > > 
> > > How can priv->md32_en_cfg_base or priv->pmb_addr not be set in
> > > mt798x_2p5ge_phy_config_init()
> > > 
> > > Andrew
> > Use command "$ifconfig eth1 down" and then "$ifconfig eth1 up",
> > mt798x_2p5ge_phy_config_init() will be called again and again.
> priv-
> > >md32_en_cfg_base & priv->pmb_addr are released after first
> firmware
> > loading. So just check these two values again for safety once priv-
> > >fw_loaded is overrided unexpectedly.
> 
> So the code is unsymmetrical. The memory is mapped in
> mt798x_2p5ge_phy_probe() but unmapped in
> mt798x_2p5ge_phy_config_init(). It would be better style to unmap it
> in mt798x_2p5ge_phy_remove(). Alternatively, just map it when
> downloading firmware, and unmap it straight afterwards.
> 
> Also, we generally discourage defensive programming. It is much
> better
> to actually understand the code and know something is not possible.
> 
> Andrew

OK. I'll fix this in v5. Map & unmap it locally in
mt798x_2p5ge_phy_config_aneg()
 -> mt798x_2p5ge_hy_load_fw()

Sky
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 37e36b2f246e..27057d16cc29 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13959,6 +13959,7 @@  M:	Qingfang Deng <dqfext@gmail.com>
 M:	SkyLake Huang <SkyLake.Huang@mediatek.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
+F:	drivers/net/phy/mediatek/mtk-2p5ge.c
 F:	drivers/net/phy/mediatek/mtk-ge-soc.c
 F:	drivers/net/phy/mediatek/mtk-phy-lib.c
 F:	drivers/net/phy/mediatek/mtk-ge.c
diff --git a/drivers/net/phy/mediatek/Kconfig b/drivers/net/phy/mediatek/Kconfig
index b11b19523ff1..88ca8819a8ff 100644
--- a/drivers/net/phy/mediatek/Kconfig
+++ b/drivers/net/phy/mediatek/Kconfig
@@ -25,3 +25,14 @@  config MEDIATEK_GE_SOC_PHY
 	  the MT7981 and MT7988 SoCs. These PHYs need calibration data
 	  present in the SoCs efuse and will dynamically calibrate VCM
 	  (common-mode voltage) during startup.
+
+config MEDIATEK_2P5GE_PHY
+	tristate "MediaTek 2.5Gb Ethernet PHYs"
+	depends on (ARM64 && ARCH_MEDIATEK) || COMPILE_TEST
+	select MTK_NET_PHYLIB
+	help
+	  Supports MediaTek SoC built-in 2.5Gb Ethernet PHYs.
+
+	  This will load necessary firmware and add appropriate time delay.
+	  Accelerate this procedure through internal pbus instead of MDIO
+	  bus. Certain link-up issues will also be fixed here.
diff --git a/drivers/net/phy/mediatek/Makefile b/drivers/net/phy/mediatek/Makefile
index 814879d0abe5..c6db8abd2c9c 100644
--- a/drivers/net/phy/mediatek/Makefile
+++ b/drivers/net/phy/mediatek/Makefile
@@ -2,3 +2,4 @@ 
 obj-$(CONFIG_MTK_NET_PHYLIB)		+= mtk-phy-lib.o
 obj-$(CONFIG_MEDIATEK_GE_PHY)		+= mtk-ge.o
 obj-$(CONFIG_MEDIATEK_GE_SOC_PHY)	+= mtk-ge-soc.o
+obj-$(CONFIG_MEDIATEK_2P5GE_PHY)	+= mtk-2p5ge.o
diff --git a/drivers/net/phy/mediatek/mtk-2p5ge.c b/drivers/net/phy/mediatek/mtk-2p5ge.c
new file mode 100644
index 000000000000..772adc0ca0dd
--- /dev/null
+++ b/drivers/net/phy/mediatek/mtk-2p5ge.c
@@ -0,0 +1,398 @@ 
+// SPDX-License-Identifier: GPL-2.0+
+#include <linux/bitfield.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/phy.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+
+#include "mtk.h"
+
+#define MTK_2P5GPHY_ID_MT7988	(0x00339c11)
+
+#define MT7988_2P5GE_PMB "mediatek/mt7988/i2p5ge-phy-pmb.bin"
+#define MT7988_2P5GE_PMB_SIZE	(0x20000)
+#define MT7988_2P5GE_PMB_BASE	(0x0f100000)
+#define MT7988_2P5GE_PMB_LEN	(0x20000)
+#define MT7988_2P5GE_MD32_EN_CFG_BASE	(0x0f0f0018)
+#define MT7988_2P5GE_MD32_EN_CFG_LEN	(0x20)
+#define   MD32_EN			BIT(0)
+
+#define BASE100T_STATUS_EXTEND		(0x10)
+#define BASE1000T_STATUS_EXTEND		(0x11)
+#define EXTEND_CTRL_AND_STATUS		(0x16)
+
+#define PHY_AUX_CTRL_STATUS		(0x1d)
+#define   PHY_AUX_DPX_MASK		GENMASK(5, 5)
+#define   PHY_AUX_SPEED_MASK		GENMASK(4, 2)
+
+#define MTK_PHY_LPI_PCS_DSP_CTRL		(0x121)
+#define   MTK_PHY_LPI_SIG_EN_LO_THRESH100_MASK	GENMASK(12, 8)
+
+/* Registers on MTK phy page 1*/
+#define MTK_PHY_PAGE_EXTENDED_1			0x0001
+#define MTK_PHY_AUX_CTRL_AND_STATUS		(0x14)
+#define   MTK_PHY_ENABLE_DOWNSHIFT		BIT(4)
+
+/* Registers on Token Ring debug nodes */
+/* ch_addr = 0x0, node_addr = 0xf, data_addr = 0x3c */
+#define AUTO_NP_10XEN				BIT(6)
+
+struct mtk_i2p5ge_phy_priv {
+	bool fw_loaded;
+	void __iomem *pmb_addr;
+	void __iomem *md32_en_cfg_base;
+	unsigned long led_state;
+};
+
+enum {
+	PHY_AUX_SPD_10 = 0,
+	PHY_AUX_SPD_100,
+	PHY_AUX_SPD_1000,
+	PHY_AUX_SPD_2500,
+};
+
+static int mt798x_2p5ge_phy_config_init(struct phy_device *phydev)
+{
+	struct mtk_i2p5ge_phy_priv *priv = phydev->priv;
+	struct device *dev = &phydev->mdio.dev;
+	const struct firmware *fw;
+	struct pinctrl *pinctrl;
+	int ret, i;
+	u16 reg;
+
+	if (!priv->fw_loaded) {
+		if (!priv->md32_en_cfg_base || !priv->pmb_addr) {
+			dev_err(dev, "MD32_EN_CFG base & PMB addresses aren't valid\n");
+			return -EINVAL;
+		}
+
+		ret = request_firmware(&fw, MT7988_2P5GE_PMB, dev);
+		if (ret) {
+			dev_err(dev, "failed to load firmware: %s, ret: %d\n",
+				MT7988_2P5GE_PMB, ret);
+			return ret;
+		}
+
+		if (fw->size != MT7988_2P5GE_PMB_SIZE) {
+			dev_err(dev, "Firmware size 0x%zx != 0x%x\n",
+				fw->size, MT7988_2P5GE_PMB_SIZE);
+			return -EINVAL;
+		}
+
+		reg = readw(priv->md32_en_cfg_base);
+		if (reg & MD32_EN) {
+			phy_set_bits(phydev, MII_BMCR, BMCR_RESET);
+			usleep_range(10000, 11000);
+		}
+		phy_set_bits(phydev, MII_BMCR, BMCR_PDOWN);
+
+		/* Write magic number to safely stall MCU */
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x800e, 0x1100);
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x800f, 0x00df);
+
+		for (i = 0; i < MT7988_2P5GE_PMB_SIZE - 1; i += 4)
+			writel(*((uint32_t *)(fw->data + i)), priv->pmb_addr + i);
+		release_firmware(fw);
+
+		writew(reg & ~MD32_EN, priv->md32_en_cfg_base);
+		writew(reg | MD32_EN, priv->md32_en_cfg_base);
+		phy_set_bits(phydev, MII_BMCR, BMCR_RESET);
+		/* We need a delay here to stabilize initialization of MCU */
+		usleep_range(7000, 8000);
+		dev_info(dev, "Firmware loading/trigger ok.\n");
+
+		priv->fw_loaded = true;
+		iounmap(priv->md32_en_cfg_base);
+		iounmap(priv->pmb_addr);
+	}
+
+	/* Setup LED */
+	phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED0_ON_CTRL,
+			 MTK_PHY_LED_ON_POLARITY | MTK_PHY_LED_ON_LINK10 |
+			 MTK_PHY_LED_ON_LINK100 | MTK_PHY_LED_ON_LINK1000 |
+			 MTK_PHY_LED_ON_LINK2500);
+	phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MTK_PHY_LED1_ON_CTRL,
+			 MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX);
+
+	pinctrl = devm_pinctrl_get_select(&phydev->mdio.dev, "i2p5gbe-led");
+	if (IS_ERR(pinctrl)) {
+		dev_err(&phydev->mdio.dev, "Fail to set LED pins!\n");
+		return PTR_ERR(pinctrl);
+	}
+
+	phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_LPI_PCS_DSP_CTRL,
+		       MTK_PHY_LPI_SIG_EN_LO_THRESH100_MASK, 0);
+
+	/* Enable 16-bit next page exchange bit if 1000-BT isn't advertizing */
+	tr_modify(phydev, 0x0, 0xf, 0x3c, AUTO_NP_10XEN,
+		  FIELD_PREP(AUTO_NP_10XEN, 0x1));
+
+	/* Enable HW auto downshift */
+	phy_modify_paged(phydev, MTK_PHY_PAGE_EXTENDED_1, MTK_PHY_AUX_CTRL_AND_STATUS,
+			 0, MTK_PHY_ENABLE_DOWNSHIFT);
+
+	return 0;
+}
+
+static int mt798x_2p5ge_phy_config_aneg(struct phy_device *phydev)
+{
+	bool changed = false;
+	u32 adv;
+	int ret;
+
+	if (phydev->autoneg == AUTONEG_DISABLE)
+		return genphy_c45_pma_setup_forced(phydev);
+
+	ret = genphy_c45_an_config_aneg(phydev);
+	if (ret < 0)
+		return ret;
+	if (ret > 0)
+		changed = true;
+
+	/* Clause 45 doesn't define 1000BaseT support. Use Clause 22 instead in our design.
+	 */
+	adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising);
+	ret = phy_modify_changed(phydev, MII_CTRL1000, ADVERTISE_1000FULL, adv);
+	if (ret < 0)
+		return ret;
+	if (ret > 0)
+		changed = true;
+
+	return genphy_c45_check_and_restart_aneg(phydev, changed);
+}
+
+static int mt798x_2p5ge_phy_get_features(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = genphy_c45_pma_read_abilities(phydev);
+	if (ret)
+		return ret;
+
+	/* We don't support HDX at MAC layer on mt7988. So mask phy's HDX capabilities here. */
+	linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, phydev->supported);
+
+	return 0;
+}
+
+static int mt798x_2p5ge_phy_read_status(struct phy_device *phydev)
+{
+	int ret;
+
+	/* When MDIO_STAT1_LSTATUS is raised genphy_c45_read_link(), this phy actually
+	 * hasn't finished AN. So use CL22's link update function instead.
+	 */
+	ret = genphy_update_link(phydev);
+	if (ret)
+		return ret;
+
+	phydev->speed = SPEED_UNKNOWN;
+	phydev->duplex = DUPLEX_UNKNOWN;
+	phydev->pause = 0;
+	phydev->asym_pause = 0;
+
+	/* We'll read link speed through vendor specific registers down below. So remove
+	 * phy_resolve_aneg_linkmode (AN on) & genphy_c45_read_pma (AN off).
+	 */
+	if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
+		ret = genphy_c45_read_lpa(phydev);
+		if (ret < 0)
+			return ret;
+
+		/* Clause 45 doesn't define 1000BaseT support. Read the link partner's 1G
+		 * advertisement via Clause 22
+		 */
+		ret = phy_read(phydev, MII_STAT1000);
+		if (ret < 0)
+			return ret;
+		mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, ret);
+	} else if (phydev->autoneg == AUTONEG_DISABLE) {
+		return -EOPNOTSUPP;
+	}
+
+	if (phydev->link) {
+		ret = phy_read(phydev, PHY_AUX_CTRL_STATUS);
+		if (ret < 0)
+			return ret;
+
+		switch (FIELD_GET(PHY_AUX_SPEED_MASK, ret)) {
+		case PHY_AUX_SPD_10:
+			phydev->speed = SPEED_10;
+			break;
+		case PHY_AUX_SPD_100:
+			phydev->speed = SPEED_100;
+			break;
+		case PHY_AUX_SPD_1000:
+			phydev->speed = SPEED_1000;
+			break;
+		case PHY_AUX_SPD_2500:
+			phydev->speed = SPEED_2500;
+			break;
+		}
+
+		phydev->duplex = DUPLEX_FULL;
+		/* FIXME: The current firmware always enables rate adaptation mode. */
+		phydev->rate_matching = RATE_MATCH_PAUSE;
+	}
+
+	return 0;
+}
+
+static int mt798x_2p5ge_phy_get_rate_matching(struct phy_device *phydev,
+					      phy_interface_t iface)
+{
+	if (iface == PHY_INTERFACE_MODE_XGMII)
+		return RATE_MATCH_PAUSE;
+	return RATE_MATCH_NONE;
+}
+
+static const unsigned long supported_triggers = (BIT(TRIGGER_NETDEV_FULL_DUPLEX) |
+						 BIT(TRIGGER_NETDEV_LINK)        |
+						 BIT(TRIGGER_NETDEV_LINK_10)     |
+						 BIT(TRIGGER_NETDEV_LINK_100)    |
+						 BIT(TRIGGER_NETDEV_LINK_1000)   |
+						 BIT(TRIGGER_NETDEV_LINK_2500)   |
+						 BIT(TRIGGER_NETDEV_RX)          |
+						 BIT(TRIGGER_NETDEV_TX));
+
+static int mt798x_2p5ge_phy_led_blink_set(struct phy_device *phydev, u8 index,
+					  unsigned long *delay_on,
+					  unsigned long *delay_off)
+{
+	bool blinking = false;
+	int err = 0;
+	struct mtk_i2p5ge_phy_priv *priv = phydev->priv;
+
+	if (index > 1)
+		return -EINVAL;
+
+	if (delay_on && delay_off && (*delay_on > 0) && (*delay_off > 0)) {
+		blinking = true;
+		*delay_on = 50;
+		*delay_off = 50;
+	}
+
+	err = mtk_phy_hw_led_blink_set(phydev, index, &priv->led_state, blinking);
+	if (err)
+		return err;
+
+	return mtk_phy_hw_led_on_set(phydev, index, &priv->led_state,
+				     MTK_2P5GPHY_LED_ON_MASK, false);
+}
+
+static int mt798x_2p5ge_phy_led_brightness_set(struct phy_device *phydev,
+					       u8 index, enum led_brightness value)
+{
+	int err;
+	struct mtk_i2p5ge_phy_priv *priv = phydev->priv;
+
+	err = mtk_phy_hw_led_blink_set(phydev, index, &priv->led_state, false);
+	if (err)
+		return err;
+
+	return mtk_phy_hw_led_on_set(phydev, index, &priv->led_state,
+				     MTK_2P5GPHY_LED_ON_MASK, (value != LED_OFF));
+}
+
+static int mt798x_2p5ge_phy_led_hw_is_supported(struct phy_device *phydev, u8 index,
+						unsigned long rules)
+{
+	return mtk_phy_led_hw_is_supported(phydev, index, rules, supported_triggers);
+}
+
+static int mt798x_2p5ge_phy_led_hw_control_get(struct phy_device *phydev, u8 index,
+					       unsigned long *rules)
+{
+	struct mtk_i2p5ge_phy_priv *priv = phydev->priv;
+
+	return mtk_phy_led_hw_ctrl_get(phydev, index, rules, &priv->led_state,
+				       MTK_2P5GPHY_LED_ON_SET,
+				       MTK_2P5GPHY_LED_RX_BLINK_SET,
+				       MTK_2P5GPHY_LED_TX_BLINK_SET);
+};
+
+static int mt798x_2p5ge_phy_led_hw_control_set(struct phy_device *phydev, u8 index,
+					       unsigned long rules)
+{
+	struct mtk_i2p5ge_phy_priv *priv = phydev->priv;
+
+	return mtk_phy_led_hw_ctrl_set(phydev, index, rules, &priv->led_state,
+				       MTK_2P5GPHY_LED_ON_SET,
+				       MTK_2P5GPHY_LED_RX_BLINK_SET,
+				       MTK_2P5GPHY_LED_TX_BLINK_SET);
+};
+
+static int mt798x_2p5ge_phy_probe(struct phy_device *phydev)
+{
+	struct mtk_i2p5ge_phy_priv *priv;
+
+	priv = devm_kzalloc(&phydev->mdio.dev,
+			    sizeof(struct mtk_i2p5ge_phy_priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	switch (phydev->drv->phy_id) {
+	case MTK_2P5GPHY_ID_MT7988:
+		priv->pmb_addr = ioremap(MT7988_2P5GE_PMB_BASE, MT7988_2P5GE_PMB_LEN);
+		if (!priv->pmb_addr)
+			return -ENOMEM;
+		priv->md32_en_cfg_base = ioremap(MT7988_2P5GE_MD32_EN_CFG_BASE,
+						 MT7988_2P5GE_MD32_EN_CFG_LEN);
+		if (!priv->md32_en_cfg_base)
+			return -ENOMEM;
+
+		/* The original hardware only sets MDIO_DEVS_PMAPMD */
+		phydev->c45_ids.mmds_present |= (MDIO_DEVS_PCS | MDIO_DEVS_AN |
+						 MDIO_DEVS_VEND1 | MDIO_DEVS_VEND2);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	priv->fw_loaded = false;
+	phydev->priv = priv;
+
+	mtk_phy_leds_state_init(phydev);
+
+	return 0;
+}
+
+static struct phy_driver mtk_gephy_driver[] = {
+	{
+		PHY_ID_MATCH_MODEL(MTK_2P5GPHY_ID_MT7988),
+		.name		= "MediaTek MT798x 2.5GbE PHY",
+		.probe		= mt798x_2p5ge_phy_probe,
+		.config_init	= mt798x_2p5ge_phy_config_init,
+		.config_aneg    = mt798x_2p5ge_phy_config_aneg,
+		.get_features	= mt798x_2p5ge_phy_get_features,
+		.read_status	= mt798x_2p5ge_phy_read_status,
+		.get_rate_matching	= mt798x_2p5ge_phy_get_rate_matching,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+		.read_page	= mtk_phy_read_page,
+		.write_page	= mtk_phy_write_page,
+		.led_blink_set	= mt798x_2p5ge_phy_led_blink_set,
+		.led_brightness_set = mt798x_2p5ge_phy_led_brightness_set,
+		.led_hw_is_supported = mt798x_2p5ge_phy_led_hw_is_supported,
+		.led_hw_control_get = mt798x_2p5ge_phy_led_hw_control_get,
+		.led_hw_control_set = mt798x_2p5ge_phy_led_hw_control_set,
+	},
+};
+
+module_phy_driver(mtk_gephy_driver);
+
+static struct mdio_device_id __maybe_unused mtk_2p5ge_phy_tbl[] = {
+	{ PHY_ID_MATCH_VENDOR(0x00339c00) },
+	{ }
+};
+
+MODULE_DESCRIPTION("MediaTek 2.5Gb Ethernet PHY driver");
+MODULE_AUTHOR("SkyLake Huang <SkyLake.Huang@mediatek.com>");
+MODULE_LICENSE("GPL");
+
+MODULE_DEVICE_TABLE(mdio, mtk_2p5ge_phy_tbl);