diff mbox series

[net-next,3/5] net: pcs: qcom-ipq: Add PCS create and phylink operations for IPQ9574

Message ID 20241101-ipq_pcs_rc1-v1-3-fdef575620cf@quicinc.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series Add PCS support for Qualcomm IPQ9574 SoC | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3 this patch: 3
netdev/build_tools success Errors and warnings before: 2 (+0) this patch: 2 (+0)
netdev/cc_maintainers success CCed 8 of 8 maintainers
netdev/build_clang success Errors and warnings before: 4 this patch: 4
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest warning net-next-2024-11-01--15-00 (tests: 776)

Commit Message

Lei Wei Nov. 1, 2024, 10:32 a.m. UTC
This patch adds the following PCS functionality for the PCS driver
for IPQ9574 SoC:

a.) PCS instance create and destroy APIs. The network driver calls
the PCS create API to create and associate the PCS instance with
the port MAC. The PCS MII RX and TX clocks are also enabled in the
PCS create API.
b.) PCS phylink operations for SGMII/QSGMII interface modes.

Signed-off-by: Lei Wei <quic_leiwei@quicinc.com>
---
 drivers/net/pcs/pcs-qcom-ipq.c   | 455 +++++++++++++++++++++++++++++++++++++++
 include/linux/pcs/pcs-qcom-ipq.h |  16 ++
 2 files changed, 471 insertions(+)

Comments

Andrew Lunn Nov. 1, 2024, 1:21 p.m. UTC | #1
> +static int ipq_pcs_config_mode(struct ipq_pcs *qpcs,
> +			       phy_interface_t interface)
> +{
> +	unsigned int val;
> +	int ret;
> +
> +	/* Configure PCS interface mode */
> +	switch (interface) {
> +	case PHY_INTERFACE_MODE_SGMII:
> +		/* Select Qualcomm SGMII AN mode */
> +		ret = regmap_update_bits(qpcs->regmap, PCS_MODE_CTRL,
> +					 PCS_MODE_SEL_MASK | PCS_MODE_AN_MODE,
> +					 PCS_MODE_SGMII);

How does Qualcomm SGMII AN mode differ from Cisco SGMII AN mode?

> +static int ipq_pcs_config_sgmii(struct ipq_pcs *qpcs,
> +				int index,
> +				unsigned int neg_mode,
> +				phy_interface_t interface)
> +{
> +	int ret;
> +
> +	/* Access to PCS registers such as PCS_MODE_CTRL which are
> +	 * common to all MIIs, is lock protected and configured
> +	 * only once. This is required only for interface modes
> +	 * such as QSGMII.
> +	 */
> +	if (interface == PHY_INTERFACE_MODE_QSGMII)
> +		mutex_lock(&qpcs->config_lock);

Is there a lot of contention on this lock? Why not take it for every
interface mode? It would make the code simpler.

> +struct phylink_pcs *ipq_pcs_create(struct device_node *np)
> +{
> +	struct platform_device *pdev;
> +	struct ipq_pcs_mii *qpcs_mii;
> +	struct device_node *pcs_np;
> +	struct ipq_pcs *qpcs;
> +	int i, ret;
> +	u32 index;
> +
> +	if (!of_device_is_available(np))
> +		return ERR_PTR(-ENODEV);
> +
> +	if (of_property_read_u32(np, "reg", &index))
> +		return ERR_PTR(-EINVAL);
> +
> +	if (index >= PCS_MAX_MII_NRS)
> +		return ERR_PTR(-EINVAL);
> +
> +	pcs_np = of_get_parent(np);
> +	if (!pcs_np)
> +		return ERR_PTR(-ENODEV);
> +
> +	if (!of_device_is_available(pcs_np)) {
> +		of_node_put(pcs_np);
> +		return ERR_PTR(-ENODEV);
> +	}

How have you got this far if the parent is not available?

> +	for (i = 0; i < PCS_MII_CLK_MAX; i++) {
> +		qpcs_mii->clk[i] = of_clk_get_by_name(np, pcs_mii_clk_name[i]);
> +		if (IS_ERR(qpcs_mii->clk[i])) {
> +			dev_err(qpcs->dev,
> +				"Failed to get MII %d interface clock %s\n",
> +				index, pcs_mii_clk_name[i]);
> +			goto err_clk_get;
> +		}
> +
> +		ret = clk_prepare_enable(qpcs_mii->clk[i]);
> +		if (ret) {
> +			dev_err(qpcs->dev,
> +				"Failed to enable MII %d interface clock %s\n",
> +				index, pcs_mii_clk_name[i]);
> +			goto err_clk_en;
> +		}
> +	}

Maybe devm_clk_bulk_get() etc will help you here? I've never actually
used them, so i don't know for sure.

	Andrew
Jakub Kicinski Nov. 1, 2024, 4:31 p.m. UTC | #2
On Fri, 1 Nov 2024 14:21:24 +0100 Andrew Lunn wrote:
> > +	/* Access to PCS registers such as PCS_MODE_CTRL which are
> > +	 * common to all MIIs, is lock protected and configured
> > +	 * only once. This is required only for interface modes
> > +	 * such as QSGMII.
> > +	 */
> > +	if (interface == PHY_INTERFACE_MODE_QSGMII)
> > +		mutex_lock(&qpcs->config_lock);  
> 
> Is there a lot of contention on this lock? Why not take it for every
> interface mode? It would make the code simpler.

+1
Lei Wei Nov. 6, 2024, 3:16 a.m. UTC | #3
On 11/1/2024 9:21 PM, Andrew Lunn wrote:
>> +static int ipq_pcs_config_mode(struct ipq_pcs *qpcs,
>> +			       phy_interface_t interface)
>> +{
>> +	unsigned int val;
>> +	int ret;
>> +
>> +	/* Configure PCS interface mode */
>> +	switch (interface) {
>> +	case PHY_INTERFACE_MODE_SGMII:
>> +		/* Select Qualcomm SGMII AN mode */
>> +		ret = regmap_update_bits(qpcs->regmap, PCS_MODE_CTRL,
>> +					 PCS_MODE_SEL_MASK | PCS_MODE_AN_MODE,
>> +					 PCS_MODE_SGMII);
> 
> How does Qualcomm SGMII AN mode differ from Cisco SGMII AN mode?
> 

Qualcomm SGMII AN mode extends Cisco SGMII spec Revision 1.8 by adding 
pause bit support in the SGMII word format. It re-uses two of the 
reserved bits 1..9 for this purpose. The PCS supports both Qualcomm 
SGMII AN and Cisco SGMII AN modes.

>> +static int ipq_pcs_config_sgmii(struct ipq_pcs *qpcs,
>> +				int index,
>> +				unsigned int neg_mode,
>> +				phy_interface_t interface)
>> +{
>> +	int ret;
>> +
>> +	/* Access to PCS registers such as PCS_MODE_CTRL which are
>> +	 * common to all MIIs, is lock protected and configured
>> +	 * only once. This is required only for interface modes
>> +	 * such as QSGMII.
>> +	 */
>> +	if (interface == PHY_INTERFACE_MODE_QSGMII)
>> +		mutex_lock(&qpcs->config_lock);
> 
> Is there a lot of contention on this lock? Why not take it for every
> interface mode? It would make the code simpler.
> 

I agree, the contention should be minimal since the lock is common for 
all MII ports in a PCS and is used only during configuration time. I 
will remove the interface mode check.

>> +struct phylink_pcs *ipq_pcs_create(struct device_node *np)
>> +{
>> +	struct platform_device *pdev;
>> +	struct ipq_pcs_mii *qpcs_mii;
>> +	struct device_node *pcs_np;
>> +	struct ipq_pcs *qpcs;
>> +	int i, ret;
>> +	u32 index;
>> +
>> +	if (!of_device_is_available(np))
>> +		return ERR_PTR(-ENODEV);
>> +
>> +	if (of_property_read_u32(np, "reg", &index))
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	if (index >= PCS_MAX_MII_NRS)
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	pcs_np = of_get_parent(np);
>> +	if (!pcs_np)
>> +		return ERR_PTR(-ENODEV);
>> +
>> +	if (!of_device_is_available(pcs_np)) {
>> +		of_node_put(pcs_np);
>> +		return ERR_PTR(-ENODEV);
>> +	}
> 
> How have you got this far if the parent is not available?
> 

This check can fail only if the parent node is disabled in the board 
DTS. I think this error situation may not be caught earlier than this 
point.
However I agree, the above check is redundant, since this check is 
immediately followed by a validity check on the 'pdev' of the parent 
node, which should be able cover any such errors as well.

>> +	for (i = 0; i < PCS_MII_CLK_MAX; i++) {
>> +		qpcs_mii->clk[i] = of_clk_get_by_name(np, pcs_mii_clk_name[i]);
>> +		if (IS_ERR(qpcs_mii->clk[i])) {
>> +			dev_err(qpcs->dev,
>> +				"Failed to get MII %d interface clock %s\n",
>> +				index, pcs_mii_clk_name[i]);
>> +			goto err_clk_get;
>> +		}
>> +
>> +		ret = clk_prepare_enable(qpcs_mii->clk[i]);
>> +		if (ret) {
>> +			dev_err(qpcs->dev,
>> +				"Failed to enable MII %d interface clock %s\n",
>> +				index, pcs_mii_clk_name[i]);
>> +			goto err_clk_en;
>> +		}
>> +	}
> 
> Maybe devm_clk_bulk_get() etc will help you here? I've never actually
> used them, so i don't know for sure.
> 

We don't have a 'device' associated with the 'np', so we could not 
consider using the "devm_clk_bulk_get()" API.

> 	Andrew
Andrew Lunn Nov. 6, 2024, 3:43 a.m. UTC | #4
On Wed, Nov 06, 2024 at 11:16:37AM +0800, Lei Wei wrote:
> 
> 
> On 11/1/2024 9:21 PM, Andrew Lunn wrote:
> > > +static int ipq_pcs_config_mode(struct ipq_pcs *qpcs,
> > > +			       phy_interface_t interface)
> > > +{
> > > +	unsigned int val;
> > > +	int ret;
> > > +
> > > +	/* Configure PCS interface mode */
> > > +	switch (interface) {
> > > +	case PHY_INTERFACE_MODE_SGMII:
> > > +		/* Select Qualcomm SGMII AN mode */
> > > +		ret = regmap_update_bits(qpcs->regmap, PCS_MODE_CTRL,
> > > +					 PCS_MODE_SEL_MASK | PCS_MODE_AN_MODE,
> > > +					 PCS_MODE_SGMII);
> > 
> > How does Qualcomm SGMII AN mode differ from Cisco SGMII AN mode?
> > 
> 
> Qualcomm SGMII AN mode extends Cisco SGMII spec Revision 1.8 by adding pause
> bit support in the SGMII word format. It re-uses two of the reserved bits
> 1..9 for this purpose. The PCS supports both Qualcomm SGMII AN and Cisco
> SGMII AN modes.

Is Qualcomm SGMII AN actually needed? I assume it only works against a
Qualcomm PHY? What interoperability testing have you do against
non-Qualcomm PHYs?

> > > +struct phylink_pcs *ipq_pcs_create(struct device_node *np)
> > > +{
> > > +	struct platform_device *pdev;
> > > +	struct ipq_pcs_mii *qpcs_mii;
> > > +	struct device_node *pcs_np;
> > > +	struct ipq_pcs *qpcs;
> > > +	int i, ret;
> > > +	u32 index;
> > > +
> > > +	if (!of_device_is_available(np))
> > > +		return ERR_PTR(-ENODEV);
> > > +
> > > +	if (of_property_read_u32(np, "reg", &index))
> > > +		return ERR_PTR(-EINVAL);
> > > +
> > > +	if (index >= PCS_MAX_MII_NRS)
> > > +		return ERR_PTR(-EINVAL);
> > > +
> > > +	pcs_np = of_get_parent(np);
> > > +	if (!pcs_np)
> > > +		return ERR_PTR(-ENODEV);
> > > +
> > > +	if (!of_device_is_available(pcs_np)) {
> > > +		of_node_put(pcs_np);
> > > +		return ERR_PTR(-ENODEV);
> > > +	}
> > 
> > How have you got this far if the parent is not available?
> > 
> 
> This check can fail only if the parent node is disabled in the board DTS. I
> think this error situation may not be caught earlier than this point.
> However I agree, the above check is redundant, since this check is
> immediately followed by a validity check on the 'pdev' of the parent node,
> which should be able cover any such errors as well.

This was also because the driver does not work as i expected. I was
expecting the PCS driver to walk its own DT and instantiate the PCS
devices listed. If the parent is disabled, it is clearly not going to
start its own children.  But it is in fact some other device which
walks the PCS DT blob, and as a result the child/parent relationship
is broken, a child could exist without its parent.

> > > +	for (i = 0; i < PCS_MII_CLK_MAX; i++) {
> > > +		qpcs_mii->clk[i] = of_clk_get_by_name(np, pcs_mii_clk_name[i]);
> > > +		if (IS_ERR(qpcs_mii->clk[i])) {
> > > +			dev_err(qpcs->dev,
> > > +				"Failed to get MII %d interface clock %s\n",
> > > +				index, pcs_mii_clk_name[i]);
> > > +			goto err_clk_get;
> > > +		}
> > > +
> > > +		ret = clk_prepare_enable(qpcs_mii->clk[i]);
> > > +		if (ret) {
> > > +			dev_err(qpcs->dev,
> > > +				"Failed to enable MII %d interface clock %s\n",
> > > +				index, pcs_mii_clk_name[i]);
> > > +			goto err_clk_en;
> > > +		}
> > > +	}
> > 
> > Maybe devm_clk_bulk_get() etc will help you here? I've never actually
> > used them, so i don't know for sure.
> > 
> 
> We don't have a 'device' associated with the 'np', so we could not consider
> using the "devm_clk_bulk_get()" API.

Another artefact of not have a child-parent relationship. I wounder if
it makes sense to change the architecture. Have the PCS driver
instantiate the PCS devices as its children. They then have a device
structure for calls like clk_bulk_get(), and a more normal
consumer/provider setup.

	Andrew
Russell King (Oracle) Nov. 7, 2024, 7:02 p.m. UTC | #5
Hi,

On Fri, Nov 01, 2024 at 06:32:51PM +0800, Lei Wei wrote:
> +static int ipq_pcs_config_mode(struct ipq_pcs *qpcs,
> +			       phy_interface_t interface)
> +{
> +	unsigned int val;
> +	int ret;
> +
> +	/* Configure PCS interface mode */
> +	switch (interface) {
> +	case PHY_INTERFACE_MODE_SGMII:
> +		/* Select Qualcomm SGMII AN mode */
> +		ret = regmap_update_bits(qpcs->regmap, PCS_MODE_CTRL,
> +					 PCS_MODE_SEL_MASK | PCS_MODE_AN_MODE,
> +					 PCS_MODE_SGMII);
> +		if (ret)
> +			return ret;
> +		break;
> +	case PHY_INTERFACE_MODE_QSGMII:
> +		ret = regmap_update_bits(qpcs->regmap, PCS_MODE_CTRL,
> +					 PCS_MODE_SEL_MASK | PCS_MODE_AN_MODE,
> +					 PCS_MODE_QSGMII);
> +		if (ret)
> +			return ret;
> +		break;
> +	default:
> +		dev_err(qpcs->dev,
> +			"Unsupported interface %s\n", phy_modes(interface));
> +		return -EOPNOTSUPP;
> +	}

I think:

	unsigned int mode;

	switch (interface) {
	case PHY_INTERFACE_MODE_SGMII:
		/* Select Qualcomm SGMII AN mode */
		mode = PCS_MODE_SGMII;
		break;

	case PHY_INTERFACE_MODE_QSGMII:
		mode = PCS_MODE_QSGMII;
		break;

	default:
		...
	}

	ret = regmap_update_bits(qpcs->regmap, PCS_MODE_CTRL,
				 PCS_MODE_SEL_MASK | PCS_MODE_AN_MODE, mode);
	if (ret)
		return ret;

might be easier to read? I notice that in patch 4, the USXGMII case
drops PCS_MODE_AN_MODE from the mask, leaving this bit set to whatever
value it previously was. Is that intentional?

> +static int ipq_pcs_link_up_config_sgmii(struct ipq_pcs *qpcs,
> +					int index,
> +					unsigned int neg_mode,
> +					int speed)
> +{
> +	int ret;
> +
> +	/* PCS speed need not be configured if in-band autoneg is enabled */
> +	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
> +		goto pcs_adapter_reset;
> +
> +	/* PCS speed set for force mode */
> +	switch (speed) {
> +	case SPEED_1000:
> +		ret = regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
> +					 PCS_MII_SPEED_MASK,
> +					 PCS_MII_SPEED_1000);
> +		if (ret)
> +			return ret;
> +		break;
> +	case SPEED_100:
> +		ret = regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
> +					 PCS_MII_SPEED_MASK, PCS_MII_SPEED_100);
> +		if (ret)
> +			return ret;
> +		break;
> +	case SPEED_10:
> +		ret = regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
> +					 PCS_MII_SPEED_MASK, PCS_MII_SPEED_10);
> +		if (ret)
> +			return ret;
> +		break;
> +	default:
> +		dev_err(qpcs->dev, "Invalid SGMII speed %d\n", speed);
> +		return -EINVAL;
> +	}

I think it's worth having the same structure here, and with fewer lines
(and fewer long lines) maybe:

	if (neg_mode != PHYLINK_PCS_NEG_INBAND_ENABLED) {
		switch (speed) {
		...
		}

		ret = regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
					 PCS_MII_SPEED_MASK, ctrl);
		if (ret)
			return ret;
	}

means you don't need the pcs_adapter_reset label.

> +
> +pcs_adapter_reset:
> +	/* PCS adapter reset */
> +	ret = regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
> +				 PCS_MII_ADPT_RESET, 0);
> +	if (ret)
> +		return ret;
> +
> +	return regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
> +				  PCS_MII_ADPT_RESET, PCS_MII_ADPT_RESET);
> +}
> +
> +static void ipq_pcs_get_state(struct phylink_pcs *pcs,
> +			      struct phylink_link_state *state)
> +{
> +	struct ipq_pcs_mii *qpcs_mii = phylink_pcs_to_qpcs_mii(pcs);
> +	struct ipq_pcs *qpcs = qpcs_mii->qpcs;
> +	int index = qpcs_mii->index;
> +
> +	switch (state->interface) {
> +	case PHY_INTERFACE_MODE_SGMII:
> +	case PHY_INTERFACE_MODE_QSGMII:
> +		ipq_pcs_get_state_sgmii(qpcs, index, state);
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	dev_dbg(qpcs->dev,
> +		"mode=%s/%s/%s link=%u\n",
> +		phy_modes(state->interface),
> +		phy_speed_to_str(state->speed),
> +		phy_duplex_to_str(state->duplex),
> +		state->link);

This will get very noisy given that in polling mode, phylink will call
this once a second - and I see you are using polling mode.

> +/**
> + * ipq_pcs_create() - Create an IPQ PCS MII instance
> + * @np: Device tree node to the PCS MII
> + *
> + * Description: Create a phylink PCS instance for the given PCS MII node @np
> + * and enable the MII clocks. This instance is associated with the specific
> + * MII of the PCS and the corresponding Ethernet netdevice.
> + *
> + * Return: A pointer to the phylink PCS instance or an error-pointer value.
> + */
> +struct phylink_pcs *ipq_pcs_create(struct device_node *np)
> +{
> +	struct platform_device *pdev;
> +	struct ipq_pcs_mii *qpcs_mii;
> +	struct device_node *pcs_np;
> +	struct ipq_pcs *qpcs;
> +	int i, ret;
> +	u32 index;
> +
> +	if (!of_device_is_available(np))
> +		return ERR_PTR(-ENODEV);
> +
> +	if (of_property_read_u32(np, "reg", &index))
> +		return ERR_PTR(-EINVAL);
> +
> +	if (index >= PCS_MAX_MII_NRS)
> +		return ERR_PTR(-EINVAL);
> +
> +	pcs_np = of_get_parent(np);
> +	if (!pcs_np)
> +		return ERR_PTR(-ENODEV);
> +
> +	if (!of_device_is_available(pcs_np)) {
> +		of_node_put(pcs_np);
> +		return ERR_PTR(-ENODEV);
> +	}
> +
> +	pdev = of_find_device_by_node(pcs_np);
> +	of_node_put(pcs_np);
> +	if (!pdev)
> +		return ERR_PTR(-ENODEV);
> +
> +	qpcs = platform_get_drvdata(pdev);
> +	put_device(&pdev->dev);
> +
> +	/* If probe is not yet completed, return DEFER to
> +	 * the dependent driver.
> +	 */
> +	if (!qpcs)
> +		return ERR_PTR(-EPROBE_DEFER);
> +
> +	qpcs_mii = kzalloc(sizeof(*qpcs_mii), GFP_KERNEL);
> +	if (!qpcs_mii)
> +		return ERR_PTR(-ENOMEM);
> +
> +	qpcs_mii->qpcs = qpcs;
> +	qpcs_mii->index = index;
> +	qpcs_mii->pcs.ops = &ipq_pcs_phylink_ops;
> +	qpcs_mii->pcs.neg_mode = true;
> +	qpcs_mii->pcs.poll = true;
> +
> +	for (i = 0; i < PCS_MII_CLK_MAX; i++) {
> +		qpcs_mii->clk[i] = of_clk_get_by_name(np, pcs_mii_clk_name[i]);
> +		if (IS_ERR(qpcs_mii->clk[i])) {
> +			dev_err(qpcs->dev,
> +				"Failed to get MII %d interface clock %s\n",
> +				index, pcs_mii_clk_name[i]);
> +			goto err_clk_get;
> +		}
> +
> +		ret = clk_prepare_enable(qpcs_mii->clk[i]);
> +		if (ret) {
> +			dev_err(qpcs->dev,
> +				"Failed to enable MII %d interface clock %s\n",
> +				index, pcs_mii_clk_name[i]);
> +			goto err_clk_en;
> +		}

Do you always need the clock prepared and enabled? If not, you could
do this in the pcs_enable() method, and turn it off in pcs_disable().

I think phylink may need a tweak to "unuse" the current PCS when
phylink_stop() is called to make that more effective at disabling the
PCS, which is something that should be done - that's a subject for a
separate patch which I may send.
Lei Wei Nov. 8, 2024, 11:31 a.m. UTC | #6
On 11/6/2024 11:43 AM, Andrew Lunn wrote:
> On Wed, Nov 06, 2024 at 11:16:37AM +0800, Lei Wei wrote:
>>
>>
>> On 11/1/2024 9:21 PM, Andrew Lunn wrote:
>>>> +static int ipq_pcs_config_mode(struct ipq_pcs *qpcs,
>>>> +			       phy_interface_t interface)
>>>> +{
>>>> +	unsigned int val;
>>>> +	int ret;
>>>> +
>>>> +	/* Configure PCS interface mode */
>>>> +	switch (interface) {
>>>> +	case PHY_INTERFACE_MODE_SGMII:
>>>> +		/* Select Qualcomm SGMII AN mode */
>>>> +		ret = regmap_update_bits(qpcs->regmap, PCS_MODE_CTRL,
>>>> +					 PCS_MODE_SEL_MASK | PCS_MODE_AN_MODE,
>>>> +					 PCS_MODE_SGMII);
>>>
>>> How does Qualcomm SGMII AN mode differ from Cisco SGMII AN mode?
>>>
>>
>> Qualcomm SGMII AN mode extends Cisco SGMII spec Revision 1.8 by adding pause
>> bit support in the SGMII word format. It re-uses two of the reserved bits
>> 1..9 for this purpose. The PCS supports both Qualcomm SGMII AN and Cisco
>> SGMII AN modes.
> 
> Is Qualcomm SGMII AN actually needed? I assume it only works against a
> Qualcomm PHY? What interoperability testing have you do against
> non-Qualcomm PHYs?
> 

I agree that using Cisco SGMII AN mode as default is more appropriate,
since is more commonly used with PHYs. I will make this change.

Qualcomm SGMII AN is an extension of top of Cisco SGMII AN (only
pause bits difference). So it is expected to be compatible with
non-Qualcomm PHYs which use Cisco SGMII AN.

>>>> +struct phylink_pcs *ipq_pcs_create(struct device_node *np)
>>>> +{
>>>> +	struct platform_device *pdev;
>>>> +	struct ipq_pcs_mii *qpcs_mii;
>>>> +	struct device_node *pcs_np;
>>>> +	struct ipq_pcs *qpcs;
>>>> +	int i, ret;
>>>> +	u32 index;
>>>> +
>>>> +	if (!of_device_is_available(np))
>>>> +		return ERR_PTR(-ENODEV);
>>>> +
>>>> +	if (of_property_read_u32(np, "reg", &index))
>>>> +		return ERR_PTR(-EINVAL);
>>>> +
>>>> +	if (index >= PCS_MAX_MII_NRS)
>>>> +		return ERR_PTR(-EINVAL);
>>>> +
>>>> +	pcs_np = of_get_parent(np);
>>>> +	if (!pcs_np)
>>>> +		return ERR_PTR(-ENODEV);
>>>> +
>>>> +	if (!of_device_is_available(pcs_np)) {
>>>> +		of_node_put(pcs_np);
>>>> +		return ERR_PTR(-ENODEV);
>>>> +	}
>>>
>>> How have you got this far if the parent is not available?
>>>
>>
>> This check can fail only if the parent node is disabled in the board DTS. I
>> think this error situation may not be caught earlier than this point.
>> However I agree, the above check is redundant, since this check is
>> immediately followed by a validity check on the 'pdev' of the parent node,
>> which should be able cover any such errors as well.
> 
> This was also because the driver does not work as i expected. I was
> expecting the PCS driver to walk its own DT and instantiate the PCS
> devices listed. If the parent is disabled, it is clearly not going to
> start its own children.  But it is in fact some other device which
> walks the PCS DT blob, and as a result the child/parent relationship
> is broken, a child could exist without its parent.
> 

Currently the PCS driver walks the DT and instantiates the parent PCS 
nodes during probe, where as the child PCS (the per-MII PCS instance) is 
instantiated later by the network device that is associated with the MII.

Alternatively as you mention, we could instantiate the child PCS during 
probe itself. The network driver when it comes up, can just issue a 
'get' operation on the PCS driver, to retrieve the PCS phylink given the 
PCS node associated with the MAC. Agree that this is architecturally 
simpler for instantiating the PCS nodes, and the interaction between 
network driver and PCS is simpler (single 'get_phylink_pcs' API exported 
from PCS driver instead of PCS create/destroy API exported). The PCS 
instances are freed up during platform device remove.

>>>> +	for (i = 0; i < PCS_MII_CLK_MAX; i++) {
>>>> +		qpcs_mii->clk[i] = of_clk_get_by_name(np, pcs_mii_clk_name[i]);
>>>> +		if (IS_ERR(qpcs_mii->clk[i])) {
>>>> +			dev_err(qpcs->dev,
>>>> +				"Failed to get MII %d interface clock %s\n",
>>>> +				index, pcs_mii_clk_name[i]);
>>>> +			goto err_clk_get;
>>>> +		}
>>>> +
>>>> +		ret = clk_prepare_enable(qpcs_mii->clk[i]);
>>>> +		if (ret) {
>>>> +			dev_err(qpcs->dev,
>>>> +				"Failed to enable MII %d interface clock %s\n",
>>>> +				index, pcs_mii_clk_name[i]);
>>>> +			goto err_clk_en;
>>>> +		}
>>>> +	}
>>>
>>> Maybe devm_clk_bulk_get() etc will help you here? I've never actually
>>> used them, so i don't know for sure.
>>>
>>
>> We don't have a 'device' associated with the 'np', so we could not consider
>> using the "devm_clk_bulk_get()" API.
> 
> Another artefact of not have a child-parent relationship. I wounder if
> it makes sense to change the architecture. Have the PCS driver
> instantiate the PCS devices as its children. They then have a device
> structure for calls like clk_bulk_get(), and a more normal
> consumer/provider setup.
> 

I think you may be suggesting to drop the child node usage in the DTS, 
so that we can attach all the MII clocks to the single PCS node, to 
facilitate usage of bulk get() API to retrieve the MII clocks for that 
PCS. We reviewed this option, and believe that retaining the current 
parent-child relationship is a better option instead. This is because 
this option allows us the flexibility to enable/disable the MII channels 
(child nodes) in the board DTS as per the ethernet/MII configuration 
relevant for the board.

We would like to explain this using an example of SoC/board DTSI below.

For IPQ9574, port5 can be connected with PCS0 (PCS0: PSGMII, PCS1 not 
used) or PCS1 (PCS0: QSGMII, PCS1: USXGMII).

IPQ9574 SoC DTSI for PCS0 (5 max MII channels) and PCS1:
--------------------------------------------------------
pcs0: ethernet-pcs@7a00000 {
	clocks = <&gcc GCC_UNIPHY0_SYS_CLK>,
		 <&gcc GCC_UNIPHY0_AHB_CLK>;
	clock-names = "sys",
		      "ahb";

	status = "disabled";

	pcs0_mii0: pcs-mii@0 {
		reg = <0>;
		status = "disabled";
	};

	......

	pcs0_mii3: pcs-mii@3 {
		reg = <3>;
		status = "disabled";
	};
	pcs0_mii4: pcs-mii@3 {
		reg = <4>;
		status = "disabled";
	};
};

pcs1: ethernet-pcs@7a10000 {
	clocks = <&gcc GCC_UNIPHY1_SYS_CLK>,
		 <&gcc GCC_UNIPHY1_AHB_CLK>;
	clock-names = "sys",
		      "ahb";

	status = "disabled";

	pcs1_mii0: pcs-mii@0 {
		reg = <0>;
		status = "disabled";
	};	
};


board1 DTS (PCS0 - QSGMII (port1 - port4), PCS1 USXGMII (port 5))
-----------------------------------------------------------------
&pcs0 {
	status = "okay";
};

/* Enable only four MII channels for PCS0 for this board */
&pcs0_mii0 {
	clocks = <&nsscc NSS_CC_UNIPHY_PORT1_RX_CLK>,
		 <&nsscc NSS_CC_UNIPHY_PORT1_TX_CLK>;
	clock-names = "mii_rx",
		      "mii_tx";
	status = "okay";
};

......

&pcs0_mii3 {
	clocks = <&nsscc NSS_CC_UNIPHY_PORT4_RX_CLK>,
		 <&nsscc NSS_CC_UNIPHY_PORT4_TX_CLK>;
	clock-names = "mii_rx",
		      "mii_tx";
	status = "okay";
};

&pcs1 {
	status = "okay";
};

&pcs1_mii0 {
	clocks = <&nsscc NSS_CC_UNIPHY_PORT5_RX_CLK>,
		 <&nsscc NSS_CC_UNIPHY_PORT5_TX_CLK>;
	clock-names = "mii_rx",
		      "mii_tx";
	status = "okay";
};

board2 DTS: (PCS0 - PSGMII (port1 - port5), PCS1 - disabled):
-------------------------------------------------------------
&pcs0 {
	status = "okay";
};

/* For PCS0, Enable all 5 MII channels for this board,
    PCS1 is disabled */
&pcs0_mii0 {
	clocks = <&nsscc NSS_CC_UNIPHY_PORT1_RX_CLK>,
		 <&nsscc NSS_CC_UNIPHY_PORT1_TX_CLK>;
	clock-names = "mii_rx",
		      "mii_tx";
	status = "okay";
};

......

&pcs0_mii4 {
	clocks = <&nsscc NSS_CC_UNIPHY_PORT5_RX_CLK>,
		 <&nsscc NSS_CC_UNIPHY_PORT5_TX_CLK>;
	clock-names = "mii_rx",
		      "mii_tx";
	status = "okay";
};

If we drop the child node in DTS, then all MII clocks will have to be 
combined with the SoC clocks (AHB/SYS) and added to the board DTS, which 
may not be correct. Also, I think the child-parent relationship in DTS 
will make it more clear to reflect the PCS-to--MII-channel relationship.

Kindly let us know if this approach is fine.

> 	Andrew
Russell King (Oracle) Nov. 8, 2024, 11:37 a.m. UTC | #7
On Fri, Nov 08, 2024 at 07:31:31PM +0800, Lei Wei wrote:
> On 11/6/2024 11:43 AM, Andrew Lunn wrote:
> > On Wed, Nov 06, 2024 at 11:16:37AM +0800, Lei Wei wrote:
> > > On 11/1/2024 9:21 PM, Andrew Lunn wrote:
> > > > How does Qualcomm SGMII AN mode differ from Cisco SGMII AN mode?
> > > 
> > > Qualcomm SGMII AN mode extends Cisco SGMII spec Revision 1.8 by adding pause
> > > bit support in the SGMII word format. It re-uses two of the reserved bits
> > > 1..9 for this purpose. The PCS supports both Qualcomm SGMII AN and Cisco
> > > SGMII AN modes.
> > 
> > Is Qualcomm SGMII AN actually needed? I assume it only works against a
> > Qualcomm PHY? What interoperability testing have you do against
> > non-Qualcomm PHYs?
> 
> I agree that using Cisco SGMII AN mode as default is more appropriate,
> since is more commonly used with PHYs. I will make this change.
> 
> Qualcomm SGMII AN is an extension of top of Cisco SGMII AN (only
> pause bits difference). So it is expected to be compatible with
> non-Qualcomm PHYs which use Cisco SGMII AN.

I believe Marvell have similar extensions.
Lei Wei Nov. 8, 2024, 12:03 p.m. UTC | #8
On 11/8/2024 3:02 AM, Russell King (Oracle) wrote:
> Hi,
> 
> On Fri, Nov 01, 2024 at 06:32:51PM +0800, Lei Wei wrote:
>> +static int ipq_pcs_config_mode(struct ipq_pcs *qpcs,
>> +			       phy_interface_t interface)
>> +{
>> +	unsigned int val;
>> +	int ret;
>> +
>> +	/* Configure PCS interface mode */
>> +	switch (interface) {
>> +	case PHY_INTERFACE_MODE_SGMII:
>> +		/* Select Qualcomm SGMII AN mode */
>> +		ret = regmap_update_bits(qpcs->regmap, PCS_MODE_CTRL,
>> +					 PCS_MODE_SEL_MASK | PCS_MODE_AN_MODE,
>> +					 PCS_MODE_SGMII);
>> +		if (ret)
>> +			return ret;
>> +		break;
>> +	case PHY_INTERFACE_MODE_QSGMII:
>> +		ret = regmap_update_bits(qpcs->regmap, PCS_MODE_CTRL,
>> +					 PCS_MODE_SEL_MASK | PCS_MODE_AN_MODE,
>> +					 PCS_MODE_QSGMII);
>> +		if (ret)
>> +			return ret;
>> +		break;
>> +	default:
>> +		dev_err(qpcs->dev,
>> +			"Unsupported interface %s\n", phy_modes(interface));
>> +		return -EOPNOTSUPP;
>> +	}
> 
> I think:
> 
> 	unsigned int mode;
> 
> 	switch (interface) {
> 	case PHY_INTERFACE_MODE_SGMII:
> 		/* Select Qualcomm SGMII AN mode */
> 		mode = PCS_MODE_SGMII;
> 		break;
> 
> 	case PHY_INTERFACE_MODE_QSGMII:
> 		mode = PCS_MODE_QSGMII;
> 		break;
> 
> 	default:
> 		...
> 	}
> 
> 	ret = regmap_update_bits(qpcs->regmap, PCS_MODE_CTRL,
> 				 PCS_MODE_SEL_MASK | PCS_MODE_AN_MODE, mode);
> 	if (ret)
> 		return ret;
> 
> might be easier to read? 

Thanks for the suggestion, I will make this change.

I notice that in patch 4, the USXGMII case
> drops PCS_MODE_AN_MODE from the mask, leaving this bit set to whatever
> value it previously was. Is that intentional?
> 

Yes, this bit is used for configuring the SGMII AN mode - Cisco AN or 
Qualcomm AN. The default setting is Cisco SGMII AN mode.

Please note that as per our discussion with Andrew on his comment 
regarding the default mode to use, we would like to change the default 
setting for SGMII/QSGMII to Cisco AN Mode in the next patch update.

>> +static int ipq_pcs_link_up_config_sgmii(struct ipq_pcs *qpcs,
>> +					int index,
>> +					unsigned int neg_mode,
>> +					int speed)
>> +{
>> +	int ret;
>> +
>> +	/* PCS speed need not be configured if in-band autoneg is enabled */
>> +	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
>> +		goto pcs_adapter_reset;
>> +
>> +	/* PCS speed set for force mode */
>> +	switch (speed) {
>> +	case SPEED_1000:
>> +		ret = regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
>> +					 PCS_MII_SPEED_MASK,
>> +					 PCS_MII_SPEED_1000);
>> +		if (ret)
>> +			return ret;
>> +		break;
>> +	case SPEED_100:
>> +		ret = regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
>> +					 PCS_MII_SPEED_MASK, PCS_MII_SPEED_100);
>> +		if (ret)
>> +			return ret;
>> +		break;
>> +	case SPEED_10:
>> +		ret = regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
>> +					 PCS_MII_SPEED_MASK, PCS_MII_SPEED_10);
>> +		if (ret)
>> +			return ret;
>> +		break;
>> +	default:
>> +		dev_err(qpcs->dev, "Invalid SGMII speed %d\n", speed);
>> +		return -EINVAL;
>> +	}
> 
> I think it's worth having the same structure here, and with fewer lines
> (and fewer long lines) maybe:
> 
> 	if (neg_mode != PHYLINK_PCS_NEG_INBAND_ENABLED) {
> 		switch (speed) {
> 		...
> 		}
> 
> 		ret = regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
> 					 PCS_MII_SPEED_MASK, ctrl);
> 		if (ret)
> 			return ret;
> 	}
> 
> means you don't need the pcs_adapter_reset label.
> 

Sure, thanks for the suggestion. I will make this change.

>> +
>> +pcs_adapter_reset:
>> +	/* PCS adapter reset */
>> +	ret = regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
>> +				 PCS_MII_ADPT_RESET, 0);
>> +	if (ret)
>> +		return ret;
>> +
>> +	return regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
>> +				  PCS_MII_ADPT_RESET, PCS_MII_ADPT_RESET);
>> +}
>> +
>> +static void ipq_pcs_get_state(struct phylink_pcs *pcs,
>> +			      struct phylink_link_state *state)
>> +{
>> +	struct ipq_pcs_mii *qpcs_mii = phylink_pcs_to_qpcs_mii(pcs);
>> +	struct ipq_pcs *qpcs = qpcs_mii->qpcs;
>> +	int index = qpcs_mii->index;
>> +
>> +	switch (state->interface) {
>> +	case PHY_INTERFACE_MODE_SGMII:
>> +	case PHY_INTERFACE_MODE_QSGMII:
>> +		ipq_pcs_get_state_sgmii(qpcs, index, state);
>> +		break;
>> +	default:
>> +		break;
>> +	}
>> +
>> +	dev_dbg(qpcs->dev,
>> +		"mode=%s/%s/%s link=%u\n",
>> +		phy_modes(state->interface),
>> +		phy_speed_to_str(state->speed),
>> +		phy_duplex_to_str(state->duplex),
>> +		state->link);
> 
> This will get very noisy given that in polling mode, phylink will call
> this once a second - and I see you are using polling mode.
> 

Sure, I will use dev_dbg_ratelimited() API instead.

>> +/**
>> + * ipq_pcs_create() - Create an IPQ PCS MII instance
>> + * @np: Device tree node to the PCS MII
>> + *
>> + * Description: Create a phylink PCS instance for the given PCS MII node @np
>> + * and enable the MII clocks. This instance is associated with the specific
>> + * MII of the PCS and the corresponding Ethernet netdevice.
>> + *
>> + * Return: A pointer to the phylink PCS instance or an error-pointer value.
>> + */
>> +struct phylink_pcs *ipq_pcs_create(struct device_node *np)
>> +{
>> +	struct platform_device *pdev;
>> +	struct ipq_pcs_mii *qpcs_mii;
>> +	struct device_node *pcs_np;
>> +	struct ipq_pcs *qpcs;
>> +	int i, ret;
>> +	u32 index;
>> +
>> +	if (!of_device_is_available(np))
>> +		return ERR_PTR(-ENODEV);
>> +
>> +	if (of_property_read_u32(np, "reg", &index))
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	if (index >= PCS_MAX_MII_NRS)
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	pcs_np = of_get_parent(np);
>> +	if (!pcs_np)
>> +		return ERR_PTR(-ENODEV);
>> +
>> +	if (!of_device_is_available(pcs_np)) {
>> +		of_node_put(pcs_np);
>> +		return ERR_PTR(-ENODEV);
>> +	}
>> +
>> +	pdev = of_find_device_by_node(pcs_np);
>> +	of_node_put(pcs_np);
>> +	if (!pdev)
>> +		return ERR_PTR(-ENODEV);
>> +
>> +	qpcs = platform_get_drvdata(pdev);
>> +	put_device(&pdev->dev);
>> +
>> +	/* If probe is not yet completed, return DEFER to
>> +	 * the dependent driver.
>> +	 */
>> +	if (!qpcs)
>> +		return ERR_PTR(-EPROBE_DEFER);
>> +
>> +	qpcs_mii = kzalloc(sizeof(*qpcs_mii), GFP_KERNEL);
>> +	if (!qpcs_mii)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	qpcs_mii->qpcs = qpcs;
>> +	qpcs_mii->index = index;
>> +	qpcs_mii->pcs.ops = &ipq_pcs_phylink_ops;
>> +	qpcs_mii->pcs.neg_mode = true;
>> +	qpcs_mii->pcs.poll = true;
>> +
>> +	for (i = 0; i < PCS_MII_CLK_MAX; i++) {
>> +		qpcs_mii->clk[i] = of_clk_get_by_name(np, pcs_mii_clk_name[i]);
>> +		if (IS_ERR(qpcs_mii->clk[i])) {
>> +			dev_err(qpcs->dev,
>> +				"Failed to get MII %d interface clock %s\n",
>> +				index, pcs_mii_clk_name[i]);
>> +			goto err_clk_get;
>> +		}
>> +
>> +		ret = clk_prepare_enable(qpcs_mii->clk[i]);
>> +		if (ret) {
>> +			dev_err(qpcs->dev,
>> +				"Failed to enable MII %d interface clock %s\n",
>> +				index, pcs_mii_clk_name[i]);
>> +			goto err_clk_en;
>> +		}
> 
> Do you always need the clock prepared and enabled? If not, you could
> do this in the pcs_enable() method, and turn it off in pcs_disable().
> 

Yes, it can be moved to pcs_enable/pcs_disable method. I will make this 
change.

> I think phylink may need a tweak to "unuse" the current PCS when
> phylink_stop() is called to make that more effective at disabling the
> PCS, which is something that should be done - that's a subject for a
> separate patch which I may send.
>
Andrew Lunn Nov. 8, 2024, 1:24 p.m. UTC | #9
> > Another artefact of not have a child-parent relationship. I wounder if
> > it makes sense to change the architecture. Have the PCS driver
> > instantiate the PCS devices as its children. They then have a device
> > structure for calls like clk_bulk_get(), and a more normal
> > consumer/provider setup.
> > 
> 
> I think you may be suggesting to drop the child node usage in the DTS, so
> that we can attach all the MII clocks to the single PCS node, to facilitate
> usage of bulk get() API to retrieve the MII clocks for that PCS.

I would keep the child nodes. They describe the cookie-cutter nature
of the hardware. The problem is with the clk_bulk API, not allowing
you to pass a device_node. of_clk_bulk_get() appears to do what you
want, but it is not exported. What we do have is:

/**
 * devm_get_clk_from_child - lookup and obtain a managed reference to a
 *                           clock producer from child node.
 * @dev: device for clock "consumer"
 * @np: pointer to clock consumer node
 * @con_id: clock consumer ID
 *
 * This function parses the clocks, and uses them to look up the
 * struct clk from the registered list of clock providers by using
 * @np and @con_id
 *
 * The clock will automatically be freed when the device is unbound
 * from the bus.
 */
struct clk *devm_get_clk_from_child(struct device *dev,
                                    struct device_node *np, const char *con_id);

So maybe a devm_get_clk_bulk_from_child() would be accepted?

However, it might not be worth the effort. Using the bulk API was just
a suggestion to make the code simpler, not a strong requirement.

	Andrew
Lei Wei Nov. 12, 2024, 12:48 p.m. UTC | #10
On 11/8/2024 9:24 PM, Andrew Lunn wrote:
>>> Another artefact of not have a child-parent relationship. I wounder if
>>> it makes sense to change the architecture. Have the PCS driver
>>> instantiate the PCS devices as its children. They then have a device
>>> structure for calls like clk_bulk_get(), and a more normal
>>> consumer/provider setup.
>>>
>>
>> I think you may be suggesting to drop the child node usage in the DTS, so
>> that we can attach all the MII clocks to the single PCS node, to facilitate
>> usage of bulk get() API to retrieve the MII clocks for that PCS.
> 
> I would keep the child nodes. They describe the cookie-cutter nature
> of the hardware. The problem is with the clk_bulk API, not allowing
> you to pass a device_node. of_clk_bulk_get() appears to do what you
> want, but it is not exported. What we do have is:
> 
> /**
>   * devm_get_clk_from_child - lookup and obtain a managed reference to a
>   *                           clock producer from child node.
>   * @dev: device for clock "consumer"
>   * @np: pointer to clock consumer node
>   * @con_id: clock consumer ID
>   *
>   * This function parses the clocks, and uses them to look up the
>   * struct clk from the registered list of clock providers by using
>   * @np and @con_id
>   *
>   * The clock will automatically be freed when the device is unbound
>   * from the bus.
>   */
> struct clk *devm_get_clk_from_child(struct device *dev,
>                                      struct device_node *np, const char *con_id);
> 
> So maybe a devm_get_clk_bulk_from_child() would be accepted?
> 
> However, it might not be worth the effort. Using the bulk API was just
> a suggestion to make the code simpler, not a strong requirement.
> 

OK, I agree.

For the PCS instantiation for child nodes, I would like to summarize the 
two options we have, and mention our chosen approach. 1.) Instantiate 
the PCS during the create API call, and export create/destroy API to the 
network driver similar to existing drivers (OR) 2.) Instantiate the 
child nodes during PCS probe and let the MAC driver access the 
'phylink_pcs' object using a ipq_pcs_get()/ipq_pcs_put() API instead of 
ipq_pcs_create()/ipq_pcs_destroy().

The other PCS drivers are following the create/destroy usage pattern 
(option 1). However we are leaning towards option 2, since it is a 
simpler design. Hope this approach is ok.

> 	Andrew
diff mbox series

Patch

diff --git a/drivers/net/pcs/pcs-qcom-ipq.c b/drivers/net/pcs/pcs-qcom-ipq.c
index e065bc61cd14..dd432303b549 100644
--- a/drivers/net/pcs/pcs-qcom-ipq.c
+++ b/drivers/net/pcs/pcs-qcom-ipq.c
@@ -6,12 +6,49 @@ 
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/pcs/pcs-qcom-ipq.h>
 #include <linux/phy.h>
+#include <linux/phylink.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
 #include <dt-bindings/net/pcs-qcom-ipq.h>
 
+/* Maximum number of MIIs per PCS instance. There are 5 MIIs for PSGMII. */
+#define PCS_MAX_MII_NRS			5
+
+#define PCS_CALIBRATION			0x1e0
+#define PCS_CALIBRATION_DONE		BIT(7)
+
+#define PCS_MODE_CTRL			0x46c
+#define PCS_MODE_SEL_MASK		GENMASK(12, 8)
+#define PCS_MODE_SGMII			FIELD_PREP(PCS_MODE_SEL_MASK, 0x4)
+#define PCS_MODE_QSGMII			FIELD_PREP(PCS_MODE_SEL_MASK, 0x1)
+#define PCS_MODE_AN_MODE		BIT(0)
+
+#define PCS_MII_CTRL(x)			(0x480 + 0x18 * (x))
+#define PCS_MII_ADPT_RESET		BIT(11)
+#define PCS_MII_FORCE_MODE		BIT(3)
+#define PCS_MII_SPEED_MASK		GENMASK(2, 1)
+#define PCS_MII_SPEED_1000		FIELD_PREP(PCS_MII_SPEED_MASK, 0x2)
+#define PCS_MII_SPEED_100		FIELD_PREP(PCS_MII_SPEED_MASK, 0x1)
+#define PCS_MII_SPEED_10		FIELD_PREP(PCS_MII_SPEED_MASK, 0x0)
+
+#define PCS_MII_STS(x)			(0x488 + 0x18 * (x))
+#define PCS_MII_LINK_STS		BIT(7)
+#define PCS_MII_STS_DUPLEX_FULL		BIT(6)
+#define PCS_MII_STS_SPEED_MASK		GENMASK(5, 4)
+#define PCS_MII_STS_SPEED_10		0
+#define PCS_MII_STS_SPEED_100		1
+#define PCS_MII_STS_SPEED_1000		2
+#define PCS_MII_STS_PAUSE_TX_EN		BIT(1)
+#define PCS_MII_STS_PAUSE_RX_EN		BIT(0)
+
+#define PCS_PLL_RESET			0x780
+#define PCS_ANA_SW_RESET		BIT(6)
+
 #define XPCS_INDIRECT_ADDR		0x8000
 #define XPCS_INDIRECT_AHB_ADDR		0x83fc
 #define XPCS_INDIRECT_ADDR_H		GENMASK(20, 8)
@@ -27,12 +64,428 @@  struct ipq_pcs {
 	struct regmap *regmap;
 	phy_interface_t interface;
 
+	/* Lock to protect PCS configurations shared by multiple MII ports */
+	struct mutex config_lock;
+
 	/* RX clock supplied to NSSCC */
 	struct clk_hw rx_hw;
 	/* TX clock supplied to NSSCC */
 	struct clk_hw tx_hw;
 };
 
+/* PCS MII clock ID */
+enum {
+	PCS_MII_RX_CLK,
+	PCS_MII_TX_CLK,
+	PCS_MII_CLK_MAX
+};
+
+/* PCS MII clock name */
+static const char *const pcs_mii_clk_name[PCS_MII_CLK_MAX] = {
+	"mii_rx",
+	"mii_tx",
+};
+
+/* Per PCS MII private data */
+struct ipq_pcs_mii {
+	struct ipq_pcs *qpcs;
+	struct phylink_pcs pcs;
+	int index;
+
+	/* Rx/Tx clocks from NSSCC to PCS MII */
+	struct clk *clk[PCS_MII_CLK_MAX];
+};
+
+#define phylink_pcs_to_qpcs_mii(_pcs)	\
+	container_of(_pcs, struct ipq_pcs_mii, pcs)
+
+static void ipq_pcs_get_state_sgmii(struct ipq_pcs *qpcs,
+				    int index,
+				    struct phylink_link_state *state)
+{
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(qpcs->regmap, PCS_MII_STS(index), &val);
+	if (ret) {
+		state->link = 0;
+		return;
+	}
+
+	state->link = !!(val & PCS_MII_LINK_STS);
+
+	if (!state->link)
+		return;
+
+	switch (FIELD_GET(PCS_MII_STS_SPEED_MASK, val)) {
+	case PCS_MII_STS_SPEED_1000:
+		state->speed = SPEED_1000;
+		break;
+	case PCS_MII_STS_SPEED_100:
+		state->speed = SPEED_100;
+		break;
+	case PCS_MII_STS_SPEED_10:
+		state->speed = SPEED_10;
+		break;
+	default:
+		state->link = false;
+		return;
+	}
+
+	if (val & PCS_MII_STS_DUPLEX_FULL)
+		state->duplex = DUPLEX_FULL;
+	else
+		state->duplex = DUPLEX_HALF;
+
+	if (val & PCS_MII_STS_PAUSE_TX_EN)
+		state->pause |= MLO_PAUSE_TX;
+
+	if (val & PCS_MII_STS_PAUSE_RX_EN)
+		state->pause |= MLO_PAUSE_RX;
+}
+
+static int ipq_pcs_config_mode(struct ipq_pcs *qpcs,
+			       phy_interface_t interface)
+{
+	unsigned int val;
+	int ret;
+
+	/* Configure PCS interface mode */
+	switch (interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+		/* Select Qualcomm SGMII AN mode */
+		ret = regmap_update_bits(qpcs->regmap, PCS_MODE_CTRL,
+					 PCS_MODE_SEL_MASK | PCS_MODE_AN_MODE,
+					 PCS_MODE_SGMII);
+		if (ret)
+			return ret;
+		break;
+	case PHY_INTERFACE_MODE_QSGMII:
+		ret = regmap_update_bits(qpcs->regmap, PCS_MODE_CTRL,
+					 PCS_MODE_SEL_MASK | PCS_MODE_AN_MODE,
+					 PCS_MODE_QSGMII);
+		if (ret)
+			return ret;
+		break;
+	default:
+		dev_err(qpcs->dev,
+			"Unsupported interface %s\n", phy_modes(interface));
+		return -EOPNOTSUPP;
+	}
+
+	/* PCS PLL reset */
+	ret = regmap_update_bits(qpcs->regmap, PCS_PLL_RESET,
+				 PCS_ANA_SW_RESET, 0);
+	if (ret)
+		return ret;
+
+	fsleep(1000);
+	ret = regmap_update_bits(qpcs->regmap, PCS_PLL_RESET,
+				 PCS_ANA_SW_RESET, PCS_ANA_SW_RESET);
+	if (ret)
+		return ret;
+
+	/* Wait for calibration completion */
+	ret = regmap_read_poll_timeout(qpcs->regmap, PCS_CALIBRATION,
+				       val, val & PCS_CALIBRATION_DONE,
+				       1000, 100000);
+	if (ret) {
+		dev_err(qpcs->dev, "PCS calibration timed-out\n");
+		return ret;
+	}
+
+	qpcs->interface = interface;
+
+	return 0;
+}
+
+static int ipq_pcs_config_sgmii(struct ipq_pcs *qpcs,
+				int index,
+				unsigned int neg_mode,
+				phy_interface_t interface)
+{
+	int ret;
+
+	/* Access to PCS registers such as PCS_MODE_CTRL which are
+	 * common to all MIIs, is lock protected and configured
+	 * only once. This is required only for interface modes
+	 * such as QSGMII.
+	 */
+	if (interface == PHY_INTERFACE_MODE_QSGMII)
+		mutex_lock(&qpcs->config_lock);
+
+	if (qpcs->interface != interface) {
+		ret = ipq_pcs_config_mode(qpcs, interface);
+		if (ret)
+			goto err;
+	}
+
+	if (interface == PHY_INTERFACE_MODE_QSGMII)
+		mutex_unlock(&qpcs->config_lock);
+
+	/* Nothing to do here as in-band autoneg mode is enabled
+	 * by default for each PCS MII port.
+	 */
+	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
+		return 0;
+
+	/* Set force speed mode */
+	return regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
+				  PCS_MII_FORCE_MODE, PCS_MII_FORCE_MODE);
+
+err:
+	if (interface == PHY_INTERFACE_MODE_QSGMII)
+		mutex_unlock(&qpcs->config_lock);
+
+	return ret;
+}
+
+static int ipq_pcs_link_up_config_sgmii(struct ipq_pcs *qpcs,
+					int index,
+					unsigned int neg_mode,
+					int speed)
+{
+	int ret;
+
+	/* PCS speed need not be configured if in-band autoneg is enabled */
+	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
+		goto pcs_adapter_reset;
+
+	/* PCS speed set for force mode */
+	switch (speed) {
+	case SPEED_1000:
+		ret = regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
+					 PCS_MII_SPEED_MASK,
+					 PCS_MII_SPEED_1000);
+		if (ret)
+			return ret;
+		break;
+	case SPEED_100:
+		ret = regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
+					 PCS_MII_SPEED_MASK, PCS_MII_SPEED_100);
+		if (ret)
+			return ret;
+		break;
+	case SPEED_10:
+		ret = regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
+					 PCS_MII_SPEED_MASK, PCS_MII_SPEED_10);
+		if (ret)
+			return ret;
+		break;
+	default:
+		dev_err(qpcs->dev, "Invalid SGMII speed %d\n", speed);
+		return -EINVAL;
+	}
+
+pcs_adapter_reset:
+	/* PCS adapter reset */
+	ret = regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
+				 PCS_MII_ADPT_RESET, 0);
+	if (ret)
+		return ret;
+
+	return regmap_update_bits(qpcs->regmap, PCS_MII_CTRL(index),
+				  PCS_MII_ADPT_RESET, PCS_MII_ADPT_RESET);
+}
+
+static void ipq_pcs_get_state(struct phylink_pcs *pcs,
+			      struct phylink_link_state *state)
+{
+	struct ipq_pcs_mii *qpcs_mii = phylink_pcs_to_qpcs_mii(pcs);
+	struct ipq_pcs *qpcs = qpcs_mii->qpcs;
+	int index = qpcs_mii->index;
+
+	switch (state->interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_QSGMII:
+		ipq_pcs_get_state_sgmii(qpcs, index, state);
+		break;
+	default:
+		break;
+	}
+
+	dev_dbg(qpcs->dev,
+		"mode=%s/%s/%s link=%u\n",
+		phy_modes(state->interface),
+		phy_speed_to_str(state->speed),
+		phy_duplex_to_str(state->duplex),
+		state->link);
+}
+
+static int ipq_pcs_config(struct phylink_pcs *pcs,
+			  unsigned int neg_mode,
+			  phy_interface_t interface,
+			  const unsigned long *advertising,
+			  bool permit)
+{
+	struct ipq_pcs_mii *qpcs_mii = phylink_pcs_to_qpcs_mii(pcs);
+	struct ipq_pcs *qpcs = qpcs_mii->qpcs;
+	int index = qpcs_mii->index;
+
+	switch (interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_QSGMII:
+		return ipq_pcs_config_sgmii(qpcs, index, neg_mode, interface);
+	default:
+		dev_err(qpcs->dev,
+			"Unsupported interface %s\n", phy_modes(interface));
+		return -EOPNOTSUPP;
+	};
+}
+
+static void ipq_pcs_link_up(struct phylink_pcs *pcs,
+			    unsigned int neg_mode,
+			    phy_interface_t interface,
+			    int speed, int duplex)
+{
+	struct ipq_pcs_mii *qpcs_mii = phylink_pcs_to_qpcs_mii(pcs);
+	struct ipq_pcs *qpcs = qpcs_mii->qpcs;
+	int index = qpcs_mii->index;
+	int ret;
+
+	switch (interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_QSGMII:
+		ret = ipq_pcs_link_up_config_sgmii(qpcs, index,
+						   neg_mode, speed);
+		break;
+	default:
+		dev_err(qpcs->dev,
+			"Unsupported interface %s\n", phy_modes(interface));
+		return;
+	}
+
+	if (ret)
+		dev_err(qpcs->dev, "PCS link up fail for interface %s\n",
+			phy_modes(interface));
+}
+
+static const struct phylink_pcs_ops ipq_pcs_phylink_ops = {
+	.pcs_get_state = ipq_pcs_get_state,
+	.pcs_config = ipq_pcs_config,
+	.pcs_link_up = ipq_pcs_link_up,
+};
+
+/**
+ * ipq_pcs_create() - Create an IPQ PCS MII instance
+ * @np: Device tree node to the PCS MII
+ *
+ * Description: Create a phylink PCS instance for the given PCS MII node @np
+ * and enable the MII clocks. This instance is associated with the specific
+ * MII of the PCS and the corresponding Ethernet netdevice.
+ *
+ * Return: A pointer to the phylink PCS instance or an error-pointer value.
+ */
+struct phylink_pcs *ipq_pcs_create(struct device_node *np)
+{
+	struct platform_device *pdev;
+	struct ipq_pcs_mii *qpcs_mii;
+	struct device_node *pcs_np;
+	struct ipq_pcs *qpcs;
+	int i, ret;
+	u32 index;
+
+	if (!of_device_is_available(np))
+		return ERR_PTR(-ENODEV);
+
+	if (of_property_read_u32(np, "reg", &index))
+		return ERR_PTR(-EINVAL);
+
+	if (index >= PCS_MAX_MII_NRS)
+		return ERR_PTR(-EINVAL);
+
+	pcs_np = of_get_parent(np);
+	if (!pcs_np)
+		return ERR_PTR(-ENODEV);
+
+	if (!of_device_is_available(pcs_np)) {
+		of_node_put(pcs_np);
+		return ERR_PTR(-ENODEV);
+	}
+
+	pdev = of_find_device_by_node(pcs_np);
+	of_node_put(pcs_np);
+	if (!pdev)
+		return ERR_PTR(-ENODEV);
+
+	qpcs = platform_get_drvdata(pdev);
+	put_device(&pdev->dev);
+
+	/* If probe is not yet completed, return DEFER to
+	 * the dependent driver.
+	 */
+	if (!qpcs)
+		return ERR_PTR(-EPROBE_DEFER);
+
+	qpcs_mii = kzalloc(sizeof(*qpcs_mii), GFP_KERNEL);
+	if (!qpcs_mii)
+		return ERR_PTR(-ENOMEM);
+
+	qpcs_mii->qpcs = qpcs;
+	qpcs_mii->index = index;
+	qpcs_mii->pcs.ops = &ipq_pcs_phylink_ops;
+	qpcs_mii->pcs.neg_mode = true;
+	qpcs_mii->pcs.poll = true;
+
+	for (i = 0; i < PCS_MII_CLK_MAX; i++) {
+		qpcs_mii->clk[i] = of_clk_get_by_name(np, pcs_mii_clk_name[i]);
+		if (IS_ERR(qpcs_mii->clk[i])) {
+			dev_err(qpcs->dev,
+				"Failed to get MII %d interface clock %s\n",
+				index, pcs_mii_clk_name[i]);
+			goto err_clk_get;
+		}
+
+		ret = clk_prepare_enable(qpcs_mii->clk[i]);
+		if (ret) {
+			dev_err(qpcs->dev,
+				"Failed to enable MII %d interface clock %s\n",
+				index, pcs_mii_clk_name[i]);
+			goto err_clk_en;
+		}
+	}
+
+	return &qpcs_mii->pcs;
+
+err_clk_en:
+	clk_put(qpcs_mii->clk[i]);
+err_clk_get:
+	while (i) {
+		i--;
+		clk_disable_unprepare(qpcs_mii->clk[i]);
+		clk_put(qpcs_mii->clk[i]);
+	}
+
+	kfree(qpcs_mii);
+	return ERR_PTR(-ENODEV);
+}
+EXPORT_SYMBOL(ipq_pcs_create);
+
+/**
+ * ipq_pcs_destroy() - Destroy the IPQ PCS MII instance
+ * @pcs: PCS instance
+ *
+ * Description: Destroy a phylink PCS instance.
+ */
+void ipq_pcs_destroy(struct phylink_pcs *pcs)
+{
+	struct ipq_pcs_mii *qpcs_mii;
+	int i;
+
+	if (!pcs)
+		return;
+
+	qpcs_mii = phylink_pcs_to_qpcs_mii(pcs);
+
+	for (i = 0; i < PCS_MII_CLK_MAX; i++) {
+		clk_disable_unprepare(qpcs_mii->clk[i]);
+		clk_put(qpcs_mii->clk[i]);
+	}
+
+	kfree(qpcs_mii);
+}
+EXPORT_SYMBOL(ipq_pcs_destroy);
+
 static unsigned long ipq_pcs_clk_rate_get(struct ipq_pcs *qpcs)
 {
 	switch (qpcs->interface) {
@@ -219,6 +672,8 @@  static int ipq_pcs_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	mutex_init(&qpcs->config_lock);
+
 	platform_set_drvdata(pdev, qpcs);
 
 	return 0;
diff --git a/include/linux/pcs/pcs-qcom-ipq.h b/include/linux/pcs/pcs-qcom-ipq.h
new file mode 100644
index 000000000000..f72a895afaba
--- /dev/null
+++ b/include/linux/pcs/pcs-qcom-ipq.h
@@ -0,0 +1,16 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ *
+ */
+
+#ifndef __LINUX_PCS_QCOM_IPQ_H
+#define __LINUX_PCS_QCOM_IPQ_H
+
+struct device_node;
+struct phylink_pcs;
+
+struct phylink_pcs *ipq_pcs_create(struct device_node *np);
+void ipq_pcs_destroy(struct phylink_pcs *pcs);
+
+#endif /* __LINUX_PCS_QCOM_IPQ_H */