diff mbox series

[v2] PCI: mediatek-gen3: Assert MAC reset only if PHY reset also present

Message ID 20240229092449.580971-1-angelogioacchino.delregno@collabora.com (mailing list archive)
State New, archived
Headers show
Series [v2] PCI: mediatek-gen3: Assert MAC reset only if PHY reset also present | expand

Commit Message

AngeloGioacchino Del Regno Feb. 29, 2024, 9:24 a.m. UTC
Some SoCs have two PCI-Express controllers: in the case of MT8195,
one of them is using a dedicated PHY, but the other uses a combo PHY
that is shared with USB and in that case the PHY cannot be reset
from the PCIe driver, or USB functionality will be unable to resume.

Resetting the PCIe MAC without also resetting the PHY will result in
a full system lockup at PCIe resume time and the only option to
resume operation is to hard reboot the system (with a PMIC cut-off).

To resolve this issue, check if we've got both a PHY and a MAC reset
and, if not, never assert resets at PM suspend time: in that case,
the link is still getting powered down as both the clocks and the
power domains will go down anyway.

Fixes: d537dc125f07 ("PCI: mediatek-gen3: Add system PM support")
Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
---

Changes in v2:
 - Rebased over next-20240229

 drivers/pci/controller/pcie-mediatek-gen3.c | 25 ++++++++++++++-------
 1 file changed, 17 insertions(+), 8 deletions(-)

Comments

Nícolas F. R. A. Prado Feb. 29, 2024, 4:20 p.m. UTC | #1
On Thu, Feb 29, 2024 at 10:24:49AM +0100, AngeloGioacchino Del Regno wrote:
> Some SoCs have two PCI-Express controllers: in the case of MT8195,
> one of them is using a dedicated PHY, but the other uses a combo PHY
> that is shared with USB and in that case the PHY cannot be reset
> from the PCIe driver, or USB functionality will be unable to resume.
> 
> Resetting the PCIe MAC without also resetting the PHY will result in
> a full system lockup at PCIe resume time and the only option to
> resume operation is to hard reboot the system (with a PMIC cut-off).
> 
> To resolve this issue, check if we've got both a PHY and a MAC reset
> and, if not, never assert resets at PM suspend time: in that case,
> the link is still getting powered down as both the clocks and the
> power domains will go down anyway.
> 
> Fixes: d537dc125f07 ("PCI: mediatek-gen3: Add system PM support")
> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>

With this applied resume finally works on MT8195-Tomato! And I no longer see the
errors I mentioned in [1]. So,

Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>

Thanks,
Nícolas

[1] https://lore.kernel.org/all/d8cfb804-e47a-471c-8bc0-e974ee045655@notapiano/
Jianjun Wang (王建军) March 1, 2024, 2:48 a.m. UTC | #2
Hi Angelo,

Thanks for your patch.

On Thu, 2024-02-29 at 10:24 +0100, AngeloGioacchino Del Regno wrote:
> Some SoCs have two PCI-Express controllers: in the case of MT8195,
> one of them is using a dedicated PHY, but the other uses a combo PHY
> that is shared with USB and in that case the PHY cannot be reset
> from the PCIe driver, or USB functionality will be unable to resume.
> 
> Resetting the PCIe MAC without also resetting the PHY will result in
> a full system lockup at PCIe resume time and the only option to
> resume operation is to hard reboot the system (with a PMIC cut-off).
> 
> To resolve this issue, check if we've got both a PHY and a MAC reset
> and, if not, never assert resets at PM suspend time: in that case,
> the link is still getting powered down as both the clocks and the
> power domains will go down anyway.
> 
> Fixes: d537dc125f07 ("PCI: mediatek-gen3: Add system PM support")
> Signed-off-by: AngeloGioacchino Del Regno <
> angelogioacchino.delregno@collabora.com>
> ---
> 
> Changes in v2:
>  - Rebased over next-20240229
> 
>  drivers/pci/controller/pcie-mediatek-gen3.c | 25 ++++++++++++++-----
> --
>  1 file changed, 17 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c
> b/drivers/pci/controller/pcie-mediatek-gen3.c
> index 975b3024fb08..99b5d7a49be1 100644
> --- a/drivers/pci/controller/pcie-mediatek-gen3.c
> +++ b/drivers/pci/controller/pcie-mediatek-gen3.c
> @@ -874,17 +874,26 @@ static int mtk_pcie_power_up(struct
> mtk_gen3_pcie *pcie)
>  	return err;
>  }
>  
> -static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie)
> +static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie, bool
> is_suspend)
>  {
> +	bool suspend_reset_supported = pcie->mac_reset && pcie-
> >phy_reset;
> +
>  	clk_bulk_disable_unprepare(pcie->num_clks, pcie->clks);
>  
>  	pm_runtime_put_sync(pcie->dev);
>  	pm_runtime_disable(pcie->dev);
> -	reset_control_assert(pcie->mac_reset);
> +
> +	/*
> +	 * Assert MAC reset only if we also got a PHY reset, otherwise
> +	 * the system will lockup at PM resume time.
> +	 */
> +	if (is_suspend && suspend_reset_supported)
> +		reset_control_assert(pcie->mac_reset);
>  
>  	phy_power_off(pcie->phy);
>  	phy_exit(pcie->phy);
> -	reset_control_assert(pcie->phy_reset);
> +	if (is_suspend && suspend_reset_supported)
> +		reset_control_assert(pcie->phy_reset);
>  }
>  
>  static int mtk_pcie_setup(struct mtk_gen3_pcie *pcie)
> @@ -920,7 +929,7 @@ static int mtk_pcie_setup(struct mtk_gen3_pcie
> *pcie)
>  	return 0;
>  
>  err_setup:
> -	mtk_pcie_power_down(pcie);
> +	mtk_pcie_power_down(pcie, false);
>  
>  	return err;
>  }
> @@ -951,7 +960,7 @@ static int mtk_pcie_probe(struct platform_device
> *pdev)
>  	err = pci_host_probe(host);
>  	if (err) {
>  		mtk_pcie_irq_teardown(pcie);
> -		mtk_pcie_power_down(pcie);
> +		mtk_pcie_power_down(pcie, false);
>  		return err;
>  	}
>  
> @@ -969,7 +978,7 @@ static void mtk_pcie_remove(struct
> platform_device *pdev)
>  	pci_unlock_rescan_remove();
>  
>  	mtk_pcie_irq_teardown(pcie);
> -	mtk_pcie_power_down(pcie);
> +	mtk_pcie_power_down(pcie, false);

Is there any reason not to reset the MAC and PHY when probe fails and
driver removing? Some SoCs may not have MTCMOS to cut off their power,
we need to assert the reset signal to save power in that case.

Thanks.

>  }
>  
>  static void mtk_pcie_irq_save(struct mtk_gen3_pcie *pcie)
> @@ -1044,7 +1053,7 @@ static int mtk_pcie_suspend_noirq(struct device
> *dev)
>  	dev_dbg(pcie->dev, "entered L2 states successfully");
>  
>  	mtk_pcie_irq_save(pcie);
> -	mtk_pcie_power_down(pcie);
> +	mtk_pcie_power_down(pcie, true);
>  
>  	return 0;
>  }
> @@ -1060,7 +1069,7 @@ static int mtk_pcie_resume_noirq(struct device
> *dev)
>  
>  	err = mtk_pcie_startup_port(pcie);
>  	if (err) {
> -		mtk_pcie_power_down(pcie);
> +		mtk_pcie_power_down(pcie, false);
>  		return err;
>  	}
>
s-vadapalli March 1, 2024, 6:42 a.m. UTC | #3
On Thu, Feb 29, 2024 at 10:24:49AM +0100, AngeloGioacchino Del Regno wrote:
> Some SoCs have two PCI-Express controllers: in the case of MT8195,
> one of them is using a dedicated PHY, but the other uses a combo PHY
> that is shared with USB and in that case the PHY cannot be reset
> from the PCIe driver, or USB functionality will be unable to resume.
> 
> Resetting the PCIe MAC without also resetting the PHY will result in
> a full system lockup at PCIe resume time and the only option to
> resume operation is to hard reboot the system (with a PMIC cut-off).
> 
> To resolve this issue, check if we've got both a PHY and a MAC reset
> and, if not, never assert resets at PM suspend time: in that case,
> the link is still getting powered down as both the clocks and the
> power domains will go down anyway.
> 
> Fixes: d537dc125f07 ("PCI: mediatek-gen3: Add system PM support")
> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> ---
> 
> Changes in v2:
>  - Rebased over next-20240229
> 
>  drivers/pci/controller/pcie-mediatek-gen3.c | 25 ++++++++++++++-------
>  1 file changed, 17 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c
> index 975b3024fb08..99b5d7a49be1 100644
> --- a/drivers/pci/controller/pcie-mediatek-gen3.c
> +++ b/drivers/pci/controller/pcie-mediatek-gen3.c
> @@ -874,17 +874,26 @@ static int mtk_pcie_power_up(struct mtk_gen3_pcie *pcie)
>  	return err;
>  }
>  
> -static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie)
> +static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie, bool is_suspend)
>  {
> +	bool suspend_reset_supported = pcie->mac_reset && pcie->phy_reset;
> +
>  	clk_bulk_disable_unprepare(pcie->num_clks, pcie->clks);
>  
>  	pm_runtime_put_sync(pcie->dev);
>  	pm_runtime_disable(pcie->dev);
> -	reset_control_assert(pcie->mac_reset);
> +
> +	/*
> +	 * Assert MAC reset only if we also got a PHY reset, otherwise
> +	 * the system will lockup at PM resume time.
> +	 */
> +	if (is_suspend && suspend_reset_supported)
> +		reset_control_assert(pcie->mac_reset);
>  
>  	phy_power_off(pcie->phy);
>  	phy_exit(pcie->phy);

Wouldn't this power off the shared PHY? Or will the PHY driver make this
NO-OP if the PHY is shared, in which case the above two statements could
be combined with the other statements in the:
if (is_suspend && suspend_reset_supported)
condition to get a single block of code that also combines the
reset_control_assert(pcie->phy_reset)
present below.

> -	reset_control_assert(pcie->phy_reset);
> +	if (is_suspend && suspend_reset_supported)
> +		reset_control_assert(pcie->phy_reset);
>  }
>  
...

Regards,
Siddharth.
AngeloGioacchino Del Regno March 1, 2024, 9:06 a.m. UTC | #4
Il 01/03/24 07:42, Siddharth Vadapalli ha scritto:
> On Thu, Feb 29, 2024 at 10:24:49AM +0100, AngeloGioacchino Del Regno wrote:
>> Some SoCs have two PCI-Express controllers: in the case of MT8195,
>> one of them is using a dedicated PHY, but the other uses a combo PHY
>> that is shared with USB and in that case the PHY cannot be reset
>> from the PCIe driver, or USB functionality will be unable to resume.
>>
>> Resetting the PCIe MAC without also resetting the PHY will result in
>> a full system lockup at PCIe resume time and the only option to
>> resume operation is to hard reboot the system (with a PMIC cut-off).
>>
>> To resolve this issue, check if we've got both a PHY and a MAC reset
>> and, if not, never assert resets at PM suspend time: in that case,
>> the link is still getting powered down as both the clocks and the
>> power domains will go down anyway.
>>
>> Fixes: d537dc125f07 ("PCI: mediatek-gen3: Add system PM support")
>> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
>> ---
>>
>> Changes in v2:
>>   - Rebased over next-20240229
>>
>>   drivers/pci/controller/pcie-mediatek-gen3.c | 25 ++++++++++++++-------
>>   1 file changed, 17 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c
>> index 975b3024fb08..99b5d7a49be1 100644
>> --- a/drivers/pci/controller/pcie-mediatek-gen3.c
>> +++ b/drivers/pci/controller/pcie-mediatek-gen3.c
>> @@ -874,17 +874,26 @@ static int mtk_pcie_power_up(struct mtk_gen3_pcie *pcie)
>>   	return err;
>>   }
>>   
>> -static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie)
>> +static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie, bool is_suspend)
>>   {
>> +	bool suspend_reset_supported = pcie->mac_reset && pcie->phy_reset;
>> +
>>   	clk_bulk_disable_unprepare(pcie->num_clks, pcie->clks);
>>   
>>   	pm_runtime_put_sync(pcie->dev);
>>   	pm_runtime_disable(pcie->dev);
>> -	reset_control_assert(pcie->mac_reset);
>> +
>> +	/*
>> +	 * Assert MAC reset only if we also got a PHY reset, otherwise
>> +	 * the system will lockup at PM resume time.
>> +	 */
>> +	if (is_suspend && suspend_reset_supported)
>> +		reset_control_assert(pcie->mac_reset);
>>   
>>   	phy_power_off(pcie->phy);
>>   	phy_exit(pcie->phy);
> 
> Wouldn't this power off the shared PHY? Or will the PHY driver make this
> NO-OP if the PHY is shared, in which case the above two statements could
> be combined with the other statements in the:
> if (is_suspend && suspend_reset_supported)
> condition to get a single block of code that also combines the
> reset_control_assert(pcie->phy_reset)
> present below.
> 

No, that'd be fine:

static int mtk_phy_power_off(struct phy *phy)
{
	struct mtk_phy_instance *instance = phy_get_drvdata(phy);
	struct mtk_tphy *tphy = dev_get_drvdata(phy->dev.parent);

	if (instance->type == PHY_TYPE_USB2)
		u2_phy_instance_power_off(tphy, instance);
	else if (instance->type == PHY_TYPE_PCIE)
		pcie_phy_instance_power_off(tphy, instance);

	return 0;
}

...it's two different PHY instances that we're dealing with, here :-)

Cheers,
Angelo

>> -	reset_control_assert(pcie->phy_reset);
>> +	if (is_suspend && suspend_reset_supported)
>> +		reset_control_assert(pcie->phy_reset);
>>   }
>>   
> ...
> 
> Regards,
> Siddharth.
s-vadapalli March 1, 2024, 9:55 a.m. UTC | #5
On Fri, Mar 01, 2024 at 10:06:33AM +0100, AngeloGioacchino Del Regno wrote:
> Il 01/03/24 07:42, Siddharth Vadapalli ha scritto:
> > On Thu, Feb 29, 2024 at 10:24:49AM +0100, AngeloGioacchino Del Regno wrote:
> > > Some SoCs have two PCI-Express controllers: in the case of MT8195,
> > > one of them is using a dedicated PHY, but the other uses a combo PHY
> > > that is shared with USB and in that case the PHY cannot be reset
> > > from the PCIe driver, or USB functionality will be unable to resume.
> > > 
> > > Resetting the PCIe MAC without also resetting the PHY will result in
> > > a full system lockup at PCIe resume time and the only option to
> > > resume operation is to hard reboot the system (with a PMIC cut-off).
> > > 
> > > To resolve this issue, check if we've got both a PHY and a MAC reset
> > > and, if not, never assert resets at PM suspend time: in that case,
> > > the link is still getting powered down as both the clocks and the
> > > power domains will go down anyway.
> > > 
> > > Fixes: d537dc125f07 ("PCI: mediatek-gen3: Add system PM support")
> > > Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> > > ---
> > > 
> > > Changes in v2:
> > >   - Rebased over next-20240229
> > > 
> > >   drivers/pci/controller/pcie-mediatek-gen3.c | 25 ++++++++++++++-------
> > >   1 file changed, 17 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c
> > > index 975b3024fb08..99b5d7a49be1 100644
> > > --- a/drivers/pci/controller/pcie-mediatek-gen3.c
> > > +++ b/drivers/pci/controller/pcie-mediatek-gen3.c
> > > @@ -874,17 +874,26 @@ static int mtk_pcie_power_up(struct mtk_gen3_pcie *pcie)
> > >   	return err;
> > >   }
> > > -static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie)
> > > +static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie, bool is_suspend)
> > >   {
> > > +	bool suspend_reset_supported = pcie->mac_reset && pcie->phy_reset;
> > > +
> > >   	clk_bulk_disable_unprepare(pcie->num_clks, pcie->clks);
> > >   	pm_runtime_put_sync(pcie->dev);
> > >   	pm_runtime_disable(pcie->dev);
> > > -	reset_control_assert(pcie->mac_reset);
> > > +
> > > +	/*
> > > +	 * Assert MAC reset only if we also got a PHY reset, otherwise
> > > +	 * the system will lockup at PM resume time.
> > > +	 */
> > > +	if (is_suspend && suspend_reset_supported)
> > > +		reset_control_assert(pcie->mac_reset);
> > >   	phy_power_off(pcie->phy);
> > >   	phy_exit(pcie->phy);
> > 
> > Wouldn't this power off the shared PHY? Or will the PHY driver make this
> > NO-OP if the PHY is shared, in which case the above two statements could
> > be combined with the other statements in the:
> > if (is_suspend && suspend_reset_supported)
> > condition to get a single block of code that also combines the
> > reset_control_assert(pcie->phy_reset)
> > present below.
> > 
> 
> No, that'd be fine:
> 
> static int mtk_phy_power_off(struct phy *phy)
> {
> 	struct mtk_phy_instance *instance = phy_get_drvdata(phy);
> 	struct mtk_tphy *tphy = dev_get_drvdata(phy->dev.parent);
> 
> 	if (instance->type == PHY_TYPE_USB2)
> 		u2_phy_instance_power_off(tphy, instance);
> 	else if (instance->type == PHY_TYPE_PCIE)
> 		pcie_phy_instance_power_off(tphy, instance);
> 
> 	return 0;
> }
> 
> ...it's two different PHY instances that we're dealing with, here :-)

I didn't realize that it is handled separately. Thank you for clarifying!

Regards,
Siddharth.
AngeloGioacchino Del Regno March 6, 2024, 8:50 a.m. UTC | #6
Il 01/03/24 03:48, Jianjun Wang (王建军) ha scritto:
> Hi Angelo,
> 
> Thanks for your patch.
> 
> On Thu, 2024-02-29 at 10:24 +0100, AngeloGioacchino Del Regno wrote:
>> Some SoCs have two PCI-Express controllers: in the case of MT8195,
>> one of them is using a dedicated PHY, but the other uses a combo PHY
>> that is shared with USB and in that case the PHY cannot be reset
>> from the PCIe driver, or USB functionality will be unable to resume.
>>
>> Resetting the PCIe MAC without also resetting the PHY will result in
>> a full system lockup at PCIe resume time and the only option to
>> resume operation is to hard reboot the system (with a PMIC cut-off).
>>
>> To resolve this issue, check if we've got both a PHY and a MAC reset
>> and, if not, never assert resets at PM suspend time: in that case,
>> the link is still getting powered down as both the clocks and the
>> power domains will go down anyway.
>>
>> Fixes: d537dc125f07 ("PCI: mediatek-gen3: Add system PM support")
>> Signed-off-by: AngeloGioacchino Del Regno <
>> angelogioacchino.delregno@collabora.com>
>> ---
>>
>> Changes in v2:
>>   - Rebased over next-20240229
>>
>>   drivers/pci/controller/pcie-mediatek-gen3.c | 25 ++++++++++++++-----
>> --
>>   1 file changed, 17 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c
>> b/drivers/pci/controller/pcie-mediatek-gen3.c
>> index 975b3024fb08..99b5d7a49be1 100644
>> --- a/drivers/pci/controller/pcie-mediatek-gen3.c
>> +++ b/drivers/pci/controller/pcie-mediatek-gen3.c
>> @@ -874,17 +874,26 @@ static int mtk_pcie_power_up(struct
>> mtk_gen3_pcie *pcie)
>>   	return err;
>>   }
>>   
>> -static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie)
>> +static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie, bool
>> is_suspend)
>>   {
>> +	bool suspend_reset_supported = pcie->mac_reset && pcie-
>>> phy_reset;
>> +
>>   	clk_bulk_disable_unprepare(pcie->num_clks, pcie->clks);
>>   
>>   	pm_runtime_put_sync(pcie->dev);
>>   	pm_runtime_disable(pcie->dev);
>> -	reset_control_assert(pcie->mac_reset);
>> +
>> +	/*
>> +	 * Assert MAC reset only if we also got a PHY reset, otherwise
>> +	 * the system will lockup at PM resume time.
>> +	 */
>> +	if (is_suspend && suspend_reset_supported)
>> +		reset_control_assert(pcie->mac_reset);
>>   
>>   	phy_power_off(pcie->phy);
>>   	phy_exit(pcie->phy);
>> -	reset_control_assert(pcie->phy_reset);
>> +	if (is_suspend && suspend_reset_supported)
>> +		reset_control_assert(pcie->phy_reset);
>>   }
>>   
>>   static int mtk_pcie_setup(struct mtk_gen3_pcie *pcie)
>> @@ -920,7 +929,7 @@ static int mtk_pcie_setup(struct mtk_gen3_pcie
>> *pcie)
>>   	return 0;
>>   
>>   err_setup:
>> -	mtk_pcie_power_down(pcie);
>> +	mtk_pcie_power_down(pcie, false);
>>   
>>   	return err;
>>   }
>> @@ -951,7 +960,7 @@ static int mtk_pcie_probe(struct platform_device
>> *pdev)
>>   	err = pci_host_probe(host);
>>   	if (err) {
>>   		mtk_pcie_irq_teardown(pcie);
>> -		mtk_pcie_power_down(pcie);
>> +		mtk_pcie_power_down(pcie, false);
>>   		return err;
>>   	}
>>   
>> @@ -969,7 +978,7 @@ static void mtk_pcie_remove(struct
>> platform_device *pdev)
>>   	pci_unlock_rescan_remove();
>>   
>>   	mtk_pcie_irq_teardown(pcie);
>> -	mtk_pcie_power_down(pcie);
>> +	mtk_pcie_power_down(pcie, false);
> 
> Is there any reason not to reset the MAC and PHY when probe fails and
> driver removing? Some SoCs may not have MTCMOS to cut off their power,
> we need to assert the reset signal to save power in that case.
> 

Sorry for the late reply - yes, there is a reason.

On platforms needing this quirk, resetting at .remove() time will hang the
machine if the module is reinserted later (hence, .probe() called at a later
time).

Regards,
Angelo

> Thanks.
> 
>>   }
>>   
>>   static void mtk_pcie_irq_save(struct mtk_gen3_pcie *pcie)
>> @@ -1044,7 +1053,7 @@ static int mtk_pcie_suspend_noirq(struct device
>> *dev)
>>   	dev_dbg(pcie->dev, "entered L2 states successfully");
>>   
>>   	mtk_pcie_irq_save(pcie);
>> -	mtk_pcie_power_down(pcie);
>> +	mtk_pcie_power_down(pcie, true);
>>   
>>   	return 0;
>>   }
>> @@ -1060,7 +1069,7 @@ static int mtk_pcie_resume_noirq(struct device
>> *dev)
>>   
>>   	err = mtk_pcie_startup_port(pcie);
>>   	if (err) {
>> -		mtk_pcie_power_down(pcie);
>> +		mtk_pcie_power_down(pcie, false);
>>   		return err;
>>   	}
>>
Jianjun Wang (王建军) March 8, 2024, 9:44 a.m. UTC | #7
On Wed, 2024-03-06 at 09:50 +0100, AngeloGioacchino Del Regno wrote:
> Il 01/03/24 03:48, Jianjun Wang (王建军) ha scritto:
> > Hi Angelo,
> > 
> > Thanks for your patch.
> > 
> > On Thu, 2024-02-29 at 10:24 +0100, AngeloGioacchino Del Regno
> > wrote:
> > > Some SoCs have two PCI-Express controllers: in the case of
> > > MT8195,
> > > one of them is using a dedicated PHY, but the other uses a combo
> > > PHY
> > > that is shared with USB and in that case the PHY cannot be reset
> > > from the PCIe driver, or USB functionality will be unable to
> > > resume.
> > > 
> > > Resetting the PCIe MAC without also resetting the PHY will result
> > > in
> > > a full system lockup at PCIe resume time and the only option to
> > > resume operation is to hard reboot the system (with a PMIC cut-
> > > off).
> > > 
> > > To resolve this issue, check if we've got both a PHY and a MAC
> > > reset
> > > and, if not, never assert resets at PM suspend time: in that
> > > case,
> > > the link is still getting powered down as both the clocks and the
> > > power domains will go down anyway.
> > > 
> > > Fixes: d537dc125f07 ("PCI: mediatek-gen3: Add system PM support")
> > > Signed-off-by: AngeloGioacchino Del Regno <
> > > angelogioacchino.delregno@collabora.com>
> > > ---
> > > 
> > > Changes in v2:
> > >   - Rebased over next-20240229
> > > 
> > >   drivers/pci/controller/pcie-mediatek-gen3.c | 25
> > > ++++++++++++++-----
> > > --
> > >   1 file changed, 17 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c
> > > b/drivers/pci/controller/pcie-mediatek-gen3.c
> > > index 975b3024fb08..99b5d7a49be1 100644
> > > --- a/drivers/pci/controller/pcie-mediatek-gen3.c
> > > +++ b/drivers/pci/controller/pcie-mediatek-gen3.c
> > > @@ -874,17 +874,26 @@ static int mtk_pcie_power_up(struct
> > > mtk_gen3_pcie *pcie)
> > >   	return err;
> > >   }
> > >   
> > > -static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie)
> > > +static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie, bool
> > > is_suspend)
> > >   {
> > > +	bool suspend_reset_supported = pcie->mac_reset && pcie-
> > > > phy_reset;
> > > 
> > > +
> > >   	clk_bulk_disable_unprepare(pcie->num_clks, pcie->clks);
> > >   
> > >   	pm_runtime_put_sync(pcie->dev);
> > >   	pm_runtime_disable(pcie->dev);
> > > -	reset_control_assert(pcie->mac_reset);
> > > +
> > > +	/*
> > > +	 * Assert MAC reset only if we also got a PHY reset, otherwise
> > > +	 * the system will lockup at PM resume time.
> > > +	 */
> > > +	if (is_suspend && suspend_reset_supported)
> > > +		reset_control_assert(pcie->mac_reset);
> > >   
> > >   	phy_power_off(pcie->phy);
> > >   	phy_exit(pcie->phy);
> > > -	reset_control_assert(pcie->phy_reset);
> > > +	if (is_suspend && suspend_reset_supported)
> > > +		reset_control_assert(pcie->phy_reset);
> > >   }
> > >   
> > >   static int mtk_pcie_setup(struct mtk_gen3_pcie *pcie)
> > > @@ -920,7 +929,7 @@ static int mtk_pcie_setup(struct
> > > mtk_gen3_pcie
> > > *pcie)
> > >   	return 0;
> > >   
> > >   err_setup:
> > > -	mtk_pcie_power_down(pcie);
> > > +	mtk_pcie_power_down(pcie, false);
> > >   
> > >   	return err;
> > >   }
> > > @@ -951,7 +960,7 @@ static int mtk_pcie_probe(struct
> > > platform_device
> > > *pdev)
> > >   	err = pci_host_probe(host);
> > >   	if (err) {
> > >   		mtk_pcie_irq_teardown(pcie);
> > > -		mtk_pcie_power_down(pcie);
> > > +		mtk_pcie_power_down(pcie, false);
> > >   		return err;
> > >   	}
> > >   
> > > @@ -969,7 +978,7 @@ static void mtk_pcie_remove(struct
> > > platform_device *pdev)
> > >   	pci_unlock_rescan_remove();
> > >   
> > >   	mtk_pcie_irq_teardown(pcie);
> > > -	mtk_pcie_power_down(pcie);
> > > +	mtk_pcie_power_down(pcie, false);
> > 
> > Is there any reason not to reset the MAC and PHY when probe fails
> > and
> > driver removing? Some SoCs may not have MTCMOS to cut off their
> > power,
> > we need to assert the reset signal to save power in that case.
> > 
> 
> Sorry for the late reply - yes, there is a reason.
> 
> On platforms needing this quirk, resetting at .remove() time will
> hang the
> machine if the module is reinserted later (hence, .probe() called at
> a later
> time).

Does this only happen when the PCIe MAC is reset without resetting the
PHY? Is it related to the reset framework?

Thanks.
> 
> Regards,
> Angelo
> 
> > Thanks.
> > 
> > >   }
> > >   
> > >   static void mtk_pcie_irq_save(struct mtk_gen3_pcie *pcie)
> > > @@ -1044,7 +1053,7 @@ static int mtk_pcie_suspend_noirq(struct
> > > device
> > > *dev)
> > >   	dev_dbg(pcie->dev, "entered L2 states successfully");
> > >   
> > >   	mtk_pcie_irq_save(pcie);
> > > -	mtk_pcie_power_down(pcie);
> > > +	mtk_pcie_power_down(pcie, true);
> > >   
> > >   	return 0;
> > >   }
> > > @@ -1060,7 +1069,7 @@ static int mtk_pcie_resume_noirq(struct
> > > device
> > > *dev)
> > >   
> > >   	err = mtk_pcie_startup_port(pcie);
> > >   	if (err) {
> > > -		mtk_pcie_power_down(pcie);
> > > +		mtk_pcie_power_down(pcie, false);
> > >   		return err;
> > >   	}
> > >   
> 
> 
>
AngeloGioacchino Del Regno March 8, 2024, 1:02 p.m. UTC | #8
Il 08/03/24 10:44, Jianjun Wang (王建军) ha scritto:
> On Wed, 2024-03-06 at 09:50 +0100, AngeloGioacchino Del Regno wrote:
>> Il 01/03/24 03:48, Jianjun Wang (王建军) ha scritto:
>>> Hi Angelo,
>>>
>>> Thanks for your patch.
>>>
>>> On Thu, 2024-02-29 at 10:24 +0100, AngeloGioacchino Del Regno
>>> wrote:
>>>> Some SoCs have two PCI-Express controllers: in the case of
>>>> MT8195,
>>>> one of them is using a dedicated PHY, but the other uses a combo
>>>> PHY
>>>> that is shared with USB and in that case the PHY cannot be reset
>>>> from the PCIe driver, or USB functionality will be unable to
>>>> resume.
>>>>
>>>> Resetting the PCIe MAC without also resetting the PHY will result
>>>> in
>>>> a full system lockup at PCIe resume time and the only option to
>>>> resume operation is to hard reboot the system (with a PMIC cut-
>>>> off).
>>>>
>>>> To resolve this issue, check if we've got both a PHY and a MAC
>>>> reset
>>>> and, if not, never assert resets at PM suspend time: in that
>>>> case,
>>>> the link is still getting powered down as both the clocks and the
>>>> power domains will go down anyway.
>>>>
>>>> Fixes: d537dc125f07 ("PCI: mediatek-gen3: Add system PM support")
>>>> Signed-off-by: AngeloGioacchino Del Regno <
>>>> angelogioacchino.delregno@collabora.com>
>>>> ---
>>>>
>>>> Changes in v2:
>>>>    - Rebased over next-20240229
>>>>
>>>>    drivers/pci/controller/pcie-mediatek-gen3.c | 25
>>>> ++++++++++++++-----
>>>> --
>>>>    1 file changed, 17 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c
>>>> b/drivers/pci/controller/pcie-mediatek-gen3.c
>>>> index 975b3024fb08..99b5d7a49be1 100644
>>>> --- a/drivers/pci/controller/pcie-mediatek-gen3.c
>>>> +++ b/drivers/pci/controller/pcie-mediatek-gen3.c
>>>> @@ -874,17 +874,26 @@ static int mtk_pcie_power_up(struct
>>>> mtk_gen3_pcie *pcie)
>>>>    	return err;
>>>>    }
>>>>    
>>>> -static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie)
>>>> +static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie, bool
>>>> is_suspend)
>>>>    {
>>>> +	bool suspend_reset_supported = pcie->mac_reset && pcie-
>>>>> phy_reset;
>>>>
>>>> +
>>>>    	clk_bulk_disable_unprepare(pcie->num_clks, pcie->clks);
>>>>    
>>>>    	pm_runtime_put_sync(pcie->dev);
>>>>    	pm_runtime_disable(pcie->dev);
>>>> -	reset_control_assert(pcie->mac_reset);
>>>> +
>>>> +	/*
>>>> +	 * Assert MAC reset only if we also got a PHY reset, otherwise
>>>> +	 * the system will lockup at PM resume time.
>>>> +	 */
>>>> +	if (is_suspend && suspend_reset_supported)
>>>> +		reset_control_assert(pcie->mac_reset);
>>>>    
>>>>    	phy_power_off(pcie->phy);
>>>>    	phy_exit(pcie->phy);
>>>> -	reset_control_assert(pcie->phy_reset);
>>>> +	if (is_suspend && suspend_reset_supported)
>>>> +		reset_control_assert(pcie->phy_reset);
>>>>    }
>>>>    
>>>>    static int mtk_pcie_setup(struct mtk_gen3_pcie *pcie)
>>>> @@ -920,7 +929,7 @@ static int mtk_pcie_setup(struct
>>>> mtk_gen3_pcie
>>>> *pcie)
>>>>    	return 0;
>>>>    
>>>>    err_setup:
>>>> -	mtk_pcie_power_down(pcie);
>>>> +	mtk_pcie_power_down(pcie, false);
>>>>    
>>>>    	return err;
>>>>    }
>>>> @@ -951,7 +960,7 @@ static int mtk_pcie_probe(struct
>>>> platform_device
>>>> *pdev)
>>>>    	err = pci_host_probe(host);
>>>>    	if (err) {
>>>>    		mtk_pcie_irq_teardown(pcie);
>>>> -		mtk_pcie_power_down(pcie);
>>>> +		mtk_pcie_power_down(pcie, false);
>>>>    		return err;
>>>>    	}
>>>>    
>>>> @@ -969,7 +978,7 @@ static void mtk_pcie_remove(struct
>>>> platform_device *pdev)
>>>>    	pci_unlock_rescan_remove();
>>>>    
>>>>    	mtk_pcie_irq_teardown(pcie);
>>>> -	mtk_pcie_power_down(pcie);
>>>> +	mtk_pcie_power_down(pcie, false);
>>>
>>> Is there any reason not to reset the MAC and PHY when probe fails
>>> and
>>> driver removing? Some SoCs may not have MTCMOS to cut off their
>>> power,
>>> we need to assert the reset signal to save power in that case.
>>>
>>
>> Sorry for the late reply - yes, there is a reason.
>>
>> On platforms needing this quirk, resetting at .remove() time will
>> hang the
>> machine if the module is reinserted later (hence, .probe() called at
>> a later
>> time).
> 
> Does this only happen when the PCIe MAC is reset without resetting the
> PHY? Is it related to the reset framework?
> 

Happens only when MAC is reset without resetting the PHY after calling
phy_power_off(); phy_exit();

It's not a reset framework issue, that's for sure.

Angelo

> Thanks.
>>
>> Regards,
>> Angelo
>>
>>> Thanks.
>>>
>>>>    }
>>>>    
>>>>    static void mtk_pcie_irq_save(struct mtk_gen3_pcie *pcie)
>>>> @@ -1044,7 +1053,7 @@ static int mtk_pcie_suspend_noirq(struct
>>>> device
>>>> *dev)
>>>>    	dev_dbg(pcie->dev, "entered L2 states successfully");
>>>>    
>>>>    	mtk_pcie_irq_save(pcie);
>>>> -	mtk_pcie_power_down(pcie);
>>>> +	mtk_pcie_power_down(pcie, true);
>>>>    
>>>>    	return 0;
>>>>    }
>>>> @@ -1060,7 +1069,7 @@ static int mtk_pcie_resume_noirq(struct
>>>> device
>>>> *dev)
>>>>    
>>>>    	err = mtk_pcie_startup_port(pcie);
>>>>    	if (err) {
>>>> -		mtk_pcie_power_down(pcie);
>>>> +		mtk_pcie_power_down(pcie, false);
>>>>    		return err;
>>>>    	}
>>>>    
>>
>>
>>
Jianjun Wang (王建军) March 13, 2024, 3:33 a.m. UTC | #9
On Fri, 2024-03-08 at 14:02 +0100, AngeloGioacchino Del Regno wrote:
> Il 08/03/24 10:44, Jianjun Wang (王建军) ha scritto:
> > On Wed, 2024-03-06 at 09:50 +0100, AngeloGioacchino Del Regno
> > wrote:
> > > Il 01/03/24 03:48, Jianjun Wang (王建军) ha scritto:
> > > > Hi Angelo,
> > > > 
> > > > Thanks for your patch.
> > > > 
> > > > On Thu, 2024-02-29 at 10:24 +0100, AngeloGioacchino Del Regno
> > > > wrote:
> > > > > Some SoCs have two PCI-Express controllers: in the case of
> > > > > MT8195,
> > > > > one of them is using a dedicated PHY, but the other uses a
> > > > > combo
> > > > > PHY
> > > > > that is shared with USB and in that case the PHY cannot be
> > > > > reset
> > > > > from the PCIe driver, or USB functionality will be unable to
> > > > > resume.
> > > > > 
> > > > > Resetting the PCIe MAC without also resetting the PHY will
> > > > > result
> > > > > in
> > > > > a full system lockup at PCIe resume time and the only option
> > > > > to
> > > > > resume operation is to hard reboot the system (with a PMIC
> > > > > cut-
> > > > > off).
> > > > > 
> > > > > To resolve this issue, check if we've got both a PHY and a
> > > > > MAC
> > > > > reset
> > > > > and, if not, never assert resets at PM suspend time: in that
> > > > > case,
> > > > > the link is still getting powered down as both the clocks and
> > > > > the
> > > > > power domains will go down anyway.
> > > > > 
> > > > > Fixes: d537dc125f07 ("PCI: mediatek-gen3: Add system PM
> > > > > support")
> > > > > Signed-off-by: AngeloGioacchino Del Regno <
> > > > > angelogioacchino.delregno@collabora.com>
> > > > > ---
> > > > > 
> > > > > Changes in v2:
> > > > >    - Rebased over next-20240229
> > > > > 
> > > > >    drivers/pci/controller/pcie-mediatek-gen3.c | 25
> > > > > ++++++++++++++-----
> > > > > --
> > > > >    1 file changed, 17 insertions(+), 8 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c
> > > > > b/drivers/pci/controller/pcie-mediatek-gen3.c
> > > > > index 975b3024fb08..99b5d7a49be1 100644
> > > > > --- a/drivers/pci/controller/pcie-mediatek-gen3.c
> > > > > +++ b/drivers/pci/controller/pcie-mediatek-gen3.c
> > > > > @@ -874,17 +874,26 @@ static int mtk_pcie_power_up(struct
> > > > > mtk_gen3_pcie *pcie)
> > > > >    	return err;
> > > > >    }
> > > > >    
> > > > > -static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie)
> > > > > +static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie,
> > > > > bool
> > > > > is_suspend)
> > > > >    {
> > > > > +	bool suspend_reset_supported = pcie->mac_reset && pcie-
> > > > > > phy_reset;
> > > > > 
> > > > > +
> > > > >    	clk_bulk_disable_unprepare(pcie->num_clks, pcie->clks);
> > > > >    
> > > > >    	pm_runtime_put_sync(pcie->dev);
> > > > >    	pm_runtime_disable(pcie->dev);
> > > > > -	reset_control_assert(pcie->mac_reset);
> > > > > +
> > > > > +	/*
> > > > > +	 * Assert MAC reset only if we also got a PHY reset,
> > > > > otherwise
> > > > > +	 * the system will lockup at PM resume time.
> > > > > +	 */
> > > > > +	if (is_suspend && suspend_reset_supported)
> > > > > +		reset_control_assert(pcie->mac_reset);
> > > > >    
> > > > >    	phy_power_off(pcie->phy);
> > > > >    	phy_exit(pcie->phy);
> > > > > -	reset_control_assert(pcie->phy_reset);
> > > > > +	if (is_suspend && suspend_reset_supported)
> > > > > +		reset_control_assert(pcie->phy_reset);
> > > > >    }
> > > > >    
> > > > >    static int mtk_pcie_setup(struct mtk_gen3_pcie *pcie)
> > > > > @@ -920,7 +929,7 @@ static int mtk_pcie_setup(struct
> > > > > mtk_gen3_pcie
> > > > > *pcie)
> > > > >    	return 0;
> > > > >    
> > > > >    err_setup:
> > > > > -	mtk_pcie_power_down(pcie);
> > > > > +	mtk_pcie_power_down(pcie, false);
> > > > >    
> > > > >    	return err;
> > > > >    }
> > > > > @@ -951,7 +960,7 @@ static int mtk_pcie_probe(struct
> > > > > platform_device
> > > > > *pdev)
> > > > >    	err = pci_host_probe(host);
> > > > >    	if (err) {
> > > > >    		mtk_pcie_irq_teardown(pcie);
> > > > > -		mtk_pcie_power_down(pcie);
> > > > > +		mtk_pcie_power_down(pcie, false);
> > > > >    		return err;
> > > > >    	}
> > > > >    
> > > > > @@ -969,7 +978,7 @@ static void mtk_pcie_remove(struct
> > > > > platform_device *pdev)
> > > > >    	pci_unlock_rescan_remove();
> > > > >    
> > > > >    	mtk_pcie_irq_teardown(pcie);
> > > > > -	mtk_pcie_power_down(pcie);
> > > > > +	mtk_pcie_power_down(pcie, false);
> > > > 
> > > > Is there any reason not to reset the MAC and PHY when probe
> > > > fails
> > > > and
> > > > driver removing? Some SoCs may not have MTCMOS to cut off their
> > > > power,
> > > > we need to assert the reset signal to save power in that case.
> > > > 
> > > 
> > > Sorry for the late reply - yes, there is a reason.
> > > 
> > > On platforms needing this quirk, resetting at .remove() time will
> > > hang the
> > > machine if the module is reinserted later (hence, .probe() called
> > > at
> > > a later
> > > time).
> > 
> > Does this only happen when the PCIe MAC is reset without resetting
> > the
> > PHY? Is it related to the reset framework?
> > 
> 
> Happens only when MAC is reset without resetting the PHY after
> calling
> phy_power_off(); phy_exit();
> 
> It's not a reset framework issue, that's for sure.

Thanks for the explanation.

In that case, why do we only reset during suspend? Can we also reset
them when probe fails and driver removing, if both the PHY and MAC
resets are present?

Thanks.

> 
> Angelo
> 
> > Thanks.
> > > 
> > > Regards,
> > > Angelo
> > > 
> > > > Thanks.
> > > > 
> > > > >    }
> > > > >    
> > > > >    static void mtk_pcie_irq_save(struct mtk_gen3_pcie *pcie)
> > > > > @@ -1044,7 +1053,7 @@ static int
> > > > > mtk_pcie_suspend_noirq(struct
> > > > > device
> > > > > *dev)
> > > > >    	dev_dbg(pcie->dev, "entered L2 states successfully");
> > > > >    
> > > > >    	mtk_pcie_irq_save(pcie);
> > > > > -	mtk_pcie_power_down(pcie);
> > > > > +	mtk_pcie_power_down(pcie, true);
> > > > >    
> > > > >    	return 0;
> > > > >    }
> > > > > @@ -1060,7 +1069,7 @@ static int mtk_pcie_resume_noirq(struct
> > > > > device
> > > > > *dev)
> > > > >    
> > > > >    	err = mtk_pcie_startup_port(pcie);
> > > > >    	if (err) {
> > > > > -		mtk_pcie_power_down(pcie);
> > > > > +		mtk_pcie_power_down(pcie, false);
> > > > >    		return err;
> > > > >    	}
> > > > >    
> > > 
> > > 
> > > 
> 
>
diff mbox series

Patch

diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c
index 975b3024fb08..99b5d7a49be1 100644
--- a/drivers/pci/controller/pcie-mediatek-gen3.c
+++ b/drivers/pci/controller/pcie-mediatek-gen3.c
@@ -874,17 +874,26 @@  static int mtk_pcie_power_up(struct mtk_gen3_pcie *pcie)
 	return err;
 }
 
-static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie)
+static void mtk_pcie_power_down(struct mtk_gen3_pcie *pcie, bool is_suspend)
 {
+	bool suspend_reset_supported = pcie->mac_reset && pcie->phy_reset;
+
 	clk_bulk_disable_unprepare(pcie->num_clks, pcie->clks);
 
 	pm_runtime_put_sync(pcie->dev);
 	pm_runtime_disable(pcie->dev);
-	reset_control_assert(pcie->mac_reset);
+
+	/*
+	 * Assert MAC reset only if we also got a PHY reset, otherwise
+	 * the system will lockup at PM resume time.
+	 */
+	if (is_suspend && suspend_reset_supported)
+		reset_control_assert(pcie->mac_reset);
 
 	phy_power_off(pcie->phy);
 	phy_exit(pcie->phy);
-	reset_control_assert(pcie->phy_reset);
+	if (is_suspend && suspend_reset_supported)
+		reset_control_assert(pcie->phy_reset);
 }
 
 static int mtk_pcie_setup(struct mtk_gen3_pcie *pcie)
@@ -920,7 +929,7 @@  static int mtk_pcie_setup(struct mtk_gen3_pcie *pcie)
 	return 0;
 
 err_setup:
-	mtk_pcie_power_down(pcie);
+	mtk_pcie_power_down(pcie, false);
 
 	return err;
 }
@@ -951,7 +960,7 @@  static int mtk_pcie_probe(struct platform_device *pdev)
 	err = pci_host_probe(host);
 	if (err) {
 		mtk_pcie_irq_teardown(pcie);
-		mtk_pcie_power_down(pcie);
+		mtk_pcie_power_down(pcie, false);
 		return err;
 	}
 
@@ -969,7 +978,7 @@  static void mtk_pcie_remove(struct platform_device *pdev)
 	pci_unlock_rescan_remove();
 
 	mtk_pcie_irq_teardown(pcie);
-	mtk_pcie_power_down(pcie);
+	mtk_pcie_power_down(pcie, false);
 }
 
 static void mtk_pcie_irq_save(struct mtk_gen3_pcie *pcie)
@@ -1044,7 +1053,7 @@  static int mtk_pcie_suspend_noirq(struct device *dev)
 	dev_dbg(pcie->dev, "entered L2 states successfully");
 
 	mtk_pcie_irq_save(pcie);
-	mtk_pcie_power_down(pcie);
+	mtk_pcie_power_down(pcie, true);
 
 	return 0;
 }
@@ -1060,7 +1069,7 @@  static int mtk_pcie_resume_noirq(struct device *dev)
 
 	err = mtk_pcie_startup_port(pcie);
 	if (err) {
-		mtk_pcie_power_down(pcie);
+		mtk_pcie_power_down(pcie, false);
 		return err;
 	}