diff mbox

[v3,6/7] mmc: sunxi: Add runtime_pm support

Message ID 4514368de4ef7b4224be455f2d9e6090b464e819.1523888566.git-series.maxime.ripard@bootlin.com (mailing list archive)
State New, archived
Headers show

Commit Message

Maxime Ripard April 16, 2018, 2:23 p.m. UTC
So far, even if our card was not in use, we didn't shut down our MMC
controller, which meant that it was still active and clocking the bus.

While this obviously means that we could save some power there, it also
creates issues when it comes to EMC control since we'll have a perfect peak
at the card clock rate.

Let's implement runtime_pm with autosuspend so that we will shut down the
controller when it's not been in use for quite some time.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
---
 drivers/mmc/host/sunxi-mmc.c | 48 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 48 insertions(+)

Comments

Marc Zyngier June 14, 2018, 2:11 p.m. UTC | #1
Hi Maxime,

On 16/04/18 15:23, Maxime Ripard wrote:
> So far, even if our card was not in use, we didn't shut down our MMC
> controller, which meant that it was still active and clocking the bus.
> 
> While this obviously means that we could save some power there, it also
> creates issues when it comes to EMC control since we'll have a perfect peak
> at the card clock rate.
> 
> Let's implement runtime_pm with autosuspend so that we will shut down the
> controller when it's not been in use for quite some time.
> 
> Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
> ---
>  drivers/mmc/host/sunxi-mmc.c | 48 +++++++++++++++++++++++++++++++++++++-
>  1 file changed, 48 insertions(+)
> 
> diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
> index 0165da0d022a..0253deb153a4 100644
> --- a/drivers/mmc/host/sunxi-mmc.c
> +++ b/drivers/mmc/host/sunxi-mmc.c
> @@ -35,6 +35,7 @@
>  #include <linux/of_gpio.h>
>  #include <linux/of_platform.h>
>  #include <linux/platform_device.h>
> +#include <linux/pm_runtime.h>
>  #include <linux/regulator/consumer.h>
>  #include <linux/reset.h>
>  #include <linux/scatterlist.h>
> @@ -969,6 +970,9 @@ static void sunxi_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
>  	unsigned long flags;
>  	u32 imask;
>  
> +	if (enable)
> +		pm_runtime_get_noresume(host->dev);
> +
>  	spin_lock_irqsave(&host->lock, flags);
>  
>  	imask = mmc_readl(host, REG_IMASK);
> @@ -981,6 +985,9 @@ static void sunxi_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
>  	}
>  	mmc_writel(host, REG_IMASK, imask);
>  	spin_unlock_irqrestore(&host->lock, flags);
> +
> +	if (!enable)
> +		pm_runtime_put_noidle(host->mmc->parent);
>  }
>  
>  static void sunxi_mmc_hw_reset(struct mmc_host *mmc)
> @@ -1394,6 +1401,11 @@ static int sunxi_mmc_probe(struct platform_device *pdev)
>  	if (ret)
>  		goto error_free_dma;
>  
> +	pm_runtime_set_active(&pdev->dev);
> +	pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
> +	pm_runtime_use_autosuspend(&pdev->dev);
> +	pm_runtime_enable(&pdev->dev);
> +
>  	ret = mmc_add_host(mmc);
>  	if (ret)
>  		goto error_free_dma;
> @@ -1414,6 +1426,7 @@ static int sunxi_mmc_remove(struct platform_device *pdev)
>  	struct sunxi_mmc_host *host = mmc_priv(mmc);
>  
>  	mmc_remove_host(mmc);
> +	pm_runtime_force_suspend(&pdev->dev);
>  	disable_irq(host->irq);
>  	sunxi_mmc_disable(host);
>  	dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
> @@ -1422,10 +1435,45 @@ static int sunxi_mmc_remove(struct platform_device *pdev)
>  	return 0;
>  }
>  
> +static int sunxi_mmc_runtime_resume(struct device *dev)
> +{
> +	struct mmc_host	*mmc = dev_get_drvdata(dev);
> +	struct sunxi_mmc_host *host = mmc_priv(mmc);
> +	int ret;
> +
> +	ret = sunxi_mmc_enable(host);
> +	if (ret)
> +		return ret;
> +
> +	sunxi_mmc_init_host(host);
> +	sunxi_mmc_set_bus_width(host, mmc->ios.bus_width);
> +	sunxi_mmc_set_clk(host, &mmc->ios);
> +
> +	return 0;
> +}
> +
> +static int sunxi_mmc_runtime_suspend(struct device *dev)
> +{
> +	struct mmc_host	*mmc = dev_get_drvdata(dev);
> +	struct sunxi_mmc_host *host = mmc_priv(mmc);
> +
> +	sunxi_mmc_reset_host(host);
> +	sunxi_mmc_disable(host);
> +
> +	return 0;
> +}
> +
> +static const struct dev_pm_ops sunxi_mmc_pm_ops = {
> +	SET_RUNTIME_PM_OPS(sunxi_mmc_runtime_suspend,
> +			   sunxi_mmc_runtime_resume,
> +			   NULL)
> +};
> +
>  static struct platform_driver sunxi_mmc_driver = {
>  	.driver = {
>  		.name	= "sunxi-mmc",
>  		.of_match_table = of_match_ptr(sunxi_mmc_of_match),
> +		.pm = &sunxi_mmc_pm_ops,
>  	},
>  	.probe		= sunxi_mmc_probe,
>  	.remove		= sunxi_mmc_remove,
> 

This patch has the unfortunate impact of killing my A20 system
(cubietruck), as of 9a8e1e8cc2c02c57c4e941651a8481a633506c91:

[...]
[    3.286649] NET: Registered protocol family 10
[    3.291898]  mmcblk0: p1
[    3.295297] mmc1: queuing unknown CIS tuple 0x80 (2 bytes)
[    3.302773] mmc1: queuing unknown CIS tuple 0x80 (3 bytes)
[    3.305787] Segment Routing with IPv6
[    3.312225] mip6: Mobile IPv6
[    3.316166] sit: IPv6, IPv4 and MPLS over IPv4 tunneling driver
[    3.316246] mmc1: queuing unknown CIS tuple 0x80 (3 bytes)
[    3.323721] ip6_gre: GRE over IPv6 tunneling driver
[    3.333954] NET: Registered protocol family 17
[    3.338837] 9pnet: Installing 9P2000 support
[    3.343379] NET: Registered protocol family 37
[    3.347885] Key type dns_resolver registered
[    3.352214] mmc1: queuing unknown CIS tuple 0x80 (7 bytes)
[    3.352217] openvswitch: Open vSwitch switching datapath
[    3.352620] mpls_gso: MPLS GSO support
[    3.367001] ThumbEE CPU extension supported.

and that's where it stops. No message, just a hard lockup (I suppose
that both the CPUs are trying to access some device that is now not
clocked).

With a working kernel, I see SATA and the wifi SDIO being probed.

Happy to help testing stuff if you have any idea.

Thanks,

	M.
Kevin Hilman June 14, 2018, 6:57 p.m. UTC | #2
On Thu, Jun 14, 2018 at 7:12 AM Marc Zyngier <marc.zyngier@arm.com> wrote:
>
> Hi Maxime,
>
> On 16/04/18 15:23, Maxime Ripard wrote:
> > So far, even if our card was not in use, we didn't shut down our MMC
> > controller, which meant that it was still active and clocking the bus.
> >
> > While this obviously means that we could save some power there, it also
> > creates issues when it comes to EMC control since we'll have a perfect peak
> > at the card clock rate.
> >
> > Let's implement runtime_pm with autosuspend so that we will shut down the
> > controller when it's not been in use for quite some time.
> >
[...]

> This patch has the unfortunate impact of killing my A20 system
> (cubietruck), as of 9a8e1e8cc2c02c57c4e941651a8481a633506c91:

kernelCI also found failures on a few a10/a20 platfforms[1], and they
all fail to reach userspace, similar to what Marc reported.

I bisected on a sun7i-a20-bananapi and that pointed at this commit
also (in mainline as 9a8e1e8cc2c0 mmc: sunxi: Add runtime_pm support)

Kevin

[1] https://kernelci.org/boot/all/job/mainline/branch/master/kernel/v4.17-11928-g2837461dbe6f/
[2] $ git bisect log
git bisect start
# good: [5037be168f0e4ee910602935b1180291082d3aac] Merge branch
'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
git bisect good 5037be168f0e4ee910602935b1180291082d3aac
# bad: [f60342fac9fae20ada2cd5faadbc2a1337cae03f] Merge tag
'mmc-v4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc
git bisect bad f60342fac9fae20ada2cd5faadbc2a1337cae03f
# good: [fd59ccc53062964007beda8787ffd9cd93968d63] Merge tag
'fscrypt_for_linus' of
git://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt
git bisect good fd59ccc53062964007beda8787ffd9cd93968d63
# good: [2158091d9cda6f126f71973667e8a9fc1e795d03] Merge branch
'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
git bisect good 2158091d9cda6f126f71973667e8a9fc1e795d03
# bad: [bfd694d5e21c2f0d344db6afeaf993bb0f299545] mmc: core: Add
tunable delay before detecting card after card is inserted
git bisect bad bfd694d5e21c2f0d344db6afeaf993bb0f299545
# good: [743b819e4178935e3f098e5f13db301f532fa9e0] mmc: sunxi: Reorder
the headers
git bisect good 743b819e4178935e3f098e5f13db301f532fa9e0
# bad: [ebca50dfae525341c48c2f69798667352318549e] mmc:
renesas_sdhi_internal_dmac: remove superfluous WARN
git bisect bad ebca50dfae525341c48c2f69798667352318549e
# bad: [eef797ac13c08fae0f0ce7d2215d0951e884fa2d] mmc: sunxi: Drop the
init / reset of the controller from set_ios
git bisect bad eef797ac13c08fae0f0ce7d2215d0951e884fa2d
# good: [ad04d9555da02c719de25b7d1e81ea8d0d2c4838] mmc: sunxi: Move
clock configuration to a function
git bisect good ad04d9555da02c719de25b7d1e81ea8d0d2c4838
# bad: [9a8e1e8cc2c02c57c4e941651a8481a633506c91] mmc: sunxi: Add
runtime_pm support
git bisect bad 9a8e1e8cc2c02c57c4e941651a8481a633506c91
# good: [e27e1f3d04061ccc3735361554088cd7aa286e31] mmc: sunxi: Move
the card power configuration to a function
git bisect good e27e1f3d04061ccc3735361554088cd7aa286e31
# first bad commit: [9a8e1e8cc2c02c57c4e941651a8481a633506c91] mmc:
sunxi: Add runtime_pm support
Ulf Hansson June 15, 2018, 8:55 a.m. UTC | #3
On 14 June 2018 at 16:11, Marc Zyngier <marc.zyngier@arm.com> wrote:
> Hi Maxime,
>
> On 16/04/18 15:23, Maxime Ripard wrote:
>> So far, even if our card was not in use, we didn't shut down our MMC
>> controller, which meant that it was still active and clocking the bus.
>>
>> While this obviously means that we could save some power there, it also
>> creates issues when it comes to EMC control since we'll have a perfect peak
>> at the card clock rate.
>>
>> Let's implement runtime_pm with autosuspend so that we will shut down the
>> controller when it's not been in use for quite some time.
>>
>> Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
>> ---
>>  drivers/mmc/host/sunxi-mmc.c | 48 +++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 48 insertions(+)
>>
>> diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
>> index 0165da0d022a..0253deb153a4 100644
>> --- a/drivers/mmc/host/sunxi-mmc.c
>> +++ b/drivers/mmc/host/sunxi-mmc.c
>> @@ -35,6 +35,7 @@
>>  #include <linux/of_gpio.h>
>>  #include <linux/of_platform.h>
>>  #include <linux/platform_device.h>
>> +#include <linux/pm_runtime.h>
>>  #include <linux/regulator/consumer.h>
>>  #include <linux/reset.h>
>>  #include <linux/scatterlist.h>
>> @@ -969,6 +970,9 @@ static void sunxi_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
>>       unsigned long flags;
>>       u32 imask;
>>
>> +     if (enable)
>> +             pm_runtime_get_noresume(host->dev);
>> +
>>       spin_lock_irqsave(&host->lock, flags);
>>
>>       imask = mmc_readl(host, REG_IMASK);
>> @@ -981,6 +985,9 @@ static void sunxi_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
>>       }
>>       mmc_writel(host, REG_IMASK, imask);
>>       spin_unlock_irqrestore(&host->lock, flags);
>> +
>> +     if (!enable)
>> +             pm_runtime_put_noidle(host->mmc->parent);
>>  }
>>
>>  static void sunxi_mmc_hw_reset(struct mmc_host *mmc)
>> @@ -1394,6 +1401,11 @@ static int sunxi_mmc_probe(struct platform_device *pdev)
>>       if (ret)
>>               goto error_free_dma;
>>
>> +     pm_runtime_set_active(&pdev->dev);
>> +     pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
>> +     pm_runtime_use_autosuspend(&pdev->dev);
>> +     pm_runtime_enable(&pdev->dev);
>> +
>>       ret = mmc_add_host(mmc);
>>       if (ret)
>>               goto error_free_dma;
>> @@ -1414,6 +1426,7 @@ static int sunxi_mmc_remove(struct platform_device *pdev)
>>       struct sunxi_mmc_host *host = mmc_priv(mmc);
>>
>>       mmc_remove_host(mmc);
>> +     pm_runtime_force_suspend(&pdev->dev);
>>       disable_irq(host->irq);
>>       sunxi_mmc_disable(host);
>>       dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
>> @@ -1422,10 +1435,45 @@ static int sunxi_mmc_remove(struct platform_device *pdev)
>>       return 0;
>>  }
>>
>> +static int sunxi_mmc_runtime_resume(struct device *dev)
>> +{
>> +     struct mmc_host *mmc = dev_get_drvdata(dev);
>> +     struct sunxi_mmc_host *host = mmc_priv(mmc);
>> +     int ret;
>> +
>> +     ret = sunxi_mmc_enable(host);
>> +     if (ret)
>> +             return ret;
>> +
>> +     sunxi_mmc_init_host(host);
>> +     sunxi_mmc_set_bus_width(host, mmc->ios.bus_width);
>> +     sunxi_mmc_set_clk(host, &mmc->ios);
>> +
>> +     return 0;
>> +}
>> +
>> +static int sunxi_mmc_runtime_suspend(struct device *dev)
>> +{
>> +     struct mmc_host *mmc = dev_get_drvdata(dev);
>> +     struct sunxi_mmc_host *host = mmc_priv(mmc);
>> +
>> +     sunxi_mmc_reset_host(host);
>> +     sunxi_mmc_disable(host);
>> +
>> +     return 0;
>> +}
>> +
>> +static const struct dev_pm_ops sunxi_mmc_pm_ops = {
>> +     SET_RUNTIME_PM_OPS(sunxi_mmc_runtime_suspend,
>> +                        sunxi_mmc_runtime_resume,
>> +                        NULL)
>> +};
>> +
>>  static struct platform_driver sunxi_mmc_driver = {
>>       .driver = {
>>               .name   = "sunxi-mmc",
>>               .of_match_table = of_match_ptr(sunxi_mmc_of_match),
>> +             .pm = &sunxi_mmc_pm_ops,
>>       },
>>       .probe          = sunxi_mmc_probe,
>>       .remove         = sunxi_mmc_remove,
>>
>
> This patch has the unfortunate impact of killing my A20 system
> (cubietruck), as of 9a8e1e8cc2c02c57c4e941651a8481a633506c91:
>
> [...]
> [    3.286649] NET: Registered protocol family 10
> [    3.291898]  mmcblk0: p1
> [    3.295297] mmc1: queuing unknown CIS tuple 0x80 (2 bytes)
> [    3.302773] mmc1: queuing unknown CIS tuple 0x80 (3 bytes)
> [    3.305787] Segment Routing with IPv6
> [    3.312225] mip6: Mobile IPv6
> [    3.316166] sit: IPv6, IPv4 and MPLS over IPv4 tunneling driver
> [    3.316246] mmc1: queuing unknown CIS tuple 0x80 (3 bytes)
> [    3.323721] ip6_gre: GRE over IPv6 tunneling driver
> [    3.333954] NET: Registered protocol family 17
> [    3.338837] 9pnet: Installing 9P2000 support
> [    3.343379] NET: Registered protocol family 37
> [    3.347885] Key type dns_resolver registered
> [    3.352214] mmc1: queuing unknown CIS tuple 0x80 (7 bytes)
> [    3.352217] openvswitch: Open vSwitch switching datapath
> [    3.352620] mpls_gso: MPLS GSO support
> [    3.367001] ThumbEE CPU extension supported.
>
> and that's where it stops. No message, just a hard lockup (I suppose
> that both the CPUs are trying to access some device that is now not
> clocked).

Seems like a reasonable analyze.

>
> With a working kernel, I see SATA and the wifi SDIO being probed.
>
> Happy to help testing stuff if you have any idea.

In principle I would start with avoiding having the sunxi-mmc driver
to probe. Or bail out early in probe, whichever is the easiest for
you.

The point is, if the sunxi-mmc driver doesn't even enable its clock,
it would be interesting to see if there are other that depends on it.

One could also play with clk_disable_unused(), the
late_initcall_sync(), which can be turned off with the module
parameter "clk_ignore_unused".

Anyway, to hide/fix the problem for now, we could add a call to
pm_runtime_get_noresume() before the sunxi-driver calls
pm_runtime_enable().

Kind regards
Uffe
Kevin Hilman June 15, 2018, 2:45 p.m. UTC | #4
On Fri, Jun 15, 2018 at 1:56 AM Ulf Hansson <ulf.hansson@linaro.org> wrote:
>
> On 14 June 2018 at 16:11, Marc Zyngier <marc.zyngier@arm.com> wrote:
> > Hi Maxime,
> >
> > On 16/04/18 15:23, Maxime Ripard wrote:
> >> So far, even if our card was not in use, we didn't shut down our MMC
> >> controller, which meant that it was still active and clocking the bus.
> >>
> >> While this obviously means that we could save some power there, it also
> >> creates issues when it comes to EMC control since we'll have a perfect peak
> >> at the card clock rate.
> >>
> >> Let's implement runtime_pm with autosuspend so that we will shut down the
> >> controller when it's not been in use for quite some time.
> >>
> >> Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
> >> ---
> >>  drivers/mmc/host/sunxi-mmc.c | 48 +++++++++++++++++++++++++++++++++++++-
> >>  1 file changed, 48 insertions(+)
> >>
> >> diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
> >> index 0165da0d022a..0253deb153a4 100644
> >> --- a/drivers/mmc/host/sunxi-mmc.c
> >> +++ b/drivers/mmc/host/sunxi-mmc.c
> >> @@ -35,6 +35,7 @@
> >>  #include <linux/of_gpio.h>
> >>  #include <linux/of_platform.h>
> >>  #include <linux/platform_device.h>
> >> +#include <linux/pm_runtime.h>
> >>  #include <linux/regulator/consumer.h>
> >>  #include <linux/reset.h>
> >>  #include <linux/scatterlist.h>
> >> @@ -969,6 +970,9 @@ static void sunxi_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
> >>       unsigned long flags;
> >>       u32 imask;
> >>
> >> +     if (enable)
> >> +             pm_runtime_get_noresume(host->dev);
> >> +
> >>       spin_lock_irqsave(&host->lock, flags);
> >>
> >>       imask = mmc_readl(host, REG_IMASK);
> >> @@ -981,6 +985,9 @@ static void sunxi_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
> >>       }
> >>       mmc_writel(host, REG_IMASK, imask);
> >>       spin_unlock_irqrestore(&host->lock, flags);
> >> +
> >> +     if (!enable)
> >> +             pm_runtime_put_noidle(host->mmc->parent);
> >>  }
> >>
> >>  static void sunxi_mmc_hw_reset(struct mmc_host *mmc)
> >> @@ -1394,6 +1401,11 @@ static int sunxi_mmc_probe(struct platform_device *pdev)
> >>       if (ret)
> >>               goto error_free_dma;
> >>
> >> +     pm_runtime_set_active(&pdev->dev);
> >> +     pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
> >> +     pm_runtime_use_autosuspend(&pdev->dev);
> >> +     pm_runtime_enable(&pdev->dev);
> >> +
> >>       ret = mmc_add_host(mmc);
> >>       if (ret)
> >>               goto error_free_dma;
> >> @@ -1414,6 +1426,7 @@ static int sunxi_mmc_remove(struct platform_device *pdev)
> >>       struct sunxi_mmc_host *host = mmc_priv(mmc);
> >>
> >>       mmc_remove_host(mmc);
> >> +     pm_runtime_force_suspend(&pdev->dev);
> >>       disable_irq(host->irq);
> >>       sunxi_mmc_disable(host);
> >>       dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
> >> @@ -1422,10 +1435,45 @@ static int sunxi_mmc_remove(struct platform_device *pdev)
> >>       return 0;
> >>  }
> >>
> >> +static int sunxi_mmc_runtime_resume(struct device *dev)
> >> +{
> >> +     struct mmc_host *mmc = dev_get_drvdata(dev);
> >> +     struct sunxi_mmc_host *host = mmc_priv(mmc);
> >> +     int ret;
> >> +
> >> +     ret = sunxi_mmc_enable(host);
> >> +     if (ret)
> >> +             return ret;
> >> +
> >> +     sunxi_mmc_init_host(host);
> >> +     sunxi_mmc_set_bus_width(host, mmc->ios.bus_width);
> >> +     sunxi_mmc_set_clk(host, &mmc->ios);
> >> +
> >> +     return 0;
> >> +}
> >> +
> >> +static int sunxi_mmc_runtime_suspend(struct device *dev)
> >> +{
> >> +     struct mmc_host *mmc = dev_get_drvdata(dev);
> >> +     struct sunxi_mmc_host *host = mmc_priv(mmc);
> >> +
> >> +     sunxi_mmc_reset_host(host);
> >> +     sunxi_mmc_disable(host);
> >> +
> >> +     return 0;
> >> +}
> >> +
> >> +static const struct dev_pm_ops sunxi_mmc_pm_ops = {
> >> +     SET_RUNTIME_PM_OPS(sunxi_mmc_runtime_suspend,
> >> +                        sunxi_mmc_runtime_resume,
> >> +                        NULL)
> >> +};
> >> +
> >>  static struct platform_driver sunxi_mmc_driver = {
> >>       .driver = {
> >>               .name   = "sunxi-mmc",
> >>               .of_match_table = of_match_ptr(sunxi_mmc_of_match),
> >> +             .pm = &sunxi_mmc_pm_ops,
> >>       },
> >>       .probe          = sunxi_mmc_probe,
> >>       .remove         = sunxi_mmc_remove,
> >>
> >
> > This patch has the unfortunate impact of killing my A20 system
> > (cubietruck), as of 9a8e1e8cc2c02c57c4e941651a8481a633506c91:
> >
> > [...]
> > [    3.286649] NET: Registered protocol family 10
> > [    3.291898]  mmcblk0: p1
> > [    3.295297] mmc1: queuing unknown CIS tuple 0x80 (2 bytes)
> > [    3.302773] mmc1: queuing unknown CIS tuple 0x80 (3 bytes)
> > [    3.305787] Segment Routing with IPv6
> > [    3.312225] mip6: Mobile IPv6
> > [    3.316166] sit: IPv6, IPv4 and MPLS over IPv4 tunneling driver
> > [    3.316246] mmc1: queuing unknown CIS tuple 0x80 (3 bytes)
> > [    3.323721] ip6_gre: GRE over IPv6 tunneling driver
> > [    3.333954] NET: Registered protocol family 17
> > [    3.338837] 9pnet: Installing 9P2000 support
> > [    3.343379] NET: Registered protocol family 37
> > [    3.347885] Key type dns_resolver registered
> > [    3.352214] mmc1: queuing unknown CIS tuple 0x80 (7 bytes)
> > [    3.352217] openvswitch: Open vSwitch switching datapath
> > [    3.352620] mpls_gso: MPLS GSO support
> > [    3.367001] ThumbEE CPU extension supported.
> >
> > and that's where it stops. No message, just a hard lockup (I suppose
> > that both the CPUs are trying to access some device that is now not
> > clocked).
>
> Seems like a reasonable analyze.
>
> >
> > With a working kernel, I see SATA and the wifi SDIO being probed.
> >
> > Happy to help testing stuff if you have any idea.
>
> In principle I would start with avoiding having the sunxi-mmc driver
> to probe. Or bail out early in probe, whichever is the easiest for
> you.
>
> The point is, if the sunxi-mmc driver doesn't even enable its clock,
> it would be interesting to see if there are other that depends on it.
>
> One could also play with clk_disable_unused(), the
> late_initcall_sync(), which can be turned off with the module
> parameter "clk_ignore_unused".

I added clk_ignore_unused to the kernel command-line, and that didn't
help, so it's not just an init-time clock that's causing the problem.

> Anyway, to hide/fix the problem for now, we could add a call to
> pm_runtime_get_noresume() before the sunxi-driver calls
> pm_runtime_enable().

I tried that and it makes the kernel finish booting, so that smells
definitely like the MMC is disabling a clock when it goes idle that
some other device (or CPU) depends on.

Kevin
Maxime Ripard June 15, 2018, 3:12 p.m. UTC | #5
On Fri, Jun 15, 2018 at 07:45:03AM -0700, Kevin Hilman wrote:
> > >
> > > With a working kernel, I see SATA and the wifi SDIO being probed.
> > >
> > > Happy to help testing stuff if you have any idea.
> >
> > In principle I would start with avoiding having the sunxi-mmc driver
> > to probe. Or bail out early in probe, whichever is the easiest for
> > you.
> >
> > The point is, if the sunxi-mmc driver doesn't even enable its clock,
> > it would be interesting to see if there are other that depends on it.
> >
> > One could also play with clk_disable_unused(), the
> > late_initcall_sync(), which can be turned off with the module
> > parameter "clk_ignore_unused".
> 
> I added clk_ignore_unused to the kernel command-line, and that didn't
> help, so it's not just an init-time clock that's causing the problem.
> 
> > Anyway, to hide/fix the problem for now, we could add a call to
> > pm_runtime_get_noresume() before the sunxi-driver calls
> > pm_runtime_enable().
> 
> I tried that and it makes the kernel finish booting, so that smells
> definitely like the MMC is disabling a clock when it goes idle that
> some other device (or CPU) depends on.

I quickly looked at the A10 and A20 clock driver and I have not seen
any obvious mishap.

If you have ftrace enabled, could you add the trace_clk_disable*
events (along with tp_printk since the kernel seems to break the
entire boot).

That will allow us to see which clock is disabled and shouldn't.

Thanks!
Maxime
Maxime Ripard June 26, 2018, 9:09 a.m. UTC | #6
On Fri, Jun 15, 2018 at 07:45:03AM -0700, Kevin Hilman wrote:
> On Fri, Jun 15, 2018 at 1:56 AM Ulf Hansson <ulf.hansson@linaro.org> wrote:
> >
> > On 14 June 2018 at 16:11, Marc Zyngier <marc.zyngier@arm.com> wrote:
> > > Hi Maxime,
> > >
> > > On 16/04/18 15:23, Maxime Ripard wrote:
> > >> So far, even if our card was not in use, we didn't shut down our MMC
> > >> controller, which meant that it was still active and clocking the bus.
> > >>
> > >> While this obviously means that we could save some power there, it also
> > >> creates issues when it comes to EMC control since we'll have a perfect peak
> > >> at the card clock rate.
> > >>
> > >> Let's implement runtime_pm with autosuspend so that we will shut down the
> > >> controller when it's not been in use for quite some time.
> > >>
> > >> Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
> > >> ---
> > >>  drivers/mmc/host/sunxi-mmc.c | 48 +++++++++++++++++++++++++++++++++++++-
> > >>  1 file changed, 48 insertions(+)
> > >>
> > >> diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
> > >> index 0165da0d022a..0253deb153a4 100644
> > >> --- a/drivers/mmc/host/sunxi-mmc.c
> > >> +++ b/drivers/mmc/host/sunxi-mmc.c
> > >> @@ -35,6 +35,7 @@
> > >>  #include <linux/of_gpio.h>
> > >>  #include <linux/of_platform.h>
> > >>  #include <linux/platform_device.h>
> > >> +#include <linux/pm_runtime.h>
> > >>  #include <linux/regulator/consumer.h>
> > >>  #include <linux/reset.h>
> > >>  #include <linux/scatterlist.h>
> > >> @@ -969,6 +970,9 @@ static void sunxi_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
> > >>       unsigned long flags;
> > >>       u32 imask;
> > >>
> > >> +     if (enable)
> > >> +             pm_runtime_get_noresume(host->dev);
> > >> +
> > >>       spin_lock_irqsave(&host->lock, flags);
> > >>
> > >>       imask = mmc_readl(host, REG_IMASK);
> > >> @@ -981,6 +985,9 @@ static void sunxi_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
> > >>       }
> > >>       mmc_writel(host, REG_IMASK, imask);
> > >>       spin_unlock_irqrestore(&host->lock, flags);
> > >> +
> > >> +     if (!enable)
> > >> +             pm_runtime_put_noidle(host->mmc->parent);
> > >>  }
> > >>
> > >>  static void sunxi_mmc_hw_reset(struct mmc_host *mmc)
> > >> @@ -1394,6 +1401,11 @@ static int sunxi_mmc_probe(struct platform_device *pdev)
> > >>       if (ret)
> > >>               goto error_free_dma;
> > >>
> > >> +     pm_runtime_set_active(&pdev->dev);
> > >> +     pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
> > >> +     pm_runtime_use_autosuspend(&pdev->dev);
> > >> +     pm_runtime_enable(&pdev->dev);
> > >> +
> > >>       ret = mmc_add_host(mmc);
> > >>       if (ret)
> > >>               goto error_free_dma;
> > >> @@ -1414,6 +1426,7 @@ static int sunxi_mmc_remove(struct platform_device *pdev)
> > >>       struct sunxi_mmc_host *host = mmc_priv(mmc);
> > >>
> > >>       mmc_remove_host(mmc);
> > >> +     pm_runtime_force_suspend(&pdev->dev);
> > >>       disable_irq(host->irq);
> > >>       sunxi_mmc_disable(host);
> > >>       dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
> > >> @@ -1422,10 +1435,45 @@ static int sunxi_mmc_remove(struct platform_device *pdev)
> > >>       return 0;
> > >>  }
> > >>
> > >> +static int sunxi_mmc_runtime_resume(struct device *dev)
> > >> +{
> > >> +     struct mmc_host *mmc = dev_get_drvdata(dev);
> > >> +     struct sunxi_mmc_host *host = mmc_priv(mmc);
> > >> +     int ret;
> > >> +
> > >> +     ret = sunxi_mmc_enable(host);
> > >> +     if (ret)
> > >> +             return ret;
> > >> +
> > >> +     sunxi_mmc_init_host(host);
> > >> +     sunxi_mmc_set_bus_width(host, mmc->ios.bus_width);
> > >> +     sunxi_mmc_set_clk(host, &mmc->ios);
> > >> +
> > >> +     return 0;
> > >> +}
> > >> +
> > >> +static int sunxi_mmc_runtime_suspend(struct device *dev)
> > >> +{
> > >> +     struct mmc_host *mmc = dev_get_drvdata(dev);
> > >> +     struct sunxi_mmc_host *host = mmc_priv(mmc);
> > >> +
> > >> +     sunxi_mmc_reset_host(host);
> > >> +     sunxi_mmc_disable(host);
> > >> +
> > >> +     return 0;
> > >> +}
> > >> +
> > >> +static const struct dev_pm_ops sunxi_mmc_pm_ops = {
> > >> +     SET_RUNTIME_PM_OPS(sunxi_mmc_runtime_suspend,
> > >> +                        sunxi_mmc_runtime_resume,
> > >> +                        NULL)
> > >> +};
> > >> +
> > >>  static struct platform_driver sunxi_mmc_driver = {
> > >>       .driver = {
> > >>               .name   = "sunxi-mmc",
> > >>               .of_match_table = of_match_ptr(sunxi_mmc_of_match),
> > >> +             .pm = &sunxi_mmc_pm_ops,
> > >>       },
> > >>       .probe          = sunxi_mmc_probe,
> > >>       .remove         = sunxi_mmc_remove,
> > >>
> > >
> > > This patch has the unfortunate impact of killing my A20 system
> > > (cubietruck), as of 9a8e1e8cc2c02c57c4e941651a8481a633506c91:
> > >
> > > [...]
> > > [    3.286649] NET: Registered protocol family 10
> > > [    3.291898]  mmcblk0: p1
> > > [    3.295297] mmc1: queuing unknown CIS tuple 0x80 (2 bytes)
> > > [    3.302773] mmc1: queuing unknown CIS tuple 0x80 (3 bytes)
> > > [    3.305787] Segment Routing with IPv6
> > > [    3.312225] mip6: Mobile IPv6
> > > [    3.316166] sit: IPv6, IPv4 and MPLS over IPv4 tunneling driver
> > > [    3.316246] mmc1: queuing unknown CIS tuple 0x80 (3 bytes)
> > > [    3.323721] ip6_gre: GRE over IPv6 tunneling driver
> > > [    3.333954] NET: Registered protocol family 17
> > > [    3.338837] 9pnet: Installing 9P2000 support
> > > [    3.343379] NET: Registered protocol family 37
> > > [    3.347885] Key type dns_resolver registered
> > > [    3.352214] mmc1: queuing unknown CIS tuple 0x80 (7 bytes)
> > > [    3.352217] openvswitch: Open vSwitch switching datapath
> > > [    3.352620] mpls_gso: MPLS GSO support
> > > [    3.367001] ThumbEE CPU extension supported.
> > >
> > > and that's where it stops. No message, just a hard lockup (I suppose
> > > that both the CPUs are trying to access some device that is now not
> > > clocked).
> >
> > Seems like a reasonable analyze.
> >
> > >
> > > With a working kernel, I see SATA and the wifi SDIO being probed.
> > >
> > > Happy to help testing stuff if you have any idea.
> >
> > In principle I would start with avoiding having the sunxi-mmc driver
> > to probe. Or bail out early in probe, whichever is the easiest for
> > you.
> >
> > The point is, if the sunxi-mmc driver doesn't even enable its clock,
> > it would be interesting to see if there are other that depends on it.
> >
> > One could also play with clk_disable_unused(), the
> > late_initcall_sync(), which can be turned off with the module
> > parameter "clk_ignore_unused".
> 
> I added clk_ignore_unused to the kernel command-line, and that didn't
> help, so it's not just an init-time clock that's causing the problem.
> 
> > Anyway, to hide/fix the problem for now, we could add a call to
> > pm_runtime_get_noresume() before the sunxi-driver calls
> > pm_runtime_enable().
> 
> I tried that and it makes the kernel finish booting, so that smells
> definitely like the MMC is disabling a clock when it goes idle that
> some other device (or CPU) depends on.

I've looked into it, and I can't seem to reproduce it on a 4.18-rc2
kernel with my cubieboard2, booting from NFS and trying to access the
SD card.

Marc is booting from SATA, but apparently your boards Kevin are
booting from initramfs?

Apart from the ftrace option, can you add a call in the probe to
clk_prepare_enable for clk_ahb and clk_mmc clocks, and see if there's
one that fixes the issue? the output and sample clocks are not
gatable, so it shouldn't cause any trouble.

Maxime
diff mbox

Patch

diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
index 0165da0d022a..0253deb153a4 100644
--- a/drivers/mmc/host/sunxi-mmc.c
+++ b/drivers/mmc/host/sunxi-mmc.c
@@ -35,6 +35,7 @@ 
 #include <linux/of_gpio.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 #include <linux/scatterlist.h>
@@ -969,6 +970,9 @@  static void sunxi_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
 	unsigned long flags;
 	u32 imask;
 
+	if (enable)
+		pm_runtime_get_noresume(host->dev);
+
 	spin_lock_irqsave(&host->lock, flags);
 
 	imask = mmc_readl(host, REG_IMASK);
@@ -981,6 +985,9 @@  static void sunxi_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
 	}
 	mmc_writel(host, REG_IMASK, imask);
 	spin_unlock_irqrestore(&host->lock, flags);
+
+	if (!enable)
+		pm_runtime_put_noidle(host->mmc->parent);
 }
 
 static void sunxi_mmc_hw_reset(struct mmc_host *mmc)
@@ -1394,6 +1401,11 @@  static int sunxi_mmc_probe(struct platform_device *pdev)
 	if (ret)
 		goto error_free_dma;
 
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
 	ret = mmc_add_host(mmc);
 	if (ret)
 		goto error_free_dma;
@@ -1414,6 +1426,7 @@  static int sunxi_mmc_remove(struct platform_device *pdev)
 	struct sunxi_mmc_host *host = mmc_priv(mmc);
 
 	mmc_remove_host(mmc);
+	pm_runtime_force_suspend(&pdev->dev);
 	disable_irq(host->irq);
 	sunxi_mmc_disable(host);
 	dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
@@ -1422,10 +1435,45 @@  static int sunxi_mmc_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int sunxi_mmc_runtime_resume(struct device *dev)
+{
+	struct mmc_host	*mmc = dev_get_drvdata(dev);
+	struct sunxi_mmc_host *host = mmc_priv(mmc);
+	int ret;
+
+	ret = sunxi_mmc_enable(host);
+	if (ret)
+		return ret;
+
+	sunxi_mmc_init_host(host);
+	sunxi_mmc_set_bus_width(host, mmc->ios.bus_width);
+	sunxi_mmc_set_clk(host, &mmc->ios);
+
+	return 0;
+}
+
+static int sunxi_mmc_runtime_suspend(struct device *dev)
+{
+	struct mmc_host	*mmc = dev_get_drvdata(dev);
+	struct sunxi_mmc_host *host = mmc_priv(mmc);
+
+	sunxi_mmc_reset_host(host);
+	sunxi_mmc_disable(host);
+
+	return 0;
+}
+
+static const struct dev_pm_ops sunxi_mmc_pm_ops = {
+	SET_RUNTIME_PM_OPS(sunxi_mmc_runtime_suspend,
+			   sunxi_mmc_runtime_resume,
+			   NULL)
+};
+
 static struct platform_driver sunxi_mmc_driver = {
 	.driver = {
 		.name	= "sunxi-mmc",
 		.of_match_table = of_match_ptr(sunxi_mmc_of_match),
+		.pm = &sunxi_mmc_pm_ops,
 	},
 	.probe		= sunxi_mmc_probe,
 	.remove		= sunxi_mmc_remove,