diff mbox series

[v3] xhci: Prevent runtime suspend on Etron EJ168

Message ID 20200504171642.26947-1-kai.heng.feng@canonical.com (mailing list archive)
State New, archived
Headers show
Series [v3] xhci: Prevent runtime suspend on Etron EJ168 | expand

Commit Message

Kai-Heng Feng May 4, 2020, 5:16 p.m. UTC
Etron EJ168 USB 3.0 Host Controller stops working after S3, if it was
runtime suspended previously:
[  370.080359] pci 0000:02:00.0: can't change power state from D3cold to D0 (config space inaccessible)
[  370.080477] xhci_hcd 0000:04:00.0: can't change power state from D3cold to D0 (config space inaccessible)
[  370.080532] pcieport 0000:00:1c.0: DPC: containment event, status:0x1f05 source:0x0200
[  370.080533] pcieport 0000:00:1c.0: DPC: ERR_FATAL detected
[  370.080536] xhci_hcd 0000:04:00.0: can't change power state from D3hot to D0 (config space inaccessible)
[  370.080552] xhci_hcd 0000:04:00.0: AER: can't recover (no error_detected callback)
[  370.080566] usb usb3: root hub lost power or was reset
[  370.080566] usb usb4: root hub lost power or was reset
[  370.080572] xhci_hcd 0000:04:00.0: Host halt failed, -19
[  370.080574] xhci_hcd 0000:04:00.0: Host not accessible, reset failed.
[  370.080575] xhci_hcd 0000:04:00.0: PCI post-resume error -19!
[  370.080586] xhci_hcd 0000:04:00.0: HC died; cleaning up

This can be fixed by not runtime suspend the controller at all.

So disable runtime suspend for EJ168 xHCI device.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
---
v3:
 - Balance rpm refcount in remove callback.

v2:
 - Use a new quirk to avoid changing existing behavior.

 drivers/usb/host/xhci-pci.c | 7 ++++++-
 drivers/usb/host/xhci.h     | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

Comments

Kai-Heng Feng June 8, 2020, 3:56 a.m. UTC | #1
> On May 5, 2020, at 01:16, Kai-Heng Feng <kai.heng.feng@canonical.com> wrote:
> 
> Etron EJ168 USB 3.0 Host Controller stops working after S3, if it was
> runtime suspended previously:
> [  370.080359] pci 0000:02:00.0: can't change power state from D3cold to D0 (config space inaccessible)
> [  370.080477] xhci_hcd 0000:04:00.0: can't change power state from D3cold to D0 (config space inaccessible)
> [  370.080532] pcieport 0000:00:1c.0: DPC: containment event, status:0x1f05 source:0x0200
> [  370.080533] pcieport 0000:00:1c.0: DPC: ERR_FATAL detected
> [  370.080536] xhci_hcd 0000:04:00.0: can't change power state from D3hot to D0 (config space inaccessible)
> [  370.080552] xhci_hcd 0000:04:00.0: AER: can't recover (no error_detected callback)
> [  370.080566] usb usb3: root hub lost power or was reset
> [  370.080566] usb usb4: root hub lost power or was reset
> [  370.080572] xhci_hcd 0000:04:00.0: Host halt failed, -19
> [  370.080574] xhci_hcd 0000:04:00.0: Host not accessible, reset failed.
> [  370.080575] xhci_hcd 0000:04:00.0: PCI post-resume error -19!
> [  370.080586] xhci_hcd 0000:04:00.0: HC died; cleaning up
> 
> This can be fixed by not runtime suspend the controller at all.
> 
> So disable runtime suspend for EJ168 xHCI device.
> 
> Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>

A gentle ping...

> ---
> v3:
> - Balance rpm refcount in remove callback.
> 
> v2:
> - Use a new quirk to avoid changing existing behavior.
> 
> drivers/usb/host/xhci-pci.c | 7 ++++++-
> drivers/usb/host/xhci.h     | 1 +
> 2 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
> index 766b74723e64..67b4b433a93e 100644
> --- a/drivers/usb/host/xhci-pci.c
> +++ b/drivers/usb/host/xhci-pci.c
> @@ -227,6 +227,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
> 		xhci->quirks |= XHCI_RESET_ON_RESUME;
> 		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
> 		xhci->quirks |= XHCI_BROKEN_STREAMS;
> +		xhci->quirks |= XHCI_DISABLE_RUNTIME_SUSPEND;
> 	}
> 	if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
> 	    pdev->device == 0x0014) {
> @@ -371,7 +372,8 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
> 		xhci->shared_hcd->can_do_streams = 1;
> 
> 	/* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */
> -	pm_runtime_put_noidle(&dev->dev);
> +	if (!(xhci->quirks & XHCI_DISABLE_RUNTIME_SUSPEND))
> +		pm_runtime_put_noidle(&dev->dev);
> 
> 	if (xhci->quirks & XHCI_DEFAULT_PM_RUNTIME_ALLOW)
> 		pm_runtime_allow(&dev->dev);
> @@ -397,6 +399,9 @@ static void xhci_pci_remove(struct pci_dev *dev)
> 	if (xhci->quirks & XHCI_DEFAULT_PM_RUNTIME_ALLOW)
> 		pm_runtime_forbid(&dev->dev);
> 
> +	if (!(xhci->quirks & XHCI_DISABLE_RUNTIME_SUSPEND))
> +		pm_runtime_get_noresume(&dev->dev);
> +
> 	if (xhci->shared_hcd) {
> 		usb_remove_hcd(xhci->shared_hcd);
> 		usb_put_hcd(xhci->shared_hcd);
> diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
> index 86cfefdd6632..d9c209a10d3f 100644
> --- a/drivers/usb/host/xhci.h
> +++ b/drivers/usb/host/xhci.h
> @@ -1873,6 +1873,7 @@ struct xhci_hcd {
> #define XHCI_DEFAULT_PM_RUNTIME_ALLOW	BIT_ULL(33)
> #define XHCI_RESET_PLL_ON_DISCONNECT	BIT_ULL(34)
> #define XHCI_SNPS_BROKEN_SUSPEND    BIT_ULL(35)
> +#define XHCI_DISABLE_RUNTIME_SUSPEND    BIT_ULL(36)
> 
> 	unsigned int		num_active_eps;
> 	unsigned int		limit_active_eps;
> -- 
> 2.17.1
>
Kai-Heng Feng Sept. 28, 2020, 9:10 a.m. UTC | #2
> On Jun 8, 2020, at 11:56, Kai-Heng Feng <kai.heng.feng@canonical.com> wrote:
> 
> 
> 
>> On May 5, 2020, at 01:16, Kai-Heng Feng <kai.heng.feng@canonical.com> wrote:
>> 
>> Etron EJ168 USB 3.0 Host Controller stops working after S3, if it was
>> runtime suspended previously:
>> [  370.080359] pci 0000:02:00.0: can't change power state from D3cold to D0 (config space inaccessible)
>> [  370.080477] xhci_hcd 0000:04:00.0: can't change power state from D3cold to D0 (config space inaccessible)
>> [  370.080532] pcieport 0000:00:1c.0: DPC: containment event, status:0x1f05 source:0x0200
>> [  370.080533] pcieport 0000:00:1c.0: DPC: ERR_FATAL detected
>> [  370.080536] xhci_hcd 0000:04:00.0: can't change power state from D3hot to D0 (config space inaccessible)
>> [  370.080552] xhci_hcd 0000:04:00.0: AER: can't recover (no error_detected callback)
>> [  370.080566] usb usb3: root hub lost power or was reset
>> [  370.080566] usb usb4: root hub lost power or was reset
>> [  370.080572] xhci_hcd 0000:04:00.0: Host halt failed, -19
>> [  370.080574] xhci_hcd 0000:04:00.0: Host not accessible, reset failed.
>> [  370.080575] xhci_hcd 0000:04:00.0: PCI post-resume error -19!
>> [  370.080586] xhci_hcd 0000:04:00.0: HC died; cleaning up
>> 
>> This can be fixed by not runtime suspend the controller at all.
>> 
>> So disable runtime suspend for EJ168 xHCI device.
>> 
>> Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> 
> A gentle ping...

Another gentle ping...

> 
>> ---
>> v3:
>> - Balance rpm refcount in remove callback.
>> 
>> v2:
>> - Use a new quirk to avoid changing existing behavior.
>> 
>> drivers/usb/host/xhci-pci.c | 7 ++++++-
>> drivers/usb/host/xhci.h     | 1 +
>> 2 files changed, 7 insertions(+), 1 deletion(-)
>> 
>> diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
>> index 766b74723e64..67b4b433a93e 100644
>> --- a/drivers/usb/host/xhci-pci.c
>> +++ b/drivers/usb/host/xhci-pci.c
>> @@ -227,6 +227,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
>> 		xhci->quirks |= XHCI_RESET_ON_RESUME;
>> 		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
>> 		xhci->quirks |= XHCI_BROKEN_STREAMS;
>> +		xhci->quirks |= XHCI_DISABLE_RUNTIME_SUSPEND;
>> 	}
>> 	if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
>> 	    pdev->device == 0x0014) {
>> @@ -371,7 +372,8 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
>> 		xhci->shared_hcd->can_do_streams = 1;
>> 
>> 	/* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */
>> -	pm_runtime_put_noidle(&dev->dev);
>> +	if (!(xhci->quirks & XHCI_DISABLE_RUNTIME_SUSPEND))
>> +		pm_runtime_put_noidle(&dev->dev);
>> 
>> 	if (xhci->quirks & XHCI_DEFAULT_PM_RUNTIME_ALLOW)
>> 		pm_runtime_allow(&dev->dev);
>> @@ -397,6 +399,9 @@ static void xhci_pci_remove(struct pci_dev *dev)
>> 	if (xhci->quirks & XHCI_DEFAULT_PM_RUNTIME_ALLOW)
>> 		pm_runtime_forbid(&dev->dev);
>> 
>> +	if (!(xhci->quirks & XHCI_DISABLE_RUNTIME_SUSPEND))
>> +		pm_runtime_get_noresume(&dev->dev);
>> +
>> 	if (xhci->shared_hcd) {
>> 		usb_remove_hcd(xhci->shared_hcd);
>> 		usb_put_hcd(xhci->shared_hcd);
>> diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
>> index 86cfefdd6632..d9c209a10d3f 100644
>> --- a/drivers/usb/host/xhci.h
>> +++ b/drivers/usb/host/xhci.h
>> @@ -1873,6 +1873,7 @@ struct xhci_hcd {
>> #define XHCI_DEFAULT_PM_RUNTIME_ALLOW	BIT_ULL(33)
>> #define XHCI_RESET_PLL_ON_DISCONNECT	BIT_ULL(34)
>> #define XHCI_SNPS_BROKEN_SUSPEND    BIT_ULL(35)
>> +#define XHCI_DISABLE_RUNTIME_SUSPEND    BIT_ULL(36)
>> 
>> 	unsigned int		num_active_eps;
>> 	unsigned int		limit_active_eps;
>> -- 
>> 2.17.1
>> 
>
Mathias Nyman Sept. 28, 2020, 9:42 p.m. UTC | #3
On 28.9.2020 12.10, Kai-Heng Feng wrote:
> 
> 
>> On Jun 8, 2020, at 11:56, Kai-Heng Feng <kai.heng.feng@canonical.com> wrote:
>>
>>
>>
>>> On May 5, 2020, at 01:16, Kai-Heng Feng <kai.heng.feng@canonical.com> wrote:
>>>
>>> Etron EJ168 USB 3.0 Host Controller stops working after S3, if it was
>>> runtime suspended previously:
>>> [  370.080359] pci 0000:02:00.0: can't change power state from D3cold to D0 (config space inaccessible)
>>> [  370.080477] xhci_hcd 0000:04:00.0: can't change power state from D3cold to D0 (config space inaccessible)
>>> [  370.080532] pcieport 0000:00:1c.0: DPC: containment event, status:0x1f05 source:0x0200
>>> [  370.080533] pcieport 0000:00:1c.0: DPC: ERR_FATAL detected
>>> [  370.080536] xhci_hcd 0000:04:00.0: can't change power state from D3hot to D0 (config space inaccessible)
>>> [  370.080552] xhci_hcd 0000:04:00.0: AER: can't recover (no error_detected callback)
>>> [  370.080566] usb usb3: root hub lost power or was reset
>>> [  370.080566] usb usb4: root hub lost power or was reset
>>> [  370.080572] xhci_hcd 0000:04:00.0: Host halt failed, -19
>>> [  370.080574] xhci_hcd 0000:04:00.0: Host not accessible, reset failed.
>>> [  370.080575] xhci_hcd 0000:04:00.0: PCI post-resume error -19!
>>> [  370.080586] xhci_hcd 0000:04:00.0: HC died; cleaning up
>>>
>>> This can be fixed by not runtime suspend the controller at all.
>>>
>>> So disable runtime suspend for EJ168 xHCI device.
>>>
>>> Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
>>
>> A gentle ping...
> 
> Another gentle ping...

Thanks, somehow I didn't notice this earlier.

Was the rootcause ever investigated?
Preventing runtime suspend looks like a quick fix to get rid of the issue, but 
possibly just hides some other underlying power management problem

-Mathias
Kai-Heng Feng Sept. 29, 2020, 7:39 a.m. UTC | #4
> On Sep 29, 2020, at 05:42, Mathias Nyman <mathias.nyman@linux.intel.com> wrote:
> 
> On 28.9.2020 12.10, Kai-Heng Feng wrote:
>> 
>> 
>>> On Jun 8, 2020, at 11:56, Kai-Heng Feng <kai.heng.feng@canonical.com> wrote:
>>> 
>>> 
>>> 
>>>> On May 5, 2020, at 01:16, Kai-Heng Feng <kai.heng.feng@canonical.com> wrote:
>>>> 
>>>> Etron EJ168 USB 3.0 Host Controller stops working after S3, if it was
>>>> runtime suspended previously:
>>>> [  370.080359] pci 0000:02:00.0: can't change power state from D3cold to D0 (config space inaccessible)
>>>> [  370.080477] xhci_hcd 0000:04:00.0: can't change power state from D3cold to D0 (config space inaccessible)
>>>> [  370.080532] pcieport 0000:00:1c.0: DPC: containment event, status:0x1f05 source:0x0200
>>>> [  370.080533] pcieport 0000:00:1c.0: DPC: ERR_FATAL detected
>>>> [  370.080536] xhci_hcd 0000:04:00.0: can't change power state from D3hot to D0 (config space inaccessible)
>>>> [  370.080552] xhci_hcd 0000:04:00.0: AER: can't recover (no error_detected callback)
>>>> [  370.080566] usb usb3: root hub lost power or was reset
>>>> [  370.080566] usb usb4: root hub lost power or was reset
>>>> [  370.080572] xhci_hcd 0000:04:00.0: Host halt failed, -19
>>>> [  370.080574] xhci_hcd 0000:04:00.0: Host not accessible, reset failed.
>>>> [  370.080575] xhci_hcd 0000:04:00.0: PCI post-resume error -19!
>>>> [  370.080586] xhci_hcd 0000:04:00.0: HC died; cleaning up
>>>> 
>>>> This can be fixed by not runtime suspend the controller at all.
>>>> 
>>>> So disable runtime suspend for EJ168 xHCI device.
>>>> 
>>>> Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
>>> 
>>> A gentle ping...
>> 
>> Another gentle ping...
> 
> Thanks, somehow I didn't notice this earlier.
> 
> Was the rootcause ever investigated?
> Preventing runtime suspend looks like a quick fix to get rid of the issue, but 
> possibly just hides some other underlying power management problem

I tried different angles but didn't find any useful solution.

Eventually I found out that the device stays at D0 under Windows, so this is the approach I take.

Kai-Heng

> 
> -Mathias
diff mbox series

Patch

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 766b74723e64..67b4b433a93e 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -227,6 +227,7 @@  static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 		xhci->quirks |= XHCI_RESET_ON_RESUME;
 		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
 		xhci->quirks |= XHCI_BROKEN_STREAMS;
+		xhci->quirks |= XHCI_DISABLE_RUNTIME_SUSPEND;
 	}
 	if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
 	    pdev->device == 0x0014) {
@@ -371,7 +372,8 @@  static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		xhci->shared_hcd->can_do_streams = 1;
 
 	/* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */
-	pm_runtime_put_noidle(&dev->dev);
+	if (!(xhci->quirks & XHCI_DISABLE_RUNTIME_SUSPEND))
+		pm_runtime_put_noidle(&dev->dev);
 
 	if (xhci->quirks & XHCI_DEFAULT_PM_RUNTIME_ALLOW)
 		pm_runtime_allow(&dev->dev);
@@ -397,6 +399,9 @@  static void xhci_pci_remove(struct pci_dev *dev)
 	if (xhci->quirks & XHCI_DEFAULT_PM_RUNTIME_ALLOW)
 		pm_runtime_forbid(&dev->dev);
 
+	if (!(xhci->quirks & XHCI_DISABLE_RUNTIME_SUSPEND))
+		pm_runtime_get_noresume(&dev->dev);
+
 	if (xhci->shared_hcd) {
 		usb_remove_hcd(xhci->shared_hcd);
 		usb_put_hcd(xhci->shared_hcd);
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 86cfefdd6632..d9c209a10d3f 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1873,6 +1873,7 @@  struct xhci_hcd {
 #define XHCI_DEFAULT_PM_RUNTIME_ALLOW	BIT_ULL(33)
 #define XHCI_RESET_PLL_ON_DISCONNECT	BIT_ULL(34)
 #define XHCI_SNPS_BROKEN_SUSPEND    BIT_ULL(35)
+#define XHCI_DISABLE_RUNTIME_SUSPEND    BIT_ULL(36)
 
 	unsigned int		num_active_eps;
 	unsigned int		limit_active_eps;