diff mbox series

[v2] PCI: imx6: Add suspend/resume support for i.MX6QDL

Message ID 20241009131659.29616-1-eichest@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v2] PCI: imx6: Add suspend/resume support for i.MX6QDL | expand

Commit Message

Stefan Eichenberger Oct. 9, 2024, 1:14 p.m. UTC
From: Stefan Eichenberger <stefan.eichenberger@toradex.com>

The suspend/resume support is broken on the i.MX6QDL platform. This
patch resets the link upon resuming to recover functionality. It shares
most of the sequences with other i.MX devices but does not touch the
critical registers, which might break PCIe. This patch addresses the
same issue as the following downstream commit:
https://github.com/nxp-imx/linux-imx/commit/4e92355e1f79d225ea842511fcfd42b343b32995
In comparison this patch will also reset the device if possible. Without
this patch suspend/resume will not work if a PCIe device is connected.
The kernel will hang on resume and print an error:
ath10k_pci 0000:01:00.0: Unable to change power state from D3hot to D0, device inaccessible
8<--- cut here ---
Unhandled fault: imprecise external abort (0x1406) at 0x0106f944

Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
---
v1 -> v2: Share most code with other i.MX platforms and set suspend
	  support flag for i.MX6QDL. Version 1 can be found here:
	  https://lore.kernel.org/all/20240819090428.17349-1-eichest@gmail.com/

 drivers/pci/controller/dwc/pci-imx6.c | 44 +++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 3 deletions(-)

Comments

Frank Li Oct. 10, 2024, 8:01 p.m. UTC | #1
On Wed, Oct 09, 2024 at 03:14:05PM +0200, Stefan Eichenberger wrote:
> From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
>
> The suspend/resume support is broken on the i.MX6QDL platform. This
> patch resets the link upon resuming to recover functionality. It shares
> most of the sequences with other i.MX devices but does not touch the
> critical registers, which might break PCIe. This patch addresses the
> same issue as the following downstream commit:
> https://github.com/nxp-imx/linux-imx/commit/4e92355e1f79d225ea842511fcfd42b343b32995
> In comparison this patch will also reset the device if possible. Without
> this patch suspend/resume will not work if a PCIe device is connected.
> The kernel will hang on resume and print an error:
> ath10k_pci 0000:01:00.0: Unable to change power state from D3hot to D0, device inaccessible
> 8<--- cut here ---
> Unhandled fault: imprecise external abort (0x1406) at 0x0106f944
>
> Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> ---

Thank you for your patch.

But it may conflict with another suspend/resume patch
https://lore.kernel.org/imx/1727245477-15961-8-git-send-email-hongxing.zhu@nxp.com/

Frank

> v1 -> v2: Share most code with other i.MX platforms and set suspend
> 	  support flag for i.MX6QDL. Version 1 can be found here:
> 	  https://lore.kernel.org/all/20240819090428.17349-1-eichest@gmail.com/
>
>  drivers/pci/controller/dwc/pci-imx6.c | 44 +++++++++++++++++++++++++--
>  1 file changed, 41 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
> index 808d1f1054173..f33bef0aa1071 100644
> --- a/drivers/pci/controller/dwc/pci-imx6.c
> +++ b/drivers/pci/controller/dwc/pci-imx6.c
> @@ -1238,8 +1238,23 @@ static int imx_pcie_suspend_noirq(struct device *dev)
>
>  	imx_pcie_msi_save_restore(imx_pcie, true);
>  	imx_pcie_pm_turnoff(imx_pcie);
> -	imx_pcie_stop_link(imx_pcie->pci);
> -	imx_pcie_host_exit(pp);
> +	/*
> +	 * Do not turn off the PCIe controller because of ERR003756, ERR004490, ERR005188,
> +	 * they all document issues with LLTSSM and the PCIe controller which
> +	 * does not come out of reset properly. Therefore, try to keep the controller enabled
> +	 * and only reset the link. However, the reference clock still needs to be turned off,
> +	 * else the controller will freeze on resume.
> +	 */
> +	if (imx_pcie->drvdata->variant == IMX6Q) {

Add new flag: IMX_PCIE_FLAG_SKIP_TURN_OFF.
use imx_check_flag() here.
Maybe other platform don't want to turn PCIe at some usercase.

> +		/* Reset the PCIe device */
> +		gpiod_set_value_cansleep(imx_pcie->reset_gpiod, 1);
> +
> +		if (imx_pcie->drvdata->enable_ref_clk)
> +			imx_pcie->drvdata->enable_ref_clk(imx_pcie, false);
> +	} else {
> +		imx_pcie_stop_link(imx_pcie->pci);
> +		imx_pcie_host_exit(pp);
> +	}
>
>  	return 0;
>  }
> @@ -1253,6 +1268,28 @@ static int imx_pcie_resume_noirq(struct device *dev)
>  	if (!(imx_pcie->drvdata->flags & IMX_PCIE_FLAG_SUPPORTS_SUSPEND))
>  		return 0;
>
> +	/*
> +	 * Even though the i.MX6Q does not support proper suspend/resume, we
> +	 * need to reset the link after resume or the memory mapped PCIe I/O
> +	 * space will be inaccessible. This will cause the system to freeze.
> +	 */
> +	if (imx_pcie->drvdata->variant == IMX6Q) {
> +		if (imx_pcie->drvdata->enable_ref_clk)
> +			imx_pcie->drvdata->enable_ref_clk(imx_pcie, true);
> +
> +		imx_pcie_deassert_core_reset(imx_pcie);
> +
> +		/*
> +		 * Setup the root complex again and enable msi. Without this PCIe will
> +		 * not work in msi mode and drivers will crash if they try to access
> +		 * the device memory area
> +		 */
> +		dw_pcie_setup_rc(&imx_pcie->pci->pp);
> +		imx_pcie_msi_save_restore(imx_pcie, false);
> +
> +		return 0;
> +	}
> +
>  	ret = imx_pcie_host_init(pp);
>  	if (ret)
>  		return ret;
> @@ -1485,7 +1522,8 @@ static const struct imx_pcie_drvdata drvdata[] = {
>  	[IMX6Q] = {
>  		.variant = IMX6Q,
>  		.flags = IMX_PCIE_FLAG_IMX_PHY |
> -			 IMX_PCIE_FLAG_IMX_SPEED_CHANGE,
> +			 IMX_PCIE_FLAG_IMX_SPEED_CHANGE |
> +			 IMX_PCIE_FLAG_SUPPORTS_SUSPEND,

Add IMX_PCIE_FLAG_SKIP_TURN_OFF here
and move comment "Do not turn off the PCIe" to here.

Frank

>  		.dbi_length = 0x200,
>  		.gpr = "fsl,imx6q-iomuxc-gpr",
>  		.clk_names = imx6q_clks,
> --
> 2.43.0
>
Francesco Dolcini Oct. 10, 2024, 8:11 p.m. UTC | #2
Hello Frank,

On Thu, Oct 10, 2024 at 04:01:21PM -0400, Frank Li wrote:
> On Wed, Oct 09, 2024 at 03:14:05PM +0200, Stefan Eichenberger wrote:
> > From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> >
> > The suspend/resume support is broken on the i.MX6QDL platform. This
> > patch resets the link upon resuming to recover functionality. It shares
> > most of the sequences with other i.MX devices but does not touch the
> > critical registers, which might break PCIe. This patch addresses the
> > same issue as the following downstream commit:
> > https://github.com/nxp-imx/linux-imx/commit/4e92355e1f79d225ea842511fcfd42b343b32995
> > In comparison this patch will also reset the device if possible. Without
> > this patch suspend/resume will not work if a PCIe device is connected.
> > The kernel will hang on resume and print an error:
> > ath10k_pci 0000:01:00.0: Unable to change power state from D3hot to D0, device inaccessible
> > 8<--- cut here ---
> > Unhandled fault: imprecise external abort (0x1406) at 0x0106f944
> >
> > Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> > ---
> 
> Thank you for your patch.
> 
> But it may conflict with another suspend/resume patch
> https://lore.kernel.org/imx/1727245477-15961-8-git-send-email-hongxing.zhu@nxp.com/

Thanks for the head-up.

Do you see any issue with this patch apart that? Because this patch is
fixing a crash, so I would expect this to be merged, once ready, and
such a series rebased afterward.

I am writing this explicitly since you wrote a similar comment on the
v1 (https://lore.kernel.org/all/ZsNXDq%2FkidZdyhvD@lizhi-Precision-Tower-5810/)
and I would like to prevent to have this fix starving for long just because
multiple people is working on the same driver.

Francesco
Frank Li Oct. 10, 2024, 10:45 p.m. UTC | #3
On Thu, Oct 10, 2024 at 10:11:21PM +0200, Francesco Dolcini wrote:
> Hello Frank,
>
> On Thu, Oct 10, 2024 at 04:01:21PM -0400, Frank Li wrote:
> > On Wed, Oct 09, 2024 at 03:14:05PM +0200, Stefan Eichenberger wrote:
> > > From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> > >
> > > The suspend/resume support is broken on the i.MX6QDL platform. This
> > > patch resets the link upon resuming to recover functionality. It shares
> > > most of the sequences with other i.MX devices but does not touch the
> > > critical registers, which might break PCIe. This patch addresses the
> > > same issue as the following downstream commit:
> > > https://github.com/nxp-imx/linux-imx/commit/4e92355e1f79d225ea842511fcfd42b343b32995
> > > In comparison this patch will also reset the device if possible. Without
> > > this patch suspend/resume will not work if a PCIe device is connected.
> > > The kernel will hang on resume and print an error:
> > > ath10k_pci 0000:01:00.0: Unable to change power state from D3hot to D0, device inaccessible
> > > 8<--- cut here ---
> > > Unhandled fault: imprecise external abort (0x1406) at 0x0106f944
> > >
> > > Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> > > ---
> >
> > Thank you for your patch.
> >
> > But it may conflict with another suspend/resume patch
> > https://lore.kernel.org/imx/1727245477-15961-8-git-send-email-hongxing.zhu@nxp.com/
>
> Thanks for the head-up.
>
> Do you see any issue with this patch apart that? Because this patch is
> fixing a crash, so I would expect this to be merged, once ready, and
> such a series rebased afterward.
>
> I am writing this explicitly since you wrote a similar comment on the
> v1 (https://lore.kernel.org/all/ZsNXDq%2FkidZdyhvD@lizhi-Precision-Tower-5810/)
> and I would like to prevent to have this fix starving for long just because
> multiple people is working on the same driver.

My key comment for this patch is use flags IMX_PCIE_FLAG_SKIP_TURN_OFF
in suspend()/resume(), it is up to kw to pick which one firstly.

Frank

>
> Francesco
>
Stefan Eichenberger Oct. 11, 2024, 2:36 p.m. UTC | #4
Hi Frank,

On Thu, Oct 10, 2024 at 06:45:17PM -0400, Frank Li wrote:
> On Thu, Oct 10, 2024 at 10:11:21PM +0200, Francesco Dolcini wrote:
> > Hello Frank,
> >
> > On Thu, Oct 10, 2024 at 04:01:21PM -0400, Frank Li wrote:
> > > On Wed, Oct 09, 2024 at 03:14:05PM +0200, Stefan Eichenberger wrote:
> > > > From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> > > >
> > > > The suspend/resume support is broken on the i.MX6QDL platform. This
> > > > patch resets the link upon resuming to recover functionality. It shares
> > > > most of the sequences with other i.MX devices but does not touch the
> > > > critical registers, which might break PCIe. This patch addresses the
> > > > same issue as the following downstream commit:
> > > > https://github.com/nxp-imx/linux-imx/commit/4e92355e1f79d225ea842511fcfd42b343b32995
> > > > In comparison this patch will also reset the device if possible. Without
> > > > this patch suspend/resume will not work if a PCIe device is connected.
> > > > The kernel will hang on resume and print an error:
> > > > ath10k_pci 0000:01:00.0: Unable to change power state from D3hot to D0, device inaccessible
> > > > 8<--- cut here ---
> > > > Unhandled fault: imprecise external abort (0x1406) at 0x0106f944
> > > >
> > > > Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> > > > ---
> > >
> > > Thank you for your patch.
> > >
> > > But it may conflict with another suspend/resume patch
> > > https://lore.kernel.org/imx/1727245477-15961-8-git-send-email-hongxing.zhu@nxp.com/
> >
> > Thanks for the head-up.
> >
> > Do you see any issue with this patch apart that? Because this patch is
> > fixing a crash, so I would expect this to be merged, once ready, and
> > such a series rebased afterward.
> >
> > I am writing this explicitly since you wrote a similar comment on the
> > v1 (https://lore.kernel.org/all/ZsNXDq%2FkidZdyhvD@lizhi-Precision-Tower-5810/)
> > and I would like to prevent to have this fix starving for long just because
> > multiple people is working on the same driver.
> 
> My key comment for this patch is use flags IMX_PCIE_FLAG_SKIP_TURN_OFF
> in suspend()/resume(), it is up to kw to pick which one firstly.

I will try to implement it as you proposed with the new flag. 

However, what I figured out with kernel v6.12-rc1 I get the following
warning when booting on an i.MX6QDL even without my patch applied:
[    1.901199] PCI: bus0: Fast back to back transfers disabled
[    1.904754] mmc1: SDHCI controller on 2190000.mmc [2190000.mmc] using ADMA
[    1.904914] mmc2: SDHCI controller on 2194000.mmc [2194000.mmc] using ADMA
[    1.910686] pci 0000:01:00.0: [168c:003c] type 00 class 0x028000 PCIe Endpoint
[    1.918390] NET: Registered PF_PACKET protocol family
[    1.918573] mmc0: SDHCI controller on 2198000.mmc [2198000.mmc] using ADMA
[    1.924322] pci 0000:01:00.0: BAR 0 [mem 0x00000000-0x001fffff 64bit]
[    1.931635] Key type dns_resolver registered
[    1.936764] pci 0000:01:00.0: ROM [mem 0x00000000-0x0000ffff pref]
[    1.961043] pci 0000:01:00.0: supports D1 D2
[    1.961526] Registering SWP/SWPB emulation handler
[    1.965601] mmc0: new DDR MMC card at address 0001
[    1.976575] mmcblk0: mmc0:0001 Q2J54A 3.59 GiB
[    1.979794] Loading compiled-in X.509 certificates
[    1.985036] PCI: bus1: Fast back to back transfers disabled
[    1.991531] pci 0000:00:00.0: bridge window [mem 0x01000000-0x011fffff]: assigned
[    1.998742]  mmcblk0: p1 p2 p3
[    1.999163] pci 0000:00:00.0: BAR 0 [mem 0x01200000-0x012fffff]: assigned
[    2.003947] mmcblk0boot0: mmc0:0001 Q2J54A 16.0 MiB
[    2.008990] pci 0000:00:00.0: bridge window [mem 0x01300000-0x013fffff pref]: assigned
[    2.009023] pci 0000:00:00.0: ROM [mem 0x01400000-0x0140ffff pref]: assigned
[    2.009054] pci 0000:01:00.0: BAR 0 [mem 0x01000000-0x011fffff 64bit]: assigned
[    2.017526] mmcblk0boot1: mmc0:0001 Q2J54A 16.0 MiB
[    2.022015] pci 0000:01:00.0: ROM [mem 0x01300000-0x0130ffff pref]: assigned
[    2.032133] mmcblk0rpmb: mmc0:0001 Q2J54A 512 KiB, chardev (242:0)
[    2.036347] pci 0000:00:00.0: PCI bridge to [bus 01-ff]
[    2.036370] pci 0000:00:00.0:   bridge window [mem 0x01000000-0x011fffff]
[    2.036384] pci 0000:00:00.0:   bridge window [mem 0x01300000-0x013fffff pref]
[    2.042552] pps pps0: new PPS source ptp0
[    2.048338] pci_bus 0000:00: resource 4 [io  0x0000-0xffff]
[    2.083626] pci_bus 0000:00: resource 5 [mem 0x01000000-0x01efffff]
[    2.089972] pci_bus 0000:01: resource 1 [mem 0x01000000-0x011fffff]
[    2.093461] fec 2188000.ethernet eth0: registered PHC device 0
[    2.096283] pci_bus 0000:01: resource 2 [mem 0x01300000-0x013fffff pref]
[    2.096352] sysfs: cannot create duplicate filename '/devices/platform/soc/1ffc000.pcie/pci0000:00/0000:00:00.0/resource0'
[    2.096365] CPU: 3 UID: 0 PID: 52 Comm: kworker/u19:2 Not tainted 6.12.0-rc1 #54
[    2.096381] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[    2.096391] Workqueue: async async_run_entry_fn
[    2.103025] imx_thermal 20c8000.anatop:tempmon: Industrial CPU temperature grade - max:105C critical:100C passive:95C
[    2.108932]
[    2.108940] Call trace:
[    2.108950]  unwind_backtrace from show_stack+0x10/0x14
[    2.121391] clk: Disabling unused clocks
[    2.127423]  show_stack from dump_stack_lvl+0x54/0x68
[    2.127451]  dump_stack_lvl from sysfs_warn_dup+0x58/0x64
[    2.134265] PM: genpd: Disabling unused power domains
[    2.138525]  sysfs_warn_dup from sysfs_add_bin_file_mode_ns+0xbc/0xcc
[    2.138547]  sysfs_add_bin_file_mode_ns from sysfs_create_bin_file+0xac/0xb4
[    2.149242] ALSA device list:
[    2.150647]  sysfs_create_bin_file from pci_create_resource_files+0x84/0x148
[    2.153183]   No soundcards found.
[    2.158407]  pci_create_resource_files from pci_bus_add_device+0x24/0xe4
[    2.211699]  pci_bus_add_device from pci_bus_add_devices+0x2c/0x70
[    2.217930]  pci_bus_add_devices from pci_host_probe+0x7c/0xa4
[    2.223806]  pci_host_probe from dw_pcie_host_init+0x4ec/0x71c
[    2.229682]  dw_pcie_host_init from imx_pcie_probe+0x3a8/0x75c
[    2.235559]  imx_pcie_probe from platform_probe+0x5c/0xb0
[    2.241010]  platform_probe from really_probe+0xd0/0x3a4
[    2.246364]  really_probe from __driver_probe_device+0x8c/0x1d4
[    2.252321]  __driver_probe_device from driver_probe_device+0x30/0xc0
[    2.258803]  driver_probe_device from __driver_attach_async_helper+0x50/0xd8
[    2.265892]  __driver_attach_async_helper from async_run_entry_fn+0x30/0x144
[    2.272980]  async_run_entry_fn from process_one_work+0x154/0x2dc
[    2.279115]  process_one_work from worker_thread+0x250/0x3f0
[    2.284811]  worker_thread from kthread+0x110/0x12c
[    2.289726]  kthread from ret_from_fork+0x14/0x28
[    2.294461] Exception stack(0xe6a0dfb0 to 0xe6a0dff8)
[    2.299535] dfa0:                                     00000000 00000000 00000000 00000000
[    2.307740] dfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[    2.315942] dfe0: 00000000 00000000 00000000 00000000 00000013 00000000
[    2.323156] pcieport 0000:00:00.0: PME: Signaling with IRQ 293
[    2.329645] pcieport 0000:00:00.0: AER: enabled with IRQ 293
[    2.335553] sysfs: cannot create duplicate filename '/devices/platform/soc/1ffc000.pcie/pci0000:00/0000:00:00.0/0000:01:00.0/resource0'
[    2.347794] CPU: 3 UID: 0 PID: 52 Comm: kworker/u19:2 Not tainted 6.12.0-rc1 #54
[    2.355229] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[    2.361779] Workqueue: async async_run_entry_fn
[    2.366349] Call trace:
[    2.366362]  unwind_backtrace from show_stack+0x10/0x14
[    2.374183]  show_stack from dump_stack_lvl+0x54/0x68
[    2.379273]  dump_stack_lvl from sysfs_warn_dup+0x58/0x64
[    2.384706]  sysfs_warn_dup from sysfs_add_bin_file_mode_ns+0xbc/0xcc
[    2.391183]  sysfs_add_bin_file_mode_ns from sysfs_create_bin_file+0xac/0xb4
[    2.398270]  sysfs_create_bin_file from pci_create_resource_files+0x84/0x148
[    2.405360]  pci_create_resource_files from pci_bus_add_device+0x24/0xe4
[    2.412113]  pci_bus_add_device from pci_bus_add_devices+0x2c/0x70
[    2.418334]  pci_bus_add_devices from pci_bus_add_devices+0x60/0x70
[    2.424642]  pci_bus_add_devices from pci_host_probe+0x7c/0xa4
[    2.430511]  pci_host_probe from dw_pcie_host_init+0x4ec/0x71c
[    2.436384]  dw_pcie_host_init from imx_pcie_probe+0x3a8/0x75c
[    2.442258]  imx_pcie_probe from platform_probe+0x5c/0xb0
[    2.447704]  platform_probe from really_probe+0xd0/0x3a4
[    2.453057]  really_probe from __driver_probe_device+0x8c/0x1d4
[    2.459014]  __driver_probe_device from driver_probe_device+0x30/0xc0
[    2.465493]  driver_probe_device from __driver_attach_async_helper+0x50/0xd8
[    2.472580]  __driver_attach_async_helper from async_run_entry_fn+0x30/0x144
[    2.479666]  async_run_entry_fn from process_one_work+0x154/0x2dc
[    2.485800]  process_one_work from worker_thread+0x250/0x3f0
[    2.491494]  worker_thread from kthread+0x110/0x12c
[    2.496408]  kthread from ret_from_fork+0x14/0x28
[    2.501140] Exception stack(0xe6a0dfb0 to 0xe6a0dff8)
[    2.506214] dfa0:                                     00000000 00000000 00000000 00000000
[    2.514418] dfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[    2.522620] dfe0: 00000000 00000000 00000000 00000000 00000013 00000000

This was not the case with kernel v6.11, do you have an idea where this
comes from? I did not dig into more detail yet but it looks a bit like a
regression. The driver still works, it just prints this duplicate
filename warning.

Regards,
Stefan
Frank Li Oct. 11, 2024, 3:36 p.m. UTC | #5
On Fri, Oct 11, 2024 at 04:36:21PM +0200, Stefan Eichenberger wrote:
> Hi Frank,
>
> On Thu, Oct 10, 2024 at 06:45:17PM -0400, Frank Li wrote:
> > On Thu, Oct 10, 2024 at 10:11:21PM +0200, Francesco Dolcini wrote:
> > > Hello Frank,
> > >
> > > On Thu, Oct 10, 2024 at 04:01:21PM -0400, Frank Li wrote:
> > > > On Wed, Oct 09, 2024 at 03:14:05PM +0200, Stefan Eichenberger wrote:
> > > > > From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> > > > >
> > > > > The suspend/resume support is broken on the i.MX6QDL platform. This
> > > > > patch resets the link upon resuming to recover functionality. It shares
> > > > > most of the sequences with other i.MX devices but does not touch the
> > > > > critical registers, which might break PCIe. This patch addresses the
> > > > > same issue as the following downstream commit:
> > > > > https://github.com/nxp-imx/linux-imx/commit/4e92355e1f79d225ea842511fcfd42b343b32995
> > > > > In comparison this patch will also reset the device if possible. Without
> > > > > this patch suspend/resume will not work if a PCIe device is connected.
> > > > > The kernel will hang on resume and print an error:
> > > > > ath10k_pci 0000:01:00.0: Unable to change power state from D3hot to D0, device inaccessible
> > > > > 8<--- cut here ---
> > > > > Unhandled fault: imprecise external abort (0x1406) at 0x0106f944
> > > > >
> > > > > Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> > > > > ---
> > > >
> > > > Thank you for your patch.
> > > >
> > > > But it may conflict with another suspend/resume patch
> > > > https://lore.kernel.org/imx/1727245477-15961-8-git-send-email-hongxing.zhu@nxp.com/
> > >
> > > Thanks for the head-up.
> > >
> > > Do you see any issue with this patch apart that? Because this patch is
> > > fixing a crash, so I would expect this to be merged, once ready, and
> > > such a series rebased afterward.
> > >
> > > I am writing this explicitly since you wrote a similar comment on the
> > > v1 (https://lore.kernel.org/all/ZsNXDq%2FkidZdyhvD@lizhi-Precision-Tower-5810/)
> > > and I would like to prevent to have this fix starving for long just because
> > > multiple people is working on the same driver.
> >
> > My key comment for this patch is use flags IMX_PCIE_FLAG_SKIP_TURN_OFF
> > in suspend()/resume(), it is up to kw to pick which one firstly.
>
> I will try to implement it as you proposed with the new flag.

I have not met this problem at arm64 platform. what's your .config?

Frank

>
> However, what I figured out with kernel v6.12-rc1 I get the following
> warning when booting on an i.MX6QDL even without my patch applied:
> [    1.901199] PCI: bus0: Fast back to back transfers disabled
> [    1.904754] mmc1: SDHCI controller on 2190000.mmc [2190000.mmc] using ADMA
> [    1.904914] mmc2: SDHCI controller on 2194000.mmc [2194000.mmc] using ADMA
> [    1.910686] pci 0000:01:00.0: [168c:003c] type 00 class 0x028000 PCIe Endpoint
> [    1.918390] NET: Registered PF_PACKET protocol family
> [    1.918573] mmc0: SDHCI controller on 2198000.mmc [2198000.mmc] using ADMA
> [    1.924322] pci 0000:01:00.0: BAR 0 [mem 0x00000000-0x001fffff 64bit]
> [    1.931635] Key type dns_resolver registered
> [    1.936764] pci 0000:01:00.0: ROM [mem 0x00000000-0x0000ffff pref]
> [    1.961043] pci 0000:01:00.0: supports D1 D2
> [    1.961526] Registering SWP/SWPB emulation handler
> [    1.965601] mmc0: new DDR MMC card at address 0001
> [    1.976575] mmcblk0: mmc0:0001 Q2J54A 3.59 GiB
> [    1.979794] Loading compiled-in X.509 certificates
> [    1.985036] PCI: bus1: Fast back to back transfers disabled
> [    1.991531] pci 0000:00:00.0: bridge window [mem 0x01000000-0x011fffff]: assigned
> [    1.998742]  mmcblk0: p1 p2 p3
> [    1.999163] pci 0000:00:00.0: BAR 0 [mem 0x01200000-0x012fffff]: assigned
> [    2.003947] mmcblk0boot0: mmc0:0001 Q2J54A 16.0 MiB
> [    2.008990] pci 0000:00:00.0: bridge window [mem 0x01300000-0x013fffff pref]: assigned
> [    2.009023] pci 0000:00:00.0: ROM [mem 0x01400000-0x0140ffff pref]: assigned
> [    2.009054] pci 0000:01:00.0: BAR 0 [mem 0x01000000-0x011fffff 64bit]: assigned
> [    2.017526] mmcblk0boot1: mmc0:0001 Q2J54A 16.0 MiB
> [    2.022015] pci 0000:01:00.0: ROM [mem 0x01300000-0x0130ffff pref]: assigned
> [    2.032133] mmcblk0rpmb: mmc0:0001 Q2J54A 512 KiB, chardev (242:0)
> [    2.036347] pci 0000:00:00.0: PCI bridge to [bus 01-ff]
> [    2.036370] pci 0000:00:00.0:   bridge window [mem 0x01000000-0x011fffff]
> [    2.036384] pci 0000:00:00.0:   bridge window [mem 0x01300000-0x013fffff pref]
> [    2.042552] pps pps0: new PPS source ptp0
> [    2.048338] pci_bus 0000:00: resource 4 [io  0x0000-0xffff]
> [    2.083626] pci_bus 0000:00: resource 5 [mem 0x01000000-0x01efffff]
> [    2.089972] pci_bus 0000:01: resource 1 [mem 0x01000000-0x011fffff]
> [    2.093461] fec 2188000.ethernet eth0: registered PHC device 0
> [    2.096283] pci_bus 0000:01: resource 2 [mem 0x01300000-0x013fffff pref]
> [    2.096352] sysfs: cannot create duplicate filename '/devices/platform/soc/1ffc000.pcie/pci0000:00/0000:00:00.0/resource0'
> [    2.096365] CPU: 3 UID: 0 PID: 52 Comm: kworker/u19:2 Not tainted 6.12.0-rc1 #54
> [    2.096381] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
> [    2.096391] Workqueue: async async_run_entry_fn
> [    2.103025] imx_thermal 20c8000.anatop:tempmon: Industrial CPU temperature grade - max:105C critical:100C passive:95C
> [    2.108932]
> [    2.108940] Call trace:
> [    2.108950]  unwind_backtrace from show_stack+0x10/0x14
> [    2.121391] clk: Disabling unused clocks
> [    2.127423]  show_stack from dump_stack_lvl+0x54/0x68
> [    2.127451]  dump_stack_lvl from sysfs_warn_dup+0x58/0x64
> [    2.134265] PM: genpd: Disabling unused power domains
> [    2.138525]  sysfs_warn_dup from sysfs_add_bin_file_mode_ns+0xbc/0xcc
> [    2.138547]  sysfs_add_bin_file_mode_ns from sysfs_create_bin_file+0xac/0xb4
> [    2.149242] ALSA device list:
> [    2.150647]  sysfs_create_bin_file from pci_create_resource_files+0x84/0x148
> [    2.153183]   No soundcards found.
> [    2.158407]  pci_create_resource_files from pci_bus_add_device+0x24/0xe4
> [    2.211699]  pci_bus_add_device from pci_bus_add_devices+0x2c/0x70
> [    2.217930]  pci_bus_add_devices from pci_host_probe+0x7c/0xa4
> [    2.223806]  pci_host_probe from dw_pcie_host_init+0x4ec/0x71c
> [    2.229682]  dw_pcie_host_init from imx_pcie_probe+0x3a8/0x75c
> [    2.235559]  imx_pcie_probe from platform_probe+0x5c/0xb0
> [    2.241010]  platform_probe from really_probe+0xd0/0x3a4
> [    2.246364]  really_probe from __driver_probe_device+0x8c/0x1d4
> [    2.252321]  __driver_probe_device from driver_probe_device+0x30/0xc0
> [    2.258803]  driver_probe_device from __driver_attach_async_helper+0x50/0xd8
> [    2.265892]  __driver_attach_async_helper from async_run_entry_fn+0x30/0x144
> [    2.272980]  async_run_entry_fn from process_one_work+0x154/0x2dc
> [    2.279115]  process_one_work from worker_thread+0x250/0x3f0
> [    2.284811]  worker_thread from kthread+0x110/0x12c
> [    2.289726]  kthread from ret_from_fork+0x14/0x28
> [    2.294461] Exception stack(0xe6a0dfb0 to 0xe6a0dff8)
> [    2.299535] dfa0:                                     00000000 00000000 00000000 00000000
> [    2.307740] dfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
> [    2.315942] dfe0: 00000000 00000000 00000000 00000000 00000013 00000000
> [    2.323156] pcieport 0000:00:00.0: PME: Signaling with IRQ 293
> [    2.329645] pcieport 0000:00:00.0: AER: enabled with IRQ 293
> [    2.335553] sysfs: cannot create duplicate filename '/devices/platform/soc/1ffc000.pcie/pci0000:00/0000:00:00.0/0000:01:00.0/resource0'
> [    2.347794] CPU: 3 UID: 0 PID: 52 Comm: kworker/u19:2 Not tainted 6.12.0-rc1 #54
> [    2.355229] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
> [    2.361779] Workqueue: async async_run_entry_fn
> [    2.366349] Call trace:
> [    2.366362]  unwind_backtrace from show_stack+0x10/0x14
> [    2.374183]  show_stack from dump_stack_lvl+0x54/0x68
> [    2.379273]  dump_stack_lvl from sysfs_warn_dup+0x58/0x64
> [    2.384706]  sysfs_warn_dup from sysfs_add_bin_file_mode_ns+0xbc/0xcc
> [    2.391183]  sysfs_add_bin_file_mode_ns from sysfs_create_bin_file+0xac/0xb4
> [    2.398270]  sysfs_create_bin_file from pci_create_resource_files+0x84/0x148
> [    2.405360]  pci_create_resource_files from pci_bus_add_device+0x24/0xe4
> [    2.412113]  pci_bus_add_device from pci_bus_add_devices+0x2c/0x70
> [    2.418334]  pci_bus_add_devices from pci_bus_add_devices+0x60/0x70
> [    2.424642]  pci_bus_add_devices from pci_host_probe+0x7c/0xa4
> [    2.430511]  pci_host_probe from dw_pcie_host_init+0x4ec/0x71c
> [    2.436384]  dw_pcie_host_init from imx_pcie_probe+0x3a8/0x75c
> [    2.442258]  imx_pcie_probe from platform_probe+0x5c/0xb0
> [    2.447704]  platform_probe from really_probe+0xd0/0x3a4
> [    2.453057]  really_probe from __driver_probe_device+0x8c/0x1d4
> [    2.459014]  __driver_probe_device from driver_probe_device+0x30/0xc0
> [    2.465493]  driver_probe_device from __driver_attach_async_helper+0x50/0xd8
> [    2.472580]  __driver_attach_async_helper from async_run_entry_fn+0x30/0x144
> [    2.479666]  async_run_entry_fn from process_one_work+0x154/0x2dc
> [    2.485800]  process_one_work from worker_thread+0x250/0x3f0
> [    2.491494]  worker_thread from kthread+0x110/0x12c
> [    2.496408]  kthread from ret_from_fork+0x14/0x28
> [    2.501140] Exception stack(0xe6a0dfb0 to 0xe6a0dff8)
> [    2.506214] dfa0:                                     00000000 00000000 00000000 00000000
> [    2.514418] dfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
> [    2.522620] dfe0: 00000000 00000000 00000000 00000000 00000013 00000000
>
> This was not the case with kernel v6.11, do you have an idea where this
> comes from? I did not dig into more detail yet but it looks a bit like a
> regression. The driver still works, it just prints this duplicate
> filename warning.
>
> Regards,
> Stefan
Fabio Estevam Oct. 11, 2024, 5:53 p.m. UTC | #6
On Fri, Oct 11, 2024 at 12:36 PM Frank Li <Frank.li@nxp.com> wrote:

> I have not met this problem at arm64 platform. what's your .config?

Stefan reported the problem on imx6, so imx_v6_v7_defconfig.
Manivannan Sadhasivam Oct. 12, 2024, 4:13 a.m. UTC | #7
On Wed, Oct 09, 2024 at 03:14:05PM +0200, Stefan Eichenberger wrote:
> From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> 
> The suspend/resume support is broken on the i.MX6QDL platform. This

You mean the 'system suspend/resume'?

> patch resets the link upon resuming to recover functionality. It shares
> most of the sequences with other i.MX devices but does not touch the
> critical registers, which might break PCIe. This patch addresses the
> same issue as the following downstream commit:
> https://github.com/nxp-imx/linux-imx/commit/4e92355e1f79d225ea842511fcfd42b343b32995
> In comparison this patch will also reset the device if possible. Without
> this patch suspend/resume will not work if a PCIe device is connected.
> The kernel will hang on resume and print an error:
> ath10k_pci 0000:01:00.0: Unable to change power state from D3hot to D0, device inaccessible

Looks like the device is turned off during suspend.

> 8<--- cut here ---
> Unhandled fault: imprecise external abort (0x1406) at 0x0106f944
> 
> Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> ---
> v1 -> v2: Share most code with other i.MX platforms and set suspend
> 	  support flag for i.MX6QDL. Version 1 can be found here:
> 	  https://lore.kernel.org/all/20240819090428.17349-1-eichest@gmail.com/
> 
>  drivers/pci/controller/dwc/pci-imx6.c | 44 +++++++++++++++++++++++++--
>  1 file changed, 41 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
> index 808d1f1054173..f33bef0aa1071 100644
> --- a/drivers/pci/controller/dwc/pci-imx6.c
> +++ b/drivers/pci/controller/dwc/pci-imx6.c
> @@ -1238,8 +1238,23 @@ static int imx_pcie_suspend_noirq(struct device *dev)
>  
>  	imx_pcie_msi_save_restore(imx_pcie, true);
>  	imx_pcie_pm_turnoff(imx_pcie);
> -	imx_pcie_stop_link(imx_pcie->pci);
> -	imx_pcie_host_exit(pp);
> +	/*
> +	 * Do not turn off the PCIe controller because of ERR003756, ERR004490, ERR005188,
> +	 * they all document issues with LLTSSM and the PCIe controller which

LTSSM

But LTSSM is for the PCIe link state, not sure how it impacts controller state.
Can you share the link to those erratums?

> +	 * does not come out of reset properly. Therefore, try to keep the controller enabled
> +	 * and only reset the link. However, the reference clock still needs to be turned off,

You are resetting the *device* below, not the link.

> +	 * else the controller will freeze on resume.
> +	 */

Please use 80 columns for comments. Exception is for the code.

> +	if (imx_pcie->drvdata->variant == IMX6Q) {
> +		/* Reset the PCIe device */
> +		gpiod_set_value_cansleep(imx_pcie->reset_gpiod, 1);
> +
> +		if (imx_pcie->drvdata->enable_ref_clk)
> +			imx_pcie->drvdata->enable_ref_clk(imx_pcie, false);
> +	} else {
> +		imx_pcie_stop_link(imx_pcie->pci);
> +		imx_pcie_host_exit(pp);
> +	}
>  
>  	return 0;
>  }
> @@ -1253,6 +1268,28 @@ static int imx_pcie_resume_noirq(struct device *dev)
>  	if (!(imx_pcie->drvdata->flags & IMX_PCIE_FLAG_SUPPORTS_SUSPEND))
>  		return 0;
>  
> +	/*
> +	 * Even though the i.MX6Q does not support proper suspend/resume, we
> +	 * need to reset the link after resume or the memory mapped PCIe I/O
> +	 * space will be inaccessible. This will cause the system to freeze.
> +	 */

This comment is not really needed.

> +	if (imx_pcie->drvdata->variant == IMX6Q) {
> +		if (imx_pcie->drvdata->enable_ref_clk)
> +			imx_pcie->drvdata->enable_ref_clk(imx_pcie, true);
> +
> +		imx_pcie_deassert_core_reset(imx_pcie);

There is no corresponding imx_pcie_assert_core_reset() in suspend.

> +
> +		/*
> +		 * Setup the root complex again and enable msi. Without this PCIe will
> +		 * not work in msi mode and drivers will crash if they try to access
> +		 * the device memory area
> +		 */

This indicates that the controller state is not preserved. I think we need a bit
more understanding on what is going on during system suspend on this platform.

- Mani
Manivannan Sadhasivam Oct. 12, 2024, 6:03 a.m. UTC | #8
On Fri, Oct 11, 2024 at 04:36:21PM +0200, Stefan Eichenberger wrote:

[...]

> However, what I figured out with kernel v6.12-rc1 I get the following
> warning when booting on an i.MX6QDL even without my patch applied:
> [    1.901199] PCI: bus0: Fast back to back transfers disabled
> [    1.904754] mmc1: SDHCI controller on 2190000.mmc [2190000.mmc] using ADMA
> [    1.904914] mmc2: SDHCI controller on 2194000.mmc [2194000.mmc] using ADMA
> [    1.910686] pci 0000:01:00.0: [168c:003c] type 00 class 0x028000 PCIe Endpoint
> [    1.918390] NET: Registered PF_PACKET protocol family
> [    1.918573] mmc0: SDHCI controller on 2198000.mmc [2198000.mmc] using ADMA
> [    1.924322] pci 0000:01:00.0: BAR 0 [mem 0x00000000-0x001fffff 64bit]
> [    1.931635] Key type dns_resolver registered
> [    1.936764] pci 0000:01:00.0: ROM [mem 0x00000000-0x0000ffff pref]
> [    1.961043] pci 0000:01:00.0: supports D1 D2
> [    1.961526] Registering SWP/SWPB emulation handler
> [    1.965601] mmc0: new DDR MMC card at address 0001
> [    1.976575] mmcblk0: mmc0:0001 Q2J54A 3.59 GiB
> [    1.979794] Loading compiled-in X.509 certificates
> [    1.985036] PCI: bus1: Fast back to back transfers disabled
> [    1.991531] pci 0000:00:00.0: bridge window [mem 0x01000000-0x011fffff]: assigned
> [    1.998742]  mmcblk0: p1 p2 p3
> [    1.999163] pci 0000:00:00.0: BAR 0 [mem 0x01200000-0x012fffff]: assigned
> [    2.003947] mmcblk0boot0: mmc0:0001 Q2J54A 16.0 MiB
> [    2.008990] pci 0000:00:00.0: bridge window [mem 0x01300000-0x013fffff pref]: assigned
> [    2.009023] pci 0000:00:00.0: ROM [mem 0x01400000-0x0140ffff pref]: assigned
> [    2.009054] pci 0000:01:00.0: BAR 0 [mem 0x01000000-0x011fffff 64bit]: assigned
> [    2.017526] mmcblk0boot1: mmc0:0001 Q2J54A 16.0 MiB
> [    2.022015] pci 0000:01:00.0: ROM [mem 0x01300000-0x0130ffff pref]: assigned
> [    2.032133] mmcblk0rpmb: mmc0:0001 Q2J54A 512 KiB, chardev (242:0)
> [    2.036347] pci 0000:00:00.0: PCI bridge to [bus 01-ff]
> [    2.036370] pci 0000:00:00.0:   bridge window [mem 0x01000000-0x011fffff]
> [    2.036384] pci 0000:00:00.0:   bridge window [mem 0x01300000-0x013fffff pref]
> [    2.042552] pps pps0: new PPS source ptp0
> [    2.048338] pci_bus 0000:00: resource 4 [io  0x0000-0xffff]
> [    2.083626] pci_bus 0000:00: resource 5 [mem 0x01000000-0x01efffff]
> [    2.089972] pci_bus 0000:01: resource 1 [mem 0x01000000-0x011fffff]
> [    2.093461] fec 2188000.ethernet eth0: registered PHC device 0
> [    2.096283] pci_bus 0000:01: resource 2 [mem 0x01300000-0x013fffff pref]
> [    2.096352] sysfs: cannot create duplicate filename '/devices/platform/soc/1ffc000.pcie/pci0000:00/0000:00:00.0/resource0'
> [    2.096365] CPU: 3 UID: 0 PID: 52 Comm: kworker/u19:2 Not tainted 6.12.0-rc1 #54
> [    2.096381] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
> [    2.096391] Workqueue: async async_run_entry_fn
> [    2.103025] imx_thermal 20c8000.anatop:tempmon: Industrial CPU temperature grade - max:105C critical:100C passive:95C
> [    2.108932]
> [    2.108940] Call trace:
> [    2.108950]  unwind_backtrace from show_stack+0x10/0x14
> [    2.121391] clk: Disabling unused clocks
> [    2.127423]  show_stack from dump_stack_lvl+0x54/0x68
> [    2.127451]  dump_stack_lvl from sysfs_warn_dup+0x58/0x64
> [    2.134265] PM: genpd: Disabling unused power domains
> [    2.138525]  sysfs_warn_dup from sysfs_add_bin_file_mode_ns+0xbc/0xcc
> [    2.138547]  sysfs_add_bin_file_mode_ns from sysfs_create_bin_file+0xac/0xb4
> [    2.149242] ALSA device list:
> [    2.150647]  sysfs_create_bin_file from pci_create_resource_files+0x84/0x148
> [    2.153183]   No soundcards found.
> [    2.158407]  pci_create_resource_files from pci_bus_add_device+0x24/0xe4
> [    2.211699]  pci_bus_add_device from pci_bus_add_devices+0x2c/0x70
> [    2.217930]  pci_bus_add_devices from pci_host_probe+0x7c/0xa4
> [    2.223806]  pci_host_probe from dw_pcie_host_init+0x4ec/0x71c
> [    2.229682]  dw_pcie_host_init from imx_pcie_probe+0x3a8/0x75c
> [    2.235559]  imx_pcie_probe from platform_probe+0x5c/0xb0
> [    2.241010]  platform_probe from really_probe+0xd0/0x3a4
> [    2.246364]  really_probe from __driver_probe_device+0x8c/0x1d4
> [    2.252321]  __driver_probe_device from driver_probe_device+0x30/0xc0
> [    2.258803]  driver_probe_device from __driver_attach_async_helper+0x50/0xd8
> [    2.265892]  __driver_attach_async_helper from async_run_entry_fn+0x30/0x144
> [    2.272980]  async_run_entry_fn from process_one_work+0x154/0x2dc
> [    2.279115]  process_one_work from worker_thread+0x250/0x3f0
> [    2.284811]  worker_thread from kthread+0x110/0x12c
> [    2.289726]  kthread from ret_from_fork+0x14/0x28
> [    2.294461] Exception stack(0xe6a0dfb0 to 0xe6a0dff8)
> [    2.299535] dfa0:                                     00000000 00000000 00000000 00000000
> [    2.307740] dfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
> [    2.315942] dfe0: 00000000 00000000 00000000 00000000 00000013 00000000
> [    2.323156] pcieport 0000:00:00.0: PME: Signaling with IRQ 293
> [    2.329645] pcieport 0000:00:00.0: AER: enabled with IRQ 293
> [    2.335553] sysfs: cannot create duplicate filename '/devices/platform/soc/1ffc000.pcie/pci0000:00/0000:00:00.0/0000:01:00.0/resource0'
> [    2.347794] CPU: 3 UID: 0 PID: 52 Comm: kworker/u19:2 Not tainted 6.12.0-rc1 #54
> [    2.355229] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
> [    2.361779] Workqueue: async async_run_entry_fn
> [    2.366349] Call trace:
> [    2.366362]  unwind_backtrace from show_stack+0x10/0x14
> [    2.374183]  show_stack from dump_stack_lvl+0x54/0x68
> [    2.379273]  dump_stack_lvl from sysfs_warn_dup+0x58/0x64
> [    2.384706]  sysfs_warn_dup from sysfs_add_bin_file_mode_ns+0xbc/0xcc
> [    2.391183]  sysfs_add_bin_file_mode_ns from sysfs_create_bin_file+0xac/0xb4
> [    2.398270]  sysfs_create_bin_file from pci_create_resource_files+0x84/0x148
> [    2.405360]  pci_create_resource_files from pci_bus_add_device+0x24/0xe4
> [    2.412113]  pci_bus_add_device from pci_bus_add_devices+0x2c/0x70
> [    2.418334]  pci_bus_add_devices from pci_bus_add_devices+0x60/0x70
> [    2.424642]  pci_bus_add_devices from pci_host_probe+0x7c/0xa4
> [    2.430511]  pci_host_probe from dw_pcie_host_init+0x4ec/0x71c
> [    2.436384]  dw_pcie_host_init from imx_pcie_probe+0x3a8/0x75c
> [    2.442258]  imx_pcie_probe from platform_probe+0x5c/0xb0
> [    2.447704]  platform_probe from really_probe+0xd0/0x3a4
> [    2.453057]  really_probe from __driver_probe_device+0x8c/0x1d4
> [    2.459014]  __driver_probe_device from driver_probe_device+0x30/0xc0
> [    2.465493]  driver_probe_device from __driver_attach_async_helper+0x50/0xd8
> [    2.472580]  __driver_attach_async_helper from async_run_entry_fn+0x30/0x144
> [    2.479666]  async_run_entry_fn from process_one_work+0x154/0x2dc
> [    2.485800]  process_one_work from worker_thread+0x250/0x3f0
> [    2.491494]  worker_thread from kthread+0x110/0x12c
> [    2.496408]  kthread from ret_from_fork+0x14/0x28
> [    2.501140] Exception stack(0xe6a0dfb0 to 0xe6a0dff8)
> [    2.506214] dfa0:                                     00000000 00000000 00000000 00000000
> [    2.514418] dfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
> [    2.522620] dfe0: 00000000 00000000 00000000 00000000 00000013 00000000
> 
> This was not the case with kernel v6.11, do you have an idea where this
> comes from? I did not dig into more detail yet but it looks a bit like a
> regression. The driver still works, it just prints this duplicate
> filename warning.

Could you please do bisect to find the offending commit?

- Mani
Richard Zhu Oct. 12, 2024, 9:02 a.m. UTC | #9
> -----Original Message-----
> From: Stefan Eichenberger <eichest@gmail.com>
> Sent: 2024年10月11日 22:36
> To: Frank Li <frank.li@nxp.com>
> Cc: Francesco Dolcini <francesco@dolcini.it>; Hongxing Zhu
> <hongxing.zhu@nxp.com>; l.stach@pengutronix.de; lpieralisi@kernel.org;
> kw@linux.com; manivannan.sadhasivam@linaro.org; robh@kernel.org;
> bhelgaas@google.com; shawnguo@kernel.org; s.hauer@pengutronix.de;
> kernel@pengutronix.de; festevam@gmail.com; Francesco Dolcini
> <francesco.dolcini@toradex.com>; linux-pci@vger.kernel.org;
> linux-arm-kernel@lists.infradead.org; imx@lists.linux.dev;
> linux-kernel@vger.kernel.org; Stefan Eichenberger
> <stefan.eichenberger@toradex.com>
> Subject: Re: [PATCH v2] PCI: imx6: Add suspend/resume support for i.MX6QDL
> 
> Hi Frank,
> 
> On Thu, Oct 10, 2024 at 06:45:17PM -0400, Frank Li wrote:
> > On Thu, Oct 10, 2024 at 10:11:21PM +0200, Francesco Dolcini wrote:
> > > Hello Frank,
> > >
> > > On Thu, Oct 10, 2024 at 04:01:21PM -0400, Frank Li wrote:
> > > > On Wed, Oct 09, 2024 at 03:14:05PM +0200, Stefan Eichenberger wrote:
> > > > > From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> > > > >
> > > > > The suspend/resume support is broken on the i.MX6QDL platform.
> > > > > This patch resets the link upon resuming to recover
> > > > > functionality. It shares most of the sequences with other i.MX
> > > > > devices but does not touch the critical registers, which might
> > > > > break PCIe. This patch addresses the same issue as the following
> downstream commit:
> > > > > https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%
> > > > >
> 2Fgithub.com%2Fnxp-imx%2Flinux-imx%2Fcommit%2F4e92355e1f79d225ea
> > > > >
> 842511fcfd42b343b32995&data=05%7C02%7Chongxing.zhu%40nxp.com%7C4
> > > > >
> 9cdf8aaeee54cec71c508dcea0215b1%7C686ea1d3bc2b4c6fa92cd99c5c3016
> > > > >
> 35%7C0%7C0%7C638642541899874406%7CUnknown%7CTWFpbGZsb3d8eyJWI
> joi
> > > > >
> MC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7
> > > > >
> C%7C%7C&sdata=DQGwcXYy0soCov1Uf4ycLQiisP8qlbBzOslyX3FY5Cs%3D&res
> > > > > erved=0 In comparison this patch will also reset the device if
> > > > > possible. Without this patch suspend/resume will not work if a
> > > > > PCIe device is connected.
> > > > > The kernel will hang on resume and print an error:
> > > > > ath10k_pci 0000:01:00.0: Unable to change power state from D3hot
> > > > > to D0, device inaccessible
> > > > > 8<--- cut here ---
> > > > > Unhandled fault: imprecise external abort (0x1406) at 0x0106f944
> > > > >
> > > > > Signed-off-by: Stefan Eichenberger
> > > > > <stefan.eichenberger@toradex.com>
> > > > > ---
> > > >
> > > > Thank you for your patch.
> > > >
> > > > But it may conflict with another suspend/resume patch
> > > > https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2F
> > > > lore.kernel.org%2Fimx%2F1727245477-15961-8-git-send-email-hongxing
> > > > .zhu%40nxp.com%2F&data=05%7C02%7Chongxing.zhu%40nxp.com%7C4
> 9cdf8aa
> > > >
> eee54cec71c508dcea0215b1%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%
> 7C
> > > >
> 0%7C638642541899892894%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjA
> wMDA
> > > >
> iLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata
> > > >
> =xPBKoiWL8IceLlv%2F6lWoqkPWosh%2BRvG8jyA8NjKSsOI%3D&reserved=0
> > >
> > > Thanks for the head-up.
> > >
> > > Do you see any issue with this patch apart that? Because this patch
> > > is fixing a crash, so I would expect this to be merged, once ready,
> > > and such a series rebased afterward.
> > >
> > > I am writing this explicitly since you wrote a similar comment on
> > > the
> > > v1
> > > (https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fl
> > > ore.kernel.org%2Fall%2FZsNXDq%252FkidZdyhvD%40lizhi-Precision-Tower-
> > >
> 5810%2F&data=05%7C02%7Chongxing.zhu%40nxp.com%7C49cdf8aaeee54cec
> 71c5
> > >
> 08dcea0215b1%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C63864
> 25418
> > >
> 99904634%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2l
> uMzI
> > >
> iLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata=x2OVvZsi4YIxSg
> KGD
> > > 7ZzqbRQjiTFK4%2BzMdgDv2qSdig%3D&reserved=0)
> > > and I would like to prevent to have this fix starving for long just
> > > because multiple people is working on the same driver.
> >
> > My key comment for this patch is use flags IMX_PCIE_FLAG_SKIP_TURN_OFF
> > in suspend()/resume(), it is up to kw to pick which one firstly.
> 
> I will try to implement it as you proposed with the new flag.
> 
> However, what I figured out with kernel v6.12-rc1 I get the following warning
> when booting on an i.MX6QDL even without my patch applied:
> [    1.901199] PCI: bus0: Fast back to back transfers disabled
> [    1.904754] mmc1: SDHCI controller on 2190000.mmc [2190000.mmc]
> using ADMA
> [    1.904914] mmc2: SDHCI controller on 2194000.mmc [2194000.mmc]
> using ADMA
> [    1.910686] pci 0000:01:00.0: [168c:003c] type 00 class 0x028000 PCIe
> Endpoint
> [    1.918390] NET: Registered PF_PACKET protocol family
> [    1.918573] mmc0: SDHCI controller on 2198000.mmc [2198000.mmc]
> using ADMA
> [    1.924322] pci 0000:01:00.0: BAR 0 [mem 0x00000000-0x001fffff 64bit]
> [    1.931635] Key type dns_resolver registered
> [    1.936764] pci 0000:01:00.0: ROM [mem 0x00000000-0x0000ffff pref]
> [    1.961043] pci 0000:01:00.0: supports D1 D2
> [    1.961526] Registering SWP/SWPB emulation handler
> [    1.965601] mmc0: new DDR MMC card at address 0001
> [    1.976575] mmcblk0: mmc0:0001 Q2J54A 3.59 GiB
> [    1.979794] Loading compiled-in X.509 certificates
> [    1.985036] PCI: bus1: Fast back to back transfers disabled
> [    1.991531] pci 0000:00:00.0: bridge window [mem 0x01000000-0x011fffff]:
> assigned
> [    1.998742]  mmcblk0: p1 p2 p3
> [    1.999163] pci 0000:00:00.0: BAR 0 [mem 0x01200000-0x012fffff]:
> assigned
> [    2.003947] mmcblk0boot0: mmc0:0001 Q2J54A 16.0 MiB
> [    2.008990] pci 0000:00:00.0: bridge window [mem 0x01300000-0x013fffff
> pref]: assigned
> [    2.009023] pci 0000:00:00.0: ROM [mem 0x01400000-0x0140ffff pref]:
> assigned
> [    2.009054] pci 0000:01:00.0: BAR 0 [mem 0x01000000-0x011fffff 64bit]:
> assigned
> [    2.017526] mmcblk0boot1: mmc0:0001 Q2J54A 16.0 MiB
> [    2.022015] pci 0000:01:00.0: ROM [mem 0x01300000-0x0130ffff pref]:
> assigned
> [    2.032133] mmcblk0rpmb: mmc0:0001 Q2J54A 512 KiB, chardev (242:0)
> [    2.036347] pci 0000:00:00.0: PCI bridge to [bus 01-ff]
> [    2.036370] pci 0000:00:00.0:   bridge window [mem
> 0x01000000-0x011fffff]
> [    2.036384] pci 0000:00:00.0:   bridge window [mem
> 0x01300000-0x013fffff pref]
> [    2.042552] pps pps0: new PPS source ptp0
> [    2.048338] pci_bus 0000:00: resource 4 [io  0x0000-0xffff]
> [    2.083626] pci_bus 0000:00: resource 5 [mem 0x01000000-0x01efffff]
> [    2.089972] pci_bus 0000:01: resource 1 [mem 0x01000000-0x011fffff]
> [    2.093461] fec 2188000.ethernet eth0: registered PHC device 0
> [    2.096283] pci_bus 0000:01: resource 2 [mem 0x01300000-0x013fffff pref]
> [    2.096352] sysfs: cannot create duplicate filename
> '/devices/platform/soc/1ffc000.pcie/pci0000:00/0000:00:00.0/resource0'
> [    2.096365] CPU: 3 UID: 0 PID: 52 Comm: kworker/u19:2 Not tainted
> 6.12.0-rc1 #54
> [    2.096381] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
> [    2.096391] Workqueue: async async_run_entry_fn
> [    2.103025] imx_thermal 20c8000.anatop:tempmon: Industrial CPU
> temperature grade - max:105C critical:100C passive:95C
> [    2.108932]
> [    2.108940] Call trace:
> [    2.108950]  unwind_backtrace from show_stack+0x10/0x14
> [    2.121391] clk: Disabling unused clocks
> [    2.127423]  show_stack from dump_stack_lvl+0x54/0x68
> [    2.127451]  dump_stack_lvl from sysfs_warn_dup+0x58/0x64
> [    2.134265] PM: genpd: Disabling unused power domains
> [    2.138525]  sysfs_warn_dup from sysfs_add_bin_file_mode_ns+0xbc/0xcc
> [    2.138547]  sysfs_add_bin_file_mode_ns from
> sysfs_create_bin_file+0xac/0xb4
> [    2.149242] ALSA device list:
> [    2.150647]  sysfs_create_bin_file from
> pci_create_resource_files+0x84/0x148
> [    2.153183]   No soundcards found.
> [    2.158407]  pci_create_resource_files from
> pci_bus_add_device+0x24/0xe4
> [    2.211699]  pci_bus_add_device from pci_bus_add_devices+0x2c/0x70
> [    2.217930]  pci_bus_add_devices from pci_host_probe+0x7c/0xa4
> [    2.223806]  pci_host_probe from dw_pcie_host_init+0x4ec/0x71c
> [    2.229682]  dw_pcie_host_init from imx_pcie_probe+0x3a8/0x75c
> [    2.235559]  imx_pcie_probe from platform_probe+0x5c/0xb0
> [    2.241010]  platform_probe from really_probe+0xd0/0x3a4
> [    2.246364]  really_probe from __driver_probe_device+0x8c/0x1d4
> [    2.252321]  __driver_probe_device from driver_probe_device+0x30/0xc0
> [    2.258803]  driver_probe_device from
> __driver_attach_async_helper+0x50/0xd8
> [    2.265892]  __driver_attach_async_helper from
> async_run_entry_fn+0x30/0x144
> [    2.272980]  async_run_entry_fn from process_one_work+0x154/0x2dc
> [    2.279115]  process_one_work from worker_thread+0x250/0x3f0
> [    2.284811]  worker_thread from kthread+0x110/0x12c
> [    2.289726]  kthread from ret_from_fork+0x14/0x28
> [    2.294461] Exception stack(0xe6a0dfb0 to 0xe6a0dff8)
> [    2.299535] dfa0:                                     00000000
> 00000000 00000000 00000000
> [    2.307740] dfc0: 00000000 00000000 00000000 00000000 00000000
> 00000000 00000000 00000000
> [    2.315942] dfe0: 00000000 00000000 00000000 00000000 00000013
> 00000000
> [    2.323156] pcieport 0000:00:00.0: PME: Signaling with IRQ 293
> [    2.329645] pcieport 0000:00:00.0: AER: enabled with IRQ 293
> [    2.335553] sysfs: cannot create duplicate filename
> '/devices/platform/soc/1ffc000.pcie/pci0000:00/0000:00:00.0/0000:01:00.0/res
> ource0'
> [    2.347794] CPU: 3 UID: 0 PID: 52 Comm: kworker/u19:2 Not tainted
> 6.12.0-rc1 #54
> [    2.355229] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
> [    2.361779] Workqueue: async async_run_entry_fn
> [    2.366349] Call trace:
> [    2.366362]  unwind_backtrace from show_stack+0x10/0x14
> [    2.374183]  show_stack from dump_stack_lvl+0x54/0x68
> [    2.379273]  dump_stack_lvl from sysfs_warn_dup+0x58/0x64
> [    2.384706]  sysfs_warn_dup from sysfs_add_bin_file_mode_ns+0xbc/0xcc
> [    2.391183]  sysfs_add_bin_file_mode_ns from
> sysfs_create_bin_file+0xac/0xb4
> [    2.398270]  sysfs_create_bin_file from
> pci_create_resource_files+0x84/0x148
> [    2.405360]  pci_create_resource_files from
> pci_bus_add_device+0x24/0xe4
> [    2.412113]  pci_bus_add_device from pci_bus_add_devices+0x2c/0x70
> [    2.418334]  pci_bus_add_devices from pci_bus_add_devices+0x60/0x70
> [    2.424642]  pci_bus_add_devices from pci_host_probe+0x7c/0xa4
> [    2.430511]  pci_host_probe from dw_pcie_host_init+0x4ec/0x71c
> [    2.436384]  dw_pcie_host_init from imx_pcie_probe+0x3a8/0x75c
> [    2.442258]  imx_pcie_probe from platform_probe+0x5c/0xb0
> [    2.447704]  platform_probe from really_probe+0xd0/0x3a4
> [    2.453057]  really_probe from __driver_probe_device+0x8c/0x1d4
> [    2.459014]  __driver_probe_device from driver_probe_device+0x30/0xc0
> [    2.465493]  driver_probe_device from
> __driver_attach_async_helper+0x50/0xd8
> [    2.472580]  __driver_attach_async_helper from
> async_run_entry_fn+0x30/0x144
> [    2.479666]  async_run_entry_fn from process_one_work+0x154/0x2dc
> [    2.485800]  process_one_work from worker_thread+0x250/0x3f0
> [    2.491494]  worker_thread from kthread+0x110/0x12c
> [    2.496408]  kthread from ret_from_fork+0x14/0x28
> [    2.501140] Exception stack(0xe6a0dfb0 to 0xe6a0dff8)
> [    2.506214] dfa0:                                     00000000
> 00000000 00000000 00000000
> [    2.514418] dfc0: 00000000 00000000 00000000 00000000 00000000
> 00000000 00000000 00000000
> [    2.522620] dfe0: 00000000 00000000 00000000 00000000 00000013
> 00000000
> 
> This was not the case with kernel v6.11, do you have an idea where this comes
> from? I did not dig into more detail yet but it looks a bit like a regression. The
> driver still works, it just prints this duplicate filename warning.
Hi Stefan:
I use my i.MX6QP SabreSD board to make a quick boot test when both the
 v6.12-rc2 and v6.12-rc1 are used.
Since I don't have the i.MX6QDL board at my hand now.
There is no dump, kernel boots successfully, and PCIe works well too.
Do I miss something?
BTW, the imx_v6_v7_defconfig is used in my tests.

root@imx6qpdlsolox:~# lspci
00:00.0 PCI bridge: Synopsys, Inc. DWC_usb3 / PCIe bridge (rev 01)
01:00.0 Ethernet controller: Intel Corporation 82574L Gigabit Network Connection
root@imx6qpdlsolox:~# uname -a
Linux imx6qpdlsolox 6.12.0-rc2 #11 SMP Sat Oct 12 16:56:57 CST 2024 armv7l GNU/Linux
root@imx6qpdlsolox:~#
root@imx6qpdlsolox:~#
root@imx6qpdlsolox:~# dmesg | grep pci
[    0.442344] imx6q-pcie 1ffc000.pcie: host bridge /soc/pcie@1ffc000 ranges:
[    0.442397] imx6q-pcie 1ffc000.pcie:       IO 0x0001f80000..0x0001f8ffff -> 0x0000000000
[    0.442427] imx6q-pcie 1ffc000.pcie:      MEM 0x0001000000..0x0001efffff -> 0x0001000000
[    1.659104] imx6q-pcie 1ffc000.pcie: iATU: unroll F, 4 ob, 4 ib, align 64K, limit 4G
[    1.765277] imx6q-pcie 1ffc000.pcie: PCIe Gen.1 x1 link up
[    1.770809] imx6q-pcie 1ffc000.pcie: Link: Only Gen1 is enabled
[    1.770820] imx6q-pcie 1ffc000.pcie: Link up, Gen1
[    1.784066] imx6q-pcie 1ffc000.pcie: PCIe Gen.1 x1 link up
[    1.796446] imx6q-pcie 1ffc000.pcie: PCI host bridge to bus 0000:00
[    1.809095] pci_bus 0000:00: root bus resource [bus 00-ff]
[    1.809113] pci_bus 0000:00: root bus resource [io  0x0000-0xffff]
[    1.809125] pci_bus 0000:00: root bus resource [mem 0x01000000-0x01efffff]
[    1.822423] pci 0000:00:00.0: [16c3:abcd] type 01 class 0x060400 PCIe Root Port
[    1.822451] pci 0000:00:00.0: BAR 0 [mem 0x00000000-0x000fffff]
[    1.834148] pci 0000:00:00.0: ROM [mem 0x00000000-0x0000ffff pref]
[    1.849760] pci 0000:00:00.0: PCI bridge to [bus 01-ff]
[    1.890183] pci 0000:00:00.0:   bridge window [io  0x0000-0x0fff]
[    1.896293] pci 0000:00:00.0:   bridge window [mem 0x00000000-0x000fffff]
[    1.903109] pci 0000:00:00.0:   bridge window [mem 0x00000000-0x000fffff pref]
[    1.910386] pci 0000:00:00.0: Limiting cfg_size to 512
[    1.915588] pci 0000:00:00.0: supports D1
[    1.919638] pci 0000:00:00.0: PME# supported from D0 D1 D3hot D3cold
[    1.944036] pci 0000:01:00.0: [8086:10d3] type 00 class 0x020000 PCIe Endpoint
[    1.951427] pci 0000:01:00.0: BAR 0 [mem 0x00000000-0x0001ffff]
[    1.961171] pci 0000:01:00.0: BAR 1 [mem 0x00000000-0x0007ffff]
[    1.967149] pci 0000:01:00.0: BAR 2 [io  0x0000-0x001f]
[    1.972456] pci 0000:01:00.0: BAR 3 [mem 0x00000000-0x00003fff]
[    1.978528] pci 0000:01:00.0: ROM [mem 0x00000000-0x0003ffff pref]
[    1.985185] pci 0000:01:00.0: PME# supported from D0 D3hot D3cold
[    2.020060] pci 0000:00:00.0: BAR 0 [mem 0x01000000-0x010fffff]: assigned
[    2.032893] pci 0000:00:00.0: bridge window [mem 0x01100000-0x011fffff]: assigned
[    2.046230] pci 0000:00:00.0: bridge window [mem 0x01200000-0x012fffff pref]: assigned
[    2.059918] pci 0000:00:00.0: ROM [mem 0x01300000-0x0130ffff pref]: assigned
[    2.072647] pci 0000:00:00.0: bridge window [io  0x1000-0x1fff]: assigned
[    2.085371] pci 0000:01:00.0: BAR 1 [mem 0x01100000-0x0117ffff]: assigned
[    2.097773] pci 0000:01:00.0: ROM [mem 0x01200000-0x0123ffff pref]: assigned
[    2.112229] pci 0000:01:00.0: BAR 0 [mem 0x01180000-0x0119ffff]: assigned
[    2.112266] pci 0000:01:00.0: BAR 3 [mem 0x011a0000-0x011a3fff]: assigned
[    2.124738] pci 0000:01:00.0: BAR 2 [io  0x1000-0x101f]: assigned
[    2.137379] pci 0000:00:00.0: PCI bridge to [bus 01-ff]
[    2.149485] pci 0000:00:00.0:   bridge window [io  0x1000-0x1fff]
[    2.149502] pci 0000:00:00.0:   bridge window [mem 0x01100000-0x011fffff]
[    2.160835] pci 0000:00:00.0:   bridge window [mem 0x01200000-0x012fffff pref]
[    2.173722] pci_bus 0000:00: resource 4 [io  0x0000-0xffff]
[    2.193124] pci_bus 0000:00: resource 5 [mem 0x01000000-0x01efffff]
[    2.193139] pci_bus 0000:01: resource 0 [io  0x1000-0x1fff]
[    2.222093] pci_bus 0000:01: resource 1 [mem 0x01100000-0x011fffff]
[    2.228389] pci_bus 0000:01: resource 2 [mem 0x01200000-0x012fffff pref]

Best Regards
Richard Zhu
> 
> Regards,
> Stefan
Stefan Eichenberger Oct. 14, 2024, 8:10 a.m. UTC | #10
Hi Richard and Frank,

On Sat, Oct 12, 2024 at 09:02:28AM +0000, Hongxing Zhu wrote:
> > -----Original Message-----
> > From: Stefan Eichenberger <eichest@gmail.com>
> > Sent: 2024年10月11日 22:36
> > To: Frank Li <frank.li@nxp.com>
> > Cc: Francesco Dolcini <francesco@dolcini.it>; Hongxing Zhu
> > <hongxing.zhu@nxp.com>; l.stach@pengutronix.de; lpieralisi@kernel.org;
> > kw@linux.com; manivannan.sadhasivam@linaro.org; robh@kernel.org;
> > bhelgaas@google.com; shawnguo@kernel.org; s.hauer@pengutronix.de;
> > kernel@pengutronix.de; festevam@gmail.com; Francesco Dolcini
> > <francesco.dolcini@toradex.com>; linux-pci@vger.kernel.org;
> > linux-arm-kernel@lists.infradead.org; imx@lists.linux.dev;
> > linux-kernel@vger.kernel.org; Stefan Eichenberger
> > <stefan.eichenberger@toradex.com>
> > Subject: Re: [PATCH v2] PCI: imx6: Add suspend/resume support for i.MX6QDL
> > 
> > Hi Frank,
> > 
> > On Thu, Oct 10, 2024 at 06:45:17PM -0400, Frank Li wrote:
> > > On Thu, Oct 10, 2024 at 10:11:21PM +0200, Francesco Dolcini wrote:
> > > > Hello Frank,
> > > >
> > > > On Thu, Oct 10, 2024 at 04:01:21PM -0400, Frank Li wrote:
> > > > > On Wed, Oct 09, 2024 at 03:14:05PM +0200, Stefan Eichenberger wrote:
> > > > > > From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> > > > > >
> > > > > > The suspend/resume support is broken on the i.MX6QDL platform.
> > > > > > This patch resets the link upon resuming to recover
> > > > > > functionality. It shares most of the sequences with other i.MX
> > > > > > devices but does not touch the critical registers, which might
> > > > > > break PCIe. This patch addresses the same issue as the following
> > downstream commit:
> > > > > > https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%
> > > > > >
> > 2Fgithub.com%2Fnxp-imx%2Flinux-imx%2Fcommit%2F4e92355e1f79d225ea
> > > > > >
> > 842511fcfd42b343b32995&data=05%7C02%7Chongxing.zhu%40nxp.com%7C4
> > > > > >
> > 9cdf8aaeee54cec71c508dcea0215b1%7C686ea1d3bc2b4c6fa92cd99c5c3016
> > > > > >
> > 35%7C0%7C0%7C638642541899874406%7CUnknown%7CTWFpbGZsb3d8eyJWI
> > joi
> > > > > >
> > MC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7
> > > > > >
> > C%7C%7C&sdata=DQGwcXYy0soCov1Uf4ycLQiisP8qlbBzOslyX3FY5Cs%3D&res
> > > > > > erved=0 In comparison this patch will also reset the device if
> > > > > > possible. Without this patch suspend/resume will not work if a
> > > > > > PCIe device is connected.
> > > > > > The kernel will hang on resume and print an error:
> > > > > > ath10k_pci 0000:01:00.0: Unable to change power state from D3hot
> > > > > > to D0, device inaccessible
> > > > > > 8<--- cut here ---
> > > > > > Unhandled fault: imprecise external abort (0x1406) at 0x0106f944
> > > > > >
> > > > > > Signed-off-by: Stefan Eichenberger
> > > > > > <stefan.eichenberger@toradex.com>
> > > > > > ---
> > > > >
> > > > > Thank you for your patch.
> > > > >
> > > > > But it may conflict with another suspend/resume patch
> > > > > https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2F
> > > > > lore.kernel.org%2Fimx%2F1727245477-15961-8-git-send-email-hongxing
> > > > > .zhu%40nxp.com%2F&data=05%7C02%7Chongxing.zhu%40nxp.com%7C4
> > 9cdf8aa
> > > > >
> > eee54cec71c508dcea0215b1%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%
> > 7C
> > > > >
> > 0%7C638642541899892894%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjA
> > wMDA
> > > > >
> > iLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata
> > > > >
> > =xPBKoiWL8IceLlv%2F6lWoqkPWosh%2BRvG8jyA8NjKSsOI%3D&reserved=0
> > > >
> > > > Thanks for the head-up.
> > > >
> > > > Do you see any issue with this patch apart that? Because this patch
> > > > is fixing a crash, so I would expect this to be merged, once ready,
> > > > and such a series rebased afterward.
> > > >
> > > > I am writing this explicitly since you wrote a similar comment on
> > > > the
> > > > v1
> > > > (https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fl
> > > > ore.kernel.org%2Fall%2FZsNXDq%252FkidZdyhvD%40lizhi-Precision-Tower-
> > > >
> > 5810%2F&data=05%7C02%7Chongxing.zhu%40nxp.com%7C49cdf8aaeee54cec
> > 71c5
> > > >
> > 08dcea0215b1%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C63864
> > 25418
> > > >
> > 99904634%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2l
> > uMzI
> > > >
> > iLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata=x2OVvZsi4YIxSg
> > KGD
> > > > 7ZzqbRQjiTFK4%2BzMdgDv2qSdig%3D&reserved=0)
> > > > and I would like to prevent to have this fix starving for long just
> > > > because multiple people is working on the same driver.
> > >
> > > My key comment for this patch is use flags IMX_PCIE_FLAG_SKIP_TURN_OFF
> > > in suspend()/resume(), it is up to kw to pick which one firstly.
> > 
> > I will try to implement it as you proposed with the new flag.
> > 
> > However, what I figured out with kernel v6.12-rc1 I get the following warning
> > when booting on an i.MX6QDL even without my patch applied:
> > [    1.901199] PCI: bus0: Fast back to back transfers disabled
> > [    1.904754] mmc1: SDHCI controller on 2190000.mmc [2190000.mmc]
> > using ADMA
> > [    1.904914] mmc2: SDHCI controller on 2194000.mmc [2194000.mmc]
> > using ADMA
> > [    1.910686] pci 0000:01:00.0: [168c:003c] type 00 class 0x028000 PCIe
> > Endpoint
> > [    1.918390] NET: Registered PF_PACKET protocol family
> > [    1.918573] mmc0: SDHCI controller on 2198000.mmc [2198000.mmc]
> > using ADMA
> > [    1.924322] pci 0000:01:00.0: BAR 0 [mem 0x00000000-0x001fffff 64bit]
> > [    1.931635] Key type dns_resolver registered
> > [    1.936764] pci 0000:01:00.0: ROM [mem 0x00000000-0x0000ffff pref]
> > [    1.961043] pci 0000:01:00.0: supports D1 D2
> > [    1.961526] Registering SWP/SWPB emulation handler
> > [    1.965601] mmc0: new DDR MMC card at address 0001
> > [    1.976575] mmcblk0: mmc0:0001 Q2J54A 3.59 GiB
> > [    1.979794] Loading compiled-in X.509 certificates
> > [    1.985036] PCI: bus1: Fast back to back transfers disabled
> > [    1.991531] pci 0000:00:00.0: bridge window [mem 0x01000000-0x011fffff]:
> > assigned
> > [    1.998742]  mmcblk0: p1 p2 p3
> > [    1.999163] pci 0000:00:00.0: BAR 0 [mem 0x01200000-0x012fffff]:
> > assigned
> > [    2.003947] mmcblk0boot0: mmc0:0001 Q2J54A 16.0 MiB
> > [    2.008990] pci 0000:00:00.0: bridge window [mem 0x01300000-0x013fffff
> > pref]: assigned
> > [    2.009023] pci 0000:00:00.0: ROM [mem 0x01400000-0x0140ffff pref]:
> > assigned
> > [    2.009054] pci 0000:01:00.0: BAR 0 [mem 0x01000000-0x011fffff 64bit]:
> > assigned
> > [    2.017526] mmcblk0boot1: mmc0:0001 Q2J54A 16.0 MiB
> > [    2.022015] pci 0000:01:00.0: ROM [mem 0x01300000-0x0130ffff pref]:
> > assigned
> > [    2.032133] mmcblk0rpmb: mmc0:0001 Q2J54A 512 KiB, chardev (242:0)
> > [    2.036347] pci 0000:00:00.0: PCI bridge to [bus 01-ff]
> > [    2.036370] pci 0000:00:00.0:   bridge window [mem
> > 0x01000000-0x011fffff]
> > [    2.036384] pci 0000:00:00.0:   bridge window [mem
> > 0x01300000-0x013fffff pref]
> > [    2.042552] pps pps0: new PPS source ptp0
> > [    2.048338] pci_bus 0000:00: resource 4 [io  0x0000-0xffff]
> > [    2.083626] pci_bus 0000:00: resource 5 [mem 0x01000000-0x01efffff]
> > [    2.089972] pci_bus 0000:01: resource 1 [mem 0x01000000-0x011fffff]
> > [    2.093461] fec 2188000.ethernet eth0: registered PHC device 0
> > [    2.096283] pci_bus 0000:01: resource 2 [mem 0x01300000-0x013fffff pref]
> > [    2.096352] sysfs: cannot create duplicate filename
> > '/devices/platform/soc/1ffc000.pcie/pci0000:00/0000:00:00.0/resource0'
> > [    2.096365] CPU: 3 UID: 0 PID: 52 Comm: kworker/u19:2 Not tainted
> > 6.12.0-rc1 #54
> > [    2.096381] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
> > [    2.096391] Workqueue: async async_run_entry_fn
> > [    2.103025] imx_thermal 20c8000.anatop:tempmon: Industrial CPU
> > temperature grade - max:105C critical:100C passive:95C
> > [    2.108932]
> > [    2.108940] Call trace:
> > [    2.108950]  unwind_backtrace from show_stack+0x10/0x14
> > [    2.121391] clk: Disabling unused clocks
> > [    2.127423]  show_stack from dump_stack_lvl+0x54/0x68
> > [    2.127451]  dump_stack_lvl from sysfs_warn_dup+0x58/0x64
> > [    2.134265] PM: genpd: Disabling unused power domains
> > [    2.138525]  sysfs_warn_dup from sysfs_add_bin_file_mode_ns+0xbc/0xcc
> > [    2.138547]  sysfs_add_bin_file_mode_ns from
> > sysfs_create_bin_file+0xac/0xb4
> > [    2.149242] ALSA device list:
> > [    2.150647]  sysfs_create_bin_file from
> > pci_create_resource_files+0x84/0x148
> > [    2.153183]   No soundcards found.
> > [    2.158407]  pci_create_resource_files from
> > pci_bus_add_device+0x24/0xe4
> > [    2.211699]  pci_bus_add_device from pci_bus_add_devices+0x2c/0x70
> > [    2.217930]  pci_bus_add_devices from pci_host_probe+0x7c/0xa4
> > [    2.223806]  pci_host_probe from dw_pcie_host_init+0x4ec/0x71c
> > [    2.229682]  dw_pcie_host_init from imx_pcie_probe+0x3a8/0x75c
> > [    2.235559]  imx_pcie_probe from platform_probe+0x5c/0xb0
> > [    2.241010]  platform_probe from really_probe+0xd0/0x3a4
> > [    2.246364]  really_probe from __driver_probe_device+0x8c/0x1d4
> > [    2.252321]  __driver_probe_device from driver_probe_device+0x30/0xc0
> > [    2.258803]  driver_probe_device from
> > __driver_attach_async_helper+0x50/0xd8
> > [    2.265892]  __driver_attach_async_helper from
> > async_run_entry_fn+0x30/0x144
> > [    2.272980]  async_run_entry_fn from process_one_work+0x154/0x2dc
> > [    2.279115]  process_one_work from worker_thread+0x250/0x3f0
> > [    2.284811]  worker_thread from kthread+0x110/0x12c
> > [    2.289726]  kthread from ret_from_fork+0x14/0x28
> > [    2.294461] Exception stack(0xe6a0dfb0 to 0xe6a0dff8)
> > [    2.299535] dfa0:                                     00000000
> > 00000000 00000000 00000000
> > [    2.307740] dfc0: 00000000 00000000 00000000 00000000 00000000
> > 00000000 00000000 00000000
> > [    2.315942] dfe0: 00000000 00000000 00000000 00000000 00000013
> > 00000000
> > [    2.323156] pcieport 0000:00:00.0: PME: Signaling with IRQ 293
> > [    2.329645] pcieport 0000:00:00.0: AER: enabled with IRQ 293
> > [    2.335553] sysfs: cannot create duplicate filename
> > '/devices/platform/soc/1ffc000.pcie/pci0000:00/0000:00:00.0/0000:01:00.0/res
> > ource0'
> > [    2.347794] CPU: 3 UID: 0 PID: 52 Comm: kworker/u19:2 Not tainted
> > 6.12.0-rc1 #54
> > [    2.355229] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
> > [    2.361779] Workqueue: async async_run_entry_fn
> > [    2.366349] Call trace:
> > [    2.366362]  unwind_backtrace from show_stack+0x10/0x14
> > [    2.374183]  show_stack from dump_stack_lvl+0x54/0x68
> > [    2.379273]  dump_stack_lvl from sysfs_warn_dup+0x58/0x64
> > [    2.384706]  sysfs_warn_dup from sysfs_add_bin_file_mode_ns+0xbc/0xcc
> > [    2.391183]  sysfs_add_bin_file_mode_ns from
> > sysfs_create_bin_file+0xac/0xb4
> > [    2.398270]  sysfs_create_bin_file from
> > pci_create_resource_files+0x84/0x148
> > [    2.405360]  pci_create_resource_files from
> > pci_bus_add_device+0x24/0xe4
> > [    2.412113]  pci_bus_add_device from pci_bus_add_devices+0x2c/0x70
> > [    2.418334]  pci_bus_add_devices from pci_bus_add_devices+0x60/0x70
> > [    2.424642]  pci_bus_add_devices from pci_host_probe+0x7c/0xa4
> > [    2.430511]  pci_host_probe from dw_pcie_host_init+0x4ec/0x71c
> > [    2.436384]  dw_pcie_host_init from imx_pcie_probe+0x3a8/0x75c
> > [    2.442258]  imx_pcie_probe from platform_probe+0x5c/0xb0
> > [    2.447704]  platform_probe from really_probe+0xd0/0x3a4
> > [    2.453057]  really_probe from __driver_probe_device+0x8c/0x1d4
> > [    2.459014]  __driver_probe_device from driver_probe_device+0x30/0xc0
> > [    2.465493]  driver_probe_device from
> > __driver_attach_async_helper+0x50/0xd8
> > [    2.472580]  __driver_attach_async_helper from
> > async_run_entry_fn+0x30/0x144
> > [    2.479666]  async_run_entry_fn from process_one_work+0x154/0x2dc
> > [    2.485800]  process_one_work from worker_thread+0x250/0x3f0
> > [    2.491494]  worker_thread from kthread+0x110/0x12c
> > [    2.496408]  kthread from ret_from_fork+0x14/0x28
> > [    2.501140] Exception stack(0xe6a0dfb0 to 0xe6a0dff8)
> > [    2.506214] dfa0:                                     00000000
> > 00000000 00000000 00000000
> > [    2.514418] dfc0: 00000000 00000000 00000000 00000000 00000000
> > 00000000 00000000 00000000
> > [    2.522620] dfe0: 00000000 00000000 00000000 00000000 00000013
> > 00000000
> > 
> > This was not the case with kernel v6.11, do you have an idea where this comes
> > from? I did not dig into more detail yet but it looks a bit like a regression. The
> > driver still works, it just prints this duplicate filename warning.
> Hi Stefan:
> I use my i.MX6QP SabreSD board to make a quick boot test when both the
>  v6.12-rc2 and v6.12-rc1 are used.
> Since I don't have the i.MX6QDL board at my hand now.
> There is no dump, kernel boots successfully, and PCIe works well too.
> Do I miss something?
> BTW, the imx_v6_v7_defconfig is used in my tests.
> 
> root@imx6qpdlsolox:~# lspci
> 00:00.0 PCI bridge: Synopsys, Inc. DWC_usb3 / PCIe bridge (rev 01)
> 01:00.0 Ethernet controller: Intel Corporation 82574L Gigabit Network Connection
> root@imx6qpdlsolox:~# uname -a
> Linux imx6qpdlsolox 6.12.0-rc2 #11 SMP Sat Oct 12 16:56:57 CST 2024 armv7l GNU/Linux
> root@imx6qpdlsolox:~#
> root@imx6qpdlsolox:~#
> root@imx6qpdlsolox:~# dmesg | grep pci
> [    0.442344] imx6q-pcie 1ffc000.pcie: host bridge /soc/pcie@1ffc000 ranges:
> [    0.442397] imx6q-pcie 1ffc000.pcie:       IO 0x0001f80000..0x0001f8ffff -> 0x0000000000
> [    0.442427] imx6q-pcie 1ffc000.pcie:      MEM 0x0001000000..0x0001efffff -> 0x0001000000
> [    1.659104] imx6q-pcie 1ffc000.pcie: iATU: unroll F, 4 ob, 4 ib, align 64K, limit 4G
> [    1.765277] imx6q-pcie 1ffc000.pcie: PCIe Gen.1 x1 link up
> [    1.770809] imx6q-pcie 1ffc000.pcie: Link: Only Gen1 is enabled
> [    1.770820] imx6q-pcie 1ffc000.pcie: Link up, Gen1
> [    1.784066] imx6q-pcie 1ffc000.pcie: PCIe Gen.1 x1 link up
> [    1.796446] imx6q-pcie 1ffc000.pcie: PCI host bridge to bus 0000:00
> [    1.809095] pci_bus 0000:00: root bus resource [bus 00-ff]
> [    1.809113] pci_bus 0000:00: root bus resource [io  0x0000-0xffff]
> [    1.809125] pci_bus 0000:00: root bus resource [mem 0x01000000-0x01efffff]
> [    1.822423] pci 0000:00:00.0: [16c3:abcd] type 01 class 0x060400 PCIe Root Port
> [    1.822451] pci 0000:00:00.0: BAR 0 [mem 0x00000000-0x000fffff]
> [    1.834148] pci 0000:00:00.0: ROM [mem 0x00000000-0x0000ffff pref]
> [    1.849760] pci 0000:00:00.0: PCI bridge to [bus 01-ff]
> [    1.890183] pci 0000:00:00.0:   bridge window [io  0x0000-0x0fff]
> [    1.896293] pci 0000:00:00.0:   bridge window [mem 0x00000000-0x000fffff]
> [    1.903109] pci 0000:00:00.0:   bridge window [mem 0x00000000-0x000fffff pref]
> [    1.910386] pci 0000:00:00.0: Limiting cfg_size to 512
> [    1.915588] pci 0000:00:00.0: supports D1
> [    1.919638] pci 0000:00:00.0: PME# supported from D0 D1 D3hot D3cold
> [    1.944036] pci 0000:01:00.0: [8086:10d3] type 00 class 0x020000 PCIe Endpoint
> [    1.951427] pci 0000:01:00.0: BAR 0 [mem 0x00000000-0x0001ffff]
> [    1.961171] pci 0000:01:00.0: BAR 1 [mem 0x00000000-0x0007ffff]
> [    1.967149] pci 0000:01:00.0: BAR 2 [io  0x0000-0x001f]
> [    1.972456] pci 0000:01:00.0: BAR 3 [mem 0x00000000-0x00003fff]
> [    1.978528] pci 0000:01:00.0: ROM [mem 0x00000000-0x0003ffff pref]
> [    1.985185] pci 0000:01:00.0: PME# supported from D0 D3hot D3cold
> [    2.020060] pci 0000:00:00.0: BAR 0 [mem 0x01000000-0x010fffff]: assigned
> [    2.032893] pci 0000:00:00.0: bridge window [mem 0x01100000-0x011fffff]: assigned
> [    2.046230] pci 0000:00:00.0: bridge window [mem 0x01200000-0x012fffff pref]: assigned
> [    2.059918] pci 0000:00:00.0: ROM [mem 0x01300000-0x0130ffff pref]: assigned
> [    2.072647] pci 0000:00:00.0: bridge window [io  0x1000-0x1fff]: assigned
> [    2.085371] pci 0000:01:00.0: BAR 1 [mem 0x01100000-0x0117ffff]: assigned
> [    2.097773] pci 0000:01:00.0: ROM [mem 0x01200000-0x0123ffff pref]: assigned
> [    2.112229] pci 0000:01:00.0: BAR 0 [mem 0x01180000-0x0119ffff]: assigned
> [    2.112266] pci 0000:01:00.0: BAR 3 [mem 0x011a0000-0x011a3fff]: assigned
> [    2.124738] pci 0000:01:00.0: BAR 2 [io  0x1000-0x101f]: assigned
> [    2.137379] pci 0000:00:00.0: PCI bridge to [bus 01-ff]
> [    2.149485] pci 0000:00:00.0:   bridge window [io  0x1000-0x1fff]
> [    2.149502] pci 0000:00:00.0:   bridge window [mem 0x01100000-0x011fffff]
> [    2.160835] pci 0000:00:00.0:   bridge window [mem 0x01200000-0x012fffff pref]
> [    2.173722] pci_bus 0000:00: resource 4 [io  0x0000-0xffff]
> [    2.193124] pci_bus 0000:00: resource 5 [mem 0x01000000-0x01efffff]
> [    2.193139] pci_bus 0000:01: resource 0 [io  0x1000-0x1fff]
> [    2.222093] pci_bus 0000:01: resource 1 [mem 0x01100000-0x011fffff]
> [    2.228389] pci_bus 0000:01: resource 2 [mem 0x01200000-0x012fffff pref]

Thanks a lot for testing on your side.

I think I was too early to assume it is a regression. I did some more
tests today and I can't see the issue with most configurations. I
uploaded the one that generates the issue on my system with kernel
v6.12-rc1.
https://drive.google.com/file/d/1ZHpg6tG82QjmiXylq0W2HSLnz97AgyN8

However, even if I enhance the pci-imx6.c driver with a simple pr_info
the trace disappears:
diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
index 808d1f1054173..047f4ae0a2ed3 100644
--- a/drivers/pci/controller/dwc/pci-imx6.c
+++ b/drivers/pci/controller/dwc/pci-imx6.c
@@ -1282,6 +1282,8 @@ static int imx_pcie_probe(struct platform_device *pdev)
        u16 val;
        int i;

+       pr_info("%s - %s:%d\n", __func__, __FILE__, __LINE__);
+
        imx_pcie = devm_kzalloc(dev, sizeof(*imx_pcie), GFP_KERNEL);
        if (!imx_pcie)
                return -ENOMEM;

So most likely it is some sequence/timing issue. I have to check if I
find the time to investigate it further, but I guess it would be on my
side to figure out what happens.

Sorry for the (kind of) false alarm.

Regards,
Stefan
Stefan Eichenberger Oct. 14, 2024, 8:48 a.m. UTC | #11
Hi Mani,

Thanks a lot for the comments.

On Sat, Oct 12, 2024 at 09:43:15AM +0530, Manivannan Sadhasivam wrote:
> On Wed, Oct 09, 2024 at 03:14:05PM +0200, Stefan Eichenberger wrote:
> > From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> > 
> > The suspend/resume support is broken on the i.MX6QDL platform. This
> 
> You mean the 'system suspend/resume'?
> 
> > patch resets the link upon resuming to recover functionality. It shares
> > most of the sequences with other i.MX devices but does not touch the
> > critical registers, which might break PCIe. This patch addresses the
> > same issue as the following downstream commit:
> > https://github.com/nxp-imx/linux-imx/commit/4e92355e1f79d225ea842511fcfd42b343b32995
> > In comparison this patch will also reset the device if possible. Without
> > this patch suspend/resume will not work if a PCIe device is connected.
> > The kernel will hang on resume and print an error:
> > ath10k_pci 0000:01:00.0: Unable to change power state from D3hot to D0, device inaccessible
> 
> Looks like the device is turned off during suspend.

Yes, I don't have a deep understanding about PCIe to be honest. However,
my understanding is that the PCIe devices will always try to suspend and
there is no way from the PCIe host controller to prevent this. Or am I
wrong? Currently suspend is not implemented for the i.MX6Dual/Quad
variant in the pci-imx6 driver and the device will still be turned off
by the PCI subsystem. On the other side I don't think it will fix
anything if I can prevent suspend for the devices because the
communication will still fail after resume.

> 
> > 8<--- cut here ---
> > Unhandled fault: imprecise external abort (0x1406) at 0x0106f944
> > 
> > Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
> > ---
> > v1 -> v2: Share most code with other i.MX platforms and set suspend
> > 	  support flag for i.MX6QDL. Version 1 can be found here:
> > 	  https://lore.kernel.org/all/20240819090428.17349-1-eichest@gmail.com/
> > 
> >  drivers/pci/controller/dwc/pci-imx6.c | 44 +++++++++++++++++++++++++--
> >  1 file changed, 41 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
> > index 808d1f1054173..f33bef0aa1071 100644
> > --- a/drivers/pci/controller/dwc/pci-imx6.c
> > +++ b/drivers/pci/controller/dwc/pci-imx6.c
> > @@ -1238,8 +1238,23 @@ static int imx_pcie_suspend_noirq(struct device *dev)
> >  
> >  	imx_pcie_msi_save_restore(imx_pcie, true);
> >  	imx_pcie_pm_turnoff(imx_pcie);
> > -	imx_pcie_stop_link(imx_pcie->pci);
> > -	imx_pcie_host_exit(pp);
> > +	/*
> > +	 * Do not turn off the PCIe controller because of ERR003756, ERR004490, ERR005188,
> > +	 * they all document issues with LLTSSM and the PCIe controller which
> 
> LTSSM
> 
> But LTSSM is for the PCIe link state, not sure how it impacts controller state.
> Can you share the link to those erratums?

The erratas are all from this document:
https://www.nxp.com/docs/en/errata/IMX6DQCE.pdf

Only the i.MX 6Dual/6Quad variant is affected but not the newer Plus
variants.

> 
> > +	 * does not come out of reset properly. Therefore, try to keep the controller enabled
> > +	 * and only reset the link. However, the reference clock still needs to be turned off,
> 
> You are resetting the *device* below, not the link.

Thanks, I will fix this comment.

> > +	 * else the controller will freeze on resume.
> > +	 */
> 
> Please use 80 columns for comments. Exception is for the code.

Thanks, I will fix this.

> > +	if (imx_pcie->drvdata->variant == IMX6Q) {
> > +		/* Reset the PCIe device */
> > +		gpiod_set_value_cansleep(imx_pcie->reset_gpiod, 1);
> > +
> > +		if (imx_pcie->drvdata->enable_ref_clk)
> > +			imx_pcie->drvdata->enable_ref_clk(imx_pcie, false);
> > +	} else {
> > +		imx_pcie_stop_link(imx_pcie->pci);
> > +		imx_pcie_host_exit(pp);
> > +	}
> >  
> >  	return 0;
> >  }
> > @@ -1253,6 +1268,28 @@ static int imx_pcie_resume_noirq(struct device *dev)
> >  	if (!(imx_pcie->drvdata->flags & IMX_PCIE_FLAG_SUPPORTS_SUSPEND))
> >  		return 0;
> >  
> > +	/*
> > +	 * Even though the i.MX6Q does not support proper suspend/resume, we
> > +	 * need to reset the link after resume or the memory mapped PCIe I/O
> > +	 * space will be inaccessible. This will cause the system to freeze.
> > +	 */

Thanks, I will fix this.

> 
> > +	if (imx_pcie->drvdata->variant == IMX6Q) {
> > +		if (imx_pcie->drvdata->enable_ref_clk)
> > +			imx_pcie->drvdata->enable_ref_clk(imx_pcie, true);
> > +
> > +		imx_pcie_deassert_core_reset(imx_pcie);
> 
> There is no corresponding imx_pcie_assert_core_reset() in suspend.

Thanks, I will try to fix this similar to what they do in the host_init.

> > +
> > +		/*
> > +		 * Setup the root complex again and enable msi. Without this PCIe will
> > +		 * not work in msi mode and drivers will crash if they try to access
> > +		 * the device memory area
> > +		 */
> 
> This indicates that the controller state is not preserved. I think we need a bit
> more understanding on what is going on during system suspend on this platform.

Yes I fully agree, but unfortunately I don't really know how to fix it
properly. I just tried to fix the driver by searching for an absolute
minimum that is required to make suspend/resume work when a PCIe device
is connected. As an alternative the downstream patch would do something
similar but I thought also resetting the device would be a better
solution (to stay closer to what the other variants do):
https://github.com/nxp-imx/linux-imx/commit/4e92355e1f79d225ea842511fcfd42b343b32995
I just now saw that I didn't mention ERR005723 at all, sorry for that.
If this would be a more acceptable solution I would do some more tests
with this patch in our setup.

Regards,
Stefan
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
index 808d1f1054173..f33bef0aa1071 100644
--- a/drivers/pci/controller/dwc/pci-imx6.c
+++ b/drivers/pci/controller/dwc/pci-imx6.c
@@ -1238,8 +1238,23 @@  static int imx_pcie_suspend_noirq(struct device *dev)
 
 	imx_pcie_msi_save_restore(imx_pcie, true);
 	imx_pcie_pm_turnoff(imx_pcie);
-	imx_pcie_stop_link(imx_pcie->pci);
-	imx_pcie_host_exit(pp);
+	/*
+	 * Do not turn off the PCIe controller because of ERR003756, ERR004490, ERR005188,
+	 * they all document issues with LLTSSM and the PCIe controller which
+	 * does not come out of reset properly. Therefore, try to keep the controller enabled
+	 * and only reset the link. However, the reference clock still needs to be turned off,
+	 * else the controller will freeze on resume.
+	 */
+	if (imx_pcie->drvdata->variant == IMX6Q) {
+		/* Reset the PCIe device */
+		gpiod_set_value_cansleep(imx_pcie->reset_gpiod, 1);
+
+		if (imx_pcie->drvdata->enable_ref_clk)
+			imx_pcie->drvdata->enable_ref_clk(imx_pcie, false);
+	} else {
+		imx_pcie_stop_link(imx_pcie->pci);
+		imx_pcie_host_exit(pp);
+	}
 
 	return 0;
 }
@@ -1253,6 +1268,28 @@  static int imx_pcie_resume_noirq(struct device *dev)
 	if (!(imx_pcie->drvdata->flags & IMX_PCIE_FLAG_SUPPORTS_SUSPEND))
 		return 0;
 
+	/*
+	 * Even though the i.MX6Q does not support proper suspend/resume, we
+	 * need to reset the link after resume or the memory mapped PCIe I/O
+	 * space will be inaccessible. This will cause the system to freeze.
+	 */
+	if (imx_pcie->drvdata->variant == IMX6Q) {
+		if (imx_pcie->drvdata->enable_ref_clk)
+			imx_pcie->drvdata->enable_ref_clk(imx_pcie, true);
+
+		imx_pcie_deassert_core_reset(imx_pcie);
+
+		/*
+		 * Setup the root complex again and enable msi. Without this PCIe will
+		 * not work in msi mode and drivers will crash if they try to access
+		 * the device memory area
+		 */
+		dw_pcie_setup_rc(&imx_pcie->pci->pp);
+		imx_pcie_msi_save_restore(imx_pcie, false);
+
+		return 0;
+	}
+
 	ret = imx_pcie_host_init(pp);
 	if (ret)
 		return ret;
@@ -1485,7 +1522,8 @@  static const struct imx_pcie_drvdata drvdata[] = {
 	[IMX6Q] = {
 		.variant = IMX6Q,
 		.flags = IMX_PCIE_FLAG_IMX_PHY |
-			 IMX_PCIE_FLAG_IMX_SPEED_CHANGE,
+			 IMX_PCIE_FLAG_IMX_SPEED_CHANGE |
+			 IMX_PCIE_FLAG_SUPPORTS_SUSPEND,
 		.dbi_length = 0x200,
 		.gpr = "fsl,imx6q-iomuxc-gpr",
 		.clk_names = imx6q_clks,