diff mbox series

[v6,3/5] PCI: brcmstb: Set higher value for internal bus timeout

Message ID 20230623144100.34196-4-james.quinlan@broadcom.com (mailing list archive)
State New, archived
Headers show
Series PCI: brcmstb: Configure appropriate HW CLKREQ# mode | expand

Commit Message

Jim Quinlan June 23, 2023, 2:40 p.m. UTC
During long periods of the PCIe RC HW being in an L1SS sleep state, there
may be a timeout on an internal bus access, even though there may not be
any PCIe access involved.  Such a timeout will cause a subsequent CPU
abort.

So, when "brcm,enable-l1ss" is observed, we increase the timeout value to
four seconds instead of using its HW default.

Signed-off-by: Jim Quinlan <james.quinlan@broadcom.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/pci/controller/pcie-brcmstb.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

Comments

Lorenzo Pieralisi July 28, 2023, 8:43 a.m. UTC | #1
On Fri, Jun 23, 2023 at 10:40:56AM -0400, Jim Quinlan wrote:
> During long periods of the PCIe RC HW being in an L1SS sleep state, there
> may be a timeout on an internal bus access, even though there may not be
> any PCIe access involved.  Such a timeout will cause a subsequent CPU
> abort.
> 
> So, when "brcm,enable-l1ss" is observed, we increase the timeout value to
> four seconds instead of using its HW default.
> 
> Signed-off-by: Jim Quinlan <james.quinlan@broadcom.com>
> Tested-by: Florian Fainelli <f.fainelli@gmail.com>
> ---
>  drivers/pci/controller/pcie-brcmstb.c | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
> 
> diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
> index d30636a725d7..fe0415a98c63 100644
> --- a/drivers/pci/controller/pcie-brcmstb.c
> +++ b/drivers/pci/controller/pcie-brcmstb.c
> @@ -1034,6 +1034,21 @@ static int brcm_pcie_setup(struct brcm_pcie *pcie)
>  	return 0;
>  }
>  
> +/*
> + * This extends the timeout period for an access to an internal bus.  This
> + * access timeout may occur during L1SS sleep periods even without the
> + * presence of a PCIe access.
> + */
> +static void brcm_extend_rbus_timeout(struct brcm_pcie *pcie)
> +{
> +	/* TIMEOUT register is two registers before RGR1_SW_INIT_1 */
> +	const unsigned int REG_OFFSET = PCIE_RGR1_SW_INIT_1(pcie) - 8;

Nit: you could define an offset for the TIMEOUT register, if that makes
it any cleaner, up to you.

> +	u32 timeout_us = 4000000; /* 4 seconds, our setting for L1SS */

It would be useful to describe why this has to be 4 seconds in case
someone in the future will have to change it.

Thanks,
Lorenzo

> +	/* Each unit in timeout register is 1/216,000,000 seconds */
> +	writel(216 * timeout_us, pcie->base + REG_OFFSET);
> +}
> +
>  static void brcm_config_clkreq(struct brcm_pcie *pcie)
>  {
>  	bool l1ss = of_property_read_bool(pcie->np, "brcm,enable-l1ss");
> @@ -1059,6 +1074,7 @@ static void brcm_config_clkreq(struct brcm_pcie *pcie)
>  		 * of 400ns, as specified in 3.2.5.2.2 of the PCI Express
>  		 * Mini CEM 2.0 specification.
>  		 */
> +		brcm_extend_rbus_timeout(pcie);
>  		clkreq_set |= PCIE_MISC_HARD_PCIE_HARD_DEBUG_L1SS_ENABLE_MASK;
>  		dev_info(pcie->dev, "bi-dir CLKREQ# for L1SS power savings");
>  	} else {
> -- 
> 2.17.1
>
Jim Quinlan Aug. 14, 2023, 7:30 p.m. UTC | #2
On Fri, Jul 28, 2023 at 12:15 PM Jim Quinlan <james.quinlan@broadcom.com> wrote:
>
>
>
> On Thu, Jul 27, 2023, 10:44 PM Lorenzo Pieralisi <lpieralisi@kernel.org> wrote:
>>
>> On Fri, Jun 23, 2023 at 10:40:56AM -0400, Jim Quinlan wrote:
>> > During long periods of the PCIe RC HW being in an L1SS sleep state, there
>> > may be a timeout on an internal bus access, even though there may not be
>> > any PCIe access involved.  Such a timeout will cause a subsequent CPU
>> > abort.
>> >
>> > So, when "brcm,enable-l1ss" is observed, we increase the timeout value to
>> > four seconds instead of using its HW default.
>> >
>> > Signed-off-by: Jim Quinlan <james.quinlan@broadcom.com>
>> > Tested-by: Florian Fainelli <f.fainelli@gmail.com>
>> > ---
>> >  drivers/pci/controller/pcie-brcmstb.c | 16 ++++++++++++++++
>> >  1 file changed, 16 insertions(+)
>> >
>> > diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
>> > index d30636a725d7..fe0415a98c63 100644
>> > --- a/drivers/pci/controller/pcie-brcmstb.c
>> > +++ b/drivers/pci/controller/pcie-brcmstb.c
>> > @@ -1034,6 +1034,21 @@ static int brcm_pcie_setup(struct brcm_pcie *pcie)
>> >       return 0;
>> >  }
>> >
>> > +/*
>> > + * This extends the timeout period for an access to an internal bus.  This
>> > + * access timeout may occur during L1SS sleep periods even without the
>> > + * presence of a PCIe access.
>> > + */
>> > +static void brcm_extend_rbus_timeout(struct brcm_pcie *pcie)
>> > +{
>> > +     /* TIMEOUT register is two registers before RGR1_SW_INIT_1 */
>> > +     const unsigned int REG_OFFSET = PCIE_RGR1_SW_INIT_1(pcie) - 8;
>>
>> Nit: you could define an offset for the TIMEOUT register, if that makes
>> it any cleaner, up to you.
>>
>> > +     u32 timeout_us = 4000000; /* 4 seconds, our setting for L1SS */
>>
>> It would be useful to describe why this has to be 4 seconds in case
>> someone in the future will have to change it.
>
>
> Hello,
> IIRC our customer requested 2s and we doubled it.  Bjorn, can you please add this comment or a paraphrase of it before applying -- I'm currently on vacation.

Hello Bjorn,

Is the above request okay with you?  What is the status of these
commits -- will they be applied to pci-next in the near future?

Regards,
Jim Quinlan
Broadcom STB

>
> Regards,
> Jim Quinlan
>
>>
>> Thanks,
>> Lorenzo
>>
>> > +     /* Each unit in timeout register is 1/216,000,000 seconds */
>> > +     writel(216 * timeout_us, pcie->base + REG_OFFSET);
>> > +}
>> > +
>> >  static void brcm_config_clkreq(struct brcm_pcie *pcie)
>> >  {
>> >       bool l1ss = of_property_read_bool(pcie->np, "brcm,enable-l1ss");
>> > @@ -1059,6 +1074,7 @@ static void brcm_config_clkreq(struct brcm_pcie *pcie)
>> >                * of 400ns, as specified in 3.2.5.2.2 of the PCI Express
>> >                * Mini CEM 2.0 specification.
>> >                */
>> > +             brcm_extend_rbus_timeout(pcie);
>> >               clkreq_set |= PCIE_MISC_HARD_PCIE_HARD_DEBUG_L1SS_ENABLE_MASK;
>> >               dev_info(pcie->dev, "bi-dir CLKREQ# for L1SS power savings");
>> >       } else {
>> > --
>> > 2.17.1
>> >
>>
>>
Bjorn Helgaas Aug. 14, 2023, 10:06 p.m. UTC | #3
On Mon, Aug 14, 2023 at 03:30:07PM -0400, Jim Quinlan wrote:
> On Fri, Jul 28, 2023 at 12:15 PM Jim Quinlan <james.quinlan@broadcom.com> wrote:
> > On Thu, Jul 27, 2023, 10:44 PM Lorenzo Pieralisi <lpieralisi@kernel.org> wrote:
> >> On Fri, Jun 23, 2023 at 10:40:56AM -0400, Jim Quinlan wrote:
> >> > During long periods of the PCIe RC HW being in an L1SS sleep state, there
> >> > may be a timeout on an internal bus access, even though there may not be
> >> > any PCIe access involved.  Such a timeout will cause a subsequent CPU
> >> > abort.
> >> >
> >> > So, when "brcm,enable-l1ss" is observed, we increase the timeout value to
> >> > four seconds instead of using its HW default.
> >> >
> >> > Signed-off-by: Jim Quinlan <james.quinlan@broadcom.com>
> >> > Tested-by: Florian Fainelli <f.fainelli@gmail.com>
> >> > ---
> >> >  drivers/pci/controller/pcie-brcmstb.c | 16 ++++++++++++++++
> >> >  1 file changed, 16 insertions(+)
> >> >
> >> > diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
> >> > index d30636a725d7..fe0415a98c63 100644
> >> > --- a/drivers/pci/controller/pcie-brcmstb.c
> >> > +++ b/drivers/pci/controller/pcie-brcmstb.c
> >> > @@ -1034,6 +1034,21 @@ static int brcm_pcie_setup(struct brcm_pcie *pcie)
> >> >       return 0;
> >> >  }
> >> >
> >> > +/*
> >> > + * This extends the timeout period for an access to an internal bus.  This
> >> > + * access timeout may occur during L1SS sleep periods even without the
> >> > + * presence of a PCIe access.
> >> > + */
> >> > +static void brcm_extend_rbus_timeout(struct brcm_pcie *pcie)
> >> > +{
> >> > +     /* TIMEOUT register is two registers before RGR1_SW_INIT_1 */
> >> > +     const unsigned int REG_OFFSET = PCIE_RGR1_SW_INIT_1(pcie) - 8;
> >>
> >> Nit: you could define an offset for the TIMEOUT register, if that makes
> >> it any cleaner, up to you.
> >>
> >> > +     u32 timeout_us = 4000000; /* 4 seconds, our setting for L1SS */
> >>
> >> It would be useful to describe why this has to be 4 seconds in case
> >> someone in the future will have to change it.
> >
> > IIRC our customer requested 2s and we doubled it.  Bjorn, can you
> > please add this comment or a paraphrase of it before applying --
> > I'm currently on vacation.
> 
> Is the above request okay with you?  What is the status of these
> commits -- will they be applied to pci-next in the near future?

The "brcm,enable-l1ss" DT property is either unnecessary or an
indication of a hardware defect in the controller.

Requiring the property is a terrible user experience, completely
antithetical to the PCI compatibility story, and per the conversation
at [1], there are no known problems that would occur if we ignored
"brcm,enable-l1ss" and always configured mode (c) ("Bidirectional
CLKREQ# for L1SS capable devices").

Even when configured as mode (c), L1SS is not *always* enabled.  It's
certainly not enabled before ASPM init, and users can always disable
L1SS whenever they desire via the sysfs interfaces or pcie_aspm=off,
so if there's some problem with running in mode (c) with L1SS
disabled, we're still likely to see it.

But if you want to require the DT property, I guess it's mainly an
issue for you and your customers.

So to answer your question, yes, I'm OK with this series.

Bjorn

[1] https://lore.kernel.org/r/20230428223500.23337-2-jim2101024@gmail.com

> >> > +     /* Each unit in timeout register is 1/216,000,000 seconds */
> >> > +     writel(216 * timeout_us, pcie->base + REG_OFFSET);
> >> > +}
> >> > +
> >> >  static void brcm_config_clkreq(struct brcm_pcie *pcie)
> >> >  {
> >> >       bool l1ss = of_property_read_bool(pcie->np, "brcm,enable-l1ss");
> >> > @@ -1059,6 +1074,7 @@ static void brcm_config_clkreq(struct brcm_pcie *pcie)
> >> >                * of 400ns, as specified in 3.2.5.2.2 of the PCI Express
> >> >                * Mini CEM 2.0 specification.
> >> >                */
> >> > +             brcm_extend_rbus_timeout(pcie);
> >> >               clkreq_set |= PCIE_MISC_HARD_PCIE_HARD_DEBUG_L1SS_ENABLE_MASK;
> >> >               dev_info(pcie->dev, "bi-dir CLKREQ# for L1SS power savings");
> >> >       } else {
Jim Quinlan Aug. 15, 2023, 12:34 p.m. UTC | #4
On Mon, Aug 14, 2023 at 6:07 PM Bjorn Helgaas <helgaas@kernel.org> wrote:
>
> On Mon, Aug 14, 2023 at 03:30:07PM -0400, Jim Quinlan wrote:
> > On Fri, Jul 28, 2023 at 12:15 PM Jim Quinlan <james.quinlan@broadcom.com> wrote:
> > > On Thu, Jul 27, 2023, 10:44 PM Lorenzo Pieralisi <lpieralisi@kernel.org> wrote:
> > >> On Fri, Jun 23, 2023 at 10:40:56AM -0400, Jim Quinlan wrote:
> > >> > During long periods of the PCIe RC HW being in an L1SS sleep state, there
> > >> > may be a timeout on an internal bus access, even though there may not be
> > >> > any PCIe access involved.  Such a timeout will cause a subsequent CPU
> > >> > abort.
> > >> >
> > >> > So, when "brcm,enable-l1ss" is observed, we increase the timeout value to
> > >> > four seconds instead of using its HW default.
> > >> >
> > >> > Signed-off-by: Jim Quinlan <james.quinlan@broadcom.com>
> > >> > Tested-by: Florian Fainelli <f.fainelli@gmail.com>
> > >> > ---
> > >> >  drivers/pci/controller/pcie-brcmstb.c | 16 ++++++++++++++++
> > >> >  1 file changed, 16 insertions(+)
> > >> >
> > >> > diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
> > >> > index d30636a725d7..fe0415a98c63 100644
> > >> > --- a/drivers/pci/controller/pcie-brcmstb.c
> > >> > +++ b/drivers/pci/controller/pcie-brcmstb.c
> > >> > @@ -1034,6 +1034,21 @@ static int brcm_pcie_setup(struct brcm_pcie *pcie)
> > >> >       return 0;
> > >> >  }
> > >> >
> > >> > +/*
> > >> > + * This extends the timeout period for an access to an internal bus.  This
> > >> > + * access timeout may occur during L1SS sleep periods even without the
> > >> > + * presence of a PCIe access.
> > >> > + */
> > >> > +static void brcm_extend_rbus_timeout(struct brcm_pcie *pcie)
> > >> > +{
> > >> > +     /* TIMEOUT register is two registers before RGR1_SW_INIT_1 */
> > >> > +     const unsigned int REG_OFFSET = PCIE_RGR1_SW_INIT_1(pcie) - 8;
> > >>
> > >> Nit: you could define an offset for the TIMEOUT register, if that makes
> > >> it any cleaner, up to you.
> > >>
> > >> > +     u32 timeout_us = 4000000; /* 4 seconds, our setting for L1SS */
> > >>
> > >> It would be useful to describe why this has to be 4 seconds in case
> > >> someone in the future will have to change it.
> > >
> > > IIRC our customer requested 2s and we doubled it.  Bjorn, can you
> > > please add this comment or a paraphrase of it before applying --
> > > I'm currently on vacation.
> >
> > Is the above request okay with you?  What is the status of these
> > commits -- will they be applied to pci-next in the near future?
>
> The "brcm,enable-l1ss" DT property is either unnecessary or an
> indication of a hardware defect in the controller.

Agree.
>
> Requiring the property is a terrible user experience, completely
> antithetical to the PCI compatibility story, and per the conversation
> at [1], there are no known problems that would occur if we ignored
> "brcm,enable-l1ss" and always configured mode (c) ("Bidirectional
> CLKREQ# for L1SS capable devices").

Agree, but I don't believe this issue will be in the top five problems of RPi
folks getting their PCIe systems to work.

>
> Even when configured as mode (c), L1SS is not *always* enabled.  It's
> certainly not enabled before ASPM init, and users can always disable
> L1SS whenever they desire via the sysfs interfaces or pcie_aspm=off,
> so if there's some problem with running in mode (c) with L1SS
> disabled, we're still likely to see it.
>
> But if you want to require the DT property, I guess it's mainly an
> issue for you and your customers.

I believe this to be the best solution for the current HW.  As Cyril
and I have noted, it
allows some platforms to work that were not working previously.

So I am all for these commits being applied.

FWIW, I am currently advocating changing the PCIe HW core to
seamlessly handle all
of the ASPM (sub)states transitions w/o awkward SW driver
intervention.  I am also
advocating other changes as well.  So there is a possibility things will
be changed for the better in the future.

Regards,
Jim Quinlan
Broadcom STB

>
> So to answer your question, yes, I'm OK with this series.
>
> Bjorn
>
> [1] https://lore.kernel.org/r/20230428223500.23337-2-jim2101024@gmail.com
>
> > >> > +     /* Each unit in timeout register is 1/216,000,000 seconds */
> > >> > +     writel(216 * timeout_us, pcie->base + REG_OFFSET);
> > >> > +}
> > >> > +
> > >> >  static void brcm_config_clkreq(struct brcm_pcie *pcie)
> > >> >  {
> > >> >       bool l1ss = of_property_read_bool(pcie->np, "brcm,enable-l1ss");
> > >> > @@ -1059,6 +1074,7 @@ static void brcm_config_clkreq(struct brcm_pcie *pcie)
> > >> >                * of 400ns, as specified in 3.2.5.2.2 of the PCI Express
> > >> >                * Mini CEM 2.0 specification.
> > >> >                */
> > >> > +             brcm_extend_rbus_timeout(pcie);
> > >> >               clkreq_set |= PCIE_MISC_HARD_PCIE_HARD_DEBUG_L1SS_ENABLE_MASK;
> > >> >               dev_info(pcie->dev, "bi-dir CLKREQ# for L1SS power savings");
> > >> >       } else {
diff mbox series

Patch

diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
index d30636a725d7..fe0415a98c63 100644
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -1034,6 +1034,21 @@  static int brcm_pcie_setup(struct brcm_pcie *pcie)
 	return 0;
 }
 
+/*
+ * This extends the timeout period for an access to an internal bus.  This
+ * access timeout may occur during L1SS sleep periods even without the
+ * presence of a PCIe access.
+ */
+static void brcm_extend_rbus_timeout(struct brcm_pcie *pcie)
+{
+	/* TIMEOUT register is two registers before RGR1_SW_INIT_1 */
+	const unsigned int REG_OFFSET = PCIE_RGR1_SW_INIT_1(pcie) - 8;
+	u32 timeout_us = 4000000; /* 4 seconds, our setting for L1SS */
+
+	/* Each unit in timeout register is 1/216,000,000 seconds */
+	writel(216 * timeout_us, pcie->base + REG_OFFSET);
+}
+
 static void brcm_config_clkreq(struct brcm_pcie *pcie)
 {
 	bool l1ss = of_property_read_bool(pcie->np, "brcm,enable-l1ss");
@@ -1059,6 +1074,7 @@  static void brcm_config_clkreq(struct brcm_pcie *pcie)
 		 * of 400ns, as specified in 3.2.5.2.2 of the PCI Express
 		 * Mini CEM 2.0 specification.
 		 */
+		brcm_extend_rbus_timeout(pcie);
 		clkreq_set |= PCIE_MISC_HARD_PCIE_HARD_DEBUG_L1SS_ENABLE_MASK;
 		dev_info(pcie->dev, "bi-dir CLKREQ# for L1SS power savings");
 	} else {