diff mbox

[V2,3/4] PCI: tegra: Apply sw fixups to support ASPM-L1 Sub-States

Message ID 1509423769-10522-4-git-send-email-vidyas@nvidia.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Vidya Sagar Oct. 31, 2017, 4:22 a.m. UTC
Programs T_cmrt (Commmon Mode Restore Time) and T_pwr_on (Power On)
values to get them reflected in ASPM-L1 Sub-States capability registers
Also adjusts internal counter values according to 19.2 MHz clk_m value

Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
---
V2:
* no change in this patch

 drivers/pci/host/pci-tegra.c | 65 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

Comments

Bjorn Helgaas Nov. 7, 2017, 10:50 p.m. UTC | #1
Hi Vidya,

On Tue, Oct 31, 2017 at 09:52:48AM +0530, Vidya Sagar wrote:
> Programs T_cmrt (Commmon Mode Restore Time) and T_pwr_on (Power On)
> values to get them reflected in ASPM-L1 Sub-States capability registers
> Also adjusts internal counter values according to 19.2 MHz clk_m value
> 
> Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
> ---
> V2:
> * no change in this patch
> 
>  drivers/pci/host/pci-tegra.c | 65 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 65 insertions(+)
> 
> diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
> index e1526cc5d381..08da67a82a2d 100644
> --- a/drivers/pci/host/pci-tegra.c
> +++ b/drivers/pci/host/pci-tegra.c
> @@ -40,6 +40,7 @@
>  #include <linux/of_pci.h>
>  #include <linux/of_platform.h>
>  #include <linux/pci.h>
> +#include <linux/pci-aspm.h>
>  #include <linux/phy/phy.h>
>  #include <linux/platform_device.h>
>  #include <linux/reset.h>
> @@ -191,6 +192,27 @@
>  #define RP_PRIV_XP_DL	0x494
>  #define  RP_PRIV_XP_DL_GEN2_UPD_FC_TSHOLD	(0x1ff << 1)
>  
> +#define RP_L1_PM_SUBSTATES_CTL				0xC00
> +#define RP_L1_PM_SUBSTATES_CTL_CM_RTIME_MASK		(0xFF << 8)
> +#define RP_L1_PM_SUBSTATES_CTL_CM_RTIME_SHIFT		8
> +#define RP_L1_PM_SUBSTATES_CTL_T_PWRN_SCL_MASK		(0x3 << 16)
> +#define RP_L1_PM_SUBSTATES_CTL_T_PWRN_SCL_SHIFT		16
> +#define RP_L1_PM_SUBSTATES_CTL_T_PWRN_VAL_MASK		(0x1F << 19)
> +#define RP_L1_PM_SUBSTATES_CTL_T_PWRN_VAL_SHIFT		19
> +#define RP_L1_PM_SUBSTATES_CTL_HIDE_CAP			(0x1 << 24)
> +
> +#define RP_L1_PM_SUBSTATES_1_CTL			0xC04
> +#define RP_L1_PM_SUBSTATES_1_CTL_PWR_OFF_DLY_MASK	0x1FFF
> +#define RP_L1_PM_SUBSTATES_1_CTL_PWR_OFF_DLY		0x26
> +
> +#define RP_L1_PM_SUBSTATES_2_CTL			0xC08
> +#define RP_L1_PM_SUBSTATES_2_CTL_T_L1_2_DLY_MASK	0x1FFF
> +#define RP_L1_PM_SUBSTATES_2_CTL_T_L1_2_DLY		0x4D
> +#define RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_MASK	(0xFF << 13)
> +#define RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND		(0x13 << 13)
> +#define RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_COMP_MASK	(0xF << 21)
> +#define RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_COMP	(0x2 << 21)
> +
>  #define RP_RX_HDR_LIMIT	0xe00
>  #define  RP_RX_HDR_LIMIT_PW_MASK	(0xff << 8)
>  #define  RP_RX_HDR_LIMIT_PW		(0x0e << 8)
> @@ -331,6 +353,7 @@ struct tegra_pcie_soc {
>  	bool program_deskew_time;
>  	bool updateFC_threshold;
>  	bool has_aspm_l1;
> +	bool has_aspm_l1ss;
>  };
>  
>  static inline struct tegra_msi *to_tegra_msi(struct msi_controller *chip)
> @@ -423,6 +446,12 @@ static inline u32 pads_readl(struct tegra_pcie *pcie, unsigned long offset)
>  	return readl(pcie->pads + offset);
>  }
>  
> +u32 pcie_aspm_get_ltr_l_1_2_threshold(void)
> +{
> +	/* LTR L1.2 Threshold = 55us for all ports */
> +	return ((0x37 << 16) | (0x02 << 29));

I know you've already worked through this, but let me think out loud
to try to figure this out myself.

ASPM defines Link power states L0, L0s, and L1.  L1 PM Substates
extend that by adding L1.1 and L1.2.  L1.2 presumably uses less power
and has a longer exit delay than L1.1 [sec 5.5].

Ports that support L1.2 must support Latency Tolerance Reporting (LTR)
[sec 6.18].  When LTR is enabled, a device periodically sends LTR
messages.

When ASPM puts a Link into L1, it chooses either L1.1 or L1.2 based on
LTR_L1.2_THRESHOLD and recent LTR messages.  If there's no LTR
information it looks like LTR_L1.2_THRESHOLD doesn't matter and it
always chooses L1.2 [sec 5.5.1].

I don't see anything that writes PCI_EXP_DEVCTL2_LTR_EN, so I don't
think Linux ever enables LTR.  Some BIOSes apparently enable it
(Google for "LTR enabled").

1) It seems like the LTR_L1.2_THRESHOLD value should be computed based
   on the latency requirements of downstream devices.  How did you
   come up with 55us?

2) The spec requires [sec 5.5.4] that LTR_L1.2_THRESHOLD at both ends
   of a Link be the same, but as far as I can see, there's no
   requirement that it be the same across the whole system, so this
   interface doesn't seem like quite the right approach.

3) We must support kernels with multiple host bridge drivers compiled
   in, and the weak/strong symbol approach doesn't support using the
   correct version, e.g., if we merge this patch, every system
   containing the tegra driver would use this function, even if the
   hardware had a different host bridge.  Also, if another driver
   implemented its own version, we'd have duplicate symbols.

>  /*
>   * The configuration space mapping on Tegra is somewhat similar to the ECAM
>   * defined by PCIe. However it deviates a bit in how the 4 bits for extended
> @@ -2262,6 +2291,37 @@ static void tegra_pcie_apply_sw_fixup(struct tegra_pcie_port *port)
>  		value |= RP_VEND_XP_UPDATE_FC_THRESHOLD_T210;
>  		writel(value, port->base + RP_VEND_XP);
>  	}
> +
> +	if (soc->has_aspm_l1ss) {
> +		/* Set Common Mode Restore Time to 30us */
> +		value = readl(port->base + RP_L1_PM_SUBSTATES_CTL);
> +		value &= ~RP_L1_PM_SUBSTATES_CTL_CM_RTIME_MASK;
> +		value |= (0x1E << RP_L1_PM_SUBSTATES_CTL_CM_RTIME_SHIFT);
> +		writel(value, port->base + RP_L1_PM_SUBSTATES_CTL);
> +
> +		/* set T_Power_On to 70us */
> +		value = readl(port->base + RP_L1_PM_SUBSTATES_CTL);
> +		value &= ~(RP_L1_PM_SUBSTATES_CTL_T_PWRN_SCL_MASK |
> +			RP_L1_PM_SUBSTATES_CTL_T_PWRN_VAL_MASK);
> +		value |= (1 << RP_L1_PM_SUBSTATES_CTL_T_PWRN_SCL_SHIFT) |
> +			(7 << RP_L1_PM_SUBSTATES_CTL_T_PWRN_VAL_SHIFT);
> +		writel(value, port->base + RP_L1_PM_SUBSTATES_CTL);
> +
> +		/* Following is based on clk_m being 19.2 MHz */
> +		value = readl(port->base + RP_L1_PM_SUBSTATES_1_CTL);
> +		value &= ~RP_L1_PM_SUBSTATES_1_CTL_PWR_OFF_DLY_MASK;
> +		value |= RP_L1_PM_SUBSTATES_1_CTL_PWR_OFF_DLY;
> +		writel(value, port->base + RP_L1_PM_SUBSTATES_1_CTL);
> +
> +		value = readl(port->base + RP_L1_PM_SUBSTATES_2_CTL);
> +		value &= ~RP_L1_PM_SUBSTATES_2_CTL_T_L1_2_DLY_MASK;
> +		value |= RP_L1_PM_SUBSTATES_2_CTL_T_L1_2_DLY;
> +		value &= ~RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_MASK;
> +		value |= RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND;
> +		value &= ~RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_COMP_MASK;
> +		value |= RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_COMP;
> +		writel(value, port->base + RP_L1_PM_SUBSTATES_2_CTL);
> +	}
>  }
>  /*
>   * FIXME: If there are no PCIe cards attached, then calling this function
> @@ -2403,6 +2463,7 @@ static const struct tegra_pcie_soc tegra20_pcie = {
>  	.program_deskew_time = false,
>  	.updateFC_threshold = false,
>  	.has_aspm_l1 = false,
> +	.has_aspm_l1ss = false,
>  };
>  
>  static const struct tegra_pcie_soc tegra30_pcie = {
> @@ -2425,6 +2486,7 @@ static const struct tegra_pcie_soc tegra30_pcie = {
>  	.program_deskew_time = false,
>  	.updateFC_threshold = false,
>  	.has_aspm_l1 = true,
> +	.has_aspm_l1ss = false,
>  };
>  
>  static const struct tegra_pcie_soc tegra124_pcie = {
> @@ -2446,6 +2508,7 @@ static const struct tegra_pcie_soc tegra124_pcie = {
>  	.program_deskew_time = false,
>  	.updateFC_threshold = false,
>  	.has_aspm_l1 = true,
> +	.has_aspm_l1ss = false,
>  };
>  
>  static const struct tegra_pcie_soc tegra210_pcie = {
> @@ -2475,6 +2538,7 @@ static const struct tegra_pcie_soc tegra210_pcie = {
>  	.program_deskew_time = true,
>  	.updateFC_threshold = true,
>  	.has_aspm_l1 = true,
> +	.has_aspm_l1ss = true,
>  };
>  
>  static const struct tegra_pcie_soc tegra186_pcie = {
> @@ -2497,6 +2561,7 @@ static const struct tegra_pcie_soc tegra186_pcie = {
>  	.program_deskew_time = false,
>  	.updateFC_threshold = false,
>  	.has_aspm_l1 = true,
> +	.has_aspm_l1ss = true,
>  };
>  
>  static const struct of_device_id tegra_pcie_of_match[] = {
> -- 
> 2.7.4
>
Vidya Sagar Nov. 8, 2017, 8:45 a.m. UTC | #2
On Wednesday 08 November 2017 04:20 AM, Bjorn Helgaas wrote:
> Hi Vidya,
>
> On Tue, Oct 31, 2017 at 09:52:48AM +0530, Vidya Sagar wrote:
>> Programs T_cmrt (Commmon Mode Restore Time) and T_pwr_on (Power On)
>> values to get them reflected in ASPM-L1 Sub-States capability registers
>> Also adjusts internal counter values according to 19.2 MHz clk_m value
>>
>> Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
>> ---
>> V2:
>> * no change in this patch
>>
>>   drivers/pci/host/pci-tegra.c | 65 ++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 65 insertions(+)
>>
>> diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
>> index e1526cc5d381..08da67a82a2d 100644
>> --- a/drivers/pci/host/pci-tegra.c
>> +++ b/drivers/pci/host/pci-tegra.c
>> @@ -40,6 +40,7 @@
>>   #include <linux/of_pci.h>
>>   #include <linux/of_platform.h>
>>   #include <linux/pci.h>
>> +#include <linux/pci-aspm.h>
>>   #include <linux/phy/phy.h>
>>   #include <linux/platform_device.h>
>>   #include <linux/reset.h>
>> @@ -191,6 +192,27 @@
>>   #define RP_PRIV_XP_DL	0x494
>>   #define  RP_PRIV_XP_DL_GEN2_UPD_FC_TSHOLD	(0x1ff << 1)
>>   
>> +#define RP_L1_PM_SUBSTATES_CTL				0xC00
>> +#define RP_L1_PM_SUBSTATES_CTL_CM_RTIME_MASK		(0xFF << 8)
>> +#define RP_L1_PM_SUBSTATES_CTL_CM_RTIME_SHIFT		8
>> +#define RP_L1_PM_SUBSTATES_CTL_T_PWRN_SCL_MASK		(0x3 << 16)
>> +#define RP_L1_PM_SUBSTATES_CTL_T_PWRN_SCL_SHIFT		16
>> +#define RP_L1_PM_SUBSTATES_CTL_T_PWRN_VAL_MASK		(0x1F << 19)
>> +#define RP_L1_PM_SUBSTATES_CTL_T_PWRN_VAL_SHIFT		19
>> +#define RP_L1_PM_SUBSTATES_CTL_HIDE_CAP			(0x1 << 24)
>> +
>> +#define RP_L1_PM_SUBSTATES_1_CTL			0xC04
>> +#define RP_L1_PM_SUBSTATES_1_CTL_PWR_OFF_DLY_MASK	0x1FFF
>> +#define RP_L1_PM_SUBSTATES_1_CTL_PWR_OFF_DLY		0x26
>> +
>> +#define RP_L1_PM_SUBSTATES_2_CTL			0xC08
>> +#define RP_L1_PM_SUBSTATES_2_CTL_T_L1_2_DLY_MASK	0x1FFF
>> +#define RP_L1_PM_SUBSTATES_2_CTL_T_L1_2_DLY		0x4D
>> +#define RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_MASK	(0xFF << 13)
>> +#define RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND		(0x13 << 13)
>> +#define RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_COMP_MASK	(0xF << 21)
>> +#define RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_COMP	(0x2 << 21)
>> +
>>   #define RP_RX_HDR_LIMIT	0xe00
>>   #define  RP_RX_HDR_LIMIT_PW_MASK	(0xff << 8)
>>   #define  RP_RX_HDR_LIMIT_PW		(0x0e << 8)
>> @@ -331,6 +353,7 @@ struct tegra_pcie_soc {
>>   	bool program_deskew_time;
>>   	bool updateFC_threshold;
>>   	bool has_aspm_l1;
>> +	bool has_aspm_l1ss;
>>   };
>>   
>>   static inline struct tegra_msi *to_tegra_msi(struct msi_controller *chip)
>> @@ -423,6 +446,12 @@ static inline u32 pads_readl(struct tegra_pcie *pcie, unsigned long offset)
>>   	return readl(pcie->pads + offset);
>>   }
>>   
>> +u32 pcie_aspm_get_ltr_l_1_2_threshold(void)
>> +{
>> +	/* LTR L1.2 Threshold = 55us for all ports */
>> +	return ((0x37 << 16) | (0x02 << 29));
> I know you've already worked through this, but let me think out loud
> to try to figure this out myself.
>
> ASPM defines Link power states L0, L0s, and L1.  L1 PM Substates
> extend that by adding L1.1 and L1.2.  L1.2 presumably uses less power
> and has a longer exit delay than L1.1 [sec 5.5].
>
> Ports that support L1.2 must support Latency Tolerance Reporting (LTR)
> [sec 6.18].  When LTR is enabled, a device periodically sends LTR
> messages.
>
> When ASPM puts a Link into L1, it chooses either L1.1 or L1.2 based on
> LTR_L1.2_THRESHOLD and recent LTR messages.  If there's no LTR
> information it looks like LTR_L1.2_THRESHOLD doesn't matter and it
> always chooses L1.2 [sec 5.5.1].
>
> I don't see anything that writes PCI_EXP_DEVCTL2_LTR_EN, so I don't
> think Linux ever enables LTR.  Some BIOSes apparently enable it
> (Google for "LTR enabled").
I think this needs to be done in aspm.c file. i.e. whenever sub-system 
enables L1.2, it should enable
LTR_EN also.
>
> 1) It seems like the LTR_L1.2_THRESHOLD value should be computed based
>     on the latency requirements of downstream devices.  How did you
>     come up with 55us?
This is given by Tegra hardware folks.
>
> 2) The spec requires [sec 5.5.4] that LTR_L1.2_THRESHOLD at both ends
>     of a Link be the same, but as far as I can see, there's no
>     requirement that it be the same across the whole system, so this
>     interface doesn't seem like quite the right approach.
Agree that this patch would make 55us applicable to entire system
>
> 3) We must support kernels with multiple host bridge drivers compiled
>     in, and the weak/strong symbol approach doesn't support using the
>     correct version, e.g., if we merge this patch, every system
>     containing the tegra driver would use this function, even if the
>     hardware had a different host bridge.  Also, if another driver
>     implemented its own version, we'd have duplicate symbols.
Yes. Agree with this too.
How about using quirks framework for this?
Rajat / Thierry / Bjorn, any thoughts on this?
>
>>   /*
>>    * The configuration space mapping on Tegra is somewhat similar to the ECAM
>>    * defined by PCIe. However it deviates a bit in how the 4 bits for extended
>> @@ -2262,6 +2291,37 @@ static void tegra_pcie_apply_sw_fixup(struct tegra_pcie_port *port)
>>   		value |= RP_VEND_XP_UPDATE_FC_THRESHOLD_T210;
>>   		writel(value, port->base + RP_VEND_XP);
>>   	}
>> +
>> +	if (soc->has_aspm_l1ss) {
>> +		/* Set Common Mode Restore Time to 30us */
>> +		value = readl(port->base + RP_L1_PM_SUBSTATES_CTL);
>> +		value &= ~RP_L1_PM_SUBSTATES_CTL_CM_RTIME_MASK;
>> +		value |= (0x1E << RP_L1_PM_SUBSTATES_CTL_CM_RTIME_SHIFT);
>> +		writel(value, port->base + RP_L1_PM_SUBSTATES_CTL);
>> +
>> +		/* set T_Power_On to 70us */
>> +		value = readl(port->base + RP_L1_PM_SUBSTATES_CTL);
>> +		value &= ~(RP_L1_PM_SUBSTATES_CTL_T_PWRN_SCL_MASK |
>> +			RP_L1_PM_SUBSTATES_CTL_T_PWRN_VAL_MASK);
>> +		value |= (1 << RP_L1_PM_SUBSTATES_CTL_T_PWRN_SCL_SHIFT) |
>> +			(7 << RP_L1_PM_SUBSTATES_CTL_T_PWRN_VAL_SHIFT);
>> +		writel(value, port->base + RP_L1_PM_SUBSTATES_CTL);
>> +
>> +		/* Following is based on clk_m being 19.2 MHz */
>> +		value = readl(port->base + RP_L1_PM_SUBSTATES_1_CTL);
>> +		value &= ~RP_L1_PM_SUBSTATES_1_CTL_PWR_OFF_DLY_MASK;
>> +		value |= RP_L1_PM_SUBSTATES_1_CTL_PWR_OFF_DLY;
>> +		writel(value, port->base + RP_L1_PM_SUBSTATES_1_CTL);
>> +
>> +		value = readl(port->base + RP_L1_PM_SUBSTATES_2_CTL);
>> +		value &= ~RP_L1_PM_SUBSTATES_2_CTL_T_L1_2_DLY_MASK;
>> +		value |= RP_L1_PM_SUBSTATES_2_CTL_T_L1_2_DLY;
>> +		value &= ~RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_MASK;
>> +		value |= RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND;
>> +		value &= ~RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_COMP_MASK;
>> +		value |= RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_COMP;
>> +		writel(value, port->base + RP_L1_PM_SUBSTATES_2_CTL);
>> +	}
>>   }
>>   /*
>>    * FIXME: If there are no PCIe cards attached, then calling this function
>> @@ -2403,6 +2463,7 @@ static const struct tegra_pcie_soc tegra20_pcie = {
>>   	.program_deskew_time = false,
>>   	.updateFC_threshold = false,
>>   	.has_aspm_l1 = false,
>> +	.has_aspm_l1ss = false,
>>   };
>>   
>>   static const struct tegra_pcie_soc tegra30_pcie = {
>> @@ -2425,6 +2486,7 @@ static const struct tegra_pcie_soc tegra30_pcie = {
>>   	.program_deskew_time = false,
>>   	.updateFC_threshold = false,
>>   	.has_aspm_l1 = true,
>> +	.has_aspm_l1ss = false,
>>   };
>>   
>>   static const struct tegra_pcie_soc tegra124_pcie = {
>> @@ -2446,6 +2508,7 @@ static const struct tegra_pcie_soc tegra124_pcie = {
>>   	.program_deskew_time = false,
>>   	.updateFC_threshold = false,
>>   	.has_aspm_l1 = true,
>> +	.has_aspm_l1ss = false,
>>   };
>>   
>>   static const struct tegra_pcie_soc tegra210_pcie = {
>> @@ -2475,6 +2538,7 @@ static const struct tegra_pcie_soc tegra210_pcie = {
>>   	.program_deskew_time = true,
>>   	.updateFC_threshold = true,
>>   	.has_aspm_l1 = true,
>> +	.has_aspm_l1ss = true,
>>   };
>>   
>>   static const struct tegra_pcie_soc tegra186_pcie = {
>> @@ -2497,6 +2561,7 @@ static const struct tegra_pcie_soc tegra186_pcie = {
>>   	.program_deskew_time = false,
>>   	.updateFC_threshold = false,
>>   	.has_aspm_l1 = true,
>> +	.has_aspm_l1ss = true,
>>   };
>>   
>>   static const struct of_device_id tegra_pcie_of_match[] = {
>> -- 
>> 2.7.4
>>
Bjorn Helgaas Nov. 8, 2017, 5:48 p.m. UTC | #3
On Wed, Nov 08, 2017 at 02:15:05PM +0530, Vidya Sagar wrote:
> On Wednesday 08 November 2017 04:20 AM, Bjorn Helgaas wrote:
> >On Tue, Oct 31, 2017 at 09:52:48AM +0530, Vidya Sagar wrote:
> >>Programs T_cmrt (Commmon Mode Restore Time) and T_pwr_on (Power On)
> >>values to get them reflected in ASPM-L1 Sub-States capability registers
> >>Also adjusts internal counter values according to 19.2 MHz clk_m value
> >>
> >>Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
> ...

> >>+u32 pcie_aspm_get_ltr_l_1_2_threshold(void)
> >>+{
> >>+	/* LTR L1.2 Threshold = 55us for all ports */
> >>+	return ((0x37 << 16) | (0x02 << 29));
>
> >I know you've already worked through this, but let me think out loud
> >to try to figure this out myself.
> >
> >ASPM defines Link power states L0, L0s, and L1.  L1 PM Substates
> >extend that by adding L1.1 and L1.2.  L1.2 presumably uses less power
> >and has a longer exit delay than L1.1 [sec 5.5].
> >
> >Ports that support L1.2 must support Latency Tolerance Reporting (LTR)
> >[sec 6.18].  When LTR is enabled, a device periodically sends LTR
> >messages.
> >
> >When ASPM puts a Link into L1, it chooses either L1.1 or L1.2 based on
> >LTR_L1.2_THRESHOLD and recent LTR messages.  If there's no LTR
> >information it looks like LTR_L1.2_THRESHOLD doesn't matter and it
> >always chooses L1.2 [sec 5.5.1].
> >
> >I don't see anything that writes PCI_EXP_DEVCTL2_LTR_EN, so I don't
> >think Linux ever enables LTR.  Some BIOSes apparently enable it
> >(Google for "LTR enabled").
>
> I think this needs to be done in aspm.c file. i.e. whenever
> sub-system enables L1.2, it should enable
> LTR_EN also.

That probably makes sense.

> >1) It seems like the LTR_L1.2_THRESHOLD value should be computed based
> >    on the latency requirements of downstream devices.  How did you
> >    come up with 55us?
>
> This is given by Tegra hardware folks.

I do not understand why this value should be dependent on the host
bridge.  Can your hardware folks give more insight into this and how
they derived 55us?

I'm repeating myself, but the threshold (in combination with LTR
information) affects whether we enter L1.1 or L1.2.  If I understand
correctly, this is all about the downstream devices and not at all
about the host bridge.

> ...
> >3) We must support kernels with multiple host bridge drivers compiled
> >    in, and the weak/strong symbol approach doesn't support using the
> >    correct version, e.g., if we merge this patch, every system
> >    containing the tegra driver would use this function, even if the
> >    hardware had a different host bridge.  Also, if another driver
> >    implemented its own version, we'd have duplicate symbols.
>
> Yes. Agree with this too.
> How about using quirks framework for this?

If my assumption that "the threshold should be based on (a) the
latency requirements of downstream devices and (b) perhaps some global
power vs performance tradeoff" is correct, this doesn't really fit
into any kind of quirks or static computation, including the current
LTR_L1_2_THRESHOLD_BITS.

What happens if you keep all the Tegra-specific parts of this series,
i.e., you program the T_cmrt, T_pwr_on, and CLKREQ values, and enable
advertising of ASPM L1 capability, but leave out the
pcie_aspm_get_ltr_l_1_2_threshold() parts?  (BTW, I think you should
reorder the series so you fix up all the delay values *before* you
advertise ASPM L1.)

I expect that to be functionally equivalent, but it would change the
L1.1 vs L1.2 tradeoff, so it might have some performance impact,
depending on what the downstream devices are.

Bjorn
Vidya Sagar Nov. 10, 2017, 10:07 a.m. UTC | #4
On Wednesday 08 November 2017 11:18 PM, Bjorn Helgaas wrote:
> On Wed, Nov 08, 2017 at 02:15:05PM +0530, Vidya Sagar wrote:
>> On Wednesday 08 November 2017 04:20 AM, Bjorn Helgaas wrote:
>>> On Tue, Oct 31, 2017 at 09:52:48AM +0530, Vidya Sagar wrote:
>>>> Programs T_cmrt (Commmon Mode Restore Time) and T_pwr_on (Power On)
>>>> values to get them reflected in ASPM-L1 Sub-States capability registers
>>>> Also adjusts internal counter values according to 19.2 MHz clk_m value
>>>>
>>>> Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
>> ...
>>>> +u32 pcie_aspm_get_ltr_l_1_2_threshold(void)
>>>> +{
>>>> +	/* LTR L1.2 Threshold = 55us for all ports */
>>>> +	return ((0x37 << 16) | (0x02 << 29));
>>> I know you've already worked through this, but let me think out loud
>>> to try to figure this out myself.
>>>
>>> ASPM defines Link power states L0, L0s, and L1.  L1 PM Substates
>>> extend that by adding L1.1 and L1.2.  L1.2 presumably uses less power
>>> and has a longer exit delay than L1.1 [sec 5.5].
>>>
>>> Ports that support L1.2 must support Latency Tolerance Reporting (LTR)
>>> [sec 6.18].  When LTR is enabled, a device periodically sends LTR
>>> messages.
>>>
>>> When ASPM puts a Link into L1, it chooses either L1.1 or L1.2 based on
>>> LTR_L1.2_THRESHOLD and recent LTR messages.  If there's no LTR
>>> information it looks like LTR_L1.2_THRESHOLD doesn't matter and it
>>> always chooses L1.2 [sec 5.5.1].
>>>
>>> I don't see anything that writes PCI_EXP_DEVCTL2_LTR_EN, so I don't
>>> think Linux ever enables LTR.  Some BIOSes apparently enable it
>>> (Google for "LTR enabled").
>> I think this needs to be done in aspm.c file. i.e. whenever
>> sub-system enables L1.2, it should enable
>> LTR_EN also.
> That probably makes sense.
>
>>> 1) It seems like the LTR_L1.2_THRESHOLD value should be computed based
>>>     on the latency requirements of downstream devices.  How did you
>>>     come up with 55us?
>> This is given by Tegra hardware folks.
> I do not understand why this value should be dependent on the host
> bridge.  Can your hardware folks give more insight into this and how
> they derived 55us?
>
> I'm repeating myself, but the threshold (in combination with LTR
> information) affects whether we enter L1.1 or L1.2.  If I understand
> correctly, this is all about the downstream devices and not at all
> about the host bridge.
LTR_L1.2_THRESHOLD time is for the device to enter into and exit from 
L1.2 state.
55us in case of Tegra is calculated based on the circuit design and 
different latencies
involved in putting link through L1.2 entry-exit cycle.
Spec says in 5.5.4 that , quote "When programming LTR_L1.2_THRESHOLD 
Value and Scale fields,
identical values must be programmed in both Ports" and my understanding 
behind spec saying it explicitly
is that the end point should be made aware of what is the latency 
requirement for  L1.2 from root port,
just like the way end point makes root port aware of its L1.2 latency 
requirement by sending an LTR message
upstream. By this, both root port and end point come to the same page 
while taking a decision
to keep the link in L1.2 (or L1.1). Otherwise it may so happen that Tx 
would be in L1.2 and Rx would be in L1.1.
Also, my understanding is that it is bound to be different on different 
platforms as it comes from the way hardware
is designed.
>> ...
>>> 3) We must support kernels with multiple host bridge drivers compiled
>>>     in, and the weak/strong symbol approach doesn't support using the
>>>     correct version, e.g., if we merge this patch, every system
>>>     containing the tegra driver would use this function, even if the
>>>     hardware had a different host bridge.  Also, if another driver
>>>     implemented its own version, we'd have duplicate symbols.
>> Yes. Agree with this too.
>> How about using quirks framework for this?
> If my assumption that "the threshold should be based on (a) the
> latency requirements of downstream devices and (b) perhaps some global
> power vs performance tradeoff" is correct, this doesn't really fit
> into any kind of quirks or static computation, including the current
> LTR_L1_2_THRESHOLD_BITS.
>
> What happens if you keep all the Tegra-specific parts of this series,
> i.e., you program the T_cmrt, T_pwr_on, and CLKREQ values, and enable
> advertising of ASPM L1 capability, but leave out the
> pcie_aspm_get_ltr_l_1_2_threshold() parts?  (BTW, I think you should
> reorder the series so you fix up all the delay values *before* you
> advertise ASPM L1.)
>
> I expect that to be functionally equivalent, but it would change the
> L1.1 vs L1.2 tradeoff, so it might have some performance impact,
> depending on what the downstream devices are.
>
> Bjorn
Bjorn Helgaas Nov. 10, 2017, 9:29 p.m. UTC | #5
On Fri, Nov 10, 2017 at 03:37:10PM +0530, Vidya Sagar wrote:
> 
> 
> On Wednesday 08 November 2017 11:18 PM, Bjorn Helgaas wrote:
> >On Wed, Nov 08, 2017 at 02:15:05PM +0530, Vidya Sagar wrote:
> >>On Wednesday 08 November 2017 04:20 AM, Bjorn Helgaas wrote:
> >>>On Tue, Oct 31, 2017 at 09:52:48AM +0530, Vidya Sagar wrote:
> >>>>Programs T_cmrt (Commmon Mode Restore Time) and T_pwr_on (Power On)
> >>>>values to get them reflected in ASPM-L1 Sub-States capability registers
> >>>>Also adjusts internal counter values according to 19.2 MHz clk_m value
> >>>>
> >>>>Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
> >>...
> >>>>+u32 pcie_aspm_get_ltr_l_1_2_threshold(void)
> >>>>+{
> >>>>+	/* LTR L1.2 Threshold = 55us for all ports */
> >>>>+	return ((0x37 << 16) | (0x02 << 29));
> >>>I know you've already worked through this, but let me think out loud
> >>>to try to figure this out myself.
> >>>
> >>>ASPM defines Link power states L0, L0s, and L1.  L1 PM Substates
> >>>extend that by adding L1.1 and L1.2.  L1.2 presumably uses less power
> >>>and has a longer exit delay than L1.1 [sec 5.5].
> >>>
> >>>Ports that support L1.2 must support Latency Tolerance Reporting (LTR)
> >>>[sec 6.18].  When LTR is enabled, a device periodically sends LTR
> >>>messages.
> >>>
> >>>When ASPM puts a Link into L1, it chooses either L1.1 or L1.2 based on
> >>>LTR_L1.2_THRESHOLD and recent LTR messages.  If there's no LTR
> >>>information it looks like LTR_L1.2_THRESHOLD doesn't matter and it
> >>>always chooses L1.2 [sec 5.5.1].
> >>>
> >>>I don't see anything that writes PCI_EXP_DEVCTL2_LTR_EN, so I don't
> >>>think Linux ever enables LTR.  Some BIOSes apparently enable it
> >>>(Google for "LTR enabled").
> >>I think this needs to be done in aspm.c file. i.e. whenever
> >>sub-system enables L1.2, it should enable
> >>LTR_EN also.
> >That probably makes sense.
> >
> >>>1) It seems like the LTR_L1.2_THRESHOLD value should be computed based
> >>>    on the latency requirements of downstream devices.  How did you
> >>>    come up with 55us?
> >>This is given by Tegra hardware folks.
> >I do not understand why this value should be dependent on the host
> >bridge.  Can your hardware folks give more insight into this and how
> >they derived 55us?
> >
> >I'm repeating myself, but the threshold (in combination with LTR
> >information) affects whether we enter L1.1 or L1.2.  If I understand
> >correctly, this is all about the downstream devices and not at all
> >about the host bridge.
>
> LTR_L1.2_THRESHOLD time is for the device to enter into and exit
> from L1.2 state.

That doesn't feel right to me.  Device characteristics are normally
communicated via "capabilities" registers, not programmed by the OS in
"control" registers.  The OS needs to be able to configure arbitrary
plug-in devices without having device-specific details built into it.

Sec 7.33.3 says:

  LTR_L1.2_THRESHOLD_Value – Along with the LTR_L1.2_THRESHOLD_Scale,
  this field indicates the LTR threshold used to determine if entry
  into L1 results in L1.1 (if enabled) or L1.2 (if enabled).  The
  default value for this field is 00 0000 0000b.

which does not actually say anything about the L1.2 entry/exit time.

The L0s and L1 exit latencies are read-only fields in the Link
Capabilities register.  I don't completely understand how they work,
but I think corresponding L1.2 exit latency information is reported
via the "Port T(POWER_ON)" and "Port T(COMMON_MODE)" fields in the L1
PM Substates Capabilities register.  These are HwInit fields and sec
5.5.4 says they depend on circuit components and AC coupling
capacitors.

> 55us in case of Tegra is calculated based on the circuit design and
> different latencies involved in putting link through L1.2 entry-exit
> cycle.

What does Tegra report for Port T(POWER_ON) and Port T(COMMON_MODE)?
It looks like the current lspci should decode everything in the L1 PM
Substates capability:
https://git.kernel.org/pub/scm/utils/pciutils/pciutils.git/tree/ls-ecaps.c#n584
Can you collect that output?

I notice that the L1 PM SS Capabilities register contains "Port
T(POWER_ON)" and the Control 2 register contains the "T(POWER_ON)".
The spec says "Port T(POWER_ON)" is what this port requires of the
opposite end of the link, so apparently we should be configuring
"T(POWER_ON)".  We don't touch that, which looks like a hole in the
current code.

> Spec says in 5.5.4 that, quote "When programming LTR_L1.2_THRESHOLD
> Value and Scale fields, identical values must be programmed in both
> Ports" and my understanding behind spec saying it explicitly is that
> the end point should be made aware of what is the latency
> requirement for L1.2 from root port, just like the way end point
> makes root port aware of its L1.2 latency requirement by sending an
> LTR message upstream.  By this, both root port and end point come to
> the same page while taking a decision to keep the link in L1.2 (or
> L1.1).

I don't think the root port itself has a latency requirement.  The
*endpoints* provide functionality that may be latency sensitive.  The
OS needs to configure the path between the root port and the endpoint
in such a way that we can satisfy the endpoint's requirements.

> Otherwise it may so happen that Tx would be in L1.2 and Rx would be
> in L1.1.  Also, my understanding is that it is bound to be different
> on different platforms as it comes from the way hardware is
> designed.

> >What happens if you keep all the Tegra-specific parts of this series,
> >i.e., you program the T_cmrt, T_pwr_on, and CLKREQ values, and enable
> >advertising of ASPM L1 capability, but leave out the
> >pcie_aspm_get_ltr_l_1_2_threshold() parts?  (BTW, I think you should
> >reorder the series so you fix up all the delay values *before* you
> >advertise ASPM L1.)
> >
> >I expect that to be functionally equivalent, but it would change the
> >L1.1 vs L1.2 tradeoff, so it might have some performance impact,
> >depending on what the downstream devices are.

I'm still interested in this part of my question.  I'm willing
to apply patches 2, 3, 4 (in the order 4, 3, 2 and omitting the
pcie_aspm_get_ltr_l_1_2_threshold() part of patch 3) even if we
haven't figured out patch 1.

If we did this much, we should be able to use lspci to manually
compute what we think *should* happen and use setpci to set the
LTR_L1.2_THRESHOLD and see if it works as expected.

Bjorn
Vidya Sagar Nov. 12, 2017, 11:51 a.m. UTC | #6
On Saturday 11 November 2017 02:59 AM, Bjorn Helgaas wrote:
> On Fri, Nov 10, 2017 at 03:37:10PM +0530, Vidya Sagar wrote:
>>
>> On Wednesday 08 November 2017 11:18 PM, Bjorn Helgaas wrote:
>>> On Wed, Nov 08, 2017 at 02:15:05PM +0530, Vidya Sagar wrote:
>>>> On Wednesday 08 November 2017 04:20 AM, Bjorn Helgaas wrote:
>>>>> On Tue, Oct 31, 2017 at 09:52:48AM +0530, Vidya Sagar wrote:
>>>>>> Programs T_cmrt (Commmon Mode Restore Time) and T_pwr_on (Power On)
>>>>>> values to get them reflected in ASPM-L1 Sub-States capability registers
>>>>>> Also adjusts internal counter values according to 19.2 MHz clk_m value
>>>>>>
>>>>>> Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
>>>> ...
>>>>>> +u32 pcie_aspm_get_ltr_l_1_2_threshold(void)
>>>>>> +{
>>>>>> +	/* LTR L1.2 Threshold = 55us for all ports */
>>>>>> +	return ((0x37 << 16) | (0x02 << 29));
>>>>> I know you've already worked through this, but let me think out loud
>>>>> to try to figure this out myself.
>>>>>
>>>>> ASPM defines Link power states L0, L0s, and L1.  L1 PM Substates
>>>>> extend that by adding L1.1 and L1.2.  L1.2 presumably uses less power
>>>>> and has a longer exit delay than L1.1 [sec 5.5].
>>>>>
>>>>> Ports that support L1.2 must support Latency Tolerance Reporting (LTR)
>>>>> [sec 6.18].  When LTR is enabled, a device periodically sends LTR
>>>>> messages.
>>>>>
>>>>> When ASPM puts a Link into L1, it chooses either L1.1 or L1.2 based on
>>>>> LTR_L1.2_THRESHOLD and recent LTR messages.  If there's no LTR
>>>>> information it looks like LTR_L1.2_THRESHOLD doesn't matter and it
>>>>> always chooses L1.2 [sec 5.5.1].
>>>>>
>>>>> I don't see anything that writes PCI_EXP_DEVCTL2_LTR_EN, so I don't
>>>>> think Linux ever enables LTR.  Some BIOSes apparently enable it
>>>>> (Google for "LTR enabled").
>>>> I think this needs to be done in aspm.c file. i.e. whenever
>>>> sub-system enables L1.2, it should enable
>>>> LTR_EN also.
>>> That probably makes sense.
>>>
>>>>> 1) It seems like the LTR_L1.2_THRESHOLD value should be computed based
>>>>>     on the latency requirements of downstream devices.  How did you
>>>>>     come up with 55us?
>>>> This is given by Tegra hardware folks.
>>> I do not understand why this value should be dependent on the host
>>> bridge.  Can your hardware folks give more insight into this and how
>>> they derived 55us?
>>>
>>> I'm repeating myself, but the threshold (in combination with LTR
>>> information) affects whether we enter L1.1 or L1.2.  If I understand
>>> correctly, this is all about the downstream devices and not at all
>>> about the host bridge.
>> LTR_L1.2_THRESHOLD time is for the device to enter into and exit
>> from L1.2 state.
> That doesn't feel right to me.  Device characteristics are normally
> communicated via "capabilities" registers, not programmed by the OS in
> "control" registers.  The OS needs to be able to configure arbitrary
> plug-in devices without having device-specific details built into it.
But there is no capability register/field for LTR_L1.2_THRESHOLD either for
root ports or end points
>
> Sec 7.33.3 says:
>
>    LTR_L1.2_THRESHOLD_Value – Along with the LTR_L1.2_THRESHOLD_Scale,
>    this field indicates the LTR threshold used to determine if entry
>    into L1 results in L1.1 (if enabled) or L1.2 (if enabled).  The
>    default value for this field is 00 0000 0000b.
>
> which does not actually say anything about the L1.2 entry/exit time.
>
> The L0s and L1 exit latencies are read-only fields in the Link
> Capabilities register.  I don't completely understand how they work,
> but I think corresponding L1.2 exit latency information is reported
> via the "Port T(POWER_ON)" and "Port T(COMMON_MODE)" fields in the L1
> PM Substates Capabilities register.  These are HwInit fields and sec
> 5.5.4 says they depend on circuit components and AC coupling
> capacitors.
IMHO, I don't think that is the case, because, otherwise, there is no 
need for
an end point to send LTR messages to control link's entry into L1.2 right?
and I have seen some WiFi chips sending different LTR messages depending
on whether WiFi is connected to any Access Point (AP) or not.
>
>> 55us in case of Tegra is calculated based on the circuit design and
>> different latencies involved in putting link through L1.2 entry-exit
>> cycle.
> What does Tegra report for Port T(POWER_ON) and Port T(COMMON_MODE)?
> It looks like the current lspci should decode everything in the L1 PM
> Substates capability:
> https://git.kernel.org/pub/scm/utils/pciutils/pciutils.git/tree/ls-ecaps.c#n584
> Can you collect that output?
Here is the L1 SS registers dumps for both root port and end point
00:01.0 PCI bridge: NVIDIA Corporation Device 0fae (rev a1) (prog-if 00 
[Normal decode])
     Capabilities: [140 v1] L1 PM Substates
         L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+ 
L1_PM_Substates+
               PortCommonModeRestoreTime=30us PortTPowerOnTime=70us
         L1SubCtl1: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+
                T_CommonMode=30us LTR1.2_Threshold=56320ns
         L1SubCtl2: T_PwrOn=70us

01:00.0 Non-Volatile memory controller: Sandisk Corp Device 5001 
(prog-if 02 [NVM Express])
     Capabilities: [2c0 v1] L1 PM Substates
         L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+ 
L1_PM_Substates+
               PortCommonModeRestoreTime=10us PortTPowerOnTime=10us
         L1SubCtl1: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+
                T_CommonMode=0us LTR1.2_Threshold=56320ns
         L1SubCtl2: T_PwrOn=70us

> I notice that the L1 PM SS Capabilities register contains "Port
> T(POWER_ON)" and the Control 2 register contains the "T(POWER_ON)".
> The spec says "Port T(POWER_ON)" is what this port requires of the
> opposite end of the link, so apparently we should be configuring
> "T(POWER_ON)".  We don't touch that, which looks like a hole in the
> current code.
I see that T (POWER_ON) present in capability registers of both upstream 
and downstream ports
are compared ( 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/pci/pcie/aspm.c?h=v4.14-rc8#n467 
)
and the maximum of these two is written to control registers of both 
upstream and downstream ports
( 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/pci/pcie/aspm.c?h=v4.14-rc8#n650 
)
>> Spec says in 5.5.4 that, quote "When programming LTR_L1.2_THRESHOLD
>> Value and Scale fields, identical values must be programmed in both
>> Ports" and my understanding behind spec saying it explicitly is that
>> the end point should be made aware of what is the latency
>> requirement for L1.2 from root port, just like the way end point
>> makes root port aware of its L1.2 latency requirement by sending an
>> LTR message upstream.  By this, both root port and end point come to
>> the same page while taking a decision to keep the link in L1.2 (or
>> L1.1).
> I don't think the root port itself has a latency requirement.  The
> *endpoints* provide functionality that may be latency sensitive.  The
> OS needs to configure the path between the root port and the endpoint
> in such a way that we can satisfy the endpoint's requirements.
But, if that is the case, it doesn't make sense spec asking us to write
the same value to both ports. Or is there a different way to interpret it??
>
>> Otherwise it may so happen that Tx would be in L1.2 and Rx would be
>> in L1.1.  Also, my understanding is that it is bound to be different
>> on different platforms as it comes from the way hardware is
>> designed.
>>> What happens if you keep all the Tegra-specific parts of this series,
>>> i.e., you program the T_cmrt, T_pwr_on, and CLKREQ values, and enable
>>> advertising of ASPM L1 capability, but leave out the
>>> pcie_aspm_get_ltr_l_1_2_threshold() parts?  (BTW, I think you should
>>> reorder the series so you fix up all the delay values *before* you
>>> advertise ASPM L1.)
>>>
>>> I expect that to be functionally equivalent, but it would change the
>>> L1.1 vs L1.2 tradeoff, so it might have some performance impact,
>>> depending on what the downstream devices are.
> I'm still interested in this part of my question.  I'm willing
> to apply patches 2, 3, 4 (in the order 4, 3, 2 and omitting the
> pcie_aspm_get_ltr_l_1_2_threshold() part of patch 3) even if we
> haven't figured out patch 1.
>
> If we did this much, we should be able to use lspci to manually
> compute what we think *should* happen and use setpci to set the
> LTR_L1.2_THRESHOLD and see if it works as expected.
I'll work on this.
>
> Bjorn
Bjorn Helgaas Nov. 14, 2017, 11:13 p.m. UTC | #7
On Sun, Nov 12, 2017 at 05:21:47PM +0530, Vidya Sagar wrote:
> On Saturday 11 November 2017 02:59 AM, Bjorn Helgaas wrote:
> >On Fri, Nov 10, 2017 at 03:37:10PM +0530, Vidya Sagar wrote:

> >>LTR_L1.2_THRESHOLD time is for the device to enter into and exit
> >>from L1.2 state.
> >That doesn't feel right to me.  Device characteristics are normally
> >communicated via "capabilities" registers, not programmed by the OS in
> >"control" registers.  The OS needs to be able to configure arbitrary
> >plug-in devices without having device-specific details built into it.
> But there is no capability register/field for LTR_L1.2_THRESHOLD either for
> root ports or end points

Sorry, I didn't state that clearly.  I agree that LTR_L1.2_THRESHOLD
should be the time required to enter and exit the L1.2 state.

What doesn't make sense to me is that the OS would have to have
device-specific quirks built into it instead of being able to discover
what it needs via "capabilities" registers.  If it did, we would have
no way to configure a plug-in switch unless we added a quirk for every
device.  That would be a serious design error in the L1 Substates
feature.

Assume we have an Endpoint connected to a Root Port.  We should set
LTR_L1.2_THRESHOLD to the time it takes to transition the Link from L0
to L1.2 and back to L0.  Then we will enter L1.2 only if the Endpoint
has reported that it can tolerate at least that much latency.

From sec 5.5.3.3.1, Figures 5-16 and 5-17, it looks like the latency
to go from L1.0 to L1.2 and back to L0 should be at least the sum of:

  T(POWER_OFF)       max 2us (from Table 5-11)
  T(L1.2)            min 4us (from Table 5-11)
  T(POWER_ON)        from L1 PM Substates Control 2 register
  T(COMMONMODE)      from L1 PM Substates Control 1 register

This doesn't include (a) the time from L0 to L1.0 and (b) the gap
between T(POWER_ON) and T(COMMONMODE).  I don't know how to learn
those.  But I think we know how to compute T(POWER_ON) and
T(COMMONMODE) by taking the max of Port T_POWER_ON and Port
Common_Mode_Restore_Time for the two ends of the Link.

Your lspci output (below) shows:

  Root Port: Port T_POWER_ON = 70us
  Root Port: Port Common_Mode_Restore_Time = 30us
  Endpoint:  Port T_POWER_ON = 10us
  Endpoint:  Port Common_Mode_Restore_Time = 10us

That would make T(POWER_ON) = max(70us, 10us) = 70us
and T(COMMONMODE) = max(30us, 10us) = 30us.

So I would think the LTR_L1.2_THRESHOLD should be at least

  2us + 4us + 70us + 30us = 106us

Your hardware folks came up with 55us, but I don't know how.  I guess
they're using information not visible to the OS?  

Can you explain where I'm going wrong in my calculations?  I think we
should be able to relate the values from the L1 PM Substates
Capabilities register to the timing diagrams in sec 5.5.3.3.1 somehow.

> Here is the L1 SS registers dumps for both root port and end point
> 00:01.0 PCI bridge: NVIDIA Corporation Device 0fae (rev a1) (prog-if
> 00 [Normal decode])
>     Capabilities: [140 v1] L1 PM Substates
>         L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+
> L1_PM_Substates+
>               PortCommonModeRestoreTime=30us PortTPowerOnTime=70us
>         L1SubCtl1: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+
>                T_CommonMode=30us LTR1.2_Threshold=56320ns
>         L1SubCtl2: T_PwrOn=70us
> 
> 01:00.0 Non-Volatile memory controller: Sandisk Corp Device 5001
> (prog-if 02 [NVM Express])
>     Capabilities: [2c0 v1] L1 PM Substates
>         L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+
> L1_PM_Substates+
>               PortCommonModeRestoreTime=10us PortTPowerOnTime=10us
>         L1SubCtl1: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+
>                T_CommonMode=0us LTR1.2_Threshold=56320ns
>         L1SubCtl2: T_PwrOn=70us

Bjorn
Vidya Sagar Nov. 17, 2017, 2:05 p.m. UTC | #8
On Wednesday 15 November 2017 04:43 AM, Bjorn Helgaas wrote:
> On Sun, Nov 12, 2017 at 05:21:47PM +0530, Vidya Sagar wrote:
>> On Saturday 11 November 2017 02:59 AM, Bjorn Helgaas wrote:
>>> On Fri, Nov 10, 2017 at 03:37:10PM +0530, Vidya Sagar wrote:
>>>> LTR_L1.2_THRESHOLD time is for the device to enter into and exit
>>> >from L1.2 state.
>>> That doesn't feel right to me.  Device characteristics are normally
>>> communicated via "capabilities" registers, not programmed by the OS in
>>> "control" registers.  The OS needs to be able to configure arbitrary
>>> plug-in devices without having device-specific details built into it.
>> But there is no capability register/field for LTR_L1.2_THRESHOLD either for
>> root ports or end points
> Sorry, I didn't state that clearly.  I agree that LTR_L1.2_THRESHOLD
> should be the time required to enter and exit the L1.2 state.
>
> What doesn't make sense to me is that the OS would have to have
> device-specific quirks built into it instead of being able to discover
> what it needs via "capabilities" registers.  If it did, we would have
> no way to configure a plug-in switch unless we added a quirk for every
> device.  That would be a serious design error in the L1 Substates
> feature.
>
> Assume we have an Endpoint connected to a Root Port.  We should set
> LTR_L1.2_THRESHOLD to the time it takes to transition the Link from L0
> to L1.2 and back to L0.  Then we will enter L1.2 only if the Endpoint
> has reported that it can tolerate at least that much latency.
>
>  From sec 5.5.3.3.1, Figures 5-16 and 5-17, it looks like the latency
> to go from L1.0 to L1.2 and back to L0 should be at least the sum of:
>
>    T(POWER_OFF)       max 2us (from Table 5-11)
>    T(L1.2)            min 4us (from Table 5-11)
>    T(POWER_ON)        from L1 PM Substates Control 2 register
>    T(COMMONMODE)      from L1 PM Substates Control 1 register
>
> This doesn't include (a) the time from L0 to L1.0 and (b) the gap
> between T(POWER_ON) and T(COMMONMODE).  I don't know how to learn
> those.  But I think we know how to compute T(POWER_ON) and
> T(COMMONMODE) by taking the max of Port T_POWER_ON and Port
> Common_Mode_Restore_Time for the two ends of the Link.
>
> Your lspci output (below) shows:
>
>    Root Port: Port T_POWER_ON = 70us
>    Root Port: Port Common_Mode_Restore_Time = 30us
>    Endpoint:  Port T_POWER_ON = 10us
>    Endpoint:  Port Common_Mode_Restore_Time = 10us
>
> That would make T(POWER_ON) = max(70us, 10us) = 70us
> and T(COMMONMODE) = max(30us, 10us) = 30us.
>
> So I would think the LTR_L1.2_THRESHOLD should be at least
>
>    2us + 4us + 70us + 30us = 106us
>
> Your hardware folks came up with 55us, but I don't know how.  I guess
> they're using information not visible to the OS?
>
> Can you explain where I'm going wrong in my calculations?  I think we
> should be able to relate the values from the L1 PM Substates
> Capabilities register to the timing diagrams in sec 5.5.3.3.1 somehow.

Your calculations are correct. This is in fact the value we used 
initially, but, later on Tegra hardware is tweaked
by which T_POWER_ON is brought down to 35us and T_cmrt to 14us (to 
achieve better power savings for a
specific endpoint) thereby arriving at 55us of LTR_L1.2_THRESHOLD value.
Its my mistake that I didn't update these new values for T_PowerOn and 
T_cmrt.
Also, I got the confirmation from our hardware folks that, it is better 
to go with 70us and 30us for T_PowerOn and
T_cmrt respectively and stick to 106us of LTR_L1.2_THRESHOLD to be able 
to work with variety of end points.
Now that, having a strong API implementation to supply 
LTR_L1.2_THRESHOLD from host controller driver (and weak
API implementation in aspm sub-system) doesn't seem correct, do you 
suggest to derive LTR_L1.2_THRESHOLD value
(like you mentioned above) and then update it in both RP and EP L1SS 
control registers? Though it doesn't include gaps (a) and (b) above,
I think it is still the best way to come up a suitable 
LTR_L1.2_THRESHOLD value without having to be supplied by host 
controller drivers.

>> Here is the L1 SS registers dumps for both root port and end point
>> 00:01.0 PCI bridge: NVIDIA Corporation Device 0fae (rev a1) (prog-if
>> 00 [Normal decode])
>>      Capabilities: [140 v1] L1 PM Substates
>>          L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+
>> L1_PM_Substates+
>>                PortCommonModeRestoreTime=30us PortTPowerOnTime=70us
>>          L1SubCtl1: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+
>>                 T_CommonMode=30us LTR1.2_Threshold=56320ns
>>          L1SubCtl2: T_PwrOn=70us
>>
>> 01:00.0 Non-Volatile memory controller: Sandisk Corp Device 5001
>> (prog-if 02 [NVM Express])
>>      Capabilities: [2c0 v1] L1 PM Substates
>>          L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+
>> L1_PM_Substates+
>>                PortCommonModeRestoreTime=10us PortTPowerOnTime=10us
>>          L1SubCtl1: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+
>>                 T_CommonMode=0us LTR1.2_Threshold=56320ns
>>          L1SubCtl2: T_PwrOn=70us
> Bjorn
diff mbox

Patch

diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index e1526cc5d381..08da67a82a2d 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -40,6 +40,7 @@ 
 #include <linux/of_pci.h>
 #include <linux/of_platform.h>
 #include <linux/pci.h>
+#include <linux/pci-aspm.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
@@ -191,6 +192,27 @@ 
 #define RP_PRIV_XP_DL	0x494
 #define  RP_PRIV_XP_DL_GEN2_UPD_FC_TSHOLD	(0x1ff << 1)
 
+#define RP_L1_PM_SUBSTATES_CTL				0xC00
+#define RP_L1_PM_SUBSTATES_CTL_CM_RTIME_MASK		(0xFF << 8)
+#define RP_L1_PM_SUBSTATES_CTL_CM_RTIME_SHIFT		8
+#define RP_L1_PM_SUBSTATES_CTL_T_PWRN_SCL_MASK		(0x3 << 16)
+#define RP_L1_PM_SUBSTATES_CTL_T_PWRN_SCL_SHIFT		16
+#define RP_L1_PM_SUBSTATES_CTL_T_PWRN_VAL_MASK		(0x1F << 19)
+#define RP_L1_PM_SUBSTATES_CTL_T_PWRN_VAL_SHIFT		19
+#define RP_L1_PM_SUBSTATES_CTL_HIDE_CAP			(0x1 << 24)
+
+#define RP_L1_PM_SUBSTATES_1_CTL			0xC04
+#define RP_L1_PM_SUBSTATES_1_CTL_PWR_OFF_DLY_MASK	0x1FFF
+#define RP_L1_PM_SUBSTATES_1_CTL_PWR_OFF_DLY		0x26
+
+#define RP_L1_PM_SUBSTATES_2_CTL			0xC08
+#define RP_L1_PM_SUBSTATES_2_CTL_T_L1_2_DLY_MASK	0x1FFF
+#define RP_L1_PM_SUBSTATES_2_CTL_T_L1_2_DLY		0x4D
+#define RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_MASK	(0xFF << 13)
+#define RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND		(0x13 << 13)
+#define RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_COMP_MASK	(0xF << 21)
+#define RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_COMP	(0x2 << 21)
+
 #define RP_RX_HDR_LIMIT	0xe00
 #define  RP_RX_HDR_LIMIT_PW_MASK	(0xff << 8)
 #define  RP_RX_HDR_LIMIT_PW		(0x0e << 8)
@@ -331,6 +353,7 @@  struct tegra_pcie_soc {
 	bool program_deskew_time;
 	bool updateFC_threshold;
 	bool has_aspm_l1;
+	bool has_aspm_l1ss;
 };
 
 static inline struct tegra_msi *to_tegra_msi(struct msi_controller *chip)
@@ -423,6 +446,12 @@  static inline u32 pads_readl(struct tegra_pcie *pcie, unsigned long offset)
 	return readl(pcie->pads + offset);
 }
 
+u32 pcie_aspm_get_ltr_l_1_2_threshold(void)
+{
+	/* LTR L1.2 Threshold = 55us for all ports */
+	return ((0x37 << 16) | (0x02 << 29));
+}
+
 /*
  * The configuration space mapping on Tegra is somewhat similar to the ECAM
  * defined by PCIe. However it deviates a bit in how the 4 bits for extended
@@ -2262,6 +2291,37 @@  static void tegra_pcie_apply_sw_fixup(struct tegra_pcie_port *port)
 		value |= RP_VEND_XP_UPDATE_FC_THRESHOLD_T210;
 		writel(value, port->base + RP_VEND_XP);
 	}
+
+	if (soc->has_aspm_l1ss) {
+		/* Set Common Mode Restore Time to 30us */
+		value = readl(port->base + RP_L1_PM_SUBSTATES_CTL);
+		value &= ~RP_L1_PM_SUBSTATES_CTL_CM_RTIME_MASK;
+		value |= (0x1E << RP_L1_PM_SUBSTATES_CTL_CM_RTIME_SHIFT);
+		writel(value, port->base + RP_L1_PM_SUBSTATES_CTL);
+
+		/* set T_Power_On to 70us */
+		value = readl(port->base + RP_L1_PM_SUBSTATES_CTL);
+		value &= ~(RP_L1_PM_SUBSTATES_CTL_T_PWRN_SCL_MASK |
+			RP_L1_PM_SUBSTATES_CTL_T_PWRN_VAL_MASK);
+		value |= (1 << RP_L1_PM_SUBSTATES_CTL_T_PWRN_SCL_SHIFT) |
+			(7 << RP_L1_PM_SUBSTATES_CTL_T_PWRN_VAL_SHIFT);
+		writel(value, port->base + RP_L1_PM_SUBSTATES_CTL);
+
+		/* Following is based on clk_m being 19.2 MHz */
+		value = readl(port->base + RP_L1_PM_SUBSTATES_1_CTL);
+		value &= ~RP_L1_PM_SUBSTATES_1_CTL_PWR_OFF_DLY_MASK;
+		value |= RP_L1_PM_SUBSTATES_1_CTL_PWR_OFF_DLY;
+		writel(value, port->base + RP_L1_PM_SUBSTATES_1_CTL);
+
+		value = readl(port->base + RP_L1_PM_SUBSTATES_2_CTL);
+		value &= ~RP_L1_PM_SUBSTATES_2_CTL_T_L1_2_DLY_MASK;
+		value |= RP_L1_PM_SUBSTATES_2_CTL_T_L1_2_DLY;
+		value &= ~RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_MASK;
+		value |= RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND;
+		value &= ~RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_COMP_MASK;
+		value |= RP_L1_PM_SUBSTATES_2_CTL_MICROSECOND_COMP;
+		writel(value, port->base + RP_L1_PM_SUBSTATES_2_CTL);
+	}
 }
 /*
  * FIXME: If there are no PCIe cards attached, then calling this function
@@ -2403,6 +2463,7 @@  static const struct tegra_pcie_soc tegra20_pcie = {
 	.program_deskew_time = false,
 	.updateFC_threshold = false,
 	.has_aspm_l1 = false,
+	.has_aspm_l1ss = false,
 };
 
 static const struct tegra_pcie_soc tegra30_pcie = {
@@ -2425,6 +2486,7 @@  static const struct tegra_pcie_soc tegra30_pcie = {
 	.program_deskew_time = false,
 	.updateFC_threshold = false,
 	.has_aspm_l1 = true,
+	.has_aspm_l1ss = false,
 };
 
 static const struct tegra_pcie_soc tegra124_pcie = {
@@ -2446,6 +2508,7 @@  static const struct tegra_pcie_soc tegra124_pcie = {
 	.program_deskew_time = false,
 	.updateFC_threshold = false,
 	.has_aspm_l1 = true,
+	.has_aspm_l1ss = false,
 };
 
 static const struct tegra_pcie_soc tegra210_pcie = {
@@ -2475,6 +2538,7 @@  static const struct tegra_pcie_soc tegra210_pcie = {
 	.program_deskew_time = true,
 	.updateFC_threshold = true,
 	.has_aspm_l1 = true,
+	.has_aspm_l1ss = true,
 };
 
 static const struct tegra_pcie_soc tegra186_pcie = {
@@ -2497,6 +2561,7 @@  static const struct tegra_pcie_soc tegra186_pcie = {
 	.program_deskew_time = false,
 	.updateFC_threshold = false,
 	.has_aspm_l1 = true,
+	.has_aspm_l1ss = true,
 };
 
 static const struct of_device_id tegra_pcie_of_match[] = {