diff mbox

[V2] PCI: Enable PASID when End-to-End TLP is supported by all bridges

Message ID 1529460886-23722-1-git-send-email-okaya@codeaurora.org (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Sinan Kaya June 20, 2018, 2:14 a.m. UTC
A PCIe endpoint carries the process address space identifier (PASID) in
the TLP prefix as part of the memory read/write transaction. The address
information in the TLP is relevant only for a given PASID context.

An IOMMU takes PASID value and the address information from the
TLP to look up the physical address in the system.

If a bridge drops the TLP prefix, the translation agent can resolve the
address to an incorrect location and cause data corruption. Prevent
this condition by requiring End-to-End TLP prefix to be supported on the
entire data path between the endpoint and the root port.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/pci/ats.c             |  9 +++++++++
 drivers/pci/probe.c           | 17 +++++++++++++++++
 include/linux/pci.h           |  1 +
 include/uapi/linux/pci_regs.h |  1 +
 4 files changed, 28 insertions(+)

Comments

Timur Tabi June 20, 2018, 3:02 a.m. UTC | #1
On 6/19/18 9:14 PM, Sinan Kaya wrote:
> +	if (!(cap & PCI_EXP_DEVCAP2_E2ETLP))
> +		return;
> +
> +	dev->eetlp_prefix = 1;

How about:

	if (cap & PCI_EXP_DEVCAP2_E2ETLP)
		dev->eetlp_prefix = 1;
Sinan Kaya June 20, 2018, 3:04 a.m. UTC | #2
On 6/19/2018 11:02 PM, Timur Tabi wrote:
> On 6/19/18 9:14 PM, Sinan Kaya wrote:
>> +    if (!(cap & PCI_EXP_DEVCAP2_E2ETLP))
>> +        return;
>> +
>> +    dev->eetlp_prefix = 1;
> 
> How about:
> 
>     if (cap & PCI_EXP_DEVCAP2_E2ETLP)
>         dev->eetlp_prefix = 1;
> 

Both works. I'll wait until I get more feedback.
Bjorn Helgaas June 30, 2018, 12:49 a.m. UTC | #3
On Tue, Jun 19, 2018 at 10:14:46PM -0400, Sinan Kaya wrote:
> A PCIe endpoint carries the process address space identifier (PASID) in
> the TLP prefix as part of the memory read/write transaction. The address
> information in the TLP is relevant only for a given PASID context.
> 
> An IOMMU takes PASID value and the address information from the
> TLP to look up the physical address in the system.
> 
> If a bridge drops the TLP prefix, the translation agent can resolve the
> address to an incorrect location and cause data corruption. Prevent
> this condition by requiring End-to-End TLP prefix to be supported on the
> entire data path between the endpoint and the root port.

PASID is an End-End TLP Prefix (PCIe r4.0, sec 6.20).  Sec 2.2.10.2 says

  It is an error to receive a TLP with an End-End TLP Prefix by a
  Receiver that does not support End-End TLP Prefixes. A TLP in
  violation of this rule is handled as a Malformed TLP. This is a
  reported error associated with the Receiving Port (see Section 6.2).

So I agree that we shouldn't enable PASID in an endpoint unless all
the switch ports leading to it support End-End prefixes.  But I don't
see how a bridge can drop a prefix and cause data corruption -- if it
doesn't support End-End prefixes, shouldn't the bridge raise a
Malformed TLP error instead of forwarding the TLP?

> Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
> ---
>  drivers/pci/ats.c             |  9 +++++++++
>  drivers/pci/probe.c           | 17 +++++++++++++++++
>  include/linux/pci.h           |  1 +
>  include/uapi/linux/pci_regs.h |  1 +
>  4 files changed, 28 insertions(+)
> 
> diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
> index 4923a2a..e1b2e6d 100644
> --- a/drivers/pci/ats.c
> +++ b/drivers/pci/ats.c
> @@ -268,6 +268,7 @@ EXPORT_SYMBOL_GPL(pci_reset_pri);
>  int pci_enable_pasid(struct pci_dev *pdev, int features)
>  {
>  	u16 control, supported;
> +	struct pci_dev *bridge;
>  	int pos;
>  
>  	if (WARN_ON(pdev->pasid_enabled))
> @@ -277,6 +278,14 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
>  	if (!pos)
>  		return -EINVAL;
>  
> +	bridge = pci_upstream_bridge(pdev);
> +	while (bridge) {
> +		if (!bridge->eetlp_prefix)
> +			return -EINVAL;
> +
> +		bridge = pci_upstream_bridge(bridge);
> +	}

I was hoping to avoid even this loop by having the eetlp_prefix bit
indicate that "End-End TLP Prefixes are supported from the Root Port
to here".

>  	pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
>  	supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
>  
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index ac876e3..a7f7ac1 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -2042,6 +2042,22 @@ static void pci_configure_ltr(struct pci_dev *dev)
>  #endif
>  }
>  
> +static void pci_configure_eetlp_prefix(struct pci_dev *dev)
> +{
> +#ifdef CONFIG_PCI_PASID
> +	u32 cap;
> +
> +	if (!pci_is_pcie(dev))
> +		return;
> +
> +	pcie_capability_read_dword(dev, PCI_EXP_DEVCAP2, &cap);
> +	if (!(cap & PCI_EXP_DEVCAP2_E2ETLP))
> +		return;
> +
> +	dev->eetlp_prefix = 1;

I.e., here we would do:

  if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
    dev->eetlp_prefix_path = 1;
  else {
    bridge = pci_upstream_bridge(dev);
    if (bridge && bridge->eetlp_prefix_path)
      dev->eetlp_prefix_path = 1;
  }

> +#endif
> +}
> +
>  static void pci_configure_device(struct pci_dev *dev)
>  {
>  	struct hotplug_params hpp;
> @@ -2051,6 +2067,7 @@ static void pci_configure_device(struct pci_dev *dev)
>  	pci_configure_extended_tags(dev, NULL);
>  	pci_configure_relaxed_ordering(dev);
>  	pci_configure_ltr(dev);
> +	pci_configure_eetlp_prefix(dev);
>  
>  	memset(&hpp, 0, sizeof(hpp));
>  	ret = pci_get_hp_params(dev, &hpp);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 340029b..cf88d47 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -350,6 +350,7 @@ struct pci_dev {
>  	unsigned int	ltr_path:1;	/* Latency Tolerance Reporting
>  					   supported from root to here */
>  #endif
> +	unsigned int	eetlp_prefix:1;	/* End-to-End TLP Prefix */
>  
>  	pci_channel_state_t error_state;	/* Current connectivity state */
>  	struct device	dev;			/* Generic device interface */
> diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
> index 4da87e2..a617ab2 100644
> --- a/include/uapi/linux/pci_regs.h
> +++ b/include/uapi/linux/pci_regs.h
> @@ -636,6 +636,7 @@
>  #define  PCI_EXP_DEVCAP2_OBFF_MASK	0x000c0000 /* OBFF support mechanism */
>  #define  PCI_EXP_DEVCAP2_OBFF_MSG	0x00040000 /* New message signaling */
>  #define  PCI_EXP_DEVCAP2_OBFF_WAKE	0x00080000 /* Re-use WAKE# for OBFF */
> +#define PCI_EXP_DEVCAP2_E2ETLP		0x00200000 /* End-to-End TLP Prefix */

It looks like lspci doesn't decode this bit (and several others in
DevCap2).  Would you be interested in adding that?  The source is at
git://git.kernel.org/pub/scm/utils/pciutils/pciutils.git

>  #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
>  #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT	0x000f	/* Completion Timeout Value */
>  #define  PCI_EXP_DEVCTL2_COMP_TMOUT_DIS	0x0010	/* Completion Timeout Disable */
> -- 
> 2.7.4
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Sinan Kaya June 30, 2018, 2:45 p.m. UTC | #4
On 6/29/2018 8:49 PM, Bjorn Helgaas wrote:
> On Tue, Jun 19, 2018 at 10:14:46PM -0400, Sinan Kaya wrote:
>> A PCIe endpoint carries the process address space identifier (PASID) in
>> the TLP prefix as part of the memory read/write transaction. The address
>> information in the TLP is relevant only for a given PASID context.
>>
>> An IOMMU takes PASID value and the address information from the
>> TLP to look up the physical address in the system.
>>
>> If a bridge drops the TLP prefix, the translation agent can resolve the
>> address to an incorrect location and cause data corruption. Prevent
>> this condition by requiring End-to-End TLP prefix to be supported on the
>> entire data path between the endpoint and the root port.
> 
> PASID is an End-End TLP Prefix (PCIe r4.0, sec 6.20).  Sec 2.2.10.2 says
> 
>   It is an error to receive a TLP with an End-End TLP Prefix by a
>   Receiver that does not support End-End TLP Prefixes. A TLP in
>   violation of this rule is handled as a Malformed TLP. This is a
>   reported error associated with the Receiving Port (see Section 6.2).
> 
> So I agree that we shouldn't enable PASID in an endpoint unless all
> the switch ports leading to it support End-End prefixes.  But I don't
> see how a bridge can drop a prefix and cause data corruption -- if it
> doesn't support End-End prefixes, shouldn't the bridge raise a
> Malformed TLP error instead of forwarding the TLP?

It should under normal circumstances. 

I remember reading that most PCIe switches don't support TLP prefixes.
I don't know if it is because of buggy behavior or if it is just plain
unsupported while dropping the request as Malformed TLP.

I was trying to be proactive and not enable PASID if the entire path
is incapable.

> 
>> Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
>> ---
>>  drivers/pci/ats.c             |  9 +++++++++
>>  drivers/pci/probe.c           | 17 +++++++++++++++++
>>  include/linux/pci.h           |  1 +
>>  include/uapi/linux/pci_regs.h |  1 +
>>  4 files changed, 28 insertions(+)
>>
>> diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
>> index 4923a2a..e1b2e6d 100644
>> --- a/drivers/pci/ats.c
>> +++ b/drivers/pci/ats.c
>> @@ -268,6 +268,7 @@ EXPORT_SYMBOL_GPL(pci_reset_pri);
>>  int pci_enable_pasid(struct pci_dev *pdev, int features)
>>  {
>>  	u16 control, supported;
>> +	struct pci_dev *bridge;
>>  	int pos;
>>  
>>  	if (WARN_ON(pdev->pasid_enabled))
>> @@ -277,6 +278,14 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
>>  	if (!pos)
>>  		return -EINVAL;
>>  
>> +	bridge = pci_upstream_bridge(pdev);
>> +	while (bridge) {
>> +		if (!bridge->eetlp_prefix)
>> +			return -EINVAL;
>> +
>> +		bridge = pci_upstream_bridge(bridge);
>> +	}
> 
> I was hoping to avoid even this loop by having the eetlp_prefix bit
> indicate that "End-End TLP Prefixes are supported from the Root Port
> to here".
> 

I see.

>>  	pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
>>  	supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
>>  
>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>> index ac876e3..a7f7ac1 100644
>> --- a/drivers/pci/probe.c
>> +++ b/drivers/pci/probe.c
>> @@ -2042,6 +2042,22 @@ static void pci_configure_ltr(struct pci_dev *dev)
>>  #endif
>>  }
>>  
>> +static void pci_configure_eetlp_prefix(struct pci_dev *dev)
>> +{
>> +#ifdef CONFIG_PCI_PASID
>> +	u32 cap;
>> +
>> +	if (!pci_is_pcie(dev))
>> +		return;
>> +
>> +	pcie_capability_read_dword(dev, PCI_EXP_DEVCAP2, &cap);
>> +	if (!(cap & PCI_EXP_DEVCAP2_E2ETLP))
>> +		return;
>> +
>> +	dev->eetlp_prefix = 1;
> 
> I.e., here we would do:
> 
>   if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
>     dev->eetlp_prefix_path = 1;
>   else {
>     bridge = pci_upstream_bridge(dev);
>     if (bridge && bridge->eetlp_prefix_path)
>       dev->eetlp_prefix_path = 1;
>   }

Sure, let me make the changes and post a new version.

> 
>> +#endif
>> +}
>> +
>>  static void pci_configure_device(struct pci_dev *dev)
>>  {
>>  	struct hotplug_params hpp;
>> @@ -2051,6 +2067,7 @@ static void pci_configure_device(struct pci_dev *dev)
>>  	pci_configure_extended_tags(dev, NULL);
>>  	pci_configure_relaxed_ordering(dev);
>>  	pci_configure_ltr(dev);
>> +	pci_configure_eetlp_prefix(dev);
>>  
>>  	memset(&hpp, 0, sizeof(hpp));
>>  	ret = pci_get_hp_params(dev, &hpp);
>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>> index 340029b..cf88d47 100644
>> --- a/include/linux/pci.h
>> +++ b/include/linux/pci.h
>> @@ -350,6 +350,7 @@ struct pci_dev {
>>  	unsigned int	ltr_path:1;	/* Latency Tolerance Reporting
>>  					   supported from root to here */
>>  #endif
>> +	unsigned int	eetlp_prefix:1;	/* End-to-End TLP Prefix */
>>  
>>  	pci_channel_state_t error_state;	/* Current connectivity state */
>>  	struct device	dev;			/* Generic device interface */
>> diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
>> index 4da87e2..a617ab2 100644
>> --- a/include/uapi/linux/pci_regs.h
>> +++ b/include/uapi/linux/pci_regs.h
>> @@ -636,6 +636,7 @@
>>  #define  PCI_EXP_DEVCAP2_OBFF_MASK	0x000c0000 /* OBFF support mechanism */
>>  #define  PCI_EXP_DEVCAP2_OBFF_MSG	0x00040000 /* New message signaling */
>>  #define  PCI_EXP_DEVCAP2_OBFF_WAKE	0x00080000 /* Re-use WAKE# for OBFF */
>> +#define PCI_EXP_DEVCAP2_E2ETLP		0x00200000 /* End-to-End TLP Prefix */
> 
> It looks like lspci doesn't decode this bit (and several others in
> DevCap2).  Would you be interested in adding that?  The source is at
> git://git.kernel.org/pub/scm/utils/pciutils/pciutils.git
> 
>>  #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
>>  #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT	0x000f	/* Completion Timeout Value */
>>  #define  PCI_EXP_DEVCTL2_COMP_TMOUT_DIS	0x0010	/* Completion Timeout Disable */
>> -- 
>> 2.7.4
>>
>>
>> _______________________________________________
>> linux-arm-kernel mailing list
>> linux-arm-kernel@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
Bjorn Helgaas June 30, 2018, 7:19 p.m. UTC | #5
On Sat, Jun 30, 2018 at 10:45:21AM -0400, Sinan Kaya wrote:
> On 6/29/2018 8:49 PM, Bjorn Helgaas wrote:
> > On Tue, Jun 19, 2018 at 10:14:46PM -0400, Sinan Kaya wrote:
> >> A PCIe endpoint carries the process address space identifier (PASID) in
> >> the TLP prefix as part of the memory read/write transaction. The address
> >> information in the TLP is relevant only for a given PASID context.
> >>
> >> An IOMMU takes PASID value and the address information from the
> >> TLP to look up the physical address in the system.
> >>
> >> If a bridge drops the TLP prefix, the translation agent can resolve the
> >> address to an incorrect location and cause data corruption. Prevent
> >> this condition by requiring End-to-End TLP prefix to be supported on the
> >> entire data path between the endpoint and the root port.
> > 
> > PASID is an End-End TLP Prefix (PCIe r4.0, sec 6.20).  Sec 2.2.10.2 says
> > 
> >   It is an error to receive a TLP with an End-End TLP Prefix by a
> >   Receiver that does not support End-End TLP Prefixes. A TLP in
> >   violation of this rule is handled as a Malformed TLP. This is a
> >   reported error associated with the Receiving Port (see Section 6.2).
> > 
> > So I agree that we shouldn't enable PASID in an endpoint unless all
> > the switch ports leading to it support End-End prefixes.  But I don't
> > see how a bridge can drop a prefix and cause data corruption -- if it
> > doesn't support End-End prefixes, shouldn't the bridge raise a
> > Malformed TLP error instead of forwarding the TLP?
> 
> It should under normal circumstances. 
> 
> I remember reading that most PCIe switches don't support TLP prefixes.
> I don't know if it is because of buggy behavior or if it is just plain
> unsupported while dropping the request as Malformed TLP.
> 
> I was trying to be proactive and not enable PASID if the entire path
> is incapable.

Absolutely, that makes perfect sense.  Much better to fail to enable
PASID rather than enabling it and seeing Malformed TLP errors or data
corruption later.

I was trying to figure out if you can actually force data corruption
this way.  If you can, I'd say that sounds like a buggy switch that we
might want to be aware of.

Bjorn
diff mbox

Patch

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 4923a2a..e1b2e6d 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -268,6 +268,7 @@  EXPORT_SYMBOL_GPL(pci_reset_pri);
 int pci_enable_pasid(struct pci_dev *pdev, int features)
 {
 	u16 control, supported;
+	struct pci_dev *bridge;
 	int pos;
 
 	if (WARN_ON(pdev->pasid_enabled))
@@ -277,6 +278,14 @@  int pci_enable_pasid(struct pci_dev *pdev, int features)
 	if (!pos)
 		return -EINVAL;
 
+	bridge = pci_upstream_bridge(pdev);
+	while (bridge) {
+		if (!bridge->eetlp_prefix)
+			return -EINVAL;
+
+		bridge = pci_upstream_bridge(bridge);
+	}
+
 	pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
 	supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
 
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ac876e3..a7f7ac1 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2042,6 +2042,22 @@  static void pci_configure_ltr(struct pci_dev *dev)
 #endif
 }
 
+static void pci_configure_eetlp_prefix(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_PASID
+	u32 cap;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	pcie_capability_read_dword(dev, PCI_EXP_DEVCAP2, &cap);
+	if (!(cap & PCI_EXP_DEVCAP2_E2ETLP))
+		return;
+
+	dev->eetlp_prefix = 1;
+#endif
+}
+
 static void pci_configure_device(struct pci_dev *dev)
 {
 	struct hotplug_params hpp;
@@ -2051,6 +2067,7 @@  static void pci_configure_device(struct pci_dev *dev)
 	pci_configure_extended_tags(dev, NULL);
 	pci_configure_relaxed_ordering(dev);
 	pci_configure_ltr(dev);
+	pci_configure_eetlp_prefix(dev);
 
 	memset(&hpp, 0, sizeof(hpp));
 	ret = pci_get_hp_params(dev, &hpp);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b..cf88d47 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -350,6 +350,7 @@  struct pci_dev {
 	unsigned int	ltr_path:1;	/* Latency Tolerance Reporting
 					   supported from root to here */
 #endif
+	unsigned int	eetlp_prefix:1;	/* End-to-End TLP Prefix */
 
 	pci_channel_state_t error_state;	/* Current connectivity state */
 	struct device	dev;			/* Generic device interface */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 4da87e2..a617ab2 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -636,6 +636,7 @@ 
 #define  PCI_EXP_DEVCAP2_OBFF_MASK	0x000c0000 /* OBFF support mechanism */
 #define  PCI_EXP_DEVCAP2_OBFF_MSG	0x00040000 /* New message signaling */
 #define  PCI_EXP_DEVCAP2_OBFF_WAKE	0x00080000 /* Re-use WAKE# for OBFF */
+#define PCI_EXP_DEVCAP2_E2ETLP		0x00200000 /* End-to-End TLP Prefix */
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT	0x000f	/* Completion Timeout Value */
 #define  PCI_EXP_DEVCTL2_COMP_TMOUT_DIS	0x0010	/* Completion Timeout Disable */