Message ID | 20241209222938.3219364-1-helgaas@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | perf/dwc_pcie: Qualify RAS DES VSEC Capability by Vendor, Revision | expand |
Hi, Bjorn, 在 2024/12/10 06:29, Bjorn Helgaas 写道: > From: Bjorn Helgaas <bhelgaas@google.com> > > PCI Vendor-Specific (VSEC) Capabilities are defined by each vendor. > Devices from different vendors may advertise a VSEC Capability with the DWC > RAS DES functionality, but the vendors may assign different VSEC IDs. > > Search for the DWC RAS DES Capability using the VSEC ID and VSEC Rev > chosen by the vendor. > > This does not fix a current problem because Alibaba, Ampere, and Qualcomm > all assigned the same VSEC ID and VSEC Rev for the DWC RAS DES Capability. > > The potential issue is that we may add support for a device from another > vendor, where the vendor has already assigned DWC_PCIE_VSEC_RAS_DES_ID > (0x02) for an unrelated VSEC. In that event, dwc_pcie_des_cap() would find > the unrelated VSEC and mistakenly assume it was a DWC RAS DES Capability. > > Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> > --- > Sample devices that advertise VSEC Capabilities with VSEC ID=0x02 that are > unrelated to the DWC RAS DES functionality: > > https://community.nxp.com/t5/S32G/S32G3-PCIe-compliance-mode-set-speed-to-5-8Gbit-s/m-p/1875346#M7024 > 00:00.0 PCI bridge: Freescale Semiconductor Inc Device 4300 (prog-if 00 [Normal decode]) > Capabilities: [70] Express (v2) Root Port (Slot-), MSI 00 > Capabilities: [158 v1] Vendor Specific Information: ID=0002 Rev=4 Len=100 <?> > > https://github.com/google-coral/edgetpu/issues/743 > 0000:00:00.0 PCI bridge: NVIDIA Corporation Device 1ad0 (rev a1) (prog-if 00 [Normal decode]) > Capabilities: [70] Express (v2) Root Port (Slot-), MSI 00 > Capabilities: [1d0 v1] Vendor Specific Information: ID=0002 Rev=4 Len=100 > > https://www.linuxquestions.org/questions/linux-kernel-70/differences-in-'lspci-v'-output-4175495550/ > 00:01.0 PCI bridge: Intel Corporation 5520/5500/X58 I/O Hub PCI Express Root Port 1 (rev 13) (prog-if 00 [Normal decode]) > Capabilities: [90] Express Root Port (Slot+), MSI 00 > Capabilities: [160] Vendor Specific Information: ID=0002 Rev=0 Len=00c <?> > > https://www.reddit.com/r/linuxhardware/comments/187u87b/the_correct_way_to_identify_the_kernel_driver/ > 04:00.0 Network controller: Realtek Semiconductor Co., Ltd. Device c852 (rev 01) > Capabilities: [70] Express Endpoint, MSI 00 > Capabilities: [170] Vendor Specific Information: ID=0002 Rev=4 Len=100 <?> > --- > drivers/perf/dwc_pcie_pmu.c | 68 ++++++++++++++++++++----------------- > 1 file changed, 37 insertions(+), 31 deletions(-) > > diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c > index 9cbea9675e21..d022f498fa1a 100644 > --- a/drivers/perf/dwc_pcie_pmu.c > +++ b/drivers/perf/dwc_pcie_pmu.c > @@ -20,7 +20,6 @@ > #include <linux/sysfs.h> > #include <linux/types.h> > > -#define DWC_PCIE_VSEC_RAS_DES_ID 0x02 > #define DWC_PCIE_EVENT_CNT_CTL 0x8 > > /* > @@ -100,14 +99,23 @@ struct dwc_pcie_dev_info { > struct list_head dev_node; > }; > > -struct dwc_pcie_vendor_id { > - int vendor_id; > +struct dwc_pcie_pmu_vsec_id { > + u16 vendor_id; > + u16 vsec_id; > + u8 vsec_rev; > }; > > -static const struct dwc_pcie_vendor_id dwc_pcie_vendor_ids[] = { > - {.vendor_id = PCI_VENDOR_ID_ALIBABA }, > - {.vendor_id = PCI_VENDOR_ID_AMPERE }, > - {.vendor_id = PCI_VENDOR_ID_QCOM }, > +/* > + * VSEC IDs are allocated by the vendor, so a given ID may mean different > + * things to different vendors. See PCIe r6.0, sec 7.9.5.2. > + */ > +static const struct dwc_pcie_pmu_vsec_id dwc_pcie_pmu_vsec_ids[] = { > + { .vendor_id = PCI_VENDOR_ID_ALIBABA, > + .vsec_id = 0x02, .vsec_rev = 0x4 }, > + { .vendor_id = PCI_VENDOR_ID_AMPERE, > + .vsec_id = 0x02, .vsec_rev = 0x4 }, > + { .vendor_id = PCI_VENDOR_ID_QCOM, > + .vsec_id = 0x02, .vsec_rev = 0x4 }, > {} /* terminator */ > }; > > @@ -519,31 +527,28 @@ static void dwc_pcie_unregister_pmu(void *data) > perf_pmu_unregister(&pcie_pmu->pmu); > } > > -static bool dwc_pcie_match_des_cap(struct pci_dev *pdev) > +static u16 dwc_pcie_des_cap(struct pci_dev *pdev) > { > - const struct dwc_pcie_vendor_id *vid; > - u16 vsec = 0; > + const struct dwc_pcie_pmu_vsec_id *vid; > + u16 vsec; > u32 val; > > if (!pci_is_pcie(pdev) || !(pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT)) > - return false; > + return 0; > > - for (vid = dwc_pcie_vendor_ids; vid->vendor_id; vid++) { > + for (vid = dwc_pcie_pmu_vsec_ids; vid->vendor_id; vid++) { How about checking the pdev->vendor with vid->vendor_id before search the vesc cap? + if (pdev->vendor != vid->vendor_id) + continue; > vsec = pci_find_vsec_capability(pdev, vid->vendor_id, > - DWC_PCIE_VSEC_RAS_DES_ID); > - if (vsec) > - break; > + vid->vsec_id); > + if (vsec) { > + pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, > + &val); > + if (PCI_VNDR_HEADER_REV(val) == vid->vsec_rev) { > + pci_dbg(pdev, "Detected PCIe Vendor-Specific Extended Capability RAS DES\n"); > + return vsec; > + } > + } > } > - if (!vsec) > - return false; > - > - pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); > - if (PCI_VNDR_HEADER_REV(val) != 0x04) > - return false; > - > - pci_dbg(pdev, > - "Detected PCIe Vendor-Specific Extended Capability RAS DES\n"); > - return true; > + return 0; > } Best Regards, Shuai
On Tue, Dec 10, 2024 at 08:04:17PM +0800, Shuai Xue wrote: > 在 2024/12/10 06:29, Bjorn Helgaas 写道: > > From: Bjorn Helgaas <bhelgaas@google.com> > > > > PCI Vendor-Specific (VSEC) Capabilities are defined by each vendor. > > Devices from different vendors may advertise a VSEC Capability with the DWC > > RAS DES functionality, but the vendors may assign different VSEC IDs. > > > > Search for the DWC RAS DES Capability using the VSEC ID and VSEC Rev > > chosen by the vendor. > > - for (vid = dwc_pcie_vendor_ids; vid->vendor_id; vid++) { > > + for (vid = dwc_pcie_pmu_vsec_ids; vid->vendor_id; vid++) { > > How about checking the pdev->vendor with vid->vendor_id before > search the vesc cap? > > + if (pdev->vendor != vid->vendor_id) > + continue; Every user of VSEC needs to specify the (Vendor ID, VSEC ID) and verify that the Vendor ID matches the device Vendor ID, so pci_find_vsec_capability() does this check internally, so I don't think we need to do it here. > > vsec = pci_find_vsec_capability(pdev, vid->vendor_id, > > - DWC_PCIE_VSEC_RAS_DES_ID); > > - if (vsec) > > - break; > > + vid->vsec_id);
On Mon, 9 Dec 2024, Bjorn Helgaas wrote: > From: Bjorn Helgaas <bhelgaas@google.com> > > PCI Vendor-Specific (VSEC) Capabilities are defined by each vendor. > Devices from different vendors may advertise a VSEC Capability with the DWC > RAS DES functionality, but the vendors may assign different VSEC IDs. > > Search for the DWC RAS DES Capability using the VSEC ID and VSEC Rev > chosen by the vendor. > > This does not fix a current problem because Alibaba, Ampere, and Qualcomm > all assigned the same VSEC ID and VSEC Rev for the DWC RAS DES Capability. > > The potential issue is that we may add support for a device from another > vendor, where the vendor has already assigned DWC_PCIE_VSEC_RAS_DES_ID > (0x02) for an unrelated VSEC. In that event, dwc_pcie_des_cap() would find > the unrelated VSEC and mistakenly assume it was a DWC RAS DES Capability. > > Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Hi Bjorn, Thanks for the patch. It looks good to me. Also, I quickly tested it on AmpereOne and everything seemed to be working as expected. Reviewed-and-tested-by: Ilkka Koskinen <ilkka@os.amperecomputing.com> Cheers, Ilkka > --- > Sample devices that advertise VSEC Capabilities with VSEC ID=0x02 that are > unrelated to the DWC RAS DES functionality: > > https://community.nxp.com/t5/S32G/S32G3-PCIe-compliance-mode-set-speed-to-5-8Gbit-s/m-p/1875346#M7024 > 00:00.0 PCI bridge: Freescale Semiconductor Inc Device 4300 (prog-if 00 [Normal decode]) > Capabilities: [70] Express (v2) Root Port (Slot-), MSI 00 > Capabilities: [158 v1] Vendor Specific Information: ID=0002 Rev=4 Len=100 <?> > > https://github.com/google-coral/edgetpu/issues/743 > 0000:00:00.0 PCI bridge: NVIDIA Corporation Device 1ad0 (rev a1) (prog-if 00 [Normal decode]) > Capabilities: [70] Express (v2) Root Port (Slot-), MSI 00 > Capabilities: [1d0 v1] Vendor Specific Information: ID=0002 Rev=4 Len=100 > > https://www.linuxquestions.org/questions/linux-kernel-70/differences-in-'lspci-v'-output-4175495550/ > 00:01.0 PCI bridge: Intel Corporation 5520/5500/X58 I/O Hub PCI Express Root Port 1 (rev 13) (prog-if 00 [Normal decode]) > Capabilities: [90] Express Root Port (Slot+), MSI 00 > Capabilities: [160] Vendor Specific Information: ID=0002 Rev=0 Len=00c <?> > > https://www.reddit.com/r/linuxhardware/comments/187u87b/the_correct_way_to_identify_the_kernel_driver/ > 04:00.0 Network controller: Realtek Semiconductor Co., Ltd. Device c852 (rev 01) > Capabilities: [70] Express Endpoint, MSI 00 > Capabilities: [170] Vendor Specific Information: ID=0002 Rev=4 Len=100 <?> > --- > drivers/perf/dwc_pcie_pmu.c | 68 ++++++++++++++++++++----------------- > 1 file changed, 37 insertions(+), 31 deletions(-) > > diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c > index 9cbea9675e21..d022f498fa1a 100644 > --- a/drivers/perf/dwc_pcie_pmu.c > +++ b/drivers/perf/dwc_pcie_pmu.c > @@ -20,7 +20,6 @@ > #include <linux/sysfs.h> > #include <linux/types.h> > > -#define DWC_PCIE_VSEC_RAS_DES_ID 0x02 > #define DWC_PCIE_EVENT_CNT_CTL 0x8 > > /* > @@ -100,14 +99,23 @@ struct dwc_pcie_dev_info { > struct list_head dev_node; > }; > > -struct dwc_pcie_vendor_id { > - int vendor_id; > +struct dwc_pcie_pmu_vsec_id { > + u16 vendor_id; > + u16 vsec_id; > + u8 vsec_rev; > }; > > -static const struct dwc_pcie_vendor_id dwc_pcie_vendor_ids[] = { > - {.vendor_id = PCI_VENDOR_ID_ALIBABA }, > - {.vendor_id = PCI_VENDOR_ID_AMPERE }, > - {.vendor_id = PCI_VENDOR_ID_QCOM }, > +/* > + * VSEC IDs are allocated by the vendor, so a given ID may mean different > + * things to different vendors. See PCIe r6.0, sec 7.9.5.2. > + */ > +static const struct dwc_pcie_pmu_vsec_id dwc_pcie_pmu_vsec_ids[] = { > + { .vendor_id = PCI_VENDOR_ID_ALIBABA, > + .vsec_id = 0x02, .vsec_rev = 0x4 }, > + { .vendor_id = PCI_VENDOR_ID_AMPERE, > + .vsec_id = 0x02, .vsec_rev = 0x4 }, > + { .vendor_id = PCI_VENDOR_ID_QCOM, > + .vsec_id = 0x02, .vsec_rev = 0x4 }, > {} /* terminator */ > }; > > @@ -519,31 +527,28 @@ static void dwc_pcie_unregister_pmu(void *data) > perf_pmu_unregister(&pcie_pmu->pmu); > } > > -static bool dwc_pcie_match_des_cap(struct pci_dev *pdev) > +static u16 dwc_pcie_des_cap(struct pci_dev *pdev) > { > - const struct dwc_pcie_vendor_id *vid; > - u16 vsec = 0; > + const struct dwc_pcie_pmu_vsec_id *vid; > + u16 vsec; > u32 val; > > if (!pci_is_pcie(pdev) || !(pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT)) > - return false; > + return 0; > > - for (vid = dwc_pcie_vendor_ids; vid->vendor_id; vid++) { > + for (vid = dwc_pcie_pmu_vsec_ids; vid->vendor_id; vid++) { > vsec = pci_find_vsec_capability(pdev, vid->vendor_id, > - DWC_PCIE_VSEC_RAS_DES_ID); > - if (vsec) > - break; > + vid->vsec_id); > + if (vsec) { > + pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, > + &val); > + if (PCI_VNDR_HEADER_REV(val) == vid->vsec_rev) { > + pci_dbg(pdev, "Detected PCIe Vendor-Specific Extended Capability RAS DES\n"); > + return vsec; > + } > + } > } > - if (!vsec) > - return false; > - > - pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); > - if (PCI_VNDR_HEADER_REV(val) != 0x04) > - return false; > - > - pci_dbg(pdev, > - "Detected PCIe Vendor-Specific Extended Capability RAS DES\n"); > - return true; > + return 0; > } > > static void dwc_pcie_unregister_dev(struct dwc_pcie_dev_info *dev_info) > @@ -587,7 +592,7 @@ static int dwc_pcie_pmu_notifier(struct notifier_block *nb, > > switch (action) { > case BUS_NOTIFY_ADD_DEVICE: > - if (!dwc_pcie_match_des_cap(pdev)) > + if (!dwc_pcie_des_cap(pdev)) > return NOTIFY_DONE; > if (dwc_pcie_register_dev(pdev)) > return NOTIFY_BAD; > @@ -612,13 +617,14 @@ static int dwc_pcie_pmu_probe(struct platform_device *plat_dev) > struct pci_dev *pdev = plat_dev->dev.platform_data; > struct dwc_pcie_pmu *pcie_pmu; > char *name; > - u32 sbdf, val; > + u32 sbdf; > u16 vsec; > int ret; > > - vsec = pci_find_vsec_capability(pdev, pdev->vendor, > - DWC_PCIE_VSEC_RAS_DES_ID); > - pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); > + vsec = dwc_pcie_des_cap(pdev); > + if (!vsec) > + return -ENODEV; > + > sbdf = plat_dev->id; > name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", sbdf); > if (!name) > @@ -730,7 +736,7 @@ static int __init dwc_pcie_pmu_init(void) > int ret; > > for_each_pci_dev(pdev) { > - if (!dwc_pcie_match_des_cap(pdev)) > + if (!dwc_pcie_des_cap(pdev)) > continue; > > ret = dwc_pcie_register_dev(pdev); > -- > 2.34.1 > >
在 2024/12/10 22:53, Bjorn Helgaas 写道: > On Tue, Dec 10, 2024 at 08:04:17PM +0800, Shuai Xue wrote: >> 在 2024/12/10 06:29, Bjorn Helgaas 写道: >>> From: Bjorn Helgaas <bhelgaas@google.com> >>> >>> PCI Vendor-Specific (VSEC) Capabilities are defined by each vendor. >>> Devices from different vendors may advertise a VSEC Capability with the DWC >>> RAS DES functionality, but the vendors may assign different VSEC IDs. >>> >>> Search for the DWC RAS DES Capability using the VSEC ID and VSEC Rev >>> chosen by the vendor. > >>> - for (vid = dwc_pcie_vendor_ids; vid->vendor_id; vid++) { >>> + for (vid = dwc_pcie_pmu_vsec_ids; vid->vendor_id; vid++) { >> >> How about checking the pdev->vendor with vid->vendor_id before >> search the vesc cap? >> >> + if (pdev->vendor != vid->vendor_id) >> + continue; > > Every user of VSEC needs to specify the (Vendor ID, VSEC ID) and > verify that the Vendor ID matches the device Vendor ID, so > pci_find_vsec_capability() does this check internally, so I don't > think we need to do it here. I see. LGTM. Also, I quickly tested it on Yitian 710 and it works as expected. Reviewed-and-tested-by: Shuai Xue <xueshuai@linux.alibaba.com> Thanks. Best Regards, Shuai
On Mon, 09 Dec 2024 16:29:38 -0600, Bjorn Helgaas wrote: > PCI Vendor-Specific (VSEC) Capabilities are defined by each vendor. > Devices from different vendors may advertise a VSEC Capability with the DWC > RAS DES functionality, but the vendors may assign different VSEC IDs. > > Search for the DWC RAS DES Capability using the VSEC ID and VSEC Rev > chosen by the vendor. > > [...] Thanks, Bjorn, for sorting this out and also to Shuai and Ilkka for giving it a whirl on their platforms. Applied to will (for-next/perf), thanks! [1/1] perf/dwc_pcie: Qualify RAS DES VSEC Capability by Vendor, Revision https://git.kernel.org/will/c/b34d605d120f Cheers,
diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c index 9cbea9675e21..d022f498fa1a 100644 --- a/drivers/perf/dwc_pcie_pmu.c +++ b/drivers/perf/dwc_pcie_pmu.c @@ -20,7 +20,6 @@ #include <linux/sysfs.h> #include <linux/types.h> -#define DWC_PCIE_VSEC_RAS_DES_ID 0x02 #define DWC_PCIE_EVENT_CNT_CTL 0x8 /* @@ -100,14 +99,23 @@ struct dwc_pcie_dev_info { struct list_head dev_node; }; -struct dwc_pcie_vendor_id { - int vendor_id; +struct dwc_pcie_pmu_vsec_id { + u16 vendor_id; + u16 vsec_id; + u8 vsec_rev; }; -static const struct dwc_pcie_vendor_id dwc_pcie_vendor_ids[] = { - {.vendor_id = PCI_VENDOR_ID_ALIBABA }, - {.vendor_id = PCI_VENDOR_ID_AMPERE }, - {.vendor_id = PCI_VENDOR_ID_QCOM }, +/* + * VSEC IDs are allocated by the vendor, so a given ID may mean different + * things to different vendors. See PCIe r6.0, sec 7.9.5.2. + */ +static const struct dwc_pcie_pmu_vsec_id dwc_pcie_pmu_vsec_ids[] = { + { .vendor_id = PCI_VENDOR_ID_ALIBABA, + .vsec_id = 0x02, .vsec_rev = 0x4 }, + { .vendor_id = PCI_VENDOR_ID_AMPERE, + .vsec_id = 0x02, .vsec_rev = 0x4 }, + { .vendor_id = PCI_VENDOR_ID_QCOM, + .vsec_id = 0x02, .vsec_rev = 0x4 }, {} /* terminator */ }; @@ -519,31 +527,28 @@ static void dwc_pcie_unregister_pmu(void *data) perf_pmu_unregister(&pcie_pmu->pmu); } -static bool dwc_pcie_match_des_cap(struct pci_dev *pdev) +static u16 dwc_pcie_des_cap(struct pci_dev *pdev) { - const struct dwc_pcie_vendor_id *vid; - u16 vsec = 0; + const struct dwc_pcie_pmu_vsec_id *vid; + u16 vsec; u32 val; if (!pci_is_pcie(pdev) || !(pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT)) - return false; + return 0; - for (vid = dwc_pcie_vendor_ids; vid->vendor_id; vid++) { + for (vid = dwc_pcie_pmu_vsec_ids; vid->vendor_id; vid++) { vsec = pci_find_vsec_capability(pdev, vid->vendor_id, - DWC_PCIE_VSEC_RAS_DES_ID); - if (vsec) - break; + vid->vsec_id); + if (vsec) { + pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, + &val); + if (PCI_VNDR_HEADER_REV(val) == vid->vsec_rev) { + pci_dbg(pdev, "Detected PCIe Vendor-Specific Extended Capability RAS DES\n"); + return vsec; + } + } } - if (!vsec) - return false; - - pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); - if (PCI_VNDR_HEADER_REV(val) != 0x04) - return false; - - pci_dbg(pdev, - "Detected PCIe Vendor-Specific Extended Capability RAS DES\n"); - return true; + return 0; } static void dwc_pcie_unregister_dev(struct dwc_pcie_dev_info *dev_info) @@ -587,7 +592,7 @@ static int dwc_pcie_pmu_notifier(struct notifier_block *nb, switch (action) { case BUS_NOTIFY_ADD_DEVICE: - if (!dwc_pcie_match_des_cap(pdev)) + if (!dwc_pcie_des_cap(pdev)) return NOTIFY_DONE; if (dwc_pcie_register_dev(pdev)) return NOTIFY_BAD; @@ -612,13 +617,14 @@ static int dwc_pcie_pmu_probe(struct platform_device *plat_dev) struct pci_dev *pdev = plat_dev->dev.platform_data; struct dwc_pcie_pmu *pcie_pmu; char *name; - u32 sbdf, val; + u32 sbdf; u16 vsec; int ret; - vsec = pci_find_vsec_capability(pdev, pdev->vendor, - DWC_PCIE_VSEC_RAS_DES_ID); - pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); + vsec = dwc_pcie_des_cap(pdev); + if (!vsec) + return -ENODEV; + sbdf = plat_dev->id; name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", sbdf); if (!name) @@ -730,7 +736,7 @@ static int __init dwc_pcie_pmu_init(void) int ret; for_each_pci_dev(pdev) { - if (!dwc_pcie_match_des_cap(pdev)) + if (!dwc_pcie_des_cap(pdev)) continue; ret = dwc_pcie_register_dev(pdev);