Message ID | 20240618042941.96893-2-kobayashi.da-06@fujitsu.com |
---|---|
State | Superseded |
Headers | show |
Series | Export cxl1.1 device link status register value to pci device sysfs. | expand |
Kobayashi,Daisuke wrote: > Add rcd_pcie_cap and its initialization to cache the offset of cxl1.1 > device link status information. By caching it, avoid the walking > memory map area to find the offset when output the register value. > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > Signed-off-by: "Kobayashi,Daisuke" <kobayashi.da-06@fujitsu.com> > --- > drivers/cxl/core/core.h | 6 ++++ > drivers/cxl/core/regs.c | 61 +++++++++++++++++++++++++++++++++++++++++ > drivers/cxl/cxl.h | 9 ++++++ > drivers/cxl/pci.c | 8 ++++-- > 4 files changed, 82 insertions(+), 2 deletions(-) > > diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h > index 3b64fb1b9ed0..69006bde7ad5 100644 > --- a/drivers/cxl/core/core.h > +++ b/drivers/cxl/core/core.h > @@ -74,6 +74,12 @@ resource_size_t __rcrb_to_component(struct device *dev, > struct cxl_rcrb_info *ri, > enum cxl_rcrb which); > u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb); > +resource_size_t cxl_rcrb_to_linkcap(struct device *dev, struct cxl_dport *dport); Why is this function public? If it only has a caller within regs.c it can be marked static. > +#define PCI_RCRB_CAP_LIST_ID_MASK GENMASK(7, 0) > +#define PCI_RCRB_CAP_HDR_ID_MASK GENMASK(7, 0) > +#define PCI_RCRB_CAP_HDR_NEXT_MASK GENMASK(15, 8) > +#define PCI_CAP_EXP_SIZEOF 0x3c > > extern struct rw_semaphore cxl_dpa_rwsem; > extern struct rw_semaphore cxl_region_rwsem; > diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c > index 372786f80955..8a7cef1cf87d 100644 > --- a/drivers/cxl/core/regs.c > +++ b/drivers/cxl/core/regs.c > @@ -505,6 +505,67 @@ u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb) > return offset; > } > > +resource_size_t cxl_rcrb_to_linkcap(struct device *dev, struct cxl_dport *dport) > +{ > + resource_size_t rcrb = dport->rcrb.base; > + void __iomem *addr; > + u32 cap_hdr; > + u16 offset; > + > + if (!request_mem_region(rcrb, SZ_4K, "CXL RCRB")) > + return CXL_RESOURCE_NONE; > + > + addr = ioremap(rcrb, SZ_4K); > + if (!addr) { > + dev_err(dev, "Failed to map region %pr\n", addr); > + release_mem_region(rcrb, SZ_4K); > + return CXL_RESOURCE_NONE; > + } > + > + offset = FIELD_GET(PCI_RCRB_CAP_LIST_ID_MASK, readw(addr + PCI_CAPABILITY_LIST)); > + cap_hdr = readl(addr + offset); > + while ((FIELD_GET(PCI_RCRB_CAP_HDR_ID_MASK, cap_hdr)) != PCI_CAP_ID_EXP) { > + offset = FIELD_GET(PCI_RCRB_CAP_HDR_NEXT_MASK, cap_hdr); > + if (offset == 0 || offset > SZ_4K) { > + offset = 0; > + break; > + } > + cap_hdr = readl(addr + offset); I would feel better if this duplicated __pci_find_next_cap_ttl() as __rcrb_find_next_cap_ttl() and built an rcrb_find_capability() helper on top of that. > + } > + > + iounmap(addr); > + release_mem_region(rcrb, SZ_4K); > + if (!offset) > + return CXL_RESOURCE_NONE; > + > + return offset; > +} > + > +int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev) > +{ > + void __iomem *dport_pcie_cap = NULL; > + resource_size_t rcd_pcie_offset; > + struct cxl_rcrb_info *ri; > + struct cxl_dport *dport; > + struct cxl_port *port; > + > + port = cxl_pci_find_port(pdev, &dport); > + if (!port) > + return -EPROBE_DEFER; There is a missing put_device() for this find. It also seems silly to find the port again after already finding it for cxl_rcrb_get_comp_regs(). > + > + ri = &dport->rcrb; > + rcd_pcie_offset = cxl_rcrb_to_linkcap(&pdev->dev, dport); > + if (rcd_pcie_offset > 0) This looks broken if cxl_rcrb_to_linkcap() returns CXL_RESOURCE_NONE. Also, lets s/rcd_pcie_offset/pos/ since @pos is already the common variable name for tracking PCI config register offsets. > + dport_pcie_cap = devm_cxl_iomap_block(&pdev->dev, > + ri->base + rcd_pcie_offset, > + PCI_CAP_EXP_SIZEOF); > + > + dport->regs.rcd_pcie_cap = dport_pcie_cap; > + > + return 0; > +} > +EXPORT_SYMBOL_NS_GPL(cxl_dport_map_rcd_linkcap, CXL); > + > resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri, > enum cxl_rcrb which) > { > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h > index 003feebab79b..7e37ad6d84d6 100644 > --- a/drivers/cxl/cxl.h > +++ b/drivers/cxl/cxl.h > @@ -230,6 +230,14 @@ struct cxl_regs { > struct_group_tagged(cxl_rch_regs, rch_regs, > void __iomem *dport_aer; > ); > + > + /* > + * RCD upstream port specific PCIe cap register > + * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB > + */ > + struct_group_tagged(cxl_rcd_regs, rcd_regs, > + void __iomem *rcd_pcie_cap; > + ); > }; > > struct cxl_reg_map { > @@ -299,6 +307,7 @@ int cxl_setup_regs(struct cxl_register_map *map); > struct cxl_dport; > resource_size_t cxl_rcd_component_reg_phys(struct device *dev, > struct cxl_dport *dport); > +int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev); > > #define CXL_RESOURCE_NONE ((resource_size_t) -1) > #define CXL_TARGET_STRLEN 20 > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > index 2ff361e756d6..bbc55732d6c1 100644 > --- a/drivers/cxl/pci.c > +++ b/drivers/cxl/pci.c > @@ -512,11 +512,15 @@ static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, > * is an RCH and try to extract the Component Registers from > * an RCRB. > */ > - if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) > + if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { > rc = cxl_rcrb_get_comp_regs(pdev, map); > + if (rc) > + return rc; > > - if (rc) > + cxl_dport_map_rcd_linkcap(pdev); There is a chance that the port gets removed between finding it for cxl_rcrb_get_comp_regs() and finding it again for cxl_dport_map_rcd_linkcap(), so this misses the need to return EPROBE_DEFER if that latter race is lost. However, is also a chance that race can be lost *internal* to cxl_rcrb_get_comp_regs(). The only guarantee of cxl_pci_find_port() is that it returns a 'struct cxl_port' instance that will not be freed until put_device(&port->dev), but it makes no guarantee that the dport information is stable over that lifetime. I think this needs a leading fix patch that finds and locks the port and then passes that port to cxl_rcrb_get_comp_regs() and cxl_dport_map_rcd_linkcap(). Otherwise I think you could crash the kernel with a test that repeatedly reloads the cxl_acpi module while also reloading cxl_pci. Ugh, I was going to say copy what cxl_mem_probe() does around locking endpoint_parent before attaching further ports, but that also appears to not handle the same race. I.e. I think cxl_mem_probe() needs a fix to do this as well. I will copy you on a proposed patch for that.
Dan Williams wrote: > Kobayashi,Daisuke wrote: > > Add rcd_pcie_cap and its initialization to cache the offset of cxl1.1 > > device link status information. By caching it, avoid the walking > > memory map area to find the offset when output the register value. > > > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > Signed-off-by: "Kobayashi,Daisuke" <kobayashi.da-06@fujitsu.com> > > --- > > drivers/cxl/core/core.h | 6 ++++ > > drivers/cxl/core/regs.c | 61 +++++++++++++++++++++++++++++++++++++++++ > > drivers/cxl/cxl.h | 9 ++++++ > > drivers/cxl/pci.c | 8 ++++-- > > 4 files changed, 82 insertions(+), 2 deletions(-) > > [..] > > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > > index 2ff361e756d6..bbc55732d6c1 100644 > > --- a/drivers/cxl/pci.c > > +++ b/drivers/cxl/pci.c > > @@ -512,11 +512,15 @@ static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, > > * is an RCH and try to extract the Component Registers from > > * an RCRB. > > */ > > - if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) > > + if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { > > rc = cxl_rcrb_get_comp_regs(pdev, map); > > + if (rc) > > + return rc; > > > > - if (rc) > > + cxl_dport_map_rcd_linkcap(pdev); > [..] > Ugh, I was going to say copy what cxl_mem_probe() does around locking > endpoint_parent before attaching further ports, but that also appears to > not handle the same race. I.e. I think cxl_mem_probe() needs a fix to do > this as well. I will copy you on a proposed patch for that. I attempted to add the proper locking to keep cxl_dport live, but that runs into lockdep issues. So I think a better fix is rework dport lifetime to stay alive until the final put_device() of the port. In other words dport instances get added dynamically to the cxl_port, but only get destroyed after all port references are dropped. Then the @dport result from find_cxl_port() is not ephemeral. Given this is a latent bug that affects all current cxl_{mem,pci}_find_port() users, the planned fix is to just make dport lifetime longer, and that I will not have time to do that rework before v6.11 merge window, then I am ok for this lnkcap code to introduce another instance of the same bug. So, just make cxl_rcrb_get_comp_regs() and cxl_dport_map_rcd_linkcap() share the same port reference from one cxl_pci_find_port() call.
Dan Williams wrote: > Dan Williams wrote: > > Kobayashi,Daisuke wrote: > > > Add rcd_pcie_cap and its initialization to cache the offset of cxl1.1 > > > device link status information. By caching it, avoid the walking > > > memory map area to find the offset when output the register value. > > > > > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > > Signed-off-by: "Kobayashi,Daisuke" <kobayashi.da-06@fujitsu.com> > > > --- > > > drivers/cxl/core/core.h | 6 ++++ > > > drivers/cxl/core/regs.c | 61 > +++++++++++++++++++++++++++++++++++++++++ > > > drivers/cxl/cxl.h | 9 ++++++ > > > drivers/cxl/pci.c | 8 ++++-- > > > 4 files changed, 82 insertions(+), 2 deletions(-) > > > > [..] > > > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > > > index 2ff361e756d6..bbc55732d6c1 100644 > > > --- a/drivers/cxl/pci.c > > > +++ b/drivers/cxl/pci.c > > > @@ -512,11 +512,15 @@ static int cxl_pci_setup_regs(struct pci_dev > *pdev, enum cxl_regloc_type type, > > > * is an RCH and try to extract the Component Registers from > > > * an RCRB. > > > */ > > > - if (rc && type == CXL_REGLOC_RBI_COMPONENT && > is_cxl_restricted(pdev)) > > > + if (rc && type == CXL_REGLOC_RBI_COMPONENT && > is_cxl_restricted(pdev)) { > > > rc = cxl_rcrb_get_comp_regs(pdev, map); > > > + if (rc) > > > + return rc; > > > > > > - if (rc) > > > + cxl_dport_map_rcd_linkcap(pdev); > > > [..] > > Ugh, I was going to say copy what cxl_mem_probe() does around locking > > endpoint_parent before attaching further ports, but that also appears to > > not handle the same race. I.e. I think cxl_mem_probe() needs a fix to do > > this as well. I will copy you on a proposed patch for that. > > I attempted to add the proper locking to keep cxl_dport live, but that > runs into lockdep issues. > > So I think a better fix is rework dport lifetime to stay alive until the > final put_device() of the port. In other words dport instances get added > dynamically to the cxl_port, but only get destroyed after all port > references are dropped. Then the @dport result from find_cxl_port() is > not ephemeral. > > Given this is a latent bug that affects all current > cxl_{mem,pci}_find_port() users, the planned fix is to just make dport > lifetime longer, and that I will not have time to do that rework before > v6.11 merge window, then I am ok for this lnkcap code to introduce > another instance of the same bug. > > So, just make cxl_rcrb_get_comp_regs() and cxl_dport_map_rcd_linkcap() > share the same port reference from one cxl_pci_find_port() call. Thanks for checking. I'd like to confirm my understanding of the comment. Are you suggesting that, due to time constraints with the current patch, cxl_rcrb_get_comp_regs() and cxl_dport_map_rcd_linkcap() should share the same dport reference as a temporary workaround for the bug regarding the dport lifetime? If that's what you mean, I think I can solve this problem by adding "struct cxl_dport *dport" to the arguments of the two functions to share the reference. In this implementation, I'm planning to run cxl_pci_find_port() in cxl_rcrb_get_comp_regs() and share the dport obtained there. You said that find requires a corresponding put_device(), but where is the correct place to run it in this case? Or is it better not to run it in this patch?
Daisuke Kobayashi (Fujitsu) wrote: > Dan Williams wrote: > > Dan Williams wrote: > > > Kobayashi,Daisuke wrote: > > > > Add rcd_pcie_cap and its initialization to cache the offset of cxl1.1 > > > > device link status information. By caching it, avoid the walking > > > > memory map area to find the offset when output the register value. > > > > > > > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > > > Signed-off-by: "Kobayashi,Daisuke" <kobayashi.da-06@fujitsu.com> > > > > --- > > > > drivers/cxl/core/core.h | 6 ++++ > > > > drivers/cxl/core/regs.c | 61 > > +++++++++++++++++++++++++++++++++++++++++ > > > > drivers/cxl/cxl.h | 9 ++++++ > > > > drivers/cxl/pci.c | 8 ++++-- > > > > 4 files changed, 82 insertions(+), 2 deletions(-) > > > > > > [..] > > > > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > > > > index 2ff361e756d6..bbc55732d6c1 100644 > > > > --- a/drivers/cxl/pci.c > > > > +++ b/drivers/cxl/pci.c > > > > @@ -512,11 +512,15 @@ static int cxl_pci_setup_regs(struct pci_dev > > *pdev, enum cxl_regloc_type type, > > > > * is an RCH and try to extract the Component Registers from > > > > * an RCRB. > > > > */ > > > > - if (rc && type == CXL_REGLOC_RBI_COMPONENT && > > is_cxl_restricted(pdev)) > > > > + if (rc && type == CXL_REGLOC_RBI_COMPONENT && > > is_cxl_restricted(pdev)) { > > > > rc = cxl_rcrb_get_comp_regs(pdev, map); > > > > + if (rc) > > > > + return rc; > > > > > > > > - if (rc) > > > > + cxl_dport_map_rcd_linkcap(pdev); > > > > > [..] > > > Ugh, I was going to say copy what cxl_mem_probe() does around locking > > > endpoint_parent before attaching further ports, but that also appears to > > > not handle the same race. I.e. I think cxl_mem_probe() needs a fix to do > > > this as well. I will copy you on a proposed patch for that. > > > > I attempted to add the proper locking to keep cxl_dport live, but that > > runs into lockdep issues. > > > > So I think a better fix is rework dport lifetime to stay alive until the > > final put_device() of the port. In other words dport instances get added > > dynamically to the cxl_port, but only get destroyed after all port > > references are dropped. Then the @dport result from find_cxl_port() is > > not ephemeral. > > > > Given this is a latent bug that affects all current > > cxl_{mem,pci}_find_port() users, the planned fix is to just make dport > > lifetime longer, and that I will not have time to do that rework before > > v6.11 merge window, then I am ok for this lnkcap code to introduce > > another instance of the same bug. > > > > So, just make cxl_rcrb_get_comp_regs() and cxl_dport_map_rcd_linkcap() > > share the same port reference from one cxl_pci_find_port() call. > > Thanks for checking. > > I'd like to confirm my understanding of the comment. Are you suggesting that, > due to time constraints with the current patch, cxl_rcrb_get_comp_regs() and > cxl_dport_map_rcd_linkcap() should share the same dport reference as a temporary > workaround for the bug regarding the dport lifetime? What I am saying is forget the bug for now, just trust that the @dport result from cxl_pci_find_port() is valid until the put_device() on the port. > If that's what you mean, I think I can solve this problem by adding > "struct cxl_dport *dport" to the arguments of the two functions to share the reference. Yes, that's what I want for this patch, but to be clear this does not fix the bug with cxl_pci_find_port(). That bug needs deeper work that you can ignore for now. Adding another cxl_pci_find_port() user just increases the urgency to get that bug fixed. To be clear it is definitely a use after-free issue, but it needs root to be bringing ports up and down during the "cxl_pci_find_port() -> put_device(@port->dev)" window. I expect you could trigger a crash by a "modprobe -r cxl_acpi; modprobe cxl_acpi" loop while accessing these sysfs files. > In this implementation, I'm planning to run cxl_pci_find_port() in > cxl_rcrb_get_comp_regs() and share the dport obtained there. You said > that find requires a corresponding put_device(), but where is the > correct place to run it in this case? Or is it better not to run it in > this patch? So the ordering I expect is something like: port = cxl_pci_find_port(...) if (!port) return -EPROBE_DEFER; rc = cxl_rcrb_get_comp_regs(dport, ...) if (rc) goto put; rc = cxl_dport_map_rcd_linkcap(dport, ...) if (rc) goto put; put: put_device(...) return rc;
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 3b64fb1b9ed0..69006bde7ad5 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -74,6 +74,12 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri, enum cxl_rcrb which); u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb); +resource_size_t cxl_rcrb_to_linkcap(struct device *dev, struct cxl_dport *dport); + +#define PCI_RCRB_CAP_LIST_ID_MASK GENMASK(7, 0) +#define PCI_RCRB_CAP_HDR_ID_MASK GENMASK(7, 0) +#define PCI_RCRB_CAP_HDR_NEXT_MASK GENMASK(15, 8) +#define PCI_CAP_EXP_SIZEOF 0x3c extern struct rw_semaphore cxl_dpa_rwsem; extern struct rw_semaphore cxl_region_rwsem; diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 372786f80955..8a7cef1cf87d 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -505,6 +505,67 @@ u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb) return offset; } +resource_size_t cxl_rcrb_to_linkcap(struct device *dev, struct cxl_dport *dport) +{ + resource_size_t rcrb = dport->rcrb.base; + void __iomem *addr; + u32 cap_hdr; + u16 offset; + + if (!request_mem_region(rcrb, SZ_4K, "CXL RCRB")) + return CXL_RESOURCE_NONE; + + addr = ioremap(rcrb, SZ_4K); + if (!addr) { + dev_err(dev, "Failed to map region %pr\n", addr); + release_mem_region(rcrb, SZ_4K); + return CXL_RESOURCE_NONE; + } + + offset = FIELD_GET(PCI_RCRB_CAP_LIST_ID_MASK, readw(addr + PCI_CAPABILITY_LIST)); + cap_hdr = readl(addr + offset); + while ((FIELD_GET(PCI_RCRB_CAP_HDR_ID_MASK, cap_hdr)) != PCI_CAP_ID_EXP) { + offset = FIELD_GET(PCI_RCRB_CAP_HDR_NEXT_MASK, cap_hdr); + if (offset == 0 || offset > SZ_4K) { + offset = 0; + break; + } + cap_hdr = readl(addr + offset); + } + + iounmap(addr); + release_mem_region(rcrb, SZ_4K); + if (!offset) + return CXL_RESOURCE_NONE; + + return offset; +} + +int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev) +{ + void __iomem *dport_pcie_cap = NULL; + resource_size_t rcd_pcie_offset; + struct cxl_rcrb_info *ri; + struct cxl_dport *dport; + struct cxl_port *port; + + port = cxl_pci_find_port(pdev, &dport); + if (!port) + return -EPROBE_DEFER; + + ri = &dport->rcrb; + rcd_pcie_offset = cxl_rcrb_to_linkcap(&pdev->dev, dport); + if (rcd_pcie_offset > 0) + dport_pcie_cap = devm_cxl_iomap_block(&pdev->dev, + ri->base + rcd_pcie_offset, + PCI_CAP_EXP_SIZEOF); + + dport->regs.rcd_pcie_cap = dport_pcie_cap; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_dport_map_rcd_linkcap, CXL); + resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri, enum cxl_rcrb which) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 003feebab79b..7e37ad6d84d6 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -230,6 +230,14 @@ struct cxl_regs { struct_group_tagged(cxl_rch_regs, rch_regs, void __iomem *dport_aer; ); + + /* + * RCD upstream port specific PCIe cap register + * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB + */ + struct_group_tagged(cxl_rcd_regs, rcd_regs, + void __iomem *rcd_pcie_cap; + ); }; struct cxl_reg_map { @@ -299,6 +307,7 @@ int cxl_setup_regs(struct cxl_register_map *map); struct cxl_dport; resource_size_t cxl_rcd_component_reg_phys(struct device *dev, struct cxl_dport *dport); +int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev); #define CXL_RESOURCE_NONE ((resource_size_t) -1) #define CXL_TARGET_STRLEN 20 diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 2ff361e756d6..bbc55732d6c1 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -512,11 +512,15 @@ static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, * is an RCH and try to extract the Component Registers from * an RCRB. */ - if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) + if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { rc = cxl_rcrb_get_comp_regs(pdev, map); + if (rc) + return rc; - if (rc) + cxl_dport_map_rcd_linkcap(pdev); + } else if (rc) { return rc; + } return cxl_setup_regs(map); }