Message ID | 20220531152632.1397976-7-ira.weiny@intel.com |
---|---|
State | Superseded |
Headers | show |
Series | CXL: Read CDAT and DSMAS data | expand |
On Tue, 31 May 2022 08:26:29 -0700 ira.weiny@intel.com wrote: > From: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > The OS will need CDAT data from CXL devices to properly set up > interleave sets. Currently this is supported through a DOE mailbox > which supports CDAT. > > Cache the CDAT data for later parsing. Provide a sysfs binary attribute > to allow dumping of the CDAT. > > Binary dumping is modeled on /sys/firmware/ACPI/tables/ > > The ability to dump this table will be very useful for emulation of real > devices once they become available as QEMU CXL type 3 device emulation will > be able to load this file in. > > This does not support table updates at runtime. It will always provide > whatever was there when first cached. Handling of table updates can be > implemented later. > > Finally create a complete list of DOE defines within cdat.h for code > wishing to decode the CDAT table. > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > Co-developed-by: Ira Weiny <ira.weiny@intel.com> > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > Fun question of ownership inline... ... > +void read_cdat_data(struct cxl_port *port) > +{ > + struct device *dev = &port->dev; > + size_t cdat_length; > + int ret; > + > + if (cxl_cdat_get_length(port, &cdat_length)) > + return; > + > + port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL); boom. See below for why :) > + if (!port->cdat.table) { > + ret = -ENOMEM; > + goto error; > + } > + > + port->cdat.length = cdat_length; > + ret = cxl_cdat_read_table(port, &port->cdat); > + if (ret) { > + devm_kfree(dev, port->cdat.table); > + port->cdat.table = NULL; > + port->cdat.length = 0; > + ret = -EIO; > + goto error; > + } > + > + return; > +error: > + dev_err(dev, "CDAT data read error (%d)\n", ret); > +} > +EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c > index 2e2bd65c1024..aa4229ddc1bc 100644 > --- a/drivers/cxl/core/port.c > +++ b/drivers/cxl/core/port.c > @@ -320,7 +320,48 @@ static void cxl_port_release(struct device *dev) > kfree(port); > } > > +static ssize_t cdat_read(struct file *filp, struct kobject *kobj, > + struct bin_attribute *bin_attr, char *buf, > + loff_t offset, size_t count) > +{ > + struct device *dev = kobj_to_dev(kobj); > + struct cxl_port *port = to_cxl_port(dev); > + > + if (!port->cdat.table) > + return 0; > + > + return memory_read_from_buffer(buf, count, &offset, > + port->cdat.table, > + port->cdat.length); > +} > + > +static BIN_ATTR_RO(cdat, 0); > + > +static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj, > + struct bin_attribute *attr, int i) > +{ > + struct device *dev = kobj_to_dev(kobj); > + struct cxl_port *port = to_cxl_port(dev); > + > + if ((attr == &bin_attr_cdat) && port->cdat.table) > + return 0400; > + > + return 0; > +} > + > +static struct bin_attribute *cxl_cdat_bin_attributes[] = { > + &bin_attr_cdat, > + NULL, > +}; > + > +static struct attribute_group cxl_cdat_attribute_group = { > + .name = "CDAT", > + .bin_attrs = cxl_cdat_bin_attributes, > + .is_bin_visible = cxl_port_bin_attr_is_visible, > +}; > + > static const struct attribute_group *cxl_port_attribute_groups[] = { > + &cxl_cdat_attribute_group,o > &cxl_base_attribute_group, > NULL, > }; > @@ -462,6 +503,8 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, > return port; > > cxl_find_cdat_mb(port); > + /* Cache the data early to ensure is_visible() works */ > + read_cdat_data(port); This uses port as the 'device' for devm_ calls. Unfortunately if the port driver isn't loaded, it still "successfully" runs. Then if the port driver is probed, you get both a bunch of errors due to devm_ allocations on a device before the driver is loaded. For extra fun it tries to probe the ports multiple times without freeing the index which is 'interesting'. We had this happen a while ago (unrelated to DOE) but this may be unrelated (or maybe related to the region stuff I'm carrying on my test tree) As to the question of what the correct fix is... Maybe move them into the port driver probe but then is_visible won't work. Or pass a pointer to the struct device *host down into read_cdat_data and __read_cdat_data calls to handle the allocation. (I tried this and it seems superficially fine). Jonathan > > dev = &port->dev; > if (is_cxl_memdev(uport)) > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h > index 0a86be589ffc..531b77d296c7 100644 > --- a/drivers/cxl/cxl.h > +++ b/drivers/cxl/cxl.h > @@ -8,6 +8,7 @@ > #include <linux/bitfield.h> > #include <linux/bitops.h> > #include <linux/io.h> > +#include "cdat.h" > > /** > * DOC: cxl objects > @@ -268,6 +269,7 @@ struct cxl_nvdimm { > * @dead: last ep has been removed, force port re-creation > * @depth: How deep this port is relative to the root. depth 0 is the root. > * @cdat_mb: Mailbox which supports the CDAT protocol > + * @cdat: Cached CDAT data > */ > struct cxl_port { > struct device dev; > @@ -280,6 +282,7 @@ struct cxl_port { > bool dead; > unsigned int depth; > struct pci_doe_mb *cdat_mb; > + struct cxl_cdat cdat; > }; > > /** > diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h > index 366b21bd1a01..35f0d4892eaa 100644 > --- a/drivers/cxl/cxlpci.h > +++ b/drivers/cxl/cxlpci.h > @@ -75,4 +75,5 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port); > struct cxl_dev_state; > int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); > void cxl_find_cdat_mb(struct cxl_port *port); > +void read_cdat_data(struct cxl_port *port); > #endif /* __CXL_PCI_H__ */
On Wed, 1 Jun 2022 16:35:40 +0100 Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote: > On Tue, 31 May 2022 08:26:29 -0700 > ira.weiny@intel.com wrote: > > > From: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > > > The OS will need CDAT data from CXL devices to properly set up > > interleave sets. Currently this is supported through a DOE mailbox > > which supports CDAT. > > > > Cache the CDAT data for later parsing. Provide a sysfs binary attribute > > to allow dumping of the CDAT. > > > > Binary dumping is modeled on /sys/firmware/ACPI/tables/ > > > > The ability to dump this table will be very useful for emulation of real > > devices once they become available as QEMU CXL type 3 device emulation will > > be able to load this file in. > > > > This does not support table updates at runtime. It will always provide > > whatever was there when first cached. Handling of table updates can be > > implemented later. > > > > Finally create a complete list of DOE defines within cdat.h for code > > wishing to decode the CDAT table. > > > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > Co-developed-by: Ira Weiny <ira.weiny@intel.com> > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > > > > Fun question of ownership inline... And a follow up due to triggering a bug that predated this series... I'd send a fix, but I'm off on a long weekend shortly :) > > ... > > > +void read_cdat_data(struct cxl_port *port) > > +{ > > + struct device *dev = &port->dev; > > + size_t cdat_length; > > + int ret; > > + > > + if (cxl_cdat_get_length(port, &cdat_length)) > > + return; > > + > > + port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL); > > boom. See below for why :) > > > + if (!port->cdat.table) { > > + ret = -ENOMEM; > > + goto error; > > + } > > + > > + port->cdat.length = cdat_length; > > + ret = cxl_cdat_read_table(port, &port->cdat); > > + if (ret) { > > + devm_kfree(dev, port->cdat.table); > > + port->cdat.table = NULL; > > + port->cdat.length = 0; > > + ret = -EIO; > > + goto error; > > + } > > + > > + return; > > +error: > > + dev_err(dev, "CDAT data read error (%d)\n", ret); > > +} > > +EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); > > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c > > index 2e2bd65c1024..aa4229ddc1bc 100644 > > --- a/drivers/cxl/core/port.c > > +++ b/drivers/cxl/core/port.c > > @@ -320,7 +320,48 @@ static void cxl_port_release(struct device *dev) > > kfree(port); > > } > > > > +static ssize_t cdat_read(struct file *filp, struct kobject *kobj, > > + struct bin_attribute *bin_attr, char *buf, > > + loff_t offset, size_t count) > > +{ > > + struct device *dev = kobj_to_dev(kobj); > > + struct cxl_port *port = to_cxl_port(dev); > > + > > + if (!port->cdat.table) > > + return 0; > > + > > + return memory_read_from_buffer(buf, count, &offset, > > + port->cdat.table, > > + port->cdat.length); > > +} > > + > > +static BIN_ATTR_RO(cdat, 0); > > + > > +static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj, > > + struct bin_attribute *attr, int i) > > +{ > > + struct device *dev = kobj_to_dev(kobj); > > + struct cxl_port *port = to_cxl_port(dev); > > + > > + if ((attr == &bin_attr_cdat) && port->cdat.table) > > + return 0400; > > + > > + return 0; > > +} > > + > > +static struct bin_attribute *cxl_cdat_bin_attributes[] = { > > + &bin_attr_cdat, > > + NULL, > > +}; > > + > > +static struct attribute_group cxl_cdat_attribute_group = { > > + .name = "CDAT", > > + .bin_attrs = cxl_cdat_bin_attributes, > > + .is_bin_visible = cxl_port_bin_attr_is_visible, > > +}; > > + > > static const struct attribute_group *cxl_port_attribute_groups[] = { > > + &cxl_cdat_attribute_group,o > > &cxl_base_attribute_group, > > NULL, > > }; > > @@ -462,6 +503,8 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, > > return port; > > > > cxl_find_cdat_mb(port); > > + /* Cache the data early to ensure is_visible() works */ > > + read_cdat_data(port); > > This uses port as the 'device' for devm_ calls. > Unfortunately if the port driver isn't loaded, it still "successfully" runs. > Then if the port driver is probed, you get both a bunch of errors due to > devm_ allocations on a device before the driver is loaded. > > For extra fun it tries to probe the ports multiple times without freeing > the index which is 'interesting'. We had this happen a while ago (unrelated > to DOE) but this may be unrelated (or maybe related to the region stuff > I'm carrying on my test tree) So the extra devices are a result of the obvious initial path of cxl_mem_probe() from module probe and down failing, then bus_rescan_devices coming along from the pmem driver having another go... So next question is why don't the first set of endpointX devices get cleaned up if the create_endpoint() in cxl/mem.c fails. I think this is because the cxl_endpoint_autoremove() call hasn't yet happened when we detect the failure to bind a driver and error out of cxl_mem_probe(). So, fix is probably to do the cxl_endpoint_autoremove() unconditionally by moving it before the check on driver binding. https://elixir.bootlin.com/linux/latest/source/drivers/cxl/mem.c#L67 Move it up a few lines. > > As to the question of what the correct fix is... > Maybe move them into the port driver probe but then is_visible > won't work. Or pass a pointer to the struct device *host down > into read_cdat_data and __read_cdat_data calls to handle the > allocation. (I tried this and it seems superficially fine). > > Jonathan > > > > > > > dev = &port->dev; > > if (is_cxl_memdev(uport)) > > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h > > index 0a86be589ffc..531b77d296c7 100644 > > --- a/drivers/cxl/cxl.h > > +++ b/drivers/cxl/cxl.h > > @@ -8,6 +8,7 @@ > > #include <linux/bitfield.h> > > #include <linux/bitops.h> > > #include <linux/io.h> > > +#include "cdat.h" > > > > /** > > * DOC: cxl objects > > @@ -268,6 +269,7 @@ struct cxl_nvdimm { > > * @dead: last ep has been removed, force port re-creation > > * @depth: How deep this port is relative to the root. depth 0 is the root. > > * @cdat_mb: Mailbox which supports the CDAT protocol > > + * @cdat: Cached CDAT data > > */ > > struct cxl_port { > > struct device dev; > > @@ -280,6 +282,7 @@ struct cxl_port { > > bool dead; > > unsigned int depth; > > struct pci_doe_mb *cdat_mb; > > + struct cxl_cdat cdat; > > }; > > > > /** > > diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h > > index 366b21bd1a01..35f0d4892eaa 100644 > > --- a/drivers/cxl/cxlpci.h > > +++ b/drivers/cxl/cxlpci.h > > @@ -75,4 +75,5 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port); > > struct cxl_dev_state; > > int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); > > void cxl_find_cdat_mb(struct cxl_port *port); > > +void read_cdat_data(struct cxl_port *port); > > #endif /* __CXL_PCI_H__ */ >
On Wed, Jun 01, 2022 at 05:31:13PM +0100, Jonathan Cameron wrote: > On Wed, 1 Jun 2022 16:35:40 +0100 > Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote: > > > On Tue, 31 May 2022 08:26:29 -0700 > > ira.weiny@intel.com wrote: > > > > > From: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > > > > > The OS will need CDAT data from CXL devices to properly set up > > > interleave sets. Currently this is supported through a DOE mailbox > > > which supports CDAT. > > > > > > Cache the CDAT data for later parsing. Provide a sysfs binary attribute > > > to allow dumping of the CDAT. > > > > > > Binary dumping is modeled on /sys/firmware/ACPI/tables/ > > > > > > The ability to dump this table will be very useful for emulation of real > > > devices once they become available as QEMU CXL type 3 device emulation will > > > be able to load this file in. > > > > > > This does not support table updates at runtime. It will always provide > > > whatever was there when first cached. Handling of table updates can be > > > implemented later. > > > > > > Finally create a complete list of DOE defines within cdat.h for code > > > wishing to decode the CDAT table. > > > > > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > > Co-developed-by: Ira Weiny <ira.weiny@intel.com> > > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > > > > > > > Fun question of ownership inline... > > And a follow up due to triggering a bug that predated this series... > > I'd send a fix, but I'm off on a long weekend shortly :) NP I discussed with Dan and the use of dev_groups should allow me to move this to port probe where it belongs. I put it here for the sysfs stuff. > > > > > ... > > > > > +void read_cdat_data(struct cxl_port *port) > > > +{ > > > + struct device *dev = &port->dev; > > > + size_t cdat_length; > > > + int ret; > > > + > > > + if (cxl_cdat_get_length(port, &cdat_length)) > > > + return; > > > + > > > + port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL); > > > > boom. See below for why :) > > > > > + if (!port->cdat.table) { > > > + ret = -ENOMEM; > > > + goto error; > > > + } > > > + > > > + port->cdat.length = cdat_length; > > > + ret = cxl_cdat_read_table(port, &port->cdat); > > > + if (ret) { > > > + devm_kfree(dev, port->cdat.table); > > > + port->cdat.table = NULL; > > > + port->cdat.length = 0; > > > + ret = -EIO; > > > + goto error; > > > + } > > > + > > > + return; > > > +error: > > > + dev_err(dev, "CDAT data read error (%d)\n", ret); > > > +} > > > +EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); > > > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c > > > index 2e2bd65c1024..aa4229ddc1bc 100644 > > > --- a/drivers/cxl/core/port.c > > > +++ b/drivers/cxl/core/port.c > > > @@ -320,7 +320,48 @@ static void cxl_port_release(struct device *dev) > > > kfree(port); > > > } > > > > > > +static ssize_t cdat_read(struct file *filp, struct kobject *kobj, > > > + struct bin_attribute *bin_attr, char *buf, > > > + loff_t offset, size_t count) > > > +{ > > > + struct device *dev = kobj_to_dev(kobj); > > > + struct cxl_port *port = to_cxl_port(dev); > > > + > > > + if (!port->cdat.table) > > > + return 0; > > > + > > > + return memory_read_from_buffer(buf, count, &offset, > > > + port->cdat.table, > > > + port->cdat.length); > > > +} > > > + > > > +static BIN_ATTR_RO(cdat, 0); > > > + > > > +static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj, > > > + struct bin_attribute *attr, int i) > > > +{ > > > + struct device *dev = kobj_to_dev(kobj); > > > + struct cxl_port *port = to_cxl_port(dev); > > > + > > > + if ((attr == &bin_attr_cdat) && port->cdat.table) > > > + return 0400; > > > + > > > + return 0; > > > +} > > > + > > > +static struct bin_attribute *cxl_cdat_bin_attributes[] = { > > > + &bin_attr_cdat, > > > + NULL, > > > +}; > > > + > > > +static struct attribute_group cxl_cdat_attribute_group = { > > > + .name = "CDAT", > > > + .bin_attrs = cxl_cdat_bin_attributes, > > > + .is_bin_visible = cxl_port_bin_attr_is_visible, > > > +}; > > > + > > > static const struct attribute_group *cxl_port_attribute_groups[] = { > > > + &cxl_cdat_attribute_group,o > > > &cxl_base_attribute_group, > > > NULL, > > > }; > > > @@ -462,6 +503,8 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, > > > return port; > > > > > > cxl_find_cdat_mb(port); > > > + /* Cache the data early to ensure is_visible() works */ > > > + read_cdat_data(port); > > > > This uses port as the 'device' for devm_ calls. > > Unfortunately if the port driver isn't loaded, it still "successfully" runs. > > Then if the port driver is probed, you get both a bunch of errors due to > > devm_ allocations on a device before the driver is loaded. > > > > For extra fun it tries to probe the ports multiple times without freeing > > the index which is 'interesting'. We had this happen a while ago (unrelated > > to DOE) but this may be unrelated (or maybe related to the region stuff > > I'm carrying on my test tree) > > So the extra devices are a result of the obvious initial path of > cxl_mem_probe() from module probe and down failing, then bus_rescan_devices > coming along from the pmem driver having another go... > > So next question is why don't the first set of endpointX devices get cleaned up > if the create_endpoint() in cxl/mem.c fails. > > I think this is because the cxl_endpoint_autoremove() call hasn't yet happened > when we detect the failure to bind a driver and error out of cxl_mem_probe(). > > So, fix is probably to do the cxl_endpoint_autoremove() unconditionally > by moving it before the check on driver binding. > > https://elixir.bootlin.com/linux/latest/source/drivers/cxl/mem.c#L67 > > Move it up a few lines. > I think I can make it work by moving into the port probe. Ira > > > > > > As to the question of what the correct fix is... > > Maybe move them into the port driver probe but then is_visible > > won't work. Or pass a pointer to the struct device *host down > > into read_cdat_data and __read_cdat_data calls to handle the > > allocation. (I tried this and it seems superficially fine). > > > > Jonathan > > > > > > > > > > > > dev = &port->dev; > > > if (is_cxl_memdev(uport)) > > > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h > > > index 0a86be589ffc..531b77d296c7 100644 > > > --- a/drivers/cxl/cxl.h > > > +++ b/drivers/cxl/cxl.h > > > @@ -8,6 +8,7 @@ > > > #include <linux/bitfield.h> > > > #include <linux/bitops.h> > > > #include <linux/io.h> > > > +#include "cdat.h" > > > > > > /** > > > * DOC: cxl objects > > > @@ -268,6 +269,7 @@ struct cxl_nvdimm { > > > * @dead: last ep has been removed, force port re-creation > > > * @depth: How deep this port is relative to the root. depth 0 is the root. > > > * @cdat_mb: Mailbox which supports the CDAT protocol > > > + * @cdat: Cached CDAT data > > > */ > > > struct cxl_port { > > > struct device dev; > > > @@ -280,6 +282,7 @@ struct cxl_port { > > > bool dead; > > > unsigned int depth; > > > struct pci_doe_mb *cdat_mb; > > > + struct cxl_cdat cdat; > > > }; > > > > > > /** > > > diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h > > > index 366b21bd1a01..35f0d4892eaa 100644 > > > --- a/drivers/cxl/cxlpci.h > > > +++ b/drivers/cxl/cxlpci.h > > > @@ -75,4 +75,5 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port); > > > struct cxl_dev_state; > > > int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); > > > void cxl_find_cdat_mb(struct cxl_port *port); > > > +void read_cdat_data(struct cxl_port *port); > > > #endif /* __CXL_PCI_H__ */ > > >
On Thu, Jun 02, 2022 at 11:31:56AM -0700, Ira wrote: > On Wed, Jun 01, 2022 at 05:31:13PM +0100, Jonathan Cameron wrote: > > On Wed, 1 Jun 2022 16:35:40 +0100 > > Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote: > > > > > On Tue, 31 May 2022 08:26:29 -0700 > > > ira.weiny@intel.com wrote: > > > > > > > From: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > > > > > > > The OS will need CDAT data from CXL devices to properly set up > > > > interleave sets. Currently this is supported through a DOE mailbox > > > > which supports CDAT. > > > > > > > > Cache the CDAT data for later parsing. Provide a sysfs binary attribute > > > > to allow dumping of the CDAT. > > > > > > > > Binary dumping is modeled on /sys/firmware/ACPI/tables/ > > > > > > > > The ability to dump this table will be very useful for emulation of real > > > > devices once they become available as QEMU CXL type 3 device emulation will > > > > be able to load this file in. > > > > > > > > This does not support table updates at runtime. It will always provide > > > > whatever was there when first cached. Handling of table updates can be > > > > implemented later. > > > > > > > > Finally create a complete list of DOE defines within cdat.h for code > > > > wishing to decode the CDAT table. > > > > > > > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > > > Co-developed-by: Ira Weiny <ira.weiny@intel.com> > > > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > > > > > > > > > > Fun question of ownership inline... > > > > And a follow up due to triggering a bug that predated this series... > > > > I'd send a fix, but I'm off on a long weekend shortly :) > > NP I discussed with Dan and the use of dev_groups should allow me to move this > to port probe where it belongs. I put it here for the sysfs stuff. Not to make a habit of replying to my own mails but this works. So I'm going to go forward with spinning this again. Ira
On Thu, 2 Jun 2022 15:47:37 -0700 Ira Weiny <ira.weiny@intel.com> wrote: > On Thu, Jun 02, 2022 at 11:31:56AM -0700, Ira wrote: > > On Wed, Jun 01, 2022 at 05:31:13PM +0100, Jonathan Cameron wrote: > > > On Wed, 1 Jun 2022 16:35:40 +0100 > > > Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote: > > > > > > > On Tue, 31 May 2022 08:26:29 -0700 > > > > ira.weiny@intel.com wrote: > > > > > > > > > From: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > > > > > > > > > The OS will need CDAT data from CXL devices to properly set up > > > > > interleave sets. Currently this is supported through a DOE mailbox > > > > > which supports CDAT. > > > > > > > > > > Cache the CDAT data for later parsing. Provide a sysfs binary attribute > > > > > to allow dumping of the CDAT. > > > > > > > > > > Binary dumping is modeled on /sys/firmware/ACPI/tables/ > > > > > > > > > > The ability to dump this table will be very useful for emulation of real > > > > > devices once they become available as QEMU CXL type 3 device emulation will > > > > > be able to load this file in. > > > > > > > > > > This does not support table updates at runtime. It will always provide > > > > > whatever was there when first cached. Handling of table updates can be > > > > > implemented later. > > > > > > > > > > Finally create a complete list of DOE defines within cdat.h for code > > > > > wishing to decode the CDAT table. > > > > > > > > > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > > > > Co-developed-by: Ira Weiny <ira.weiny@intel.com> > > > > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > > > > > > > > > > > > > Fun question of ownership inline... > > > > > > And a follow up due to triggering a bug that predated this series... > > > > > > I'd send a fix, but I'm off on a long weekend shortly :) > > > > NP I discussed with Dan and the use of dev_groups should allow me to move this > > to port probe where it belongs. I put it here for the sysfs stuff. > > Not to make a habit of replying to my own mails but this works. > > So I'm going to go forward with spinning this again. It's the right solution - cleaner than moving the ownership. Jonathan > > Ira
diff --git a/drivers/cxl/cdat.h b/drivers/cxl/cdat.h new file mode 100644 index 000000000000..f5193a6a51fe --- /dev/null +++ b/drivers/cxl/cdat.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __CXL_CDAT_H__ +#define __CXL_CDAT_H__ + +/* + * Coherent Device Attribute table (CDAT) + * + * Specification available from UEFI.org + * + * Whilst CDAT is defined as a single table, the access via DOE maiboxes is + * done one entry at a time, where the first entry is the header. + */ + +#define CXL_DOE_TABLE_ACCESS_REQ_CODE 0x000000ff +#define CXL_DOE_TABLE_ACCESS_REQ_CODE_READ 0 +#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE 0x0000ff00 +#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA 0 +#define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE 0xffff0000 + +/* + * CDAT entries are little endian and are read from PCI config space which + * is also little endian. + * As such, on a big endian system these will have been reversed. + * This prevents us from making easy use of packed structures. + * Style form pci_regs.h + */ + +#define CDAT_HEADER_LENGTH_DW 4 +#define CDAT_HEADER_LENGTH_BYTES (CDAT_HEADER_LENGTH_DW * sizeof(u32)) +#define CDAT_HEADER_DW0_LENGTH 0xffffffff +#define CDAT_HEADER_DW1_REVISION 0x000000ff +#define CDAT_HEADER_DW1_CHECKSUM 0x0000ff00 +/* CDAT_HEADER_DW2_RESERVED */ +#define CDAT_HEADER_DW3_SEQUENCE 0xffffffff + +/* All structures have a common first DW */ +#define CDAT_STRUCTURE_DW0_TYPE 0x000000ff +#define CDAT_STRUCTURE_DW0_TYPE_DSMAS 0 +#define CDAT_STRUCTURE_DW0_TYPE_DSLBIS 1 +#define CDAT_STRUCTURE_DW0_TYPE_DSMSCIS 2 +#define CDAT_STRUCTURE_DW0_TYPE_DSIS 3 +#define CDAT_STRUCTURE_DW0_TYPE_DSEMTS 4 +#define CDAT_STRUCTURE_DW0_TYPE_SSLBIS 5 + +#define CDAT_STRUCTURE_DW0_LENGTH 0xffff0000 + +/* Device Scoped Memory Affinity Structure */ +#define CDAT_DSMAS_DW1_DSMAD_HANDLE 0x000000ff +#define CDAT_DSMAS_DW1_FLAGS 0x0000ff00 +#define CDAT_DSMAS_DPA_OFFSET(entry) ((u64)((entry)[3]) << 32 | (entry)[2]) +#define CDAT_DSMAS_DPA_LEN(entry) ((u64)((entry)[5]) << 32 | (entry)[4]) +#define CDAT_DSMAS_NON_VOLATILE(flags) ((flags & 0x04) >> 2) + +/* Device Scoped Latency and Bandwidth Information Structure */ +#define CDAT_DSLBIS_DW1_HANDLE 0x000000ff +#define CDAT_DSLBIS_DW1_FLAGS 0x0000ff00 +#define CDAT_DSLBIS_DW1_DATA_TYPE 0x00ff0000 +#define CDAT_DSLBIS_BASE_UNIT(entry) ((u64)((entry)[3]) << 32 | (entry)[2]) +#define CDAT_DSLBIS_DW4_ENTRY_0 0x0000ffff +#define CDAT_DSLBIS_DW4_ENTRY_1 0xffff0000 +#define CDAT_DSLBIS_DW5_ENTRY_2 0x0000ffff + +/* Device Scoped Memory Side Cache Information Structure */ +#define CDAT_DSMSCIS_DW1_HANDLE 0x000000ff +#define CDAT_DSMSCIS_MEMORY_SIDE_CACHE_SIZE(entry) \ + ((u64)((entry)[3]) << 32 | (entry)[2]) +#define CDAT_DSMSCIS_DW4_MEMORY_SIDE_CACHE_ATTRS 0xffffffff + +/* Device Scoped Initiator Structure */ +#define CDAT_DSIS_DW1_FLAGS 0x000000ff +#define CDAT_DSIS_DW1_HANDLE 0x0000ff00 + +/* Device Scoped EFI Memory Type Structure */ +#define CDAT_DSEMTS_DW1_HANDLE 0x000000ff +#define CDAT_DSEMTS_DW1_EFI_MEMORY_TYPE_ATTR 0x0000ff00 +#define CDAT_DSEMTS_DPA_OFFSET(entry) ((u64)((entry)[3]) << 32 | (entry)[2]) +#define CDAT_DSEMTS_DPA_LENGTH(entry) ((u64)((entry)[5]) << 32 | (entry)[4]) + +/* Switch Scoped Latency and Bandwidth Information Structure */ +#define CDAT_SSLBIS_DW1_DATA_TYPE 0x000000ff +#define CDAT_SSLBIS_BASE_UNIT(entry) ((u64)((entry)[3]) << 32 | (entry)[2]) +#define CDAT_SSLBIS_ENTRY_PORT_X(entry, i) ((entry)[4 + (i) * 2] & 0x0000ffff) +#define CDAT_SSLBIS_ENTRY_PORT_Y(entry, i) (((entry)[4 + (i) * 2] & 0xffff0000) >> 16) +#define CDAT_SSLBIS_ENTRY_LAT_OR_BW(entry, i) ((entry)[4 + (i) * 2 + 1] & 0x0000ffff) + +#define CXL_DOE_PROTOCOL_TABLE_ACCESS 2 + +/** + * struct cxl_cdat - CXL CDAT data + * + * @table: cache of CDAT table + * @length: length of cached CDAT table + */ +struct cxl_cdat { + void *table; + size_t length; +}; + +#endif /* !__CXL_CDAT_H__ */ diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 5497a65bdcf3..4c25a7d7abfd 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -9,8 +9,7 @@ #include <cxlmem.h> #include <cxl.h> #include "core.h" - -#define CXL_DOE_PROTOCOL_TABLE_ACCESS 2 +#include "cdat.h" /** * DOC: cxl core pci @@ -486,3 +485,132 @@ void cxl_find_cdat_mb(struct cxl_port *port) } } EXPORT_SYMBOL_NS_GPL(cxl_find_cdat_mb, CXL); + +#define CDAT_DOE_REQ(entry_handle) \ + (FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \ + CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \ + FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \ + CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) | \ + FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle))) + +static void cxl_doe_task_complete(struct pci_doe_task *task) +{ + complete(task->private); +} + +static int cxl_cdat_get_length(struct cxl_port *port, size_t *length) +{ + u32 cdat_request_pl = CDAT_DOE_REQ(0); + u32 cdat_response_pl[32]; + DECLARE_COMPLETION_ONSTACK(c); + struct pci_doe_task task = { + .prot.vid = PCI_DVSEC_VENDOR_ID_CXL, + .prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, + .request_pl = &cdat_request_pl, + .request_pl_sz = sizeof(cdat_request_pl), + .response_pl = cdat_response_pl, + .response_pl_sz = sizeof(cdat_response_pl), + .complete = cxl_doe_task_complete, + .private = &c, + }; + int rc = 0; + + if (!port->cdat_mb) + return -EIO; + + rc = pci_doe_submit_task(port->cdat_mb, &task); + if (rc < 0) { + dev_err(&port->dev, "DOE submit failed: %d", rc); + return rc; + } + wait_for_completion(&c); + + if (task.rv < 1) + return -EIO; + + *length = cdat_response_pl[1]; + dev_dbg(&port->dev, "CDAT length %zu\n", *length); + + return rc; +} + +static int cxl_cdat_read_table(struct cxl_port *port, + struct cxl_cdat *cdat) +{ + size_t length = cdat->length; + u32 *data = cdat->table; + int entry_handle = 0; + int rc = 0; + + if (!port->cdat_mb) + return -EIO; + + do { + u32 cdat_request_pl = CDAT_DOE_REQ(entry_handle); + u32 cdat_response_pl[32]; + DECLARE_COMPLETION_ONSTACK(c); + struct pci_doe_task task = { + .prot.vid = PCI_DVSEC_VENDOR_ID_CXL, + .prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, + .request_pl = &cdat_request_pl, + .request_pl_sz = sizeof(cdat_request_pl), + .response_pl = cdat_response_pl, + .response_pl_sz = sizeof(cdat_response_pl), + .complete = cxl_doe_task_complete, + .private = &c, + }; + size_t entry_dw; + u32 *entry; + + rc = pci_doe_submit_task(port->cdat_mb, &task); + if (rc < 0) { + dev_err(&port->dev, "DOE submit failed: %d", rc); + return rc; + } + wait_for_completion(&c); + + entry = cdat_response_pl + 1; + entry_dw = task.rv / sizeof(u32); + /* Skip Header */ + entry_dw -= 1; + entry_dw = min(length / 4, entry_dw); + memcpy(data, entry, entry_dw * sizeof(u32)); + length -= entry_dw * sizeof(u32); + data += entry_dw; + entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, cdat_response_pl[0]); + + } while (entry_handle != 0xFFFF); + + return rc; +} + +void read_cdat_data(struct cxl_port *port) +{ + struct device *dev = &port->dev; + size_t cdat_length; + int ret; + + if (cxl_cdat_get_length(port, &cdat_length)) + return; + + port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL); + if (!port->cdat.table) { + ret = -ENOMEM; + goto error; + } + + port->cdat.length = cdat_length; + ret = cxl_cdat_read_table(port, &port->cdat); + if (ret) { + devm_kfree(dev, port->cdat.table); + port->cdat.table = NULL; + port->cdat.length = 0; + ret = -EIO; + goto error; + } + + return; +error: + dev_err(dev, "CDAT data read error (%d)\n", ret); +} +EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 2e2bd65c1024..aa4229ddc1bc 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -320,7 +320,48 @@ static void cxl_port_release(struct device *dev) kfree(port); } +static ssize_t cdat_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t offset, size_t count) +{ + struct device *dev = kobj_to_dev(kobj); + struct cxl_port *port = to_cxl_port(dev); + + if (!port->cdat.table) + return 0; + + return memory_read_from_buffer(buf, count, &offset, + port->cdat.table, + port->cdat.length); +} + +static BIN_ATTR_RO(cdat, 0); + +static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj, + struct bin_attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + struct cxl_port *port = to_cxl_port(dev); + + if ((attr == &bin_attr_cdat) && port->cdat.table) + return 0400; + + return 0; +} + +static struct bin_attribute *cxl_cdat_bin_attributes[] = { + &bin_attr_cdat, + NULL, +}; + +static struct attribute_group cxl_cdat_attribute_group = { + .name = "CDAT", + .bin_attrs = cxl_cdat_bin_attributes, + .is_bin_visible = cxl_port_bin_attr_is_visible, +}; + static const struct attribute_group *cxl_port_attribute_groups[] = { + &cxl_cdat_attribute_group, &cxl_base_attribute_group, NULL, }; @@ -462,6 +503,8 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, return port; cxl_find_cdat_mb(port); + /* Cache the data early to ensure is_visible() works */ + read_cdat_data(port); dev = &port->dev; if (is_cxl_memdev(uport)) diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 0a86be589ffc..531b77d296c7 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -8,6 +8,7 @@ #include <linux/bitfield.h> #include <linux/bitops.h> #include <linux/io.h> +#include "cdat.h" /** * DOC: cxl objects @@ -268,6 +269,7 @@ struct cxl_nvdimm { * @dead: last ep has been removed, force port re-creation * @depth: How deep this port is relative to the root. depth 0 is the root. * @cdat_mb: Mailbox which supports the CDAT protocol + * @cdat: Cached CDAT data */ struct cxl_port { struct device dev; @@ -280,6 +282,7 @@ struct cxl_port { bool dead; unsigned int depth; struct pci_doe_mb *cdat_mb; + struct cxl_cdat cdat; }; /** diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 366b21bd1a01..35f0d4892eaa 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -75,4 +75,5 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); void cxl_find_cdat_mb(struct cxl_port *port); +void read_cdat_data(struct cxl_port *port); #endif /* __CXL_PCI_H__ */