Message ID | 20220605005049.2155874-7-ira.weiny@intel.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | CXL: Read CDAT and DSMAS data | expand |
On 22-06-04 17:50:46, ira.weiny@intel.com wrote: > From: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > The OS will need CDAT data from CXL devices to properly set up > interleave sets. Currently this is supported through a DOE mailbox > which supports CDAT. > > Cache the CDAT data for later parsing. Provide a sysfs binary attribute > to allow dumping of the CDAT. > > Binary dumping is modeled on /sys/firmware/ACPI/tables/ > > The ability to dump this table will be very useful for emulation of real > devices once they become available as QEMU CXL type 3 device emulation will > be able to load this file in. > > This does not support table updates at runtime. It will always provide > whatever was there when first cached. Handling of table updates can be > implemented later. > > Finally create a complete list of DOE defines within cdat.h for code > wishing to decode the CDAT table. > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > Co-developed-by: Ira Weiny <ira.weiny@intel.com> > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > > --- > Changes from V9: > Add debugging output > Jonathan Cameron > Move read_cdat to port probe by using dev_groups for the > sysfs attributes. This avoids issues with using devm > before the driver is loaded while making sure the CDAT > binary is available. > > Changes from V8: > Fix length print format > Incorporate feedback from Jonathan > Move all this to cxl_port which can help support switches when > the time comes. > > Changes from V6: > Fix issue with devm use > Move cached cdat data to cxl_dev_state > Use new pci_doe_submit_task() > Ensure the aux driver is locked while processing tasks > Rebased on cxl-pending > > Changes from V5: > Add proper guards around cdat.h > Split out finding the CDAT DOE mailbox > Use cxl_cdat to group CDAT data together > Adjust to use auxiliary_find_device() to find the DOE device > which supplies the CDAT protocol. > Rebased to latest > Remove dev_dbg(length) > Remove unneeded DOE Table access defines > Move CXL_DOE_PROTOCOL_TABLE_ACCESS define into this patch where > it is used > > Changes from V4: > Split this into it's own patch > Rearchitect this such that the memdev driver calls into the DOE > driver via the cxl_mem state object. This allows CDAT data to > come from any type of cxl_mem object not just PCI DOE. > Rebase on new struct cxl_dev_state > --- > drivers/cxl/cdat.h | 99 ++++++++++++++++++++++++++++++ > drivers/cxl/core/pci.c | 136 ++++++++++++++++++++++++++++++++++++++++- > drivers/cxl/cxl.h | 3 + > drivers/cxl/cxlpci.h | 1 + > drivers/cxl/port.c | 50 +++++++++++++++ > 5 files changed, 287 insertions(+), 2 deletions(-) > create mode 100644 drivers/cxl/cdat.h > > diff --git a/drivers/cxl/cdat.h b/drivers/cxl/cdat.h > new file mode 100644 > index 000000000000..f5193a6a51fe > --- /dev/null > +++ b/drivers/cxl/cdat.h > @@ -0,0 +1,99 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef __CXL_CDAT_H__ > +#define __CXL_CDAT_H__ > + > +/* > + * Coherent Device Attribute table (CDAT) > + * > + * Specification available from UEFI.org > + * > + * Whilst CDAT is defined as a single table, the access via DOE maiboxes is > + * done one entry at a time, where the first entry is the header. > + */ > + > +#define CXL_DOE_TABLE_ACCESS_REQ_CODE 0x000000ff > +#define CXL_DOE_TABLE_ACCESS_REQ_CODE_READ 0 > +#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE 0x0000ff00 > +#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA 0 > +#define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE 0xffff0000 > + > +/* > + * CDAT entries are little endian and are read from PCI config space which > + * is also little endian. > + * As such, on a big endian system these will have been reversed. > + * This prevents us from making easy use of packed structures. > + * Style form pci_regs.h > + */ > + > +#define CDAT_HEADER_LENGTH_DW 4 > +#define CDAT_HEADER_LENGTH_BYTES (CDAT_HEADER_LENGTH_DW * sizeof(u32)) > +#define CDAT_HEADER_DW0_LENGTH 0xffffffff > +#define CDAT_HEADER_DW1_REVISION 0x000000ff > +#define CDAT_HEADER_DW1_CHECKSUM 0x0000ff00 > +/* CDAT_HEADER_DW2_RESERVED */ > +#define CDAT_HEADER_DW3_SEQUENCE 0xffffffff > + > +/* All structures have a common first DW */ > +#define CDAT_STRUCTURE_DW0_TYPE 0x000000ff > +#define CDAT_STRUCTURE_DW0_TYPE_DSMAS 0 > +#define CDAT_STRUCTURE_DW0_TYPE_DSLBIS 1 > +#define CDAT_STRUCTURE_DW0_TYPE_DSMSCIS 2 > +#define CDAT_STRUCTURE_DW0_TYPE_DSIS 3 > +#define CDAT_STRUCTURE_DW0_TYPE_DSEMTS 4 > +#define CDAT_STRUCTURE_DW0_TYPE_SSLBIS 5 > + > +#define CDAT_STRUCTURE_DW0_LENGTH 0xffff0000 > + > +/* Device Scoped Memory Affinity Structure */ > +#define CDAT_DSMAS_DW1_DSMAD_HANDLE 0x000000ff > +#define CDAT_DSMAS_DW1_FLAGS 0x0000ff00 > +#define CDAT_DSMAS_DPA_OFFSET(entry) ((u64)((entry)[3]) << 32 | (entry)[2]) > +#define CDAT_DSMAS_DPA_LEN(entry) ((u64)((entry)[5]) << 32 | (entry)[4]) > +#define CDAT_DSMAS_NON_VOLATILE(flags) ((flags & 0x04) >> 2) > + > +/* Device Scoped Latency and Bandwidth Information Structure */ > +#define CDAT_DSLBIS_DW1_HANDLE 0x000000ff > +#define CDAT_DSLBIS_DW1_FLAGS 0x0000ff00 > +#define CDAT_DSLBIS_DW1_DATA_TYPE 0x00ff0000 > +#define CDAT_DSLBIS_BASE_UNIT(entry) ((u64)((entry)[3]) << 32 | (entry)[2]) > +#define CDAT_DSLBIS_DW4_ENTRY_0 0x0000ffff > +#define CDAT_DSLBIS_DW4_ENTRY_1 0xffff0000 > +#define CDAT_DSLBIS_DW5_ENTRY_2 0x0000ffff > + > +/* Device Scoped Memory Side Cache Information Structure */ > +#define CDAT_DSMSCIS_DW1_HANDLE 0x000000ff > +#define CDAT_DSMSCIS_MEMORY_SIDE_CACHE_SIZE(entry) \ > + ((u64)((entry)[3]) << 32 | (entry)[2]) > +#define CDAT_DSMSCIS_DW4_MEMORY_SIDE_CACHE_ATTRS 0xffffffff > + > +/* Device Scoped Initiator Structure */ > +#define CDAT_DSIS_DW1_FLAGS 0x000000ff > +#define CDAT_DSIS_DW1_HANDLE 0x0000ff00 > + > +/* Device Scoped EFI Memory Type Structure */ > +#define CDAT_DSEMTS_DW1_HANDLE 0x000000ff > +#define CDAT_DSEMTS_DW1_EFI_MEMORY_TYPE_ATTR 0x0000ff00 > +#define CDAT_DSEMTS_DPA_OFFSET(entry) ((u64)((entry)[3]) << 32 | (entry)[2]) > +#define CDAT_DSEMTS_DPA_LENGTH(entry) ((u64)((entry)[5]) << 32 | (entry)[4]) > + > +/* Switch Scoped Latency and Bandwidth Information Structure */ > +#define CDAT_SSLBIS_DW1_DATA_TYPE 0x000000ff > +#define CDAT_SSLBIS_BASE_UNIT(entry) ((u64)((entry)[3]) << 32 | (entry)[2]) > +#define CDAT_SSLBIS_ENTRY_PORT_X(entry, i) ((entry)[4 + (i) * 2] & 0x0000ffff) > +#define CDAT_SSLBIS_ENTRY_PORT_Y(entry, i) (((entry)[4 + (i) * 2] & 0xffff0000) >> 16) > +#define CDAT_SSLBIS_ENTRY_LAT_OR_BW(entry, i) ((entry)[4 + (i) * 2 + 1] & 0x0000ffff) > + > +#define CXL_DOE_PROTOCOL_TABLE_ACCESS 2 > + > +/** > + * struct cxl_cdat - CXL CDAT data > + * > + * @table: cache of CDAT table > + * @length: length of cached CDAT table > + */ > +struct cxl_cdat { > + void *table; > + size_t length; > +}; > + > +#endif /* !__CXL_CDAT_H__ */ > diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c > index d814d8317975..76fa8382b3c7 100644 > --- a/drivers/cxl/core/pci.c > +++ b/drivers/cxl/core/pci.c > @@ -9,8 +9,7 @@ > #include <cxlmem.h> > #include <cxl.h> > #include "core.h" > - > -#define CXL_DOE_PROTOCOL_TABLE_ACCESS 2 > +#include "cdat.h" > > /** > * DOC: cxl core pci > @@ -493,3 +492,136 @@ void cxl_cache_cdat_mb(struct cxl_port *port) > } > } > EXPORT_SYMBOL_NS_GPL(cxl_cache_cdat_mb, CXL); > + > +#define CDAT_DOE_REQ(entry_handle) \ > + (FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \ > + CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \ > + FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \ > + CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) | \ > + FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle))) > + > +static void cxl_doe_task_complete(struct pci_doe_task *task) > +{ > + complete(task->private); > +} > + > +static int cxl_cdat_get_length(struct cxl_port *port, size_t *length) > +{ > + u32 cdat_request_pl = CDAT_DOE_REQ(0); > + u32 cdat_response_pl[32]; > + DECLARE_COMPLETION_ONSTACK(c); > + struct pci_doe_task task = { > + .prot.vid = PCI_DVSEC_VENDOR_ID_CXL, > + .prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, > + .request_pl = &cdat_request_pl, > + .request_pl_sz = sizeof(cdat_request_pl), > + .response_pl = cdat_response_pl, > + .response_pl_sz = sizeof(cdat_response_pl), > + .complete = cxl_doe_task_complete, > + .private = &c, > + }; This is looking like something that could be nicely populated with a macro. > + int rc = 0; > + > + if (!port->cdat_mb) { > + dev_err(&port->dev, "No CDAT mailbox\n"); > + return -EIO; > + } AIUI, !port->cdat_mb isn't actually an error. Does it make sense to simply return 0 here? > + > + rc = pci_doe_submit_task(port->cdat_mb, &task); > + if (rc < 0) { > + dev_err(&port->dev, "DOE submit failed: %d", rc); > + return rc; > + } > + wait_for_completion(&c); > + > + if (task.rv < 1) > + return -EIO; > + > + *length = cdat_response_pl[1]; > + dev_dbg(&port->dev, "CDAT length %zu\n", *length); > + > + return rc; > +} > + > +static int cxl_cdat_read_table(struct cxl_port *port, > + struct cxl_cdat *cdat) > +{ > + size_t length = cdat->length; > + u32 *data = cdat->table; > + int entry_handle = 0; > + int rc = 0; > + > + if (!port->cdat_mb) { > + dev_err(&port->dev, "No CDAT mailbox\n"); > + return -EIO; > + } Similar to above, maybe just return 0? > + > + do { > + u32 cdat_request_pl = CDAT_DOE_REQ(entry_handle); > + u32 cdat_response_pl[32]; > + DECLARE_COMPLETION_ONSTACK(c); > + struct pci_doe_task task = { > + .prot.vid = PCI_DVSEC_VENDOR_ID_CXL, > + .prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, > + .request_pl = &cdat_request_pl, > + .request_pl_sz = sizeof(cdat_request_pl), > + .response_pl = cdat_response_pl, > + .response_pl_sz = sizeof(cdat_response_pl), > + .complete = cxl_doe_task_complete, > + .private = &c, > + }; > + size_t entry_dw; > + u32 *entry; > + > + rc = pci_doe_submit_task(port->cdat_mb, &task); > + if (rc < 0) { > + dev_err(&port->dev, "DOE submit failed: %d", rc); > + return rc; > + } > + wait_for_completion(&c); I'd use the timeout variant, but if you don't want to, see below. I can't quite tell if pci_doe_submit_task() is guaranteed to end with FLAG_DEAD at some point... > + > + entry = cdat_response_pl + 1; > + entry_dw = task.rv / sizeof(u32); > + /* Skip Header */ > + entry_dw -= 1; > + entry_dw = min(length / 4, entry_dw); > + memcpy(data, entry, entry_dw * sizeof(u32)); > + length -= entry_dw * sizeof(u32); > + data += entry_dw; > + entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, cdat_response_pl[0]); [0] looks suspicious... > + > + } while (entry_handle != 0xFFFF); > + > + return rc; > +} > + > +void read_cdat_data(struct cxl_port *port) I think you need kdoc here, specifically because you've opted not to do a timed wait, which means its possible to wait forever. > +{ > + struct device *dev = &port->dev; > + size_t cdat_length; > + int ret; > + > + if (cxl_cdat_get_length(port, &cdat_length)) > + return; > + > + port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL); > + if (!port->cdat.table) { > + ret = -ENOMEM; > + goto error; > + } > + > + port->cdat.length = cdat_length; > + ret = cxl_cdat_read_table(port, &port->cdat); > + if (ret) { > + devm_kfree(dev, port->cdat.table); Usually, when I see devm_kfree, it's a sign that it might not be a good candidate for devm. You could consider plain kzalloc, and then putting the kfree in the port destructor. I don't see anything incorrect though, so it's up to you. > + port->cdat.table = NULL; > + port->cdat.length = 0; > + ret = -EIO; > + goto error; > + } > + > + return; > +error: > + dev_err(dev, "CDAT data read error (%d)\n", ret); > +} > +EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h > index 0a86be589ffc..531b77d296c7 100644 > --- a/drivers/cxl/cxl.h > +++ b/drivers/cxl/cxl.h > @@ -8,6 +8,7 @@ > #include <linux/bitfield.h> > #include <linux/bitops.h> > #include <linux/io.h> > +#include "cdat.h" > > /** > * DOC: cxl objects > @@ -268,6 +269,7 @@ struct cxl_nvdimm { > * @dead: last ep has been removed, force port re-creation > * @depth: How deep this port is relative to the root. depth 0 is the root. > * @cdat_mb: Mailbox which supports the CDAT protocol > + * @cdat: Cached CDAT data > */ > struct cxl_port { > struct device dev; > @@ -280,6 +282,7 @@ struct cxl_port { > bool dead; > unsigned int depth; > struct pci_doe_mb *cdat_mb; > + struct cxl_cdat cdat; > }; > > /** > diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h > index ddbb8b77752e..71009a167a92 100644 > --- a/drivers/cxl/cxlpci.h > +++ b/drivers/cxl/cxlpci.h > @@ -75,4 +75,5 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port); > struct cxl_dev_state; > int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); > void cxl_cache_cdat_mb(struct cxl_port *port); > +void read_cdat_data(struct cxl_port *port); > #endif /* __CXL_PCI_H__ */ > diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c > index 04f3d1fc6e07..fdff20cf79e6 100644 > --- a/drivers/cxl/port.c > +++ b/drivers/cxl/port.c > @@ -50,6 +50,8 @@ static int cxl_port_probe(struct device *dev) > return PTR_ERR(cxlhdm); > > cxl_cache_cdat_mb(port); > + /* Cache the data early to ensure is_visible() works */ > + read_cdat_data(port); > > if (is_cxl_endpoint(port)) { > struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); > @@ -80,10 +82,58 @@ static int cxl_port_probe(struct device *dev) > return 0; > } > > +static ssize_t cdat_read(struct file *filp, struct kobject *kobj, > + struct bin_attribute *bin_attr, char *buf, > + loff_t offset, size_t count) > +{ > + struct device *dev = kobj_to_dev(kobj); > + struct cxl_port *port = to_cxl_port(dev); > + > + if (!port->cdat.table) > + return 0; With visibility setup below, do you need this? > + > + return memory_read_from_buffer(buf, count, &offset, > + port->cdat.table, > + port->cdat.length); > +} > + > +static BIN_ATTR_RO(cdat, 0); > + > +static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj, > + struct bin_attribute *attr, int i) > +{ > + struct device *dev = kobj_to_dev(kobj); > + struct cxl_port *port = to_cxl_port(dev); > + > + if ((attr == &bin_attr_cdat) && port->cdat.table) > + return 0400; > + > + return 0; > +} > + > +static struct bin_attribute *cxl_cdat_bin_attributes[] = { > + &bin_attr_cdat, > + NULL, > +}; > + > +static struct attribute_group cxl_cdat_attribute_group = { > + .name = "CDAT", > + .bin_attrs = cxl_cdat_bin_attributes, > + .is_bin_visible = cxl_port_bin_attr_is_visible, > +}; > + > +static const struct attribute_group *cxl_port_attribute_groups[] = { > + &cxl_cdat_attribute_group, > + NULL, > +}; > + > static struct cxl_driver cxl_port_driver = { > .name = "cxl_port", > .probe = cxl_port_probe, > .id = CXL_DEVICE_PORT, > + .drv = { > + .dev_groups = cxl_port_attribute_groups, > + }, > }; > > module_cxl_driver(cxl_port_driver); > -- > 2.35.1 >
On Mon, Jun 06, 2022 at 11:15:41AM -0700, Ben Widawsky wrote: > On 22-06-04 17:50:46, ira.weiny@intel.com wrote: > > From: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > [snip] > > + > > +static int cxl_cdat_get_length(struct cxl_port *port, size_t *length) > > +{ > > + u32 cdat_request_pl = CDAT_DOE_REQ(0); > > + u32 cdat_response_pl[32]; > > + DECLARE_COMPLETION_ONSTACK(c); > > + struct pci_doe_task task = { > > + .prot.vid = PCI_DVSEC_VENDOR_ID_CXL, > > + .prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, > > + .request_pl = &cdat_request_pl, > > + .request_pl_sz = sizeof(cdat_request_pl), > > + .response_pl = cdat_response_pl, > > + .response_pl_sz = sizeof(cdat_response_pl), > > + .complete = cxl_doe_task_complete, > > + .private = &c, > > + }; > > This is looking like something that could be nicely populated with a macro. Probably. But I'll leave that for another day. > > > + int rc = 0; > > + > > + if (!port->cdat_mb) { > > + dev_err(&port->dev, "No CDAT mailbox\n"); > > + return -EIO; > > + } > > AIUI, !port->cdat_mb isn't actually an error. It was when I was trying to get this to work... ;-) I change to dev_dbg(). > Does it make sense to simply > return 0 here? No because this is just a helper to the read_cdat below. 0 could be used to indicate 'no data' but easier to return an obvious error. > > > + > > + rc = pci_doe_submit_task(port->cdat_mb, &task); > > + if (rc < 0) { > > + dev_err(&port->dev, "DOE submit failed: %d", rc); > > + return rc; > > + } > > + wait_for_completion(&c); > > + > > + if (task.rv < 1) > > + return -EIO; > > + > > + *length = cdat_response_pl[1]; > > + dev_dbg(&port->dev, "CDAT length %zu\n", *length); > > + > > + return rc; > > +} > > + > > +static int cxl_cdat_read_table(struct cxl_port *port, > > + struct cxl_cdat *cdat) > > +{ > > + size_t length = cdat->length; > > + u32 *data = cdat->table; > > + int entry_handle = 0; > > + int rc = 0; > > + > > + if (!port->cdat_mb) { > > + dev_err(&port->dev, "No CDAT mailbox\n"); > > + return -EIO; > > + } > > Similar to above, maybe just return 0? Same response. But I'll change the messages to dev_dbg(). > > > + > > + do { > > + u32 cdat_request_pl = CDAT_DOE_REQ(entry_handle); > > + u32 cdat_response_pl[32]; > > + DECLARE_COMPLETION_ONSTACK(c); > > + struct pci_doe_task task = { > > + .prot.vid = PCI_DVSEC_VENDOR_ID_CXL, > > + .prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, > > + .request_pl = &cdat_request_pl, > > + .request_pl_sz = sizeof(cdat_request_pl), > > + .response_pl = cdat_response_pl, > > + .response_pl_sz = sizeof(cdat_response_pl), > > + .complete = cxl_doe_task_complete, > > + .private = &c, > > + }; > > + size_t entry_dw; > > + u32 *entry; > > + > > + rc = pci_doe_submit_task(port->cdat_mb, &task); > > + if (rc < 0) { > > + dev_err(&port->dev, "DOE submit failed: %d", rc); > > + return rc; > > + } > > + wait_for_completion(&c); > > I'd use the timeout variant, but if you don't want to, see below. I can't quite > tell if pci_doe_submit_task() is guaranteed to end with FLAG_DEAD at some > point... Yes it will if it goes south. The issue with a timeout here is what should this layer expect for that time? > > > + > > + entry = cdat_response_pl + 1; > > + entry_dw = task.rv / sizeof(u32); > > + /* Skip Header */ > > + entry_dw -= 1; > > + entry_dw = min(length / 4, entry_dw); > > + memcpy(data, entry, entry_dw * sizeof(u32)); > > + length -= entry_dw * sizeof(u32); > > + data += entry_dw; > > + entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, cdat_response_pl[0]); > > [0] looks suspicious... Actually I have to claim ignorance on this one. I've carried this from Jonathan's original patches. I'm not as worried about the [0] as that is just the first dword. But I'm confused as to this entry handle now. Jonathan? Help? > > > + > > + } while (entry_handle != 0xFFFF); > > + > > + return rc; > > +} > > + > > +void read_cdat_data(struct cxl_port *port) > > I think you need kdoc here, specifically because you've opted not to do a > timed wait, which means its possible to wait forever. Sure but we are not going to wait forever due to the DOE spec. But I'll document that, sure. > > > +{ > > + struct device *dev = &port->dev; > > + size_t cdat_length; > > + int ret; > > + > > + if (cxl_cdat_get_length(port, &cdat_length)) > > + return; > > + > > + port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL); > > + if (!port->cdat.table) { > > + ret = -ENOMEM; > > + goto error; > > + } > > + > > + port->cdat.length = cdat_length; > > + ret = cxl_cdat_read_table(port, &port->cdat); > > + if (ret) { > > + devm_kfree(dev, port->cdat.table); > > Usually, when I see devm_kfree, it's a sign that it might not be a good > candidate for devm. You could consider plain kzalloc, and then putting the kfree > in the port destructor. I don't see anything incorrect though, so it's up to > you. I like it this way because we are really only doing this as an error condition. And it is less error prone to use devm. Technically devm_kfree() does not even need to be here except that then we could potentially have a lot of cdat tables floating around until the port goes away. I can put in a comment to indicate why this was an anti-pattern. [snip] > > diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h > > index ddbb8b77752e..71009a167a92 100644 > > --- a/drivers/cxl/cxlpci.h > > +++ b/drivers/cxl/cxlpci.h > > @@ -75,4 +75,5 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port); > > struct cxl_dev_state; > > int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); > > void cxl_cache_cdat_mb(struct cxl_port *port); > > +void read_cdat_data(struct cxl_port *port); > > #endif /* __CXL_PCI_H__ */ > > diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c > > index 04f3d1fc6e07..fdff20cf79e6 100644 > > --- a/drivers/cxl/port.c > > +++ b/drivers/cxl/port.c > > @@ -50,6 +50,8 @@ static int cxl_port_probe(struct device *dev) > > return PTR_ERR(cxlhdm); > > > > cxl_cache_cdat_mb(port); > > + /* Cache the data early to ensure is_visible() works */ > > + read_cdat_data(port); > > > > if (is_cxl_endpoint(port)) { > > struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); > > @@ -80,10 +82,58 @@ static int cxl_port_probe(struct device *dev) > > return 0; > > } > > > > +static ssize_t cdat_read(struct file *filp, struct kobject *kobj, > > + struct bin_attribute *bin_attr, char *buf, > > + loff_t offset, size_t count) > > +{ > > + struct device *dev = kobj_to_dev(kobj); > > + struct cxl_port *port = to_cxl_port(dev); > > + > > + if (!port->cdat.table) > > + return 0; > > With visibility setup below, do you need this? Not currently. I was envisioning a later dynamic state for cdat.table where on error this could have been set to NULL. Ira [snip]
On Wed, 8 Jun 2022 14:27:14 -0700 Ira Weiny <ira.weiny@intel.com> wrote: > On Mon, Jun 06, 2022 at 11:15:41AM -0700, Ben Widawsky wrote: > > On 22-06-04 17:50:46, ira.weiny@intel.com wrote: > > > From: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > > > [snip] > > > > > + > > > + entry = cdat_response_pl + 1; > > > + entry_dw = task.rv / sizeof(u32); > > > + /* Skip Header */ > > > + entry_dw -= 1; > > > + entry_dw = min(length / 4, entry_dw); > > > + memcpy(data, entry, entry_dw * sizeof(u32)); > > > + length -= entry_dw * sizeof(u32); > > > + data += entry_dw; > > > + entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, cdat_response_pl[0]); > > > > [0] looks suspicious... > > Actually I have to claim ignorance on this one. I've carried this from > Jonathan's original patches. I'm not as worried about the [0] as that is just > the first dword. But I'm confused as to this entry handle now. > > Jonathan? Help? Looks right to me. The entryhandle is a field in the upper 16 bits of the first dword defined in Read Entry Response table in the CXL spec and also used in the request of the next entry (which is more or less a CDAT structure) Two magic values. 0 - CDAT header (request only - can't be returned) 0xFFFF - No more entries. As we are reading the whole table, we write 0 to first request and from there on use the value returned in the response for the next request until we see 0xFFFF and stop. Note IIRC the meaning of entry handle was clarified in a CXL 2.0 errata as was a bit ambiguous in the original spec (we had two QEMU implementations briefly and they did different things :) Jonathan > > > > > > + > > > + } while (entry_handle != 0xFFFF); > > > + > > > + return rc; > > > +} > > > +
On Thu, Jun 09, 2022 at 09:27:38AM +0100, Jonathan Cameron wrote: > On Wed, 8 Jun 2022 14:27:14 -0700 > Ira Weiny <ira.weiny@intel.com> wrote: > > > On Mon, Jun 06, 2022 at 11:15:41AM -0700, Ben Widawsky wrote: > > > On 22-06-04 17:50:46, ira.weiny@intel.com wrote: > > > > From: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > > > > > > > [snip] > > > > > > > > + > > > > + entry = cdat_response_pl + 1; > > > > + entry_dw = task.rv / sizeof(u32); > > > > + /* Skip Header */ > > > > + entry_dw -= 1; > > > > + entry_dw = min(length / 4, entry_dw); > > > > + memcpy(data, entry, entry_dw * sizeof(u32)); > > > > + length -= entry_dw * sizeof(u32); > > > > + data += entry_dw; > > > > + entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, cdat_response_pl[0]); > > > > > > [0] looks suspicious... > > > > Actually I have to claim ignorance on this one. I've carried this from > > Jonathan's original patches. I'm not as worried about the [0] as that is just > > the first dword. But I'm confused as to this entry handle now. > > > > Jonathan? Help? > > Looks right to me. The entryhandle is a field in the upper 16 bits of the > first dword defined in Read Entry Response table in the CXL spec and also > used in the request of the next entry (which is more or less a CDAT structure) > Two magic values. > 0 - CDAT header (request only - can't be returned) > 0xFFFF - No more entries. Thanks! > > As we are reading the whole table, we write 0 to first request and from there > on use the value returned in the response for the next request until we see > 0xFFFF and stop. > > Note IIRC the meaning of entry handle was clarified in a CXL 2.0 errata as > was a bit ambiguous in the original spec (we had two QEMU implementations > briefly and they did different things :) I see it now! I'm going to add a define for 0xFFFF as well. Ira > > Jonathan > > > > > > > > > > > + > > > > + } while (entry_handle != 0xFFFF); > > > > + > > > > + return rc; > > > > +} > > > > + > >
diff --git a/drivers/cxl/cdat.h b/drivers/cxl/cdat.h new file mode 100644 index 000000000000..f5193a6a51fe --- /dev/null +++ b/drivers/cxl/cdat.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __CXL_CDAT_H__ +#define __CXL_CDAT_H__ + +/* + * Coherent Device Attribute table (CDAT) + * + * Specification available from UEFI.org + * + * Whilst CDAT is defined as a single table, the access via DOE maiboxes is + * done one entry at a time, where the first entry is the header. + */ + +#define CXL_DOE_TABLE_ACCESS_REQ_CODE 0x000000ff +#define CXL_DOE_TABLE_ACCESS_REQ_CODE_READ 0 +#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE 0x0000ff00 +#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA 0 +#define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE 0xffff0000 + +/* + * CDAT entries are little endian and are read from PCI config space which + * is also little endian. + * As such, on a big endian system these will have been reversed. + * This prevents us from making easy use of packed structures. + * Style form pci_regs.h + */ + +#define CDAT_HEADER_LENGTH_DW 4 +#define CDAT_HEADER_LENGTH_BYTES (CDAT_HEADER_LENGTH_DW * sizeof(u32)) +#define CDAT_HEADER_DW0_LENGTH 0xffffffff +#define CDAT_HEADER_DW1_REVISION 0x000000ff +#define CDAT_HEADER_DW1_CHECKSUM 0x0000ff00 +/* CDAT_HEADER_DW2_RESERVED */ +#define CDAT_HEADER_DW3_SEQUENCE 0xffffffff + +/* All structures have a common first DW */ +#define CDAT_STRUCTURE_DW0_TYPE 0x000000ff +#define CDAT_STRUCTURE_DW0_TYPE_DSMAS 0 +#define CDAT_STRUCTURE_DW0_TYPE_DSLBIS 1 +#define CDAT_STRUCTURE_DW0_TYPE_DSMSCIS 2 +#define CDAT_STRUCTURE_DW0_TYPE_DSIS 3 +#define CDAT_STRUCTURE_DW0_TYPE_DSEMTS 4 +#define CDAT_STRUCTURE_DW0_TYPE_SSLBIS 5 + +#define CDAT_STRUCTURE_DW0_LENGTH 0xffff0000 + +/* Device Scoped Memory Affinity Structure */ +#define CDAT_DSMAS_DW1_DSMAD_HANDLE 0x000000ff +#define CDAT_DSMAS_DW1_FLAGS 0x0000ff00 +#define CDAT_DSMAS_DPA_OFFSET(entry) ((u64)((entry)[3]) << 32 | (entry)[2]) +#define CDAT_DSMAS_DPA_LEN(entry) ((u64)((entry)[5]) << 32 | (entry)[4]) +#define CDAT_DSMAS_NON_VOLATILE(flags) ((flags & 0x04) >> 2) + +/* Device Scoped Latency and Bandwidth Information Structure */ +#define CDAT_DSLBIS_DW1_HANDLE 0x000000ff +#define CDAT_DSLBIS_DW1_FLAGS 0x0000ff00 +#define CDAT_DSLBIS_DW1_DATA_TYPE 0x00ff0000 +#define CDAT_DSLBIS_BASE_UNIT(entry) ((u64)((entry)[3]) << 32 | (entry)[2]) +#define CDAT_DSLBIS_DW4_ENTRY_0 0x0000ffff +#define CDAT_DSLBIS_DW4_ENTRY_1 0xffff0000 +#define CDAT_DSLBIS_DW5_ENTRY_2 0x0000ffff + +/* Device Scoped Memory Side Cache Information Structure */ +#define CDAT_DSMSCIS_DW1_HANDLE 0x000000ff +#define CDAT_DSMSCIS_MEMORY_SIDE_CACHE_SIZE(entry) \ + ((u64)((entry)[3]) << 32 | (entry)[2]) +#define CDAT_DSMSCIS_DW4_MEMORY_SIDE_CACHE_ATTRS 0xffffffff + +/* Device Scoped Initiator Structure */ +#define CDAT_DSIS_DW1_FLAGS 0x000000ff +#define CDAT_DSIS_DW1_HANDLE 0x0000ff00 + +/* Device Scoped EFI Memory Type Structure */ +#define CDAT_DSEMTS_DW1_HANDLE 0x000000ff +#define CDAT_DSEMTS_DW1_EFI_MEMORY_TYPE_ATTR 0x0000ff00 +#define CDAT_DSEMTS_DPA_OFFSET(entry) ((u64)((entry)[3]) << 32 | (entry)[2]) +#define CDAT_DSEMTS_DPA_LENGTH(entry) ((u64)((entry)[5]) << 32 | (entry)[4]) + +/* Switch Scoped Latency and Bandwidth Information Structure */ +#define CDAT_SSLBIS_DW1_DATA_TYPE 0x000000ff +#define CDAT_SSLBIS_BASE_UNIT(entry) ((u64)((entry)[3]) << 32 | (entry)[2]) +#define CDAT_SSLBIS_ENTRY_PORT_X(entry, i) ((entry)[4 + (i) * 2] & 0x0000ffff) +#define CDAT_SSLBIS_ENTRY_PORT_Y(entry, i) (((entry)[4 + (i) * 2] & 0xffff0000) >> 16) +#define CDAT_SSLBIS_ENTRY_LAT_OR_BW(entry, i) ((entry)[4 + (i) * 2 + 1] & 0x0000ffff) + +#define CXL_DOE_PROTOCOL_TABLE_ACCESS 2 + +/** + * struct cxl_cdat - CXL CDAT data + * + * @table: cache of CDAT table + * @length: length of cached CDAT table + */ +struct cxl_cdat { + void *table; + size_t length; +}; + +#endif /* !__CXL_CDAT_H__ */ diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index d814d8317975..76fa8382b3c7 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -9,8 +9,7 @@ #include <cxlmem.h> #include <cxl.h> #include "core.h" - -#define CXL_DOE_PROTOCOL_TABLE_ACCESS 2 +#include "cdat.h" /** * DOC: cxl core pci @@ -493,3 +492,136 @@ void cxl_cache_cdat_mb(struct cxl_port *port) } } EXPORT_SYMBOL_NS_GPL(cxl_cache_cdat_mb, CXL); + +#define CDAT_DOE_REQ(entry_handle) \ + (FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \ + CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \ + FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \ + CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) | \ + FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle))) + +static void cxl_doe_task_complete(struct pci_doe_task *task) +{ + complete(task->private); +} + +static int cxl_cdat_get_length(struct cxl_port *port, size_t *length) +{ + u32 cdat_request_pl = CDAT_DOE_REQ(0); + u32 cdat_response_pl[32]; + DECLARE_COMPLETION_ONSTACK(c); + struct pci_doe_task task = { + .prot.vid = PCI_DVSEC_VENDOR_ID_CXL, + .prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, + .request_pl = &cdat_request_pl, + .request_pl_sz = sizeof(cdat_request_pl), + .response_pl = cdat_response_pl, + .response_pl_sz = sizeof(cdat_response_pl), + .complete = cxl_doe_task_complete, + .private = &c, + }; + int rc = 0; + + if (!port->cdat_mb) { + dev_err(&port->dev, "No CDAT mailbox\n"); + return -EIO; + } + + rc = pci_doe_submit_task(port->cdat_mb, &task); + if (rc < 0) { + dev_err(&port->dev, "DOE submit failed: %d", rc); + return rc; + } + wait_for_completion(&c); + + if (task.rv < 1) + return -EIO; + + *length = cdat_response_pl[1]; + dev_dbg(&port->dev, "CDAT length %zu\n", *length); + + return rc; +} + +static int cxl_cdat_read_table(struct cxl_port *port, + struct cxl_cdat *cdat) +{ + size_t length = cdat->length; + u32 *data = cdat->table; + int entry_handle = 0; + int rc = 0; + + if (!port->cdat_mb) { + dev_err(&port->dev, "No CDAT mailbox\n"); + return -EIO; + } + + do { + u32 cdat_request_pl = CDAT_DOE_REQ(entry_handle); + u32 cdat_response_pl[32]; + DECLARE_COMPLETION_ONSTACK(c); + struct pci_doe_task task = { + .prot.vid = PCI_DVSEC_VENDOR_ID_CXL, + .prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, + .request_pl = &cdat_request_pl, + .request_pl_sz = sizeof(cdat_request_pl), + .response_pl = cdat_response_pl, + .response_pl_sz = sizeof(cdat_response_pl), + .complete = cxl_doe_task_complete, + .private = &c, + }; + size_t entry_dw; + u32 *entry; + + rc = pci_doe_submit_task(port->cdat_mb, &task); + if (rc < 0) { + dev_err(&port->dev, "DOE submit failed: %d", rc); + return rc; + } + wait_for_completion(&c); + + entry = cdat_response_pl + 1; + entry_dw = task.rv / sizeof(u32); + /* Skip Header */ + entry_dw -= 1; + entry_dw = min(length / 4, entry_dw); + memcpy(data, entry, entry_dw * sizeof(u32)); + length -= entry_dw * sizeof(u32); + data += entry_dw; + entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, cdat_response_pl[0]); + + } while (entry_handle != 0xFFFF); + + return rc; +} + +void read_cdat_data(struct cxl_port *port) +{ + struct device *dev = &port->dev; + size_t cdat_length; + int ret; + + if (cxl_cdat_get_length(port, &cdat_length)) + return; + + port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL); + if (!port->cdat.table) { + ret = -ENOMEM; + goto error; + } + + port->cdat.length = cdat_length; + ret = cxl_cdat_read_table(port, &port->cdat); + if (ret) { + devm_kfree(dev, port->cdat.table); + port->cdat.table = NULL; + port->cdat.length = 0; + ret = -EIO; + goto error; + } + + return; +error: + dev_err(dev, "CDAT data read error (%d)\n", ret); +} +EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 0a86be589ffc..531b77d296c7 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -8,6 +8,7 @@ #include <linux/bitfield.h> #include <linux/bitops.h> #include <linux/io.h> +#include "cdat.h" /** * DOC: cxl objects @@ -268,6 +269,7 @@ struct cxl_nvdimm { * @dead: last ep has been removed, force port re-creation * @depth: How deep this port is relative to the root. depth 0 is the root. * @cdat_mb: Mailbox which supports the CDAT protocol + * @cdat: Cached CDAT data */ struct cxl_port { struct device dev; @@ -280,6 +282,7 @@ struct cxl_port { bool dead; unsigned int depth; struct pci_doe_mb *cdat_mb; + struct cxl_cdat cdat; }; /** diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index ddbb8b77752e..71009a167a92 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -75,4 +75,5 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); void cxl_cache_cdat_mb(struct cxl_port *port); +void read_cdat_data(struct cxl_port *port); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 04f3d1fc6e07..fdff20cf79e6 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -50,6 +50,8 @@ static int cxl_port_probe(struct device *dev) return PTR_ERR(cxlhdm); cxl_cache_cdat_mb(port); + /* Cache the data early to ensure is_visible() works */ + read_cdat_data(port); if (is_cxl_endpoint(port)) { struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); @@ -80,10 +82,58 @@ static int cxl_port_probe(struct device *dev) return 0; } +static ssize_t cdat_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t offset, size_t count) +{ + struct device *dev = kobj_to_dev(kobj); + struct cxl_port *port = to_cxl_port(dev); + + if (!port->cdat.table) + return 0; + + return memory_read_from_buffer(buf, count, &offset, + port->cdat.table, + port->cdat.length); +} + +static BIN_ATTR_RO(cdat, 0); + +static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj, + struct bin_attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + struct cxl_port *port = to_cxl_port(dev); + + if ((attr == &bin_attr_cdat) && port->cdat.table) + return 0400; + + return 0; +} + +static struct bin_attribute *cxl_cdat_bin_attributes[] = { + &bin_attr_cdat, + NULL, +}; + +static struct attribute_group cxl_cdat_attribute_group = { + .name = "CDAT", + .bin_attrs = cxl_cdat_bin_attributes, + .is_bin_visible = cxl_port_bin_attr_is_visible, +}; + +static const struct attribute_group *cxl_port_attribute_groups[] = { + &cxl_cdat_attribute_group, + NULL, +}; + static struct cxl_driver cxl_port_driver = { .name = "cxl_port", .probe = cxl_port_probe, .id = CXL_DEVICE_PORT, + .drv = { + .dev_groups = cxl_port_attribute_groups, + }, }; module_cxl_driver(cxl_port_driver);