Message ID | 20241007-dcd-type2-upstream-v4-19-c261ee6eeded@intel.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Ira Weiny |
Headers | show |
Series | DCD: Add support for Dynamic Capacity Devices (DCD) | expand |
On Mon, 07 Oct 2024 18:16:25 -0500 ira.weiny@intel.com wrote: > From: Navneet Singh <navneet.singh@intel.com> > > Dynamic Capacity Devices (DCD) support extent change notifications > through the event log mechanism. The interrupt mailbox commands were > extended in CXL 3.1 to support these notifications. Firmware can't > configure DCD events to be FW controlled but can retain control of > memory events. > > Configure DCD event log interrupts on devices supporting dynamic > capacity. Disable DCD if interrupts are not supported. > > Care is taken to preserve the interrupt policy set by the FW if FW first > has been selected by the BIOS. > > Signed-off-by: Navneet Singh <navneet.singh@intel.com> > Co-developed-by: Ira Weiny <ira.weiny@intel.com> > Signed-off-by: Ira Weiny <ira.weiny@intel.com> Ah. I was wondering why policy needed to be initialize to zero outside this call. Maybe moving it in here with a memset() would be cleaner. Either way Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
On Mon, Oct 07, 2024 at 06:16:25PM -0500, ira.weiny@intel.com wrote: > From: Navneet Singh <navneet.singh@intel.com> > > Dynamic Capacity Devices (DCD) support extent change notifications > through the event log mechanism. The interrupt mailbox commands were > extended in CXL 3.1 to support these notifications. Firmware can't > configure DCD events to be FW controlled but can retain control of > memory events. > > Configure DCD event log interrupts on devices supporting dynamic > capacity. Disable DCD if interrupts are not supported. > > Care is taken to preserve the interrupt policy set by the FW if FW first > has been selected by the BIOS. > > Signed-off-by: Navneet Singh <navneet.singh@intel.com> > Co-developed-by: Ira Weiny <ira.weiny@intel.com> > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > > --- > Changes: > [iweiny: rebase on 6.12] > --- > drivers/cxl/cxlmem.h | 2 ++ > drivers/cxl/pci.c | 72 +++++++++++++++++++++++++++++++++++++++++++--------- > 2 files changed, 62 insertions(+), 12 deletions(-) > > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h > index c3b889a586d8..2d2a1884a174 100644 > --- a/drivers/cxl/cxlmem.h > +++ b/drivers/cxl/cxlmem.h > @@ -226,7 +226,9 @@ struct cxl_event_interrupt_policy { > u8 warn_settings; > u8 failure_settings; > u8 fatal_settings; > + u8 dcd_settings; > } __packed; > +#define CXL_EVENT_INT_POLICY_BASE_SIZE 4 /* info, warn, failure, fatal */ > > /** > * struct cxl_event_state - Event log driver state > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > index c6042db0653d..2ba059d313c2 100644 > --- a/drivers/cxl/pci.c > +++ b/drivers/cxl/pci.c > @@ -672,23 +672,34 @@ static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, > } > > static int cxl_event_config_msgnums(struct cxl_memdev_state *mds, > - struct cxl_event_interrupt_policy *policy) > + struct cxl_event_interrupt_policy *policy, > + bool native_cxl) > { > struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; > + size_t size_in = CXL_EVENT_INT_POLICY_BASE_SIZE; > struct cxl_mbox_cmd mbox_cmd; > int rc; > > - *policy = (struct cxl_event_interrupt_policy) { > - .info_settings = CXL_INT_MSI_MSIX, > - .warn_settings = CXL_INT_MSI_MSIX, > - .failure_settings = CXL_INT_MSI_MSIX, > - .fatal_settings = CXL_INT_MSI_MSIX, > - }; > + /* memory event policy is left if FW has control */ > + if (native_cxl) { > + *policy = (struct cxl_event_interrupt_policy) { > + .info_settings = CXL_INT_MSI_MSIX, > + .warn_settings = CXL_INT_MSI_MSIX, > + .failure_settings = CXL_INT_MSI_MSIX, > + .fatal_settings = CXL_INT_MSI_MSIX, > + .dcd_settings = 0, > + }; > + } > + > + if (cxl_dcd_supported(mds)) { > + policy->dcd_settings = CXL_INT_MSI_MSIX; > + size_in += sizeof(policy->dcd_settings); > + } > > mbox_cmd = (struct cxl_mbox_cmd) { > .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY, > .payload_in = policy, > - .size_in = sizeof(*policy), > + .size_in = size_in, > }; > > rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); > @@ -735,6 +746,31 @@ static int cxl_event_irqsetup(struct cxl_memdev_state *mds, > return 0; > } > > +static int cxl_irqsetup(struct cxl_memdev_state *mds, > + struct cxl_event_interrupt_policy *policy, > + bool native_cxl) > +{ > + struct cxl_dev_state *cxlds = &mds->cxlds; > + int rc; > + > + if (native_cxl) { > + rc = cxl_event_irqsetup(mds, policy); > + if (rc) > + return rc; > + } > + > + if (cxl_dcd_supported(mds)) { > + rc = cxl_event_req_irq(cxlds, policy->dcd_settings); > + if (rc) { > + dev_err(cxlds->dev, "Failed to get interrupt for DCD event log\n"); > + cxl_disable_dcd(mds); > + return rc; If the device has both static and dynamic capacity, return an error code here will cause cxl_event_config() return early, and cxl_mem_get_event_records() will not be called, will it be an issue? Fan > + } > + } > + > + return 0; > +} > + > static bool cxl_event_int_is_fw(u8 setting) > { > u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting); > @@ -761,17 +797,25 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, > struct cxl_memdev_state *mds, bool irq_avail) > { > struct cxl_event_interrupt_policy policy = { 0 }; > + bool native_cxl = host_bridge->native_cxl_error; > int rc; > > /* > * When BIOS maintains CXL error reporting control, it will process > * event records. Only one agent can do so. > + * > + * If BIOS has control of events and DCD is not supported skip event > + * configuration. > */ > - if (!host_bridge->native_cxl_error) > + if (!native_cxl && !cxl_dcd_supported(mds)) > return 0; > > if (!irq_avail) { > dev_info(mds->cxlds.dev, "No interrupt support, disable event processing.\n"); > + if (cxl_dcd_supported(mds)) { > + dev_info(mds->cxlds.dev, "DCD requires interrupts, disable DCD\n"); > + cxl_disable_dcd(mds); > + } > return 0; > } > > @@ -779,10 +823,10 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, > if (rc) > return rc; > > - if (!cxl_event_validate_mem_policy(mds, &policy)) > + if (native_cxl && !cxl_event_validate_mem_policy(mds, &policy)) > return -EBUSY; > > - rc = cxl_event_config_msgnums(mds, &policy); > + rc = cxl_event_config_msgnums(mds, &policy, native_cxl); > if (rc) > return rc; > > @@ -790,12 +834,16 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, > if (rc) > return rc; > > - rc = cxl_event_irqsetup(mds, &policy); > + rc = cxl_irqsetup(mds, &policy, native_cxl); > if (rc) > return rc; > > cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL); > > + dev_dbg(mds->cxlds.dev, "Event config : %s DCD %s\n", > + native_cxl ? "OS" : "BIOS", > + cxl_dcd_supported(mds) ? "supported" : "not supported"); > + > return 0; > } > > > -- > 2.46.0 >
Fan Ni wrote: > On Mon, Oct 07, 2024 at 06:16:25PM -0500, ira.weiny@intel.com wrote: > > From: Navneet Singh <navneet.singh@intel.com> > > [snip] > > > > +static int cxl_irqsetup(struct cxl_memdev_state *mds, > > + struct cxl_event_interrupt_policy *policy, > > + bool native_cxl) > > +{ > > + struct cxl_dev_state *cxlds = &mds->cxlds; > > + int rc; > > + > > + if (native_cxl) { > > + rc = cxl_event_irqsetup(mds, policy); > > + if (rc) > > + return rc; > > + } > > + > > + if (cxl_dcd_supported(mds)) { > > + rc = cxl_event_req_irq(cxlds, policy->dcd_settings); > > + if (rc) { > > + dev_err(cxlds->dev, "Failed to get interrupt for DCD event log\n"); > > + cxl_disable_dcd(mds); > > + return rc; > > If the device has both static and dynamic capacity, return an error code > here will cause cxl_event_config() return early, and > cxl_mem_get_event_records() will not be called, will it be an issue? Good catch as it was not my intent to fail the device probe. Rather to disable DCD. In practice however, the device would be broken because it should not be advertising DCD support without allowing for the proper irq setup. Current support will fail the probe if event configuration fails. So the error here is probably bothering to try to disable future DCD processing. At this point I'm not sure which way to go... Ira
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index c3b889a586d8..2d2a1884a174 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -226,7 +226,9 @@ struct cxl_event_interrupt_policy { u8 warn_settings; u8 failure_settings; u8 fatal_settings; + u8 dcd_settings; } __packed; +#define CXL_EVENT_INT_POLICY_BASE_SIZE 4 /* info, warn, failure, fatal */ /** * struct cxl_event_state - Event log driver state diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index c6042db0653d..2ba059d313c2 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -672,23 +672,34 @@ static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, } static int cxl_event_config_msgnums(struct cxl_memdev_state *mds, - struct cxl_event_interrupt_policy *policy) + struct cxl_event_interrupt_policy *policy, + bool native_cxl) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; + size_t size_in = CXL_EVENT_INT_POLICY_BASE_SIZE; struct cxl_mbox_cmd mbox_cmd; int rc; - *policy = (struct cxl_event_interrupt_policy) { - .info_settings = CXL_INT_MSI_MSIX, - .warn_settings = CXL_INT_MSI_MSIX, - .failure_settings = CXL_INT_MSI_MSIX, - .fatal_settings = CXL_INT_MSI_MSIX, - }; + /* memory event policy is left if FW has control */ + if (native_cxl) { + *policy = (struct cxl_event_interrupt_policy) { + .info_settings = CXL_INT_MSI_MSIX, + .warn_settings = CXL_INT_MSI_MSIX, + .failure_settings = CXL_INT_MSI_MSIX, + .fatal_settings = CXL_INT_MSI_MSIX, + .dcd_settings = 0, + }; + } + + if (cxl_dcd_supported(mds)) { + policy->dcd_settings = CXL_INT_MSI_MSIX; + size_in += sizeof(policy->dcd_settings); + } mbox_cmd = (struct cxl_mbox_cmd) { .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY, .payload_in = policy, - .size_in = sizeof(*policy), + .size_in = size_in, }; rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); @@ -735,6 +746,31 @@ static int cxl_event_irqsetup(struct cxl_memdev_state *mds, return 0; } +static int cxl_irqsetup(struct cxl_memdev_state *mds, + struct cxl_event_interrupt_policy *policy, + bool native_cxl) +{ + struct cxl_dev_state *cxlds = &mds->cxlds; + int rc; + + if (native_cxl) { + rc = cxl_event_irqsetup(mds, policy); + if (rc) + return rc; + } + + if (cxl_dcd_supported(mds)) { + rc = cxl_event_req_irq(cxlds, policy->dcd_settings); + if (rc) { + dev_err(cxlds->dev, "Failed to get interrupt for DCD event log\n"); + cxl_disable_dcd(mds); + return rc; + } + } + + return 0; +} + static bool cxl_event_int_is_fw(u8 setting) { u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting); @@ -761,17 +797,25 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, struct cxl_memdev_state *mds, bool irq_avail) { struct cxl_event_interrupt_policy policy = { 0 }; + bool native_cxl = host_bridge->native_cxl_error; int rc; /* * When BIOS maintains CXL error reporting control, it will process * event records. Only one agent can do so. + * + * If BIOS has control of events and DCD is not supported skip event + * configuration. */ - if (!host_bridge->native_cxl_error) + if (!native_cxl && !cxl_dcd_supported(mds)) return 0; if (!irq_avail) { dev_info(mds->cxlds.dev, "No interrupt support, disable event processing.\n"); + if (cxl_dcd_supported(mds)) { + dev_info(mds->cxlds.dev, "DCD requires interrupts, disable DCD\n"); + cxl_disable_dcd(mds); + } return 0; } @@ -779,10 +823,10 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, if (rc) return rc; - if (!cxl_event_validate_mem_policy(mds, &policy)) + if (native_cxl && !cxl_event_validate_mem_policy(mds, &policy)) return -EBUSY; - rc = cxl_event_config_msgnums(mds, &policy); + rc = cxl_event_config_msgnums(mds, &policy, native_cxl); if (rc) return rc; @@ -790,12 +834,16 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, if (rc) return rc; - rc = cxl_event_irqsetup(mds, &policy); + rc = cxl_irqsetup(mds, &policy, native_cxl); if (rc) return rc; cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL); + dev_dbg(mds->cxlds.dev, "Event config : %s DCD %s\n", + native_cxl ? "OS" : "BIOS", + cxl_dcd_supported(mds) ? "supported" : "not supported"); + return 0; }