Message ID | 20221201002719.2596558-9-ira.weiny@intel.com |
---|---|
State | Superseded |
Headers | show |
Series | CXL: Process event logs | expand |
On Wed, 30 Nov 2022 16:27:16 -0800 ira.weiny@intel.com wrote: > From: Ira Weiny <ira.weiny@intel.com> > > CXL device events are signaled via interrupts. Each event log may have > a different interrupt message number. These message numbers are > reported in the Get Event Interrupt Policy mailbox command. > > Add interrupt support for event logs. Interrupts are allocated as > shared interrupts. Therefore, all or some event logs can share the same > message number. > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> A few trivial comments, but only superficially code style stuff which you can ignore if you feel strongly about current style or it matches existing file style etc... Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > --- > Changes from V1: > Remove unneeded evt_int_policy from struct cxl_dev_state > defer Dynamic Capacity support > Dave Jiang > s/irq/rc > use IRQ_NONE to signal the irq was not for us. > Jonathan > use msi_enabled rather than nr_irq_vec > On failure explicitly set CXL_INT_NONE > Add comment for Get Event Interrupt Policy > use devm_request_threaded_irq() > Use individual handler/thread functions for each of the > logs rather than struct cxl_event_irq_id. > > Changes from RFC v2 > Adjust to new irq 16 vector allocation > Jonathan > Remove CXL_INT_RES > Use irq threads to ensure mailbox commands are executed outside irq context > Adjust for optional Dynamic Capacity log > --- > drivers/cxl/core/mbox.c | 44 +++++++++++- > drivers/cxl/cxlmem.h | 30 ++++++++ > drivers/cxl/pci.c | 130 +++++++++++++++++++++++++++++++++++ > include/uapi/linux/cxl_mem.h | 2 + > 4 files changed, 204 insertions(+), 2 deletions(-) > > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h > index 450b410f29f6..2d384b0fc2b3 100644 > --- a/drivers/cxl/cxlmem.h > +++ b/drivers/cxl/cxlmem.h > @@ -179,6 +179,30 @@ struct cxl_endpoint_dvsec_info { > struct range dvsec_range[2]; > }; > > +/** > + * Event Interrupt Policy > + * > + * CXL rev 3.0 section 8.2.9.2.4; Table 8-52 > + */ > +enum cxl_event_int_mode { > + CXL_INT_NONE = 0x00, > + CXL_INT_MSI_MSIX = 0x01, > + CXL_INT_FW = 0x02 > +}; > +#define CXL_EVENT_INT_MODE_MASK 0x3 > +#define CXL_EVENT_INT_MSGNUM(setting) (((setting) & 0xf0) >> 4) > +struct cxl_event_interrupt_policy { > + u8 info_settings; > + u8 warn_settings; > + u8 failure_settings; > + u8 fatal_settings; > +} __packed; > + > +static inline bool cxl_evt_int_is_msi(u8 setting) > +{ > + return CXL_INT_MSI_MSIX == (setting & CXL_EVENT_INT_MODE_MASK); Maybe a case for FIELD_GET() though given the defines are all local it is already obvious what this is doing so fine if you prefer to keep it as is. > +} ... > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > index 11e95a95195a..3c0b9199f11a 100644 > --- a/drivers/cxl/pci.c > +++ b/drivers/cxl/pci.c > @@ -449,6 +449,134 @@ static void cxl_pci_alloc_irq_vectors(struct cxl_dev_state *cxlds) > cxlds->msi_enabled = true; > } > > +static irqreturn_t cxl_event_info_thread(int irq, void *id) > +{ > + struct cxl_dev_state *cxlds = id; > + > + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO); > + return IRQ_HANDLED; > +} I'm not a great fan of macros, but maybe this is a case for them. > + > +static irqreturn_t cxl_event_info_handler(int irq, void *id) > +{ > + struct cxl_dev_state *cxlds = id; > + u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); Superficial and this is guaranteed to work (8.2.8 allow all sizes of read up to 64 bytes), but maybe should treat this as a 64 bit register as that aligns better with spec? > + > + if (CXLDEV_EVENT_STATUS_INFO & status) Another maybe FIELD_GET() case? > + return IRQ_WAKE_THREAD; > + return IRQ_NONE; > +} > + > +static irqreturn_t cxl_event_warn_thread(int irq, void *id) > +{ > + struct cxl_dev_state *cxlds = id; Why id? I'd call it what it is (maybe _cxlsd) and not bother with the local variable in this case as it is only used once and doesn't need the type. static irqreturn_t cxl_event_warn_thread(int irq, void *cxlds) { cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN); return IRQ_HANDLED; } > + > + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN); > + return IRQ_HANDLED; > +} > + ...
On Thu, Dec 01, 2022 at 02:21:18PM +0000, Jonathan Cameron wrote: > On Wed, 30 Nov 2022 16:27:16 -0800 > ira.weiny@intel.com wrote: > > > From: Ira Weiny <ira.weiny@intel.com> > > > > CXL device events are signaled via interrupts. Each event log may have > > a different interrupt message number. These message numbers are > > reported in the Get Event Interrupt Policy mailbox command. > > > > Add interrupt support for event logs. Interrupts are allocated as > > shared interrupts. Therefore, all or some event logs can share the same > > message number. > > > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > > A few trivial comments, but only superficially code style stuff which you > can ignore if you feel strongly about current style or it matches existing > file style etc... Thanks I'm leaving this one I think. > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > > > > --- > > Changes from V1: > > Remove unneeded evt_int_policy from struct cxl_dev_state > > defer Dynamic Capacity support > > Dave Jiang > > s/irq/rc > > use IRQ_NONE to signal the irq was not for us. > > Jonathan > > use msi_enabled rather than nr_irq_vec > > On failure explicitly set CXL_INT_NONE > > Add comment for Get Event Interrupt Policy > > use devm_request_threaded_irq() > > Use individual handler/thread functions for each of the > > logs rather than struct cxl_event_irq_id. > > > > Changes from RFC v2 > > Adjust to new irq 16 vector allocation > > Jonathan > > Remove CXL_INT_RES > > Use irq threads to ensure mailbox commands are executed outside irq context > > Adjust for optional Dynamic Capacity log > > --- > > drivers/cxl/core/mbox.c | 44 +++++++++++- > > drivers/cxl/cxlmem.h | 30 ++++++++ > > drivers/cxl/pci.c | 130 +++++++++++++++++++++++++++++++++++ > > include/uapi/linux/cxl_mem.h | 2 + > > 4 files changed, 204 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h > > index 450b410f29f6..2d384b0fc2b3 100644 > > --- a/drivers/cxl/cxlmem.h > > +++ b/drivers/cxl/cxlmem.h > > @@ -179,6 +179,30 @@ struct cxl_endpoint_dvsec_info { > > struct range dvsec_range[2]; > > }; > > > > +/** > > + * Event Interrupt Policy > > + * > > + * CXL rev 3.0 section 8.2.9.2.4; Table 8-52 > > + */ > > +enum cxl_event_int_mode { > > + CXL_INT_NONE = 0x00, > > + CXL_INT_MSI_MSIX = 0x01, > > + CXL_INT_FW = 0x02 > > +}; > > +#define CXL_EVENT_INT_MODE_MASK 0x3 > > +#define CXL_EVENT_INT_MSGNUM(setting) (((setting) & 0xf0) >> 4) > > +struct cxl_event_interrupt_policy { > > + u8 info_settings; > > + u8 warn_settings; > > + u8 failure_settings; > > + u8 fatal_settings; > > +} __packed; > > + > > +static inline bool cxl_evt_int_is_msi(u8 setting) > > +{ > > + return CXL_INT_MSI_MSIX == (setting & CXL_EVENT_INT_MODE_MASK); > > Maybe a case for FIELD_GET() though given the defines are all local > it is already obvious what this is doing so fine if you prefer to > keep it as is. > > > +} > ... > > > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > > index 11e95a95195a..3c0b9199f11a 100644 > > --- a/drivers/cxl/pci.c > > +++ b/drivers/cxl/pci.c > > @@ -449,6 +449,134 @@ static void cxl_pci_alloc_irq_vectors(struct cxl_dev_state *cxlds) > > cxlds->msi_enabled = true; > > } > > > > +static irqreturn_t cxl_event_info_thread(int irq, void *id) > > +{ > > + struct cxl_dev_state *cxlds = id; > > + > > + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO); > > + return IRQ_HANDLED; > > +} > > I'm not a great fan of macros, but maybe this is a case for them. Nor am I. I particularly don't like when functions are defined with macros as it makes git grep and cscope harder to use on the code. > > > + > > +static irqreturn_t cxl_event_info_handler(int irq, void *id) > > +{ > > + struct cxl_dev_state *cxlds = id; > > + u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); > > Superficial and this is guaranteed to work (8.2.8 allow all sizes of read up > to 64 bytes), but maybe should treat this as a 64 bit register as that aligns > better with spec? I suppose it does. When I looked at that I only noticed the 32bit field and thought the register was 32bits. But most of our reads are to u32 fields. I think it is fine for now. > > > + > > + if (CXLDEV_EVENT_STATUS_INFO & status) > > Another maybe FIELD_GET() case? Are we using field get for individual bits? I see we are for some things. :-( I was thinking the 'field' is 'Event Status' and the 32 bit read of the register already got that. > > > + return IRQ_WAKE_THREAD; > > + return IRQ_NONE; > > +} > > + > > +static irqreturn_t cxl_event_warn_thread(int irq, void *id) > > +{ > > + struct cxl_dev_state *cxlds = id; > > Why id? Shortened from 'dev_id' from the devm_request_threaded_irq() function signature. > I'd call it what it is (maybe _cxlsd) and not bother with > the local variable in this case as it is only used once and doesn't > need the type. > > static irqreturn_t cxl_event_warn_thread(int irq, void *cxlds) > { > cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN); Yea in this case it is pretty clear but I hate having to look at function signatures to figure out what a (void *) callback type is supposed to be. This ... > > +static irqreturn_t cxl_event_warn_thread(int irq, void *id) > > +{ > > + struct cxl_dev_state *cxlds = id; ... helps to self document cxl_event_warn_thread() IMO, and the compiler will optimize. Thanks for looking! Ira > > return IRQ_HANDLED; > } > > > > + > > + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN); > > + return IRQ_HANDLED; > > +} > > + > > ... > > >
On Wed, 30 Nov 2022, ira.weiny@intel.com wrote: >From: Ira Weiny <ira.weiny@intel.com> > >CXL device events are signaled via interrupts. Each event log may have >a different interrupt message number. These message numbers are >reported in the Get Event Interrupt Policy mailbox command. > >Add interrupt support for event logs. Interrupts are allocated as >shared interrupts. Therefore, all or some event logs can share the same >message number. > >Signed-off-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
ira.weiny@ wrote: > From: Ira Weiny <ira.weiny@intel.com> > > CXL device events are signaled via interrupts. Each event log may have > a different interrupt message number. These message numbers are > reported in the Get Event Interrupt Policy mailbox command. > > Add interrupt support for event logs. Interrupts are allocated as > shared interrupts. Therefore, all or some event logs can share the same > message number. Definitely squash patch1 with this one, especially because this shows that the ->msi_enabled portion of patch1 was unnecessary, see below. > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > > --- > Changes from V1: > Remove unneeded evt_int_policy from struct cxl_dev_state > defer Dynamic Capacity support > Dave Jiang > s/irq/rc > use IRQ_NONE to signal the irq was not for us. > Jonathan > use msi_enabled rather than nr_irq_vec > On failure explicitly set CXL_INT_NONE > Add comment for Get Event Interrupt Policy > use devm_request_threaded_irq() > Use individual handler/thread functions for each of the > logs rather than struct cxl_event_irq_id. > > Changes from RFC v2 > Adjust to new irq 16 vector allocation > Jonathan > Remove CXL_INT_RES > Use irq threads to ensure mailbox commands are executed outside irq context > Adjust for optional Dynamic Capacity log > --- > drivers/cxl/core/mbox.c | 44 +++++++++++- > drivers/cxl/cxlmem.h | 30 ++++++++ > drivers/cxl/pci.c | 130 +++++++++++++++++++++++++++++++++++ > include/uapi/linux/cxl_mem.h | 2 + > 4 files changed, 204 insertions(+), 2 deletions(-) > > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c > index 30840b711381..1e00b49d8b06 100644 > --- a/drivers/cxl/core/mbox.c > +++ b/drivers/cxl/core/mbox.c > @@ -53,6 +53,8 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = { > CXL_CMD(GET_SUPPORTED_LOGS, 0, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE), > CXL_CMD(GET_EVENT_RECORD, 1, CXL_VARIABLE_PAYLOAD, 0), > CXL_CMD(CLEAR_EVENT_RECORD, CXL_VARIABLE_PAYLOAD, 0, 0), > + CXL_CMD(GET_EVT_INT_POLICY, 0, 0x5, 0), > + CXL_CMD(SET_EVT_INT_POLICY, 0x5, 0, 0), > CXL_CMD(GET_FW_INFO, 0, 0x50, 0), > CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0), > CXL_CMD(GET_LSA, 0x8, CXL_VARIABLE_PAYLOAD, 0), > @@ -806,8 +808,8 @@ static int cxl_clear_event_record(struct cxl_dev_state *cxlds, > return 0; > } > > -static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, > - enum cxl_event_log_type type) > +void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, > + enum cxl_event_log_type type) > { > struct cxl_get_event_payload *payload; > u16 nr_rec; > @@ -857,6 +859,7 @@ static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, > unlock_buffer: > mutex_unlock(&cxlds->event_buf_lock); > } > +EXPORT_SYMBOL_NS_GPL(cxl_mem_get_records_log, CXL); > > static void cxl_mem_free_event_buffer(void *data) > { > @@ -916,6 +919,43 @@ void cxl_mem_get_event_records(struct cxl_dev_state *cxlds) > } > EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL); > > +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds, > + struct cxl_event_interrupt_policy *policy) > +{ > + int rc; > + > + policy->info_settings = CXL_INT_MSI_MSIX; > + policy->warn_settings = CXL_INT_MSI_MSIX; > + policy->failure_settings = CXL_INT_MSI_MSIX; > + policy->fatal_settings = CXL_INT_MSI_MSIX; I think this needs to be careful not to undo events that the BIOS steered to itself in firmware-first mode, which raises another question, does firmware-first mean more the OS needs to backoff on some event-log handling as well? > + > + rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_EVT_INT_POLICY, > + policy, sizeof(*policy), NULL, 0); > + if (rc < 0) { > + dev_err(cxlds->dev, "Failed to set event interrupt policy : %d", > + rc); > + > + policy->info_settings = CXL_INT_NONE; > + policy->warn_settings = CXL_INT_NONE; > + policy->failure_settings = CXL_INT_NONE; > + policy->fatal_settings = CXL_INT_NONE; > + > + return rc; > + } > + > + /* Retrieve interrupt message numbers */ > + rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_EVT_INT_POLICY, NULL, 0, > + policy, sizeof(*policy)); > + if (rc < 0) { > + dev_err(cxlds->dev, "Failed to get event interrupt policy : %d", > + rc); > + return rc; > + } > + > + return 0; > +} > +EXPORT_SYMBOL_NS_GPL(cxl_event_config_msgnums, CXL); > + > /** > * cxl_mem_get_partition_info - Get partition info > * @cxlds: The device data for the operation > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h > index 450b410f29f6..2d384b0fc2b3 100644 > --- a/drivers/cxl/cxlmem.h > +++ b/drivers/cxl/cxlmem.h > @@ -179,6 +179,30 @@ struct cxl_endpoint_dvsec_info { > struct range dvsec_range[2]; > }; > > +/** > + * Event Interrupt Policy > + * > + * CXL rev 3.0 section 8.2.9.2.4; Table 8-52 > + */ > +enum cxl_event_int_mode { > + CXL_INT_NONE = 0x00, > + CXL_INT_MSI_MSIX = 0x01, > + CXL_INT_FW = 0x02 > +}; > +#define CXL_EVENT_INT_MODE_MASK 0x3 > +#define CXL_EVENT_INT_MSGNUM(setting) (((setting) & 0xf0) >> 4) > +struct cxl_event_interrupt_policy { > + u8 info_settings; > + u8 warn_settings; > + u8 failure_settings; > + u8 fatal_settings; > +} __packed; > + > +static inline bool cxl_evt_int_is_msi(u8 setting) > +{ > + return CXL_INT_MSI_MSIX == (setting & CXL_EVENT_INT_MODE_MASK); > +} > + > /** > * struct cxl_dev_state - The driver device state > * > @@ -262,6 +286,8 @@ enum cxl_opcode { > CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID, > CXL_MBOX_OP_GET_EVENT_RECORD = 0x0100, > CXL_MBOX_OP_CLEAR_EVENT_RECORD = 0x0101, > + CXL_MBOX_OP_GET_EVT_INT_POLICY = 0x0102, > + CXL_MBOX_OP_SET_EVT_INT_POLICY = 0x0103, > CXL_MBOX_OP_GET_FW_INFO = 0x0200, > CXL_MBOX_OP_ACTIVATE_FW = 0x0202, > CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400, > @@ -537,7 +563,11 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); > struct cxl_dev_state *cxl_dev_state_create(struct device *dev); > void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); > void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); > +void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, > + enum cxl_event_log_type type); > void cxl_mem_get_event_records(struct cxl_dev_state *cxlds); > +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds, > + struct cxl_event_interrupt_policy *policy); > #ifdef CONFIG_CXL_SUSPEND > void cxl_mem_active_inc(void); > void cxl_mem_active_dec(void); > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > index 11e95a95195a..3c0b9199f11a 100644 > --- a/drivers/cxl/pci.c > +++ b/drivers/cxl/pci.c > @@ -449,6 +449,134 @@ static void cxl_pci_alloc_irq_vectors(struct cxl_dev_state *cxlds) > cxlds->msi_enabled = true; > } > > +static irqreturn_t cxl_event_info_thread(int irq, void *id) > +{ > + struct cxl_dev_state *cxlds = id; > + > + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO); > + return IRQ_HANDLED; > +} > + > +static irqreturn_t cxl_event_info_handler(int irq, void *id) > +{ > + struct cxl_dev_state *cxlds = id; > + u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); > + > + if (CXLDEV_EVENT_STATUS_INFO & status) > + return IRQ_WAKE_THREAD; > + return IRQ_NONE; > +} > + > +static irqreturn_t cxl_event_warn_thread(int irq, void *id) > +{ > + struct cxl_dev_state *cxlds = id; > + > + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN); > + return IRQ_HANDLED; > +} > + > +static irqreturn_t cxl_event_warn_handler(int irq, void *id) > +{ > + struct cxl_dev_state *cxlds = id; > + u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); > + > + if (CXLDEV_EVENT_STATUS_WARN & status) > + return IRQ_WAKE_THREAD; > + return IRQ_NONE; > +} > + > +static irqreturn_t cxl_event_failure_thread(int irq, void *id) > +{ > + struct cxl_dev_state *cxlds = id; > + > + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL); > + return IRQ_HANDLED; > +} So I think one of the nice side effects of moving log priorty handling inside of cxl_mem_get_records_log() and looping through all log types in priority order until all status is clear is that an INFO interrupt also triggers a check of the FATAL status for free. You likely do not even need to do the status read in hardirq part of the handler, just unconditionally wake the event handler thread. I.e. just pass NULL for @handler to devm_request_threaded_irq() and let the thread_fn figure it all out in priority order. > + > +static irqreturn_t cxl_event_failure_handler(int irq, void *id) > +{ > + struct cxl_dev_state *cxlds = id; > + u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); > + > + if (CXLDEV_EVENT_STATUS_FAIL & status) > + return IRQ_WAKE_THREAD; > + return IRQ_NONE; > +} > + > +static irqreturn_t cxl_event_fatal_thread(int irq, void *id) > +{ > + struct cxl_dev_state *cxlds = id; > + > + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL); > + return IRQ_HANDLED; > +} > + > +static irqreturn_t cxl_event_fatal_handler(int irq, void *id) > +{ > + struct cxl_dev_state *cxlds = id; > + u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); > + > + if (CXLDEV_EVENT_STATUS_FATAL & status) > + return IRQ_WAKE_THREAD; > + return IRQ_NONE; > +} > + > +static void cxl_event_irqsetup(struct cxl_dev_state *cxlds) > +{ > + struct cxl_event_interrupt_policy policy; > + struct device *dev = cxlds->dev; > + struct pci_dev *pdev = to_pci_dev(dev); > + u8 setting; > + int rc; > + > + if (cxl_event_config_msgnums(cxlds, &policy)) > + return; > + > + setting = policy.info_settings; > + if (cxl_evt_int_is_msi(setting)) { So pci_irq_vector() automatically handles checking if msi is enabled and will return a failure if either MSI is not enabled, or the message number did not get a vector. With that insight I would do something like this (untested): @@ -521,7 +521,14 @@ static irqreturn_t cxl_event_fatal_handler(int irq, void *id) return IRQ_NONE; } -static void cxl_event_irqsetup(struct cxl_dev_state *cxlds) +static int cxl_evt_irq(struct pci_dev *pdev, u8 setting) +{ + if ((setting & CXL_EVENT_INT_MODE_MASK) != CXL_INT_MSI_MSIX) + return -ENXIO; + return pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)); +} + +static int cxl_event_irqsetup(struct cxl_dev_state *cxlds) { struct cxl_event_interrupt_policy policy; struct device *dev = cxlds->dev; @@ -529,18 +536,17 @@ static void cxl_event_irqsetup(struct cxl_dev_state *cxlds) u8 setting; int rc; - if (cxl_event_config_msgnums(cxlds, &policy)) - return; + rc = cxl_event_config_msgnums(cxlds, &policy); + if (rc) + return rc; - setting = policy.info_settings; - if (cxl_evt_int_is_msi(setting)) { - rc = devm_request_threaded_irq(dev, - pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)), - cxl_event_info_handler, cxl_event_info_thread, - IRQF_SHARED, NULL, cxlds); - if (rc) - dev_err(dev, "Failed to get interrupt for %s event log\n", - cxl_event_log_type_str(CXL_EVENT_TYPE_INFO)); + rc = devm_request_threaded_irq(dev, + cxl_evt_irq(pdev, policy.info_settings), + NULL, cxl_event_info_thread, IRQF_SHARED, + NULL, cxlds); + if (rc) { + dev_err(dev, "Failed to get interrupt for INFO event log\n"); + return rc; } setting = policy.warn_settings; > + rc = devm_request_threaded_irq(dev, > + pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)), > + cxl_event_info_handler, cxl_event_info_thread, > + IRQF_SHARED, NULL, cxlds); > + if (rc) > + dev_err(dev, "Failed to get interrupt for %s event log\n", > + cxl_event_log_type_str(CXL_EVENT_TYPE_INFO)); Per above, no need to use cxl_event_log_type_str() in these. > + } > + > + setting = policy.warn_settings; > + if (cxl_evt_int_is_msi(setting)) { > + rc = devm_request_threaded_irq(dev, > + pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)), > + cxl_event_warn_handler, cxl_event_warn_thread, > + IRQF_SHARED, NULL, cxlds); > + if (rc) > + dev_err(dev, "Failed to get interrupt for %s event log\n", > + cxl_event_log_type_str(CXL_EVENT_TYPE_WARN)); > + } > + > + setting = policy.failure_settings; > + if (cxl_evt_int_is_msi(setting)) { > + rc = devm_request_threaded_irq(dev, > + pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)), > + cxl_event_failure_handler, cxl_event_failure_thread, > + IRQF_SHARED, NULL, cxlds); > + if (rc) > + dev_err(dev, "Failed to get interrupt for %s event log\n", > + cxl_event_log_type_str(CXL_EVENT_TYPE_FAIL)); > + } > + > + setting = policy.fatal_settings; > + if (cxl_evt_int_is_msi(setting)) { > + rc = devm_request_threaded_irq(dev, > + pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)), > + cxl_event_fatal_handler, cxl_event_fatal_thread, > + IRQF_SHARED, NULL, cxlds); > + if (rc) > + dev_err(dev, "Failed to get interrupt for %s event log\n", > + cxl_event_log_type_str(CXL_EVENT_TYPE_FATAL)); > + } > +} > + > static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > { > struct cxl_register_map map; > @@ -516,6 +644,8 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > return rc; > > cxl_pci_alloc_irq_vectors(cxlds); There should be fail return here, or a comment why this can be skipped, especially if the device claims to support events. > + if (cxlds->msi_enabled) > + cxl_event_irqsetup(cxlds); Per above, do this unconditionally. > > cxlmd = devm_cxl_add_memdev(cxlds); > if (IS_ERR(cxlmd)) > diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h > index 7c1ad8062792..a8204802fcca 100644 > --- a/include/uapi/linux/cxl_mem.h > +++ b/include/uapi/linux/cxl_mem.h > @@ -26,6 +26,8 @@ > ___C(GET_SUPPORTED_LOGS, "Get Supported Logs"), \ > ___C(GET_EVENT_RECORD, "Get Event Record"), \ > ___C(CLEAR_EVENT_RECORD, "Clear Event Record"), \ > + ___C(GET_EVT_INT_POLICY, "Get Event Interrupt Policy"), \ > + ___C(SET_EVT_INT_POLICY, "Set Event Interrupt Policy"), \ > ___C(GET_FW_INFO, "Get FW Info"), \ > ___C(GET_PARTITION_INFO, "Get Partition Information"), \ > ___C(GET_LSA, "Get Label Storage Area"), \ Same, "at the end" comment.
> > +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds, > > + struct cxl_event_interrupt_policy *policy) > > +{ > > + int rc; > > + > > + policy->info_settings = CXL_INT_MSI_MSIX; > > + policy->warn_settings = CXL_INT_MSI_MSIX; > > + policy->failure_settings = CXL_INT_MSI_MSIX; > > + policy->fatal_settings = CXL_INT_MSI_MSIX; > > I think this needs to be careful not to undo events that the BIOS > steered to itself in firmware-first mode, which raises another question, > does firmware-first mean more the OS needs to backoff on some event-log > handling as well? Hmm. Does the _OSC cover these. There is one for Memory error reporting that I think covers it (refers to 12.2.3.2) Note that should cover any means of obtaining these, not just interrupt driven - so including the initial record clear. .. > > + > > +static irqreturn_t cxl_event_failure_thread(int irq, void *id) > > +{ > > + struct cxl_dev_state *cxlds = id; > > + > > + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL); > > + return IRQ_HANDLED; > > +} > > So I think one of the nice side effects of moving log priorty handling > inside of cxl_mem_get_records_log() and looping through all log types in > priority order until all status is clear is that an INFO interrupt also > triggers a check of the FATAL status for free. > I go the opposite way on this in thinking that an interrupt should only ever be used to handle the things it was registered for - so we should not be clearing fatal records in the handler triggered for info events. Doing other actions like this relies on subtlies of the generic interrupt handling code which happens to force interrupt threads on a shared interrupt line to be serialized. I'm not sure we are safe at all the interrupt isn't shared unless we put a lock around the whole thing (we have one because of the buffer mutex though). If going this way I think the lock needs a rename. It's not just protecting the buffer used, but also serialize multiple interrupt threads. Jonathan > You likely do not even need to do the status read in hardirq part of the > handler, just unconditionally wake the event handler thread. I.e. just > pass NULL for @handler to devm_request_threaded_irq() and let the > thread_fn figure it all out in priority order.
Jonathan Cameron wrote: > > > > +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds, > > > + struct cxl_event_interrupt_policy *policy) > > > +{ > > > + int rc; > > > + > > > + policy->info_settings = CXL_INT_MSI_MSIX; > > > + policy->warn_settings = CXL_INT_MSI_MSIX; > > > + policy->failure_settings = CXL_INT_MSI_MSIX; > > > + policy->fatal_settings = CXL_INT_MSI_MSIX; > > > > I think this needs to be careful not to undo events that the BIOS > > steered to itself in firmware-first mode, which raises another question, > > does firmware-first mean more the OS needs to backoff on some event-log > > handling as well? > > Hmm. Does the _OSC cover these. There is one for Memory error reporting > that I think covers it (refers to 12.2.3.2) > > Note that should cover any means of obtaining these, not just interrupt > driven - so including the initial record clear. > > .. > > > > + > > > +static irqreturn_t cxl_event_failure_thread(int irq, void *id) > > > +{ > > > + struct cxl_dev_state *cxlds = id; > > > + > > > + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL); > > > + return IRQ_HANDLED; > > > +} > > > > So I think one of the nice side effects of moving log priorty handling > > inside of cxl_mem_get_records_log() and looping through all log types in > > priority order until all status is clear is that an INFO interrupt also > > triggers a check of the FATAL status for free. > > > > I go the opposite way on this in thinking that an interrupt should only > ever be used to handle the things it was registered for - so we should > not be clearing fatal records in the handler triggered for info events. I would agree with you if this was a fast path and if the hardware mechanism did not involve shared status register that tells you that both FATAL and INFO are pending retrieval through a mechanism. Compare that to the separation between admin and IO queues in NVME. If the handler is going to loop on the status register then it must be careful not to starve out FATAL while processing INFO. > Doing other actions like this relies on subtlies of the generic interrupt > handling code which happens to force interrupt threads on a shared interrupt > line to be serialized. I'm not sure we are safe at all the interrupt > isn't shared unless we put a lock around the whole thing (we have one > because of the buffer mutex though). The interrupt is likely shared since there is no performance benefit to entice hardware vendors spend transistor budget on more vector space for events. The events architecture does not merit that spend. > If going this way I think the lock needs a rename. > It's not just protecting the buffer used, but also serialize multiple > interrupt threads. I will let Ira decide if he wants to rename, but in my mind the shared event buffer *is* the data being locked, the fact that multiple threads might be contending for it is immaterial.
On Fri, 2 Dec 2022 11:43:29 -0800 Dan Williams <dan.j.williams@intel.com> wrote: > Jonathan Cameron wrote: > > > > > > +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds, > > > > + struct cxl_event_interrupt_policy *policy) > > > > +{ > > > > + int rc; > > > > + > > > > + policy->info_settings = CXL_INT_MSI_MSIX; > > > > + policy->warn_settings = CXL_INT_MSI_MSIX; > > > > + policy->failure_settings = CXL_INT_MSI_MSIX; > > > > + policy->fatal_settings = CXL_INT_MSI_MSIX; > > > > > > I think this needs to be careful not to undo events that the BIOS > > > steered to itself in firmware-first mode, which raises another question, > > > does firmware-first mean more the OS needs to backoff on some event-log > > > handling as well? > > > > Hmm. Does the _OSC cover these. There is one for Memory error reporting > > that I think covers it (refers to 12.2.3.2) > > > > Note that should cover any means of obtaining these, not just interrupt > > driven - so including the initial record clear. > > > > .. > > > > > > + > > > > +static irqreturn_t cxl_event_failure_thread(int irq, void *id) > > > > +{ > > > > + struct cxl_dev_state *cxlds = id; > > > > + > > > > + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL); > > > > + return IRQ_HANDLED; > > > > +} > > > > > > So I think one of the nice side effects of moving log priorty handling > > > inside of cxl_mem_get_records_log() and looping through all log types in > > > priority order until all status is clear is that an INFO interrupt also > > > triggers a check of the FATAL status for free. > > > > > > > I go the opposite way on this in thinking that an interrupt should only > > ever be used to handle the things it was registered for - so we should > > not be clearing fatal records in the handler triggered for info events. > > I would agree with you if this was a fast path and if the hardware > mechanism did not involve shared status register that tells you > that both FATAL and INFO are pending retrieval through a mechanism. > Compare that to the separation between admin and IO queues in NVME. > > If the handler is going to loop on the status register then it must be > careful not to starve out FATAL while processing INFO. > > > Doing other actions like this relies on subtlies of the generic interrupt > > handling code which happens to force interrupt threads on a shared interrupt > > line to be serialized. I'm not sure we are safe at all the interrupt > > isn't shared unless we put a lock around the whole thing (we have one > > because of the buffer mutex though). > > The interrupt is likely shared since there is no performance benefit to > entice hardware vendors spend transistor budget on more vector space for > events. The events architecture does not merit that spend. > > > If going this way I think the lock needs a rename. > > It's not just protecting the buffer used, but also serialize multiple > > interrupt threads. > > I will let Ira decide if he wants to rename, but in my mind the shared > event buffer *is* the data being locked, the fact that multiple threads > might be contending for it is immaterial. It isn't he only thing being protected. Access to the device is also being serialized including the data in it's registers. If someone comes along later and decides to implement multiple buffers and there for gets rid of the lock. boom. Jonathan
Jonathan Cameron wrote: > On Fri, 2 Dec 2022 11:43:29 -0800 > Dan Williams <dan.j.williams@intel.com> wrote: > > > Jonathan Cameron wrote: > > > > > > > > +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds, > > > > > + struct cxl_event_interrupt_policy *policy) > > > > > +{ > > > > > + int rc; > > > > > + > > > > > + policy->info_settings = CXL_INT_MSI_MSIX; > > > > > + policy->warn_settings = CXL_INT_MSI_MSIX; > > > > > + policy->failure_settings = CXL_INT_MSI_MSIX; > > > > > + policy->fatal_settings = CXL_INT_MSI_MSIX; > > > > > > > > I think this needs to be careful not to undo events that the BIOS > > > > steered to itself in firmware-first mode, which raises another question, > > > > does firmware-first mean more the OS needs to backoff on some event-log > > > > handling as well? > > > > > > Hmm. Does the _OSC cover these. There is one for Memory error reporting > > > that I think covers it (refers to 12.2.3.2) > > > > > > Note that should cover any means of obtaining these, not just interrupt > > > driven - so including the initial record clear. > > > > > > .. > > > > > > > > + > > > > > +static irqreturn_t cxl_event_failure_thread(int irq, void *id) > > > > > +{ > > > > > + struct cxl_dev_state *cxlds = id; > > > > > + > > > > > + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL); > > > > > + return IRQ_HANDLED; > > > > > +} > > > > > > > > So I think one of the nice side effects of moving log priorty handling > > > > inside of cxl_mem_get_records_log() and looping through all log types in > > > > priority order until all status is clear is that an INFO interrupt also > > > > triggers a check of the FATAL status for free. > > > > > > > > > > I go the opposite way on this in thinking that an interrupt should only > > > ever be used to handle the things it was registered for - so we should > > > not be clearing fatal records in the handler triggered for info events. > > > > I would agree with you if this was a fast path and if the hardware > > mechanism did not involve shared status register that tells you > > that both FATAL and INFO are pending retrieval through a mechanism. > > Compare that to the separation between admin and IO queues in NVME. > > > > If the handler is going to loop on the status register then it must be > > careful not to starve out FATAL while processing INFO. > > > > > Doing other actions like this relies on subtlies of the generic interrupt > > > handling code which happens to force interrupt threads on a shared interrupt > > > line to be serialized. I'm not sure we are safe at all the interrupt > > > isn't shared unless we put a lock around the whole thing (we have one > > > because of the buffer mutex though). > > > > The interrupt is likely shared since there is no performance benefit to > > entice hardware vendors spend transistor budget on more vector space for > > events. The events architecture does not merit that spend. > > > > > If going this way I think the lock needs a rename. > > > It's not just protecting the buffer used, but also serialize multiple > > > interrupt threads. > > > > I will let Ira decide if he wants to rename, but in my mind the shared > > event buffer *is* the data being locked, the fact that multiple threads > > might be contending for it is immaterial. > > It isn't he only thing being protected. Access to the device is also > being serialized including the data in it's registers. > > If someone comes along later and decides to implement multiple buffers > and there for gets rid of the lock. boom. That's what the mailbox mutex is protecting against. If there is an aspect of the hardware state that is not protected by that then that's a bug.
On Mon, 5 Dec 2022 08:35:34 -0800 Dan Williams <dan.j.williams@intel.com> wrote: > Jonathan Cameron wrote: > > On Fri, 2 Dec 2022 11:43:29 -0800 > > Dan Williams <dan.j.williams@intel.com> wrote: > > > > > Jonathan Cameron wrote: > > > > > > > > > > +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds, > > > > > > + struct cxl_event_interrupt_policy *policy) > > > > > > +{ > > > > > > + int rc; > > > > > > + > > > > > > + policy->info_settings = CXL_INT_MSI_MSIX; > > > > > > + policy->warn_settings = CXL_INT_MSI_MSIX; > > > > > > + policy->failure_settings = CXL_INT_MSI_MSIX; > > > > > > + policy->fatal_settings = CXL_INT_MSI_MSIX; > > > > > > > > > > I think this needs to be careful not to undo events that the BIOS > > > > > steered to itself in firmware-first mode, which raises another question, > > > > > does firmware-first mean more the OS needs to backoff on some event-log > > > > > handling as well? > > > > > > > > Hmm. Does the _OSC cover these. There is one for Memory error reporting > > > > that I think covers it (refers to 12.2.3.2) > > > > > > > > Note that should cover any means of obtaining these, not just interrupt > > > > driven - so including the initial record clear. > > > > > > > > .. > > > > > > > > > > + > > > > > > +static irqreturn_t cxl_event_failure_thread(int irq, void *id) > > > > > > +{ > > > > > > + struct cxl_dev_state *cxlds = id; > > > > > > + > > > > > > + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL); > > > > > > + return IRQ_HANDLED; > > > > > > +} > > > > > > > > > > So I think one of the nice side effects of moving log priorty handling > > > > > inside of cxl_mem_get_records_log() and looping through all log types in > > > > > priority order until all status is clear is that an INFO interrupt also > > > > > triggers a check of the FATAL status for free. > > > > > > > > > > > > > I go the opposite way on this in thinking that an interrupt should only > > > > ever be used to handle the things it was registered for - so we should > > > > not be clearing fatal records in the handler triggered for info events. > > > > > > I would agree with you if this was a fast path and if the hardware > > > mechanism did not involve shared status register that tells you > > > that both FATAL and INFO are pending retrieval through a mechanism. > > > Compare that to the separation between admin and IO queues in NVME. > > > > > > If the handler is going to loop on the status register then it must be > > > careful not to starve out FATAL while processing INFO. > > > > > > > Doing other actions like this relies on subtlies of the generic interrupt > > > > handling code which happens to force interrupt threads on a shared interrupt > > > > line to be serialized. I'm not sure we are safe at all the interrupt > > > > isn't shared unless we put a lock around the whole thing (we have one > > > > because of the buffer mutex though). > > > > > > The interrupt is likely shared since there is no performance benefit to > > > entice hardware vendors spend transistor budget on more vector space for > > > events. The events architecture does not merit that spend. > > > > > > > If going this way I think the lock needs a rename. > > > > It's not just protecting the buffer used, but also serialize multiple > > > > interrupt threads. > > > > > > I will let Ira decide if he wants to rename, but in my mind the shared > > > event buffer *is* the data being locked, the fact that multiple threads > > > might be contending for it is immaterial. > > > > It isn't he only thing being protected. Access to the device is also > > being serialized including the data in it's registers. > > > > If someone comes along later and decides to implement multiple buffers > > and there for gets rid of the lock. boom. > > That's what the mailbox mutex is protecting against. If there is an > aspect of the hardware state that is not protected by that then that's a > bug. > Wrong level of locking. This is about a race on multiple commands 1) Read data from interrupt thread 1. 2) Read same data from interrupt thread 2. 3) Clear data from interrupt thread 1. 4) Clear data from interrupt thread 2. Boom (well minor error we probably eat but not good practice).
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 30840b711381..1e00b49d8b06 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -53,6 +53,8 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = { CXL_CMD(GET_SUPPORTED_LOGS, 0, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE), CXL_CMD(GET_EVENT_RECORD, 1, CXL_VARIABLE_PAYLOAD, 0), CXL_CMD(CLEAR_EVENT_RECORD, CXL_VARIABLE_PAYLOAD, 0, 0), + CXL_CMD(GET_EVT_INT_POLICY, 0, 0x5, 0), + CXL_CMD(SET_EVT_INT_POLICY, 0x5, 0, 0), CXL_CMD(GET_FW_INFO, 0, 0x50, 0), CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0), CXL_CMD(GET_LSA, 0x8, CXL_VARIABLE_PAYLOAD, 0), @@ -806,8 +808,8 @@ static int cxl_clear_event_record(struct cxl_dev_state *cxlds, return 0; } -static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, - enum cxl_event_log_type type) +void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, + enum cxl_event_log_type type) { struct cxl_get_event_payload *payload; u16 nr_rec; @@ -857,6 +859,7 @@ static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, unlock_buffer: mutex_unlock(&cxlds->event_buf_lock); } +EXPORT_SYMBOL_NS_GPL(cxl_mem_get_records_log, CXL); static void cxl_mem_free_event_buffer(void *data) { @@ -916,6 +919,43 @@ void cxl_mem_get_event_records(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL); +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds, + struct cxl_event_interrupt_policy *policy) +{ + int rc; + + policy->info_settings = CXL_INT_MSI_MSIX; + policy->warn_settings = CXL_INT_MSI_MSIX; + policy->failure_settings = CXL_INT_MSI_MSIX; + policy->fatal_settings = CXL_INT_MSI_MSIX; + + rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_EVT_INT_POLICY, + policy, sizeof(*policy), NULL, 0); + if (rc < 0) { + dev_err(cxlds->dev, "Failed to set event interrupt policy : %d", + rc); + + policy->info_settings = CXL_INT_NONE; + policy->warn_settings = CXL_INT_NONE; + policy->failure_settings = CXL_INT_NONE; + policy->fatal_settings = CXL_INT_NONE; + + return rc; + } + + /* Retrieve interrupt message numbers */ + rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_EVT_INT_POLICY, NULL, 0, + policy, sizeof(*policy)); + if (rc < 0) { + dev_err(cxlds->dev, "Failed to get event interrupt policy : %d", + rc); + return rc; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_event_config_msgnums, CXL); + /** * cxl_mem_get_partition_info - Get partition info * @cxlds: The device data for the operation diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 450b410f29f6..2d384b0fc2b3 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -179,6 +179,30 @@ struct cxl_endpoint_dvsec_info { struct range dvsec_range[2]; }; +/** + * Event Interrupt Policy + * + * CXL rev 3.0 section 8.2.9.2.4; Table 8-52 + */ +enum cxl_event_int_mode { + CXL_INT_NONE = 0x00, + CXL_INT_MSI_MSIX = 0x01, + CXL_INT_FW = 0x02 +}; +#define CXL_EVENT_INT_MODE_MASK 0x3 +#define CXL_EVENT_INT_MSGNUM(setting) (((setting) & 0xf0) >> 4) +struct cxl_event_interrupt_policy { + u8 info_settings; + u8 warn_settings; + u8 failure_settings; + u8 fatal_settings; +} __packed; + +static inline bool cxl_evt_int_is_msi(u8 setting) +{ + return CXL_INT_MSI_MSIX == (setting & CXL_EVENT_INT_MODE_MASK); +} + /** * struct cxl_dev_state - The driver device state * @@ -262,6 +286,8 @@ enum cxl_opcode { CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID, CXL_MBOX_OP_GET_EVENT_RECORD = 0x0100, CXL_MBOX_OP_CLEAR_EVENT_RECORD = 0x0101, + CXL_MBOX_OP_GET_EVT_INT_POLICY = 0x0102, + CXL_MBOX_OP_SET_EVT_INT_POLICY = 0x0103, CXL_MBOX_OP_GET_FW_INFO = 0x0200, CXL_MBOX_OP_ACTIVATE_FW = 0x0202, CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400, @@ -537,7 +563,11 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); struct cxl_dev_state *cxl_dev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); +void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, + enum cxl_event_log_type type); void cxl_mem_get_event_records(struct cxl_dev_state *cxlds); +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds, + struct cxl_event_interrupt_policy *policy); #ifdef CONFIG_CXL_SUSPEND void cxl_mem_active_inc(void); void cxl_mem_active_dec(void); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 11e95a95195a..3c0b9199f11a 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -449,6 +449,134 @@ static void cxl_pci_alloc_irq_vectors(struct cxl_dev_state *cxlds) cxlds->msi_enabled = true; } +static irqreturn_t cxl_event_info_thread(int irq, void *id) +{ + struct cxl_dev_state *cxlds = id; + + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO); + return IRQ_HANDLED; +} + +static irqreturn_t cxl_event_info_handler(int irq, void *id) +{ + struct cxl_dev_state *cxlds = id; + u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); + + if (CXLDEV_EVENT_STATUS_INFO & status) + return IRQ_WAKE_THREAD; + return IRQ_NONE; +} + +static irqreturn_t cxl_event_warn_thread(int irq, void *id) +{ + struct cxl_dev_state *cxlds = id; + + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN); + return IRQ_HANDLED; +} + +static irqreturn_t cxl_event_warn_handler(int irq, void *id) +{ + struct cxl_dev_state *cxlds = id; + u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); + + if (CXLDEV_EVENT_STATUS_WARN & status) + return IRQ_WAKE_THREAD; + return IRQ_NONE; +} + +static irqreturn_t cxl_event_failure_thread(int irq, void *id) +{ + struct cxl_dev_state *cxlds = id; + + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL); + return IRQ_HANDLED; +} + +static irqreturn_t cxl_event_failure_handler(int irq, void *id) +{ + struct cxl_dev_state *cxlds = id; + u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); + + if (CXLDEV_EVENT_STATUS_FAIL & status) + return IRQ_WAKE_THREAD; + return IRQ_NONE; +} + +static irqreturn_t cxl_event_fatal_thread(int irq, void *id) +{ + struct cxl_dev_state *cxlds = id; + + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL); + return IRQ_HANDLED; +} + +static irqreturn_t cxl_event_fatal_handler(int irq, void *id) +{ + struct cxl_dev_state *cxlds = id; + u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); + + if (CXLDEV_EVENT_STATUS_FATAL & status) + return IRQ_WAKE_THREAD; + return IRQ_NONE; +} + +static void cxl_event_irqsetup(struct cxl_dev_state *cxlds) +{ + struct cxl_event_interrupt_policy policy; + struct device *dev = cxlds->dev; + struct pci_dev *pdev = to_pci_dev(dev); + u8 setting; + int rc; + + if (cxl_event_config_msgnums(cxlds, &policy)) + return; + + setting = policy.info_settings; + if (cxl_evt_int_is_msi(setting)) { + rc = devm_request_threaded_irq(dev, + pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)), + cxl_event_info_handler, cxl_event_info_thread, + IRQF_SHARED, NULL, cxlds); + if (rc) + dev_err(dev, "Failed to get interrupt for %s event log\n", + cxl_event_log_type_str(CXL_EVENT_TYPE_INFO)); + } + + setting = policy.warn_settings; + if (cxl_evt_int_is_msi(setting)) { + rc = devm_request_threaded_irq(dev, + pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)), + cxl_event_warn_handler, cxl_event_warn_thread, + IRQF_SHARED, NULL, cxlds); + if (rc) + dev_err(dev, "Failed to get interrupt for %s event log\n", + cxl_event_log_type_str(CXL_EVENT_TYPE_WARN)); + } + + setting = policy.failure_settings; + if (cxl_evt_int_is_msi(setting)) { + rc = devm_request_threaded_irq(dev, + pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)), + cxl_event_failure_handler, cxl_event_failure_thread, + IRQF_SHARED, NULL, cxlds); + if (rc) + dev_err(dev, "Failed to get interrupt for %s event log\n", + cxl_event_log_type_str(CXL_EVENT_TYPE_FAIL)); + } + + setting = policy.fatal_settings; + if (cxl_evt_int_is_msi(setting)) { + rc = devm_request_threaded_irq(dev, + pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)), + cxl_event_fatal_handler, cxl_event_fatal_thread, + IRQF_SHARED, NULL, cxlds); + if (rc) + dev_err(dev, "Failed to get interrupt for %s event log\n", + cxl_event_log_type_str(CXL_EVENT_TYPE_FATAL)); + } +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -516,6 +644,8 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; cxl_pci_alloc_irq_vectors(cxlds); + if (cxlds->msi_enabled) + cxl_event_irqsetup(cxlds); cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 7c1ad8062792..a8204802fcca 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -26,6 +26,8 @@ ___C(GET_SUPPORTED_LOGS, "Get Supported Logs"), \ ___C(GET_EVENT_RECORD, "Get Event Record"), \ ___C(CLEAR_EVENT_RECORD, "Clear Event Record"), \ + ___C(GET_EVT_INT_POLICY, "Get Event Interrupt Policy"), \ + ___C(SET_EVT_INT_POLICY, "Set Event Interrupt Policy"), \ ___C(GET_FW_INFO, "Get FW Info"), \ ___C(GET_PARTITION_INFO, "Get Partition Information"), \ ___C(GET_LSA, "Get Label Storage Area"), \