Message ID | 1439226574-7766-1-git-send-email-mrochs@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
> @@ -487,11 +515,27 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp) > get_unaligned_be32(&((u32 *)scp->cmnd)[2]), > get_unaligned_be32(&((u32 *)scp->cmnd)[3])); > > - rcr = cxlflash_afu_reset(cfg); > - if (rcr == 0) > - rc = SUCCESS; > - else > + switch (cfg->state) { > + case STATE_NORMAL: > + cfg->state = STATE_LIMBO; > + scsi_block_requests(cfg->host); > + > + rcr = cxlflash_afu_reset(cfg); > + if (!rcr) > + rc = FAILED; I think you want: if (rcr) { rc = FAILED; break; } cxlflash_afu_reset returns 0 on success, so I think you want to drop the negation, and also I think if it fails you want to break out and not set STATE_NORMAL. Is that right? Once you fix that, or explain to me why I'm wrong: Reviewed-by: Daniel Axtens <dja@axtens.net>
On Mon, 2015-08-10 at 12:09 -0500, Matthew R. Ochs wrote: > Introduce support for enhanced I/O error handling. This needs more description of what you're doing. What's the overall approach? There seems to be a new limbo queue thats created stall things while the device is in error state. That should be described here. > Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com> > Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com> > --- > drivers/scsi/cxlflash/Kconfig | 2 +- > drivers/scsi/cxlflash/common.h | 11 ++- > drivers/scsi/cxlflash/main.c | 166 ++++++++++++++++++++++++++++++++++++++--- > drivers/scsi/cxlflash/main.h | 4 + > 4 files changed, 170 insertions(+), 13 deletions(-) > > diff --git a/drivers/scsi/cxlflash/Kconfig b/drivers/scsi/cxlflash/Kconfig > index c707508..c052104 100644 > --- a/drivers/scsi/cxlflash/Kconfig > +++ b/drivers/scsi/cxlflash/Kconfig > @@ -4,7 +4,7 @@ > > config CXLFLASH > tristate "Support for IBM CAPI Flash" > - depends on PCI && SCSI && CXL > + depends on PCI && SCSI && CXL && EEH > default m > help > Allows CAPI Accelerated IO to Flash > diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h > index fe86bfe..7e663f4 100644 > --- a/drivers/scsi/cxlflash/common.h > +++ b/drivers/scsi/cxlflash/common.h > @@ -76,6 +76,12 @@ enum cxlflash_init_state { > INIT_STATE_SCSI > }; > > +enum cxlflash_state { > + STATE_NORMAL, /* Normal running state, everything good */ > + STATE_LIMBO, /* Limbo running state, trying to reset/recover */ > + STATE_FAILTERM /* Failed/terminating state, error out users/threads */ > +}; > + > /* > * Each context has its own set of resource handles that is visible > * only from that context. > @@ -91,8 +97,6 @@ struct cxlflash_cfg { > > ulong cxlflash_regs_pci; > > - wait_queue_head_t eeh_waitq; > - > struct work_struct work_q; > enum cxlflash_init_state init_state; > enum cxlflash_lr_state lr_state; > @@ -105,7 +109,8 @@ struct cxlflash_cfg { > > wait_queue_head_t tmf_waitq; > bool tmf_active; > - u8 err_recovery_active:1; > + wait_queue_head_t limbo_waitq; > + enum cxlflash_state state; > }; > > struct afu_cmd { > diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c > index 76a7286..18359d4 100644 > --- a/drivers/scsi/cxlflash/main.c > +++ b/drivers/scsi/cxlflash/main.c > @@ -380,6 +380,18 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) > } > spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags); > > + switch (cfg->state) { > + case STATE_LIMBO: > + pr_debug_ratelimited("%s: device in limbo!\n", __func__); > + rc = SCSI_MLQUEUE_HOST_BUSY; So if the client gets BUSY, it should retry until it suceeds or gets a terminal failure? > + goto out; > + case STATE_FAILTERM: > + pr_debug_ratelimited("%s: device has failed!\n", __func__); > + goto error; error is only used here, so there is no need for a goto. > + default: > + break; > + } > + > cmd = cxlflash_cmd_checkout(afu); > if (unlikely(!cmd)) { > pr_err("%s: could not get a free command\n", __func__); > @@ -428,6 +440,10 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) > > out: > return rc; > +error: > + scp->result = (DID_NO_CONNECT << 16); > + scp->scsi_done(scp); > + return 0; 0 is success. That doesn't seem right for an error. > } > > /** > @@ -455,9 +471,21 @@ static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp) > get_unaligned_be32(&((u32 *)scp->cmnd)[2]), > get_unaligned_be32(&((u32 *)scp->cmnd)[3])); > > - rcr = send_tmf(afu, scp, TMF_LUN_RESET); > - if (unlikely(rcr)) > + switch (cfg->state) { > + case STATE_NORMAL: > + rcr = send_tmf(afu, scp, TMF_LUN_RESET); > + if (unlikely(rcr)) > + rc = FAILED; > + break; > + case STATE_LIMBO: > + wait_event(cfg->limbo_waitq, cfg->state != STATE_LIMBO); So we wait here till we are our of limbo? > + if (cfg->state == STATE_NORMAL) > + break; > + /* fall through */ > + default: > rc = FAILED; > + break; > + } > > pr_debug("%s: returning rc=%d\n", __func__, rc); > return rc; > @@ -487,11 +515,27 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp) > get_unaligned_be32(&((u32 *)scp->cmnd)[2]), > get_unaligned_be32(&((u32 *)scp->cmnd)[3])); > > - rcr = cxlflash_afu_reset(cfg); > - if (rcr == 0) > - rc = SUCCESS; > - else > + switch (cfg->state) { > + case STATE_NORMAL: > + cfg->state = STATE_LIMBO; > + scsi_block_requests(cfg->host); > + > + rcr = cxlflash_afu_reset(cfg); > + if (!rcr) > + rc = FAILED; This is some sort of recovery once we get back into normal state? Can you comment what you're doing here? > + cfg->state = STATE_NORMAL; > + wake_up_all(&cfg->limbo_waitq); > + scsi_unblock_requests(cfg->host); Now we actually go to normal? > + break; > + case STATE_LIMBO: > + wait_event(cfg->limbo_waitq, cfg->state != STATE_LIMBO); Wait here till we are out of limbo? What happens if that never occurs? > + if (cfg->state == STATE_NORMAL) > + break; > + /* fall through */ > + default: > rc = FAILED; > + break; > + } > > pr_debug("%s: returning rc=%d\n", __func__, rc); > return rc; > @@ -642,7 +686,7 @@ static void cxlflash_wait_for_pci_err_recovery(struct cxlflash_cfg *cfg) > struct pci_dev *pdev = cfg->dev; > > if (pci_channel_offline(pdev)) > - wait_event_timeout(cfg->eeh_waitq, > + wait_event_timeout(cfg->limbo_waitq, > !pci_channel_offline(pdev), > CXLFLASH_PCI_ERROR_RECOVERY_TIMEOUT); > } > @@ -825,6 +869,8 @@ static void cxlflash_remove(struct pci_dev *pdev) > !cfg->tmf_active); > spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags); > > + cfg->state = STATE_FAILTERM; > + > switch (cfg->init_state) { > case INIT_STATE_SCSI: > scsi_remove_host(cfg->host); > @@ -1879,6 +1925,8 @@ static int init_afu(struct cxlflash_cfg *cfg) > struct afu *afu = cfg->afu; > struct device *dev = &cfg->dev->dev; > > + cxl_perst_reloads_same_image(cfg->cxl_afu, true); > + > rc = init_mc(cfg); > if (rc) { > dev_err(dev, "%s: call to init_mc failed, rc=%d!\n", > @@ -2021,6 +2069,12 @@ void cxlflash_wait_resp(struct afu *afu, struct afu_cmd *cmd) > * the sync. This design point requires calling threads to not be on interrupt > * context due to the possibility of sleeping during concurrent sync operations. > * > + * AFU sync operations are only necessary and allowed when the device is > + * operating normally. When not operating normally, sync requests can occur as > + * part of cleaning up resources associated with an adapter prior to removal. > + * In this scenario, these requests are simply ignored (safe due to the AFU > + * going away). > + * What about if we are in limbo state and it comes back? > * Return: > * 0 on success > * -1 on failure > @@ -2028,11 +2082,17 @@ void cxlflash_wait_resp(struct afu *afu, struct afu_cmd *cmd) > int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t ctx_hndl_u, > res_hndl_t res_hndl_u, u8 mode) > { > + struct cxlflash_cfg *cfg = afu->parent; > struct afu_cmd *cmd = NULL; > int rc = 0; > int retry_cnt = 0; > static DEFINE_MUTEX(sync_active); > > + if (cfg->state != STATE_NORMAL) { > + pr_debug("%s: Sync not required! (%u)\n", __func__, cfg->state); > + return 0; > + } > + > mutex_lock(&sync_active); > retry: > cmd = cxlflash_cmd_checkout(afu); > @@ -2122,6 +2182,11 @@ static void cxlflash_worker_thread(struct work_struct *work) > int port; > ulong lock_flags; > > + /* Avoid MMIO if the device has failed */ > + > + if (cfg->state != STATE_NORMAL) > + return; > + > spin_lock_irqsave(cfg->host->host_lock, lock_flags); > > if (cfg->lr_state == LINK_RESET_REQUIRED) { > @@ -2200,10 +2265,9 @@ static int cxlflash_probe(struct pci_dev *pdev, > cfg->dev = pdev; > cfg->dev_id = (struct pci_device_id *)dev_id; > cfg->mcctx = NULL; > - cfg->err_recovery_active = 0; > > init_waitqueue_head(&cfg->tmf_waitq); > - init_waitqueue_head(&cfg->eeh_waitq); > + init_waitqueue_head(&cfg->limbo_waitq); > > INIT_WORK(&cfg->work_q, cxlflash_worker_thread); > cfg->lr_state = LINK_RESET_INVALID; > @@ -2259,6 +2323,89 @@ out_remove: > goto out; > } > > +/** > + * cxlflash_pci_error_detected() - called when a PCI error is detected > + * @pdev: PCI device struct. > + * @state: PCI channel state. > + * > + * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT > + */ > +static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev, > + pci_channel_state_t state) > +{ > + struct cxlflash_cfg *cfg = pci_get_drvdata(pdev); > + > + pr_debug("%s: pdev=%p state=%u\n", __func__, pdev, state); > + > + switch (state) { > + case pci_channel_io_frozen: > + cfg->state = STATE_LIMBO; > + > + /* Turn off legacy I/O */ > + scsi_block_requests(cfg->host); > + > + term_mc(cfg, UNDO_START); > + stop_afu(cfg); > + > + return PCI_ERS_RESULT_NEED_RESET; > + case pci_channel_io_perm_failure: > + cfg->state = STATE_FAILTERM; > + wake_up_all(&cfg->limbo_waitq); > + scsi_unblock_requests(cfg->host); > + return PCI_ERS_RESULT_DISCONNECT; > + default: > + break; > + } > + return PCI_ERS_RESULT_NEED_RESET; > +} > + > +/** > + * cxlflash_pci_slot_reset() - called when PCI slot has been reset > + * @pdev: PCI device struct. > + * > + * This routine is called by the pci error recovery code after the PCI > + * slot has been reset, just before we should resume normal operations. > + * > + * Return: PCI_ERS_RESULT_RECOVERED or PCI_ERS_RESULT_DISCONNECT > + */ > +static pci_ers_result_t cxlflash_pci_slot_reset(struct pci_dev *pdev) > +{ > + int rc = 0; > + struct cxlflash_cfg *cfg = pci_get_drvdata(pdev); > + struct device *dev = &cfg->dev->dev; > + > + pr_debug("%s: pdev=%p\n", __func__, pdev); > + > + rc = init_afu(cfg); > + if (unlikely(rc)) { > + dev_err(dev, "%s: EEH recovery failed! (%d)\n", __func__, rc); > + return PCI_ERS_RESULT_DISCONNECT; > + } > + > + return PCI_ERS_RESULT_RECOVERED; > +} > + > +/** > + * cxlflash_pci_resume() - called when normal operation can resume > + * @pdev: PCI device struct > + */ > +static void cxlflash_pci_resume(struct pci_dev *pdev) > +{ > + struct cxlflash_cfg *cfg = pci_get_drvdata(pdev); > + > + pr_debug("%s: pdev=%p\n", __func__, pdev); > + > + cfg->state = STATE_NORMAL; > + wake_up_all(&cfg->limbo_waitq); > + scsi_unblock_requests(cfg->host); > +} > + > +static const struct pci_error_handlers cxlflash_err_handler = { > + .error_detected = cxlflash_pci_error_detected, > + .slot_reset = cxlflash_pci_slot_reset, > + .resume = cxlflash_pci_resume, > +}; > + > /* > * PCI device structure > */ > @@ -2267,6 +2414,7 @@ static struct pci_driver cxlflash_driver = { > .id_table = cxlflash_pci_table, > .probe = cxlflash_probe, > .remove = cxlflash_remove, > + .err_handler = &cxlflash_err_handler, > }; > > /** > diff --git a/drivers/scsi/cxlflash/main.h b/drivers/scsi/cxlflash/main.h > index 7f890cc..7232536 100644 > --- a/drivers/scsi/cxlflash/main.h > +++ b/drivers/scsi/cxlflash/main.h > @@ -101,4 +101,8 @@ struct asyc_intr_info { > #define LINK_RESET 0x02 > }; > > +#ifndef CONFIG_CXL_EEH > +#define cxl_perst_reloads_same_image(_a, _b) do { } while (0) > +#endif > + > #endif /* _CXLFLASH_MAIN_H */ -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2015-08-10 at 12:09 -0500, Matthew R. Ochs wrote: > Introduce support for enhanced I/O error handling. > > Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com> > Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com> > --- So I'm not necessarily very qualified to review SCSI bits as I haven't done anything close to the Linux SCSI code for almost a decade but I have a couple of questions and nits... > wait_queue_head_t tmf_waitq; > bool tmf_active; > - u8 err_recovery_active:1; > + wait_queue_head_t limbo_waitq; > + enum cxlflash_state state; > }; > > struct afu_cmd { > diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c > index 76a7286..18359d4 100644 > --- a/drivers/scsi/cxlflash/main.c > +++ b/drivers/scsi/cxlflash/main.c > @@ -380,6 +380,18 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) > } > spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags); > > + switch (cfg->state) { > + case STATE_LIMBO: > + pr_debug_ratelimited("%s: device in limbo!\n", __func__); > + rc = SCSI_MLQUEUE_HOST_BUSY; > + goto out; > + case STATE_FAILTERM: > + pr_debug_ratelimited("%s: device has failed!\n", __func__); > + goto error; > + default: > + break; > + } I noticed that you mostly read and write that new state outside of your tmf_waitq.lock. Is there any other lock or mutex protecting you ? In this specific case ? I know in the old day queuecommand was called with a host lock, is that still the case ? Or you just don't care about an occasional spurrious SCSI_MLQUEUE_HOST_BUSY ? > cmd = cxlflash_cmd_checkout(afu); > if (unlikely(!cmd)) { > pr_err("%s: could not get a free command\n", __func__); > @@ -428,6 +440,10 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) > > out: > return rc; > +error: > + scp->result = (DID_NO_CONNECT << 16); > + scp->scsi_done(scp); > + return 0; > } > > /** > @@ -455,9 +471,21 @@ static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp) > get_unaligned_be32(&((u32 *)scp->cmnd)[2]), > get_unaligned_be32(&((u32 *)scp->cmnd)[3])); > > - rcr = send_tmf(afu, scp, TMF_LUN_RESET); > - if (unlikely(rcr)) > + switch (cfg->state) { > + case STATE_NORMAL: > + rcr = send_tmf(afu, scp, TMF_LUN_RESET); > + if (unlikely(rcr)) > + rc = FAILED; > + break; > + case STATE_LIMBO: > + wait_event(cfg->limbo_waitq, cfg->state != STATE_LIMBO); > + if (cfg->state == STATE_NORMAL) > + break; > + /* fall through */ > + default: > rc = FAILED; > + break; > + } Same here, since you are doing wait_event, I assume no lock is held (unless it's a mutex) and in subsequent places I am not listing. As I said, it could well be that it all works fine but it would be worth mentioning in this case because it's not obvious from simply reading the code. Cheers, Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Actually, I forgot one thing: > > config CXLFLASH > tristate "Support for IBM CAPI Flash" > - depends on PCI && SCSI && CXL > + depends on PCI && SCSI && CXL && EEH Should you depend on CXL_EEH here, seeing as you use CONFIG_CXL_EEH? > > +#ifndef CONFIG_CXL_EEH > +#define cxl_perst_reloads_same_image(_a, _b) do { } while (0) > +#endif > + > #endif /* _CXLFLASH_MAIN_H */
On Tue, 2015-08-11 at 16:21 +1000, Daniel Axtens wrote: > Actually, I forgot one thing: > > > > > config CXLFLASH > > tristate "Support for IBM CAPI Flash" > > - depends on PCI && SCSI && CXL > > + depends on PCI && SCSI && CXL && EEH > > Should you depend on CXL_EEH here, seeing as you use CONFIG_CXL_EEH? > Actually, never mind, I'm wrong. What you have is good. > > > > +#ifndef CONFIG_CXL_EEH > > +#define cxl_perst_reloads_same_image(_a, _b) do { } while (0) > > +#endif > > + > > #endif /* _CXLFLASH_MAIN_H */ >
> On Aug 10, 2015, at 6:52 PM, Daniel Axtens <dja@axtens.net> wrote: > >> @@ -487,11 +515,27 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp) >> get_unaligned_be32(&((u32 *)scp->cmnd)[2]), >> get_unaligned_be32(&((u32 *)scp->cmnd)[3])); >> >> - rcr = cxlflash_afu_reset(cfg); >> - if (rcr == 0) >> - rc = SUCCESS; >> - else >> + switch (cfg->state) { >> + case STATE_NORMAL: >> + cfg->state = STATE_LIMBO; >> + scsi_block_requests(cfg->host); >> + >> + rcr = cxlflash_afu_reset(cfg); >> + if (!rcr) >> + rc = FAILED; > > I think you want: > > if (rcr) { > rc = FAILED; > break; > } > > cxlflash_afu_reset returns 0 on success, so I think you want to drop the > negation, and also I think if it fails you want to break out and not set > STATE_NORMAL. Is that right? Good catch! Both of these were an oversight on my part and are now fixed. They’ll be included with v5. > > Once you fix that, or explain to me why I'm wrong: > Reviewed-by: Daniel Axtens <dja@axtens.net> Thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Mikey, Thanks for reviewing. Responses to your comments inline below. -matt > On Aug 10, 2015, at 9:05 PM, Michael Neuling <mikey@neuling.org> wrote: > > On Mon, 2015-08-10 at 12:09 -0500, Matthew R. Ochs wrote: >> Introduce support for enhanced I/O error handling. > > This needs more description of what you're doing. What's the overall > approach? There seems to be a new limbo queue thats created stall things > while the device is in error state. That should be described here. Sure, will do that in v5. >> struct afu_cmd { >> diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c >> index 76a7286..18359d4 100644 >> --- a/drivers/scsi/cxlflash/main.c >> +++ b/drivers/scsi/cxlflash/main.c >> @@ -380,6 +380,18 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) >> } >> spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags); >> >> + switch (cfg->state) { >> + case STATE_LIMBO: >> + pr_debug_ratelimited("%s: device in limbo!\n", __func__); >> + rc = SCSI_MLQUEUE_HOST_BUSY; > > So if the client gets BUSY, it should retry until it suceeds or gets a terminal > failure? Correct, although this situation won’t hit often because as part of transitioning to limbo, we tell the host to stop handing down new requests. > >> + goto out; >> + case STATE_FAILTERM: >> + pr_debug_ratelimited("%s: device has failed!\n", __func__); >> + goto error; > > error is only used here, so there is no need for a goto. This was a holdover from when we did have a need for a common error exit. Will remove in v5. >> cmd = cxlflash_cmd_checkout(afu); >> if (unlikely(!cmd)) { >> pr_err("%s: could not get a free command\n", __func__); >> @@ -428,6 +440,10 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) >> >> out: >> return rc; >> +error: >> + scp->result = (DID_NO_CONNECT << 16); >> + scp->scsi_done(scp); >> + return 0; > > 0 is success. That doesn’t seem right for an error. This is simply how this interface works. It does seem a bit silly but what we’re doing here is telling the stack “yes we accept your request” and then pushing the request back via done() with an error due to the host being unable to satisfy the request. > >> } >> >> /** >> @@ -455,9 +471,21 @@ static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp) >> get_unaligned_be32(&((u32 *)scp->cmnd)[2]), >> get_unaligned_be32(&((u32 *)scp->cmnd)[3])); >> >> - rcr = send_tmf(afu, scp, TMF_LUN_RESET); >> - if (unlikely(rcr)) >> + switch (cfg->state) { >> + case STATE_NORMAL: >> + rcr = send_tmf(afu, scp, TMF_LUN_RESET); >> + if (unlikely(rcr)) >> + rc = FAILED; >> + break; >> + case STATE_LIMBO: >> + wait_event(cfg->limbo_waitq, cfg->state != STATE_LIMBO); > > So we wait here till we are our of limbo? Correct, the idea here is that we only go into limbo when a host-wide reset action has occurred. In such a scenario, it doesn’t make sense to send a device specific reset as a host-wide reset is a bigger hammer. So we wait here until the reset has completed and then (assuming it was successful) we return success. > >> + if (cfg->state == STATE_NORMAL) >> + break; >> + /* fall through */ >> + default: >> rc = FAILED; >> + break; >> + } >> >> pr_debug("%s: returning rc=%d\n", __func__, rc); >> return rc; >> @@ -487,11 +515,27 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp) >> get_unaligned_be32(&((u32 *)scp->cmnd)[2]), >> get_unaligned_be32(&((u32 *)scp->cmnd)[3])); >> >> - rcr = cxlflash_afu_reset(cfg); >> - if (rcr == 0) >> - rc = SUCCESS; >> - else >> + switch (cfg->state) { >> + case STATE_NORMAL: >> + cfg->state = STATE_LIMBO; >> + scsi_block_requests(cfg->host); >> + >> + rcr = cxlflash_afu_reset(cfg); >> + if (!rcr) >> + rc = FAILED; > > This is some sort of recovery once we get back into normal state? Can you > comment what you’re doing here? What’s occurring here is that we’ve been asked to reset the host and have determined that another reset is not already taking place. To perform the reset, we need to transition to the limbo state so that other running threads and new threads will wait while the reset takes place. Note that we’re also putting user contexts in an error state so that they are notified and will have to be recovered before they can resume operation (the reset is catastrophic to them). > >> + cfg->state = STATE_NORMAL; >> + wake_up_all(&cfg->limbo_waitq); >> + scsi_unblock_requests(cfg->host); > > Now we actually go to normal? Correct, assuming the reset completes successfully then we’re no longer in limbo and have returned to a good, working state (normal). Note that Daniel Axtens pointed out that we need to transition to a failed state when the reset does not complete successfully. That will be changed in v5. > >> + break; >> + case STATE_LIMBO: >> + wait_event(cfg->limbo_waitq, cfg->state != STATE_LIMBO); > > Wait here till we are out of limbo? What happens if that never occurs? This case would be hit if there was already a reset action taking place (state of device in limbo). These events should not take long. A transition out of limbo never occurring would be a bug on the driver’s part. >> rc = init_mc(cfg); >> if (rc) { >> dev_err(dev, "%s: call to init_mc failed, rc=%d!\n", >> @@ -2021,6 +2069,12 @@ void cxlflash_wait_resp(struct afu *afu, struct afu_cmd *cmd) >> * the sync. This design point requires calling threads to not be on interrupt >> * context due to the possibility of sleeping during concurrent sync operations. >> * >> + * AFU sync operations are only necessary and allowed when the device is >> + * operating normally. When not operating normally, sync requests can occur as >> + * part of cleaning up resources associated with an adapter prior to removal. >> + * In this scenario, these requests are simply ignored (safe due to the AFU >> + * going away). >> + * > > What about if we are in limbo state and it comes back? Limbo state indicates that the device is being reset. When the device is being reset, a sync does not matter as it’s state (the device’s) is being lost during the reset. The device will be reloaded with the proper state as part of the reset and recovery [of user contexts]. -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> On Aug 10, 2015, at 10:41 PM, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > > On Mon, 2015-08-10 at 12:09 -0500, Matthew R. Ochs wrote: >> Introduce support for enhanced I/O error handling. >> >> Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com> >> Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com> >> --- > > So I'm not necessarily very qualified to review SCSI bits as I haven't > done anything close to the Linux SCSI code for almost a decade but I > have a couple of questions and nits… Thanks for reviewing Ben! > >> wait_queue_head_t tmf_waitq; >> bool tmf_active; >> - u8 err_recovery_active:1; >> + wait_queue_head_t limbo_waitq; >> + enum cxlflash_state state; >> }; >> >> struct afu_cmd { >> diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c >> index 76a7286..18359d4 100644 >> --- a/drivers/scsi/cxlflash/main.c >> +++ b/drivers/scsi/cxlflash/main.c >> @@ -380,6 +380,18 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) >> } >> spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags); >> >> + switch (cfg->state) { >> + case STATE_LIMBO: >> + pr_debug_ratelimited("%s: device in limbo!\n", __func__); >> + rc = SCSI_MLQUEUE_HOST_BUSY; >> + goto out; >> + case STATE_FAILTERM: >> + pr_debug_ratelimited("%s: device has failed!\n", __func__); >> + goto error; >> + default: >> + break; >> + } > > I noticed that you mostly read and write that new state outside of your > tmf_waitq.lock. Is there any other lock or mutex protecting you ? In > this specific case ? No there isn’t. > > I know in the old day queuecommand was called with a host lock, is that > still the case ? No, it’s no longer the case. > Or you just don't care about an occasional spurrious > SCSI_MLQUEUE_HOST_BUSY ? This is pretty much true. =) > >> cmd = cxlflash_cmd_checkout(afu); >> if (unlikely(!cmd)) { >> pr_err("%s: could not get a free command\n", __func__); >> @@ -428,6 +440,10 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) >> >> out: >> return rc; >> +error: >> + scp->result = (DID_NO_CONNECT << 16); >> + scp->scsi_done(scp); >> + return 0; >> } >> >> /** >> @@ -455,9 +471,21 @@ static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp) >> get_unaligned_be32(&((u32 *)scp->cmnd)[2]), >> get_unaligned_be32(&((u32 *)scp->cmnd)[3])); >> >> - rcr = send_tmf(afu, scp, TMF_LUN_RESET); >> - if (unlikely(rcr)) >> + switch (cfg->state) { >> + case STATE_NORMAL: >> + rcr = send_tmf(afu, scp, TMF_LUN_RESET); >> + if (unlikely(rcr)) >> + rc = FAILED; >> + break; >> + case STATE_LIMBO: >> + wait_event(cfg->limbo_waitq, cfg->state != STATE_LIMBO); >> + if (cfg->state == STATE_NORMAL) >> + break; >> + /* fall through */ >> + default: >> rc = FAILED; >> + break; >> + } > > Same here, since you are doing wait_event, I assume no lock is held > (unless it's a mutex) and in subsequent places I am not listing. > > As I said, it could well be that it all works fine but it would be worth > mentioning in this case because it's not obvious from simply reading the > code. It has been working well, and yes, your assumption is correct. We can look at adding more/better serialization as a future enhancement. -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Quoting "Matthew R. Ochs" <mrochs@linux.vnet.ibm.com>: > Introduce support for enhanced I/O error handling. > > Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com> > Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com> > --- > drivers/scsi/cxlflash/Kconfig | 2 +- > drivers/scsi/cxlflash/common.h | 11 ++- > drivers/scsi/cxlflash/main.c | 166 > ++++++++++++++++++++++++++++++++++++++--- > drivers/scsi/cxlflash/main.h | 4 + > 4 files changed, 170 insertions(+), 13 deletions(-) > > diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c > index 76a7286..18359d4 100644 > --- a/drivers/scsi/cxlflash/main.c > +++ b/drivers/scsi/cxlflash/main.c > @@ -380,6 +380,18 @@ static int cxlflash_queuecommand(struct > Scsi_Host *host, struct scsi_cmnd *scp) > } > spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags); > > + switch (cfg->state) { > + case STATE_LIMBO: > + pr_debug_ratelimited("%s: device in limbo!\n", __func__); > + rc = SCSI_MLQUEUE_HOST_BUSY; > + goto out; remove "goto out", return SCSI_MLQUEUE_HOST_BUSY; > + case STATE_FAILTERM: > + pr_debug_ratelimited("%s: device has failed!\n", __func__); > + goto error; > + default: > + break; > + } > + > cmd = cxlflash_cmd_checkout(afu); > if (unlikely(!cmd)) { > pr_err("%s: could not get a free command\n", __func__); > @@ -428,6 +440,10 @@ static int cxlflash_queuecommand(struct > Scsi_Host *host, struct scsi_cmnd *scp) > > out: > return rc; > +error: > + scp->result = (DID_NO_CONNECT << 16); > + scp->scsi_done(scp); > + return 0; > } > I have reviewed most of part in v2. Thanks, Wendy -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Wendy, Thanks for reviewing. Comments inline below. -matt > On Aug 11, 2015, at 11:15 PM, wenxiong@linux.vnet.ibm.com wrote: > > > Quoting "Matthew R. Ochs" <mrochs@linux.vnet.ibm.com>: > >> Introduce support for enhanced I/O error handling. >> >> Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com> >> Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com> >> --- >> drivers/scsi/cxlflash/Kconfig | 2 +- >> drivers/scsi/cxlflash/common.h | 11 ++- >> drivers/scsi/cxlflash/main.c | 166 ++++++++++++++++++++++++++++++++++++++--- >> drivers/scsi/cxlflash/main.h | 4 + >> 4 files changed, 170 insertions(+), 13 deletions(-) >> >> diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c >> index 76a7286..18359d4 100644 >> --- a/drivers/scsi/cxlflash/main.c >> +++ b/drivers/scsi/cxlflash/main.c >> @@ -380,6 +380,18 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) >> } >> spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags); >> >> + switch (cfg->state) { >> + case STATE_LIMBO: >> + pr_debug_ratelimited("%s: device in limbo!\n", __func__); >> + rc = SCSI_MLQUEUE_HOST_BUSY; >> + goto out; > > remove “goto out", return SCSI_MLQUEUE_HOST_BUSY; I’d like to keep this as goto out as we have a common exit point strategy already in place for this routine (and pretty much everywhere else in the driver for that matter). It would seem odd to just have this one place perform a direct return. Additionally, not included in this patch but one of the bug fixes/enhancements (to be sent out as a separate patch series) we’ve made to this routine is to add a ‘devel’ trace point in the common exit to help with debug. > >> + case STATE_FAILTERM: >> + pr_debug_ratelimited("%s: device has failed!\n", __func__); >> + goto error; >> + default: >> + break; >> + } >> + >> cmd = cxlflash_cmd_checkout(afu); >> if (unlikely(!cmd)) { >> pr_err("%s: could not get a free command\n", __func__); >> @@ -428,6 +440,10 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) >> >> out: >> return rc; >> +error: >> + scp->result = (DID_NO_CONNECT << 16); >> + scp->scsi_done(scp); >> + return 0; >> } >> > I have reviewed most of part in v2. Thanks again for your previous comments. -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/scsi/cxlflash/Kconfig b/drivers/scsi/cxlflash/Kconfig index c707508..c052104 100644 --- a/drivers/scsi/cxlflash/Kconfig +++ b/drivers/scsi/cxlflash/Kconfig @@ -4,7 +4,7 @@ config CXLFLASH tristate "Support for IBM CAPI Flash" - depends on PCI && SCSI && CXL + depends on PCI && SCSI && CXL && EEH default m help Allows CAPI Accelerated IO to Flash diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h index fe86bfe..7e663f4 100644 --- a/drivers/scsi/cxlflash/common.h +++ b/drivers/scsi/cxlflash/common.h @@ -76,6 +76,12 @@ enum cxlflash_init_state { INIT_STATE_SCSI }; +enum cxlflash_state { + STATE_NORMAL, /* Normal running state, everything good */ + STATE_LIMBO, /* Limbo running state, trying to reset/recover */ + STATE_FAILTERM /* Failed/terminating state, error out users/threads */ +}; + /* * Each context has its own set of resource handles that is visible * only from that context. @@ -91,8 +97,6 @@ struct cxlflash_cfg { ulong cxlflash_regs_pci; - wait_queue_head_t eeh_waitq; - struct work_struct work_q; enum cxlflash_init_state init_state; enum cxlflash_lr_state lr_state; @@ -105,7 +109,8 @@ struct cxlflash_cfg { wait_queue_head_t tmf_waitq; bool tmf_active; - u8 err_recovery_active:1; + wait_queue_head_t limbo_waitq; + enum cxlflash_state state; }; struct afu_cmd { diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index 76a7286..18359d4 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -380,6 +380,18 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) } spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags); + switch (cfg->state) { + case STATE_LIMBO: + pr_debug_ratelimited("%s: device in limbo!\n", __func__); + rc = SCSI_MLQUEUE_HOST_BUSY; + goto out; + case STATE_FAILTERM: + pr_debug_ratelimited("%s: device has failed!\n", __func__); + goto error; + default: + break; + } + cmd = cxlflash_cmd_checkout(afu); if (unlikely(!cmd)) { pr_err("%s: could not get a free command\n", __func__); @@ -428,6 +440,10 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) out: return rc; +error: + scp->result = (DID_NO_CONNECT << 16); + scp->scsi_done(scp); + return 0; } /** @@ -455,9 +471,21 @@ static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp) get_unaligned_be32(&((u32 *)scp->cmnd)[2]), get_unaligned_be32(&((u32 *)scp->cmnd)[3])); - rcr = send_tmf(afu, scp, TMF_LUN_RESET); - if (unlikely(rcr)) + switch (cfg->state) { + case STATE_NORMAL: + rcr = send_tmf(afu, scp, TMF_LUN_RESET); + if (unlikely(rcr)) + rc = FAILED; + break; + case STATE_LIMBO: + wait_event(cfg->limbo_waitq, cfg->state != STATE_LIMBO); + if (cfg->state == STATE_NORMAL) + break; + /* fall through */ + default: rc = FAILED; + break; + } pr_debug("%s: returning rc=%d\n", __func__, rc); return rc; @@ -487,11 +515,27 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp) get_unaligned_be32(&((u32 *)scp->cmnd)[2]), get_unaligned_be32(&((u32 *)scp->cmnd)[3])); - rcr = cxlflash_afu_reset(cfg); - if (rcr == 0) - rc = SUCCESS; - else + switch (cfg->state) { + case STATE_NORMAL: + cfg->state = STATE_LIMBO; + scsi_block_requests(cfg->host); + + rcr = cxlflash_afu_reset(cfg); + if (!rcr) + rc = FAILED; + cfg->state = STATE_NORMAL; + wake_up_all(&cfg->limbo_waitq); + scsi_unblock_requests(cfg->host); + break; + case STATE_LIMBO: + wait_event(cfg->limbo_waitq, cfg->state != STATE_LIMBO); + if (cfg->state == STATE_NORMAL) + break; + /* fall through */ + default: rc = FAILED; + break; + } pr_debug("%s: returning rc=%d\n", __func__, rc); return rc; @@ -642,7 +686,7 @@ static void cxlflash_wait_for_pci_err_recovery(struct cxlflash_cfg *cfg) struct pci_dev *pdev = cfg->dev; if (pci_channel_offline(pdev)) - wait_event_timeout(cfg->eeh_waitq, + wait_event_timeout(cfg->limbo_waitq, !pci_channel_offline(pdev), CXLFLASH_PCI_ERROR_RECOVERY_TIMEOUT); } @@ -825,6 +869,8 @@ static void cxlflash_remove(struct pci_dev *pdev) !cfg->tmf_active); spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags); + cfg->state = STATE_FAILTERM; + switch (cfg->init_state) { case INIT_STATE_SCSI: scsi_remove_host(cfg->host); @@ -1879,6 +1925,8 @@ static int init_afu(struct cxlflash_cfg *cfg) struct afu *afu = cfg->afu; struct device *dev = &cfg->dev->dev; + cxl_perst_reloads_same_image(cfg->cxl_afu, true); + rc = init_mc(cfg); if (rc) { dev_err(dev, "%s: call to init_mc failed, rc=%d!\n", @@ -2021,6 +2069,12 @@ void cxlflash_wait_resp(struct afu *afu, struct afu_cmd *cmd) * the sync. This design point requires calling threads to not be on interrupt * context due to the possibility of sleeping during concurrent sync operations. * + * AFU sync operations are only necessary and allowed when the device is + * operating normally. When not operating normally, sync requests can occur as + * part of cleaning up resources associated with an adapter prior to removal. + * In this scenario, these requests are simply ignored (safe due to the AFU + * going away). + * * Return: * 0 on success * -1 on failure @@ -2028,11 +2082,17 @@ void cxlflash_wait_resp(struct afu *afu, struct afu_cmd *cmd) int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t ctx_hndl_u, res_hndl_t res_hndl_u, u8 mode) { + struct cxlflash_cfg *cfg = afu->parent; struct afu_cmd *cmd = NULL; int rc = 0; int retry_cnt = 0; static DEFINE_MUTEX(sync_active); + if (cfg->state != STATE_NORMAL) { + pr_debug("%s: Sync not required! (%u)\n", __func__, cfg->state); + return 0; + } + mutex_lock(&sync_active); retry: cmd = cxlflash_cmd_checkout(afu); @@ -2122,6 +2182,11 @@ static void cxlflash_worker_thread(struct work_struct *work) int port; ulong lock_flags; + /* Avoid MMIO if the device has failed */ + + if (cfg->state != STATE_NORMAL) + return; + spin_lock_irqsave(cfg->host->host_lock, lock_flags); if (cfg->lr_state == LINK_RESET_REQUIRED) { @@ -2200,10 +2265,9 @@ static int cxlflash_probe(struct pci_dev *pdev, cfg->dev = pdev; cfg->dev_id = (struct pci_device_id *)dev_id; cfg->mcctx = NULL; - cfg->err_recovery_active = 0; init_waitqueue_head(&cfg->tmf_waitq); - init_waitqueue_head(&cfg->eeh_waitq); + init_waitqueue_head(&cfg->limbo_waitq); INIT_WORK(&cfg->work_q, cxlflash_worker_thread); cfg->lr_state = LINK_RESET_INVALID; @@ -2259,6 +2323,89 @@ out_remove: goto out; } +/** + * cxlflash_pci_error_detected() - called when a PCI error is detected + * @pdev: PCI device struct. + * @state: PCI channel state. + * + * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT + */ +static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct cxlflash_cfg *cfg = pci_get_drvdata(pdev); + + pr_debug("%s: pdev=%p state=%u\n", __func__, pdev, state); + + switch (state) { + case pci_channel_io_frozen: + cfg->state = STATE_LIMBO; + + /* Turn off legacy I/O */ + scsi_block_requests(cfg->host); + + term_mc(cfg, UNDO_START); + stop_afu(cfg); + + return PCI_ERS_RESULT_NEED_RESET; + case pci_channel_io_perm_failure: + cfg->state = STATE_FAILTERM; + wake_up_all(&cfg->limbo_waitq); + scsi_unblock_requests(cfg->host); + return PCI_ERS_RESULT_DISCONNECT; + default: + break; + } + return PCI_ERS_RESULT_NEED_RESET; +} + +/** + * cxlflash_pci_slot_reset() - called when PCI slot has been reset + * @pdev: PCI device struct. + * + * This routine is called by the pci error recovery code after the PCI + * slot has been reset, just before we should resume normal operations. + * + * Return: PCI_ERS_RESULT_RECOVERED or PCI_ERS_RESULT_DISCONNECT + */ +static pci_ers_result_t cxlflash_pci_slot_reset(struct pci_dev *pdev) +{ + int rc = 0; + struct cxlflash_cfg *cfg = pci_get_drvdata(pdev); + struct device *dev = &cfg->dev->dev; + + pr_debug("%s: pdev=%p\n", __func__, pdev); + + rc = init_afu(cfg); + if (unlikely(rc)) { + dev_err(dev, "%s: EEH recovery failed! (%d)\n", __func__, rc); + return PCI_ERS_RESULT_DISCONNECT; + } + + return PCI_ERS_RESULT_RECOVERED; +} + +/** + * cxlflash_pci_resume() - called when normal operation can resume + * @pdev: PCI device struct + */ +static void cxlflash_pci_resume(struct pci_dev *pdev) +{ + struct cxlflash_cfg *cfg = pci_get_drvdata(pdev); + + pr_debug("%s: pdev=%p\n", __func__, pdev); + + cfg->state = STATE_NORMAL; + wake_up_all(&cfg->limbo_waitq); + scsi_unblock_requests(cfg->host); +} + +static const struct pci_error_handlers cxlflash_err_handler = { + .error_detected = cxlflash_pci_error_detected, + .slot_reset = cxlflash_pci_slot_reset, + .resume = cxlflash_pci_resume, +}; + /* * PCI device structure */ @@ -2267,6 +2414,7 @@ static struct pci_driver cxlflash_driver = { .id_table = cxlflash_pci_table, .probe = cxlflash_probe, .remove = cxlflash_remove, + .err_handler = &cxlflash_err_handler, }; /** diff --git a/drivers/scsi/cxlflash/main.h b/drivers/scsi/cxlflash/main.h index 7f890cc..7232536 100644 --- a/drivers/scsi/cxlflash/main.h +++ b/drivers/scsi/cxlflash/main.h @@ -101,4 +101,8 @@ struct asyc_intr_info { #define LINK_RESET 0x02 }; +#ifndef CONFIG_CXL_EEH +#define cxl_perst_reloads_same_image(_a, _b) do { } while (0) +#endif + #endif /* _CXLFLASH_MAIN_H */