Message ID | 1375163155-1473-5-git-send-email-sthumma@codeaurora.org (mailing list archive) |
---|---|
State | Not Applicable, archived |
Headers | show |
Tested-by: Dolev Raviv <draviv@codeaurora.org> > Error handling in UFS driver is broken and resets the host controller > for fatal errors without re-initialization. Correct the fatal error > handling sequence according to UFS Host Controller Interface (HCI) > v1.1 specification. > > o Processed requests which are completed w/wo error are reported to > SCSI layer and any pending commands that are not started are aborted > in the controller and re-queued into scsi mid-layer queue. > > o Upon determining fatal error condition the host controller may hang > forever until a reset is applied. Block SCSI layer for sending new > requests and apply reset in a separate error handling work. > > o SCSI is informed about the expected Unit-Attention exception from the > device for the immediate command after a reset so that the SCSI layer > take necessary steps to establish communication with the device. > > Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org> > --- > drivers/scsi/ufs/ufshcd.c | 229 > ++++++++++++++++++++++++++++----------------- > drivers/scsi/ufs/ufshcd.h | 10 ++- > 2 files changed, 149 insertions(+), 90 deletions(-) > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c > index c577e6e..4dca9b4 100644 > --- a/drivers/scsi/ufs/ufshcd.c > +++ b/drivers/scsi/ufs/ufshcd.c > @@ -79,6 +79,14 @@ enum { > UFSHCD_EH_IN_PROGRESS = (1 << 0), > }; > > +/* UFSHCD UIC layer error flags */ > +enum { > + UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer error */ > + UFSHCD_UIC_NL_ERROR = (1 << 1), /* Network layer error */ > + UFSHCD_UIC_TL_ERROR = (1 << 2), /* Transport Layer error */ > + UFSHCD_UIC_DME_ERROR = (1 << 3), /* DME error */ > +}; > + > /* Interrupt configuration options */ > enum { > UFSHCD_INT_DISABLE, > @@ -101,6 +109,8 @@ enum { > > static void ufshcd_tmc_handler(struct ufs_hba *hba); > static void ufshcd_async_scan(void *data, async_cookie_t cookie); > +static int ufshcd_reset_and_restore(struct ufs_hba *hba); > +static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag); > > /* > * ufshcd_wait_for_register - wait for register value to change > @@ -1523,9 +1533,6 @@ static int ufshcd_make_hba_operational(struct > ufs_hba *hba) > goto out; > } > > - if (hba->ufshcd_state == UFSHCD_STATE_RESET) > - scsi_unblock_requests(hba->host); > - > out: > return err; > } > @@ -1651,66 +1658,6 @@ static int ufshcd_verify_dev_init(struct ufs_hba > *hba) > } > > /** > - * ufshcd_do_reset - reset the host controller > - * @hba: per adapter instance > - * > - * Returns SUCCESS/FAILED > - */ > -static int ufshcd_do_reset(struct ufs_hba *hba) > -{ > - struct ufshcd_lrb *lrbp; > - unsigned long flags; > - int tag; > - > - /* block commands from midlayer */ > - scsi_block_requests(hba->host); > - > - spin_lock_irqsave(hba->host->host_lock, flags); > - hba->ufshcd_state = UFSHCD_STATE_RESET; > - > - /* send controller to reset state */ > - ufshcd_hba_stop(hba); > - spin_unlock_irqrestore(hba->host->host_lock, flags); > - > - /* abort outstanding commands */ > - for (tag = 0; tag < hba->nutrs; tag++) { > - if (test_bit(tag, &hba->outstanding_reqs)) { > - lrbp = &hba->lrb[tag]; > - if (lrbp->cmd) { > - scsi_dma_unmap(lrbp->cmd); > - lrbp->cmd->result = DID_RESET << 16; > - lrbp->cmd->scsi_done(lrbp->cmd); > - lrbp->cmd = NULL; > - clear_bit_unlock(tag, &hba->lrb_in_use); > - } > - } > - } > - > - /* complete device management command */ > - if (hba->dev_cmd.complete) > - complete(hba->dev_cmd.complete); > - > - /* clear outstanding request/task bit maps */ > - hba->outstanding_reqs = 0; > - hba->outstanding_tasks = 0; > - > - /* Host controller enable */ > - if (ufshcd_hba_enable(hba)) { > - dev_err(hba->dev, > - "Reset: Controller initialization failed\n"); > - return FAILED; > - } > - > - if (ufshcd_link_startup(hba)) { > - dev_err(hba->dev, > - "Reset: Link start-up failed\n"); > - return FAILED; > - } > - > - return SUCCESS; > -} > - > -/** > * ufshcd_slave_alloc - handle initial SCSI device configurations > * @sdev: pointer to SCSI device > * > @@ -1727,6 +1674,9 @@ static int ufshcd_slave_alloc(struct scsi_device > *sdev) > sdev->use_10_for_ms = 1; > scsi_set_tag_type(sdev, MSG_SIMPLE_TAG); > > + /* allow SCSI layer to restart the device in case of errors */ > + sdev->allow_restart = 1; > + > /* > * Inform SCSI Midlayer that the LUN queue depth is same as the > * controller queue depth. If a LUN queue depth is less than the > @@ -1930,6 +1880,9 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, > struct ufshcd_lrb *lrbp) > case OCS_ABORTED: > result |= DID_ABORT << 16; > break; > + case OCS_INVALID_COMMAND_STATUS: > + result |= DID_REQUEUE << 16; > + break; > case OCS_INVALID_CMD_TABLE_ATTR: > case OCS_INVALID_PRDT_ATTR: > case OCS_MISMATCH_DATA_BUF_SIZE: > @@ -2241,45 +2194,145 @@ out: > } > > /** > - * ufshcd_fatal_err_handler - handle fatal errors > - * @hba: per adapter instance > + * ufshcd_err_handler - handle UFS errors that require s/w attention > + * @work: pointer to work structure > */ > -static void ufshcd_fatal_err_handler(struct work_struct *work) > +static void ufshcd_err_handler(struct work_struct *work) > { > struct ufs_hba *hba; > - hba = container_of(work, struct ufs_hba, feh_workq); > + unsigned long flags; > + u32 err_xfer = 0; > + u32 err_tm = 0; > + int err = 0; > + int tag; > + > + hba = container_of(work, struct ufs_hba, eh_work); > > pm_runtime_get_sync(hba->dev); > - /* check if reset is already in progress */ > - if (hba->ufshcd_state != UFSHCD_STATE_RESET) > - ufshcd_do_reset(hba); > + > + spin_lock_irqsave(hba->host->host_lock, flags); > + if (hba->ufshcd_state == UFSHCD_STATE_RESET) { > + spin_unlock_irqrestore(hba->host->host_lock, flags); > + goto out; > + } > + > + hba->ufshcd_state = UFSHCD_STATE_RESET; > + ufshcd_set_eh_in_progress(hba); > + > + /* Complete requests that have door-bell cleared by h/w */ > + ufshcd_transfer_req_compl(hba); > + ufshcd_tmc_handler(hba); > + spin_unlock_irqrestore(hba->host->host_lock, flags); > + > + /* Clear pending transfer requests */ > + for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) > + if (ufshcd_clear_cmd(hba, tag)) > + err_xfer |= 1 << tag; > + > + /* Clear pending task management requests */ > + for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) > + if (ufshcd_clear_tm_cmd(hba, tag)) > + err_tm |= 1 << tag; > + > + /* Complete the requests that are cleared by s/w */ > + spin_lock_irqsave(hba->host->host_lock, flags); > + ufshcd_transfer_req_compl(hba); > + ufshcd_tmc_handler(hba); > + spin_unlock_irqrestore(hba->host->host_lock, flags); > + > + /* Fatal errors need reset */ > + if (err_xfer || err_tm || (hba->saved_err & INT_FATAL_ERRORS) || > + ((hba->saved_err & UIC_ERROR) && > + (hba->saved_uic_err & UFSHCD_UIC_DL_PA_INIT_ERROR))) { > + err = ufshcd_reset_and_restore(hba); > + if (err) { > + dev_err(hba->dev, "%s: reset and restore failed\n", > + __func__); > + hba->ufshcd_state = UFSHCD_STATE_ERROR; > + } > + /* > + * Inform scsi mid-layer that we did reset and allow to handle > + * Unit Attention properly. > + */ > + scsi_report_bus_reset(hba->host, 0); > + hba->saved_err = 0; > + hba->saved_uic_err = 0; > + } > + ufshcd_clear_eh_in_progress(hba); > + > +out: > + scsi_unblock_requests(hba->host); > pm_runtime_put_sync(hba->dev); > } > > /** > - * ufshcd_err_handler - Check for fatal errors > - * @work: pointer to a work queue structure > + * ufshcd_update_uic_error - check and set fatal UIC error flags. > + * @hba: per-adapter instance > */ > -static void ufshcd_err_handler(struct ufs_hba *hba) > +static void ufshcd_update_uic_error(struct ufs_hba *hba) > { > u32 reg; > > + /* PA_INIT_ERROR is fatal and needs UIC reset */ > + reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER); > + if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT) > + hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR; > + > + /* UIC NL/TL/DME errors needs software retry */ > + reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER); > + if (reg) > + hba->uic_error |= UFSHCD_UIC_NL_ERROR; > + > + reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_TRANSPORT_LAYER); > + if (reg) > + hba->uic_error |= UFSHCD_UIC_TL_ERROR; > + > + reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DME); > + if (reg) > + hba->uic_error |= UFSHCD_UIC_DME_ERROR; > + > + dev_dbg(hba->dev, "%s: UIC error flags = 0x%08x\n", > + __func__, hba->uic_error); > +} > + > +/** > + * ufshcd_check_errors - Check for errors that need s/w attention > + * @hba: per-adapter instance > + */ > +static void ufshcd_check_errors(struct ufs_hba *hba) > +{ > + bool queue_eh_work = false; > + > if (hba->errors & INT_FATAL_ERRORS) > - goto fatal_eh; > + queue_eh_work = true; > > if (hba->errors & UIC_ERROR) { > - reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER); > - if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT) > - goto fatal_eh; > + hba->uic_error = 0; > + ufshcd_update_uic_error(hba); > + if (hba->uic_error) > + queue_eh_work = true; > } > - return; > -fatal_eh: > - /* handle fatal errors only when link is functional */ > - if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) { > - /* block commands at driver layer until error is handled */ > - hba->ufshcd_state = UFSHCD_STATE_ERROR; > - schedule_work(&hba->feh_workq); > + > + if (queue_eh_work) { > + /* handle fatal errors only when link is functional */ > + if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) { > + /* block commands from scsi mid-layer */ > + scsi_block_requests(hba->host); > + > + /* transfer error masks to sticky bits */ > + hba->saved_err |= hba->errors; > + hba->saved_uic_err |= hba->uic_error; > + > + hba->ufshcd_state = UFSHCD_STATE_ERROR; > + schedule_work(&hba->eh_work); > + } > } > + /* > + * if (!queue_eh_work) - > + * Other errors are either non-fatal where host recovers > + * itself without s/w intervention or errors that will be > + * handled by the SCSI core layer. > + */ > } > > /** > @@ -2304,7 +2357,7 @@ static void ufshcd_sl_intr(struct ufs_hba *hba, u32 > intr_status) > { > hba->errors = UFSHCD_ERROR_MASK & intr_status; > if (hba->errors) > - ufshcd_err_handler(hba); > + ufshcd_check_errors(hba); > > if (intr_status & UIC_COMMAND_COMPL) > ufshcd_uic_cmd_compl(hba); > @@ -2679,12 +2732,12 @@ static int ufshcd_eh_host_reset_handler(struct > scsi_cmnd *cmd) > */ > do { > spin_lock_irqsave(hba->host->host_lock, flags); > - if (!(work_pending(&hba->feh_workq) || > + if (!(work_pending(&hba->eh_work) || > hba->ufshcd_state == UFSHCD_STATE_RESET)) > break; > spin_unlock_irqrestore(hba->host->host_lock, flags); > dev_dbg(hba->dev, "%s: reset in progress\n", __func__); > - flush_work_sync(&hba->feh_workq); > + flush_work_sync(&hba->eh_work); > } while (1); > > hba->ufshcd_state = UFSHCD_STATE_RESET; > @@ -2918,7 +2971,7 @@ int ufshcd_init(struct device *dev, struct ufs_hba > **hba_handle, > init_waitqueue_head(&hba->tm_tag_wq); > > /* Initialize work queues */ > - INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler); > + INIT_WORK(&hba->eh_work, ufshcd_err_handler); > INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler); > > /* Initialize UIC command mutex */ > diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h > index 1e0585c..8f5624e 100644 > --- a/drivers/scsi/ufs/ufshcd.h > +++ b/drivers/scsi/ufs/ufshcd.h > @@ -182,9 +182,12 @@ struct ufs_dev_cmd { > * @eh_flags: Error handling flags > * @intr_mask: Interrupt Mask Bits > * @ee_ctrl_mask: Exception event control mask > - * @feh_workq: Work queue for fatal controller error handling > + * @eh_work: Worker to handle UFS errors that require s/w attention > * @eeh_work: Worker to handle exception events > * @errors: HBA errors > + * @uic_error: UFS interconnect layer error status > + * @saved_err: sticky error mask > + * @saved_uic_err: sticky UIC error mask > * @dev_cmd: ufs device management command information > * @auto_bkops_enabled: to track whether bkops is enabled in device > */ > @@ -230,11 +233,14 @@ struct ufs_hba { > u16 ee_ctrl_mask; > > /* Work Queues */ > - struct work_struct feh_workq; > + struct work_struct eh_work; > struct work_struct eeh_work; > > /* HBA Errors */ > u32 errors; > + u32 uic_error; > + u32 saved_err; > + u32 saved_uic_err; > > /* Device management request data */ > struct ufs_dev_cmd dev_cmd; > -- > QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member > of Code Aurora Forum, hosted by The Linux Foundation. > > -- > To unsubscribe from this list: send the line "unsubscribe linux-scsi" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >
Reviewed-by: Yaniv Gardi <ygardi@codeaurora.org>
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
= > -----Original Message-----
= > From: linux-scsi-owner@vger.kernel.org [mailto:linux-scsi-
= > owner@vger.kernel.org] On Behalf Of Dolev Raviv
= > Sent: Monday, August 12, 2013 4:03 PM
= > To: Sujit Reddy Thumma
= > Cc: Vinayak Holikatti; Santosh Y; James E.J. Bottomley; linux-
= > scsi@vger.kernel.org; Sujit Reddy Thumma; linux-arm-
= > msm@vger.kernel.org
= > Subject: Re: [PATCH V5 4/4] scsi: ufs: Improve UFS fatal error handling
= >
= > Tested-by: Dolev Raviv <draviv@codeaurora.org>
= >
= > > Error handling in UFS driver is broken and resets the host controller
= > > for fatal errors without re-initialization. Correct the fatal error
= > > handling sequence according to UFS Host Controller Interface (HCI)
= > > v1.1 specification.
= > >
= > > o Processed requests which are completed w/wo error are reported to
= > > SCSI layer and any pending commands that are not started are aborted
= > > in the controller and re-queued into scsi mid-layer queue.
= > >
= > > o Upon determining fatal error condition the host controller may hang
= > > forever until a reset is applied. Block SCSI layer for sending new
= > > requests and apply reset in a separate error handling work.
= > >
= > > o SCSI is informed about the expected Unit-Attention exception from
the
= > > device for the immediate command after a reset so that the SCSI
layer
= > > take necessary steps to establish communication with the device.
= > >
= > > Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
= > > ---
= > > drivers/scsi/ufs/ufshcd.c | 229
= > > ++++++++++++++++++++++++++++-----------------
= > > drivers/scsi/ufs/ufshcd.h | 10 ++-
= > > 2 files changed, 149 insertions(+), 90 deletions(-)
= > >
= > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
= > > index c577e6e..4dca9b4 100644
= > > --- a/drivers/scsi/ufs/ufshcd.c
= > > +++ b/drivers/scsi/ufs/ufshcd.c
= > > @@ -79,6 +79,14 @@ enum {
= > > UFSHCD_EH_IN_PROGRESS = (1 << 0),
= > > };
= > >
= > > +/* UFSHCD UIC layer error flags */
= > > +enum {
= > > + UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer
= > error */
= > > + UFSHCD_UIC_NL_ERROR = (1 << 1), /* Network layer error */
= > > + UFSHCD_UIC_TL_ERROR = (1 << 2), /* Transport Layer error */
= > > + UFSHCD_UIC_DME_ERROR = (1 << 3), /* DME error */ };
= > > +
= > > /* Interrupt configuration options */ enum {
= > > UFSHCD_INT_DISABLE,
= > > @@ -101,6 +109,8 @@ enum {
= > >
= > > static void ufshcd_tmc_handler(struct ufs_hba *hba); static void
= > > ufshcd_async_scan(void *data, async_cookie_t cookie);
= > > +static int ufshcd_reset_and_restore(struct ufs_hba *hba); static int
= > > +ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag);
= > >
= > > /*
= > > * ufshcd_wait_for_register - wait for register value to change @@
= > > -1523,9 +1533,6 @@ static int ufshcd_make_hba_operational(struct
= > > ufs_hba *hba)
= > > goto out;
= > > }
= > >
= > > - if (hba->ufshcd_state == UFSHCD_STATE_RESET)
= > > - scsi_unblock_requests(hba->host);
= > > -
= > > out:
= > > return err;
= > > }
= > > @@ -1651,66 +1658,6 @@ static int ufshcd_verify_dev_init(struct
= > > ufs_hba
= > > *hba)
= > > }
= > >
= > > /**
= > > - * ufshcd_do_reset - reset the host controller
= > > - * @hba: per adapter instance
= > > - *
= > > - * Returns SUCCESS/FAILED
= > > - */
= > > -static int ufshcd_do_reset(struct ufs_hba *hba) -{
= > > - struct ufshcd_lrb *lrbp;
= > > - unsigned long flags;
= > > - int tag;
= > > -
= > > - /* block commands from midlayer */
= > > - scsi_block_requests(hba->host);
= > > -
= > > - spin_lock_irqsave(hba->host->host_lock, flags);
= > > - hba->ufshcd_state = UFSHCD_STATE_RESET;
= > > -
= > > - /* send controller to reset state */
= > > - ufshcd_hba_stop(hba);
= > > - spin_unlock_irqrestore(hba->host->host_lock, flags);
= > > -
= > > - /* abort outstanding commands */
= > > - for (tag = 0; tag < hba->nutrs; tag++) {
= > > - if (test_bit(tag, &hba->outstanding_reqs)) {
= > > - lrbp = &hba->lrb[tag];
= > > - if (lrbp->cmd) {
= > > - scsi_dma_unmap(lrbp->cmd);
= > > - lrbp->cmd->result = DID_RESET << 16;
= > > - lrbp->cmd->scsi_done(lrbp->cmd);
= > > - lrbp->cmd = NULL;
= > > - clear_bit_unlock(tag, &hba->lrb_in_use);
= > > - }
= > > - }
= > > - }
= > > -
= > > - /* complete device management command */
= > > - if (hba->dev_cmd.complete)
= > > - complete(hba->dev_cmd.complete);
= > > -
= > > - /* clear outstanding request/task bit maps */
= > > - hba->outstanding_reqs = 0;
= > > - hba->outstanding_tasks = 0;
= > > -
= > > - /* Host controller enable */
= > > - if (ufshcd_hba_enable(hba)) {
= > > - dev_err(hba->dev,
= > > - "Reset: Controller initialization failed\n");
= > > - return FAILED;
= > > - }
= > > -
= > > - if (ufshcd_link_startup(hba)) {
= > > - dev_err(hba->dev,
= > > - "Reset: Link start-up failed\n");
= > > - return FAILED;
= > > - }
= > > -
= > > - return SUCCESS;
= > > -}
= > > -
= > > -/**
= > > * ufshcd_slave_alloc - handle initial SCSI device configurations
= > > * @sdev: pointer to SCSI device
= > > *
= > > @@ -1727,6 +1674,9 @@ static int ufshcd_slave_alloc(struct scsi_device
= > > *sdev)
= > > sdev->use_10_for_ms = 1;
= > > scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
= > >
= > > + /* allow SCSI layer to restart the device in case of errors */
= > > + sdev->allow_restart = 1;
= > > +
= > > /*
= > > * Inform SCSI Midlayer that the LUN queue depth is same as the
= > > * controller queue depth. If a LUN queue depth is less than the
= > @@
= > > -1930,6 +1880,9 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba,
= > > struct ufshcd_lrb *lrbp)
= > > case OCS_ABORTED:
= > > result |= DID_ABORT << 16;
= > > break;
= > > + case OCS_INVALID_COMMAND_STATUS:
= > > + result |= DID_REQUEUE << 16;
= > > + break;
= > > case OCS_INVALID_CMD_TABLE_ATTR:
= > > case OCS_INVALID_PRDT_ATTR:
= > > case OCS_MISMATCH_DATA_BUF_SIZE:
= > > @@ -2241,45 +2194,145 @@ out:
= > > }
= > >
= > > /**
= > > - * ufshcd_fatal_err_handler - handle fatal errors
= > > - * @hba: per adapter instance
= > > + * ufshcd_err_handler - handle UFS errors that require s/w attention
= > > + * @work: pointer to work structure
= > > */
= > > -static void ufshcd_fatal_err_handler(struct work_struct *work)
= > > +static void ufshcd_err_handler(struct work_struct *work)
= > > {
= > > struct ufs_hba *hba;
= > > - hba = container_of(work, struct ufs_hba, feh_workq);
= > > + unsigned long flags;
= > > + u32 err_xfer = 0;
= > > + u32 err_tm = 0;
= > > + int err = 0;
= > > + int tag;
= > > +
= > > + hba = container_of(work, struct ufs_hba, eh_work);
= > >
= > > pm_runtime_get_sync(hba->dev);
= > > - /* check if reset is already in progress */
= > > - if (hba->ufshcd_state != UFSHCD_STATE_RESET)
= > > - ufshcd_do_reset(hba);
= > > +
= > > + spin_lock_irqsave(hba->host->host_lock, flags);
= > > + if (hba->ufshcd_state == UFSHCD_STATE_RESET) {
= > > + spin_unlock_irqrestore(hba->host->host_lock, flags);
= > > + goto out;
= > > + }
= > > +
= > > + hba->ufshcd_state = UFSHCD_STATE_RESET;
= > > + ufshcd_set_eh_in_progress(hba);
= > > +
= > > + /* Complete requests that have door-bell cleared by h/w */
= > > + ufshcd_transfer_req_compl(hba);
= > > + ufshcd_tmc_handler(hba);
= > > + spin_unlock_irqrestore(hba->host->host_lock, flags);
= > > +
= > > + /* Clear pending transfer requests */
= > > + for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs)
= > > + if (ufshcd_clear_cmd(hba, tag))
= > > + err_xfer |= 1 << tag;
= > > +
= > > + /* Clear pending task management requests */
= > > + for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs)
= > > + if (ufshcd_clear_tm_cmd(hba, tag))
= > > + err_tm |= 1 << tag;
= > > +
= > > + /* Complete the requests that are cleared by s/w */
= > > + spin_lock_irqsave(hba->host->host_lock, flags);
= > > + ufshcd_transfer_req_compl(hba);
= > > + ufshcd_tmc_handler(hba);
= > > + spin_unlock_irqrestore(hba->host->host_lock, flags);
= > > +
= > > + /* Fatal errors need reset */
= > > + if (err_xfer || err_tm || (hba->saved_err & INT_FATAL_ERRORS) ||
= > > + ((hba->saved_err & UIC_ERROR) &&
= > > + (hba->saved_uic_err &
= > UFSHCD_UIC_DL_PA_INIT_ERROR))) {
= > > + err = ufshcd_reset_and_restore(hba);
= > > + if (err) {
= > > + dev_err(hba->dev, "%s: reset and restore failed\n",
= > > + __func__);
= > > + hba->ufshcd_state = UFSHCD_STATE_ERROR;
= > > + }
= > > + /*
= > > + * Inform scsi mid-layer that we did reset and allow to
= > handle
= > > + * Unit Attention properly.
= > > + */
= > > + scsi_report_bus_reset(hba->host, 0);
= > > + hba->saved_err = 0;
= > > + hba->saved_uic_err = 0;
= > > + }
= > > + ufshcd_clear_eh_in_progress(hba);
= > > +
= > > +out:
= > > + scsi_unblock_requests(hba->host);
= > > pm_runtime_put_sync(hba->dev);
= > > }
= > >
= > > /**
= > > - * ufshcd_err_handler - Check for fatal errors
= > > - * @work: pointer to a work queue structure
= > > + * ufshcd_update_uic_error - check and set fatal UIC error flags.
= > > + * @hba: per-adapter instance
= > > */
= > > -static void ufshcd_err_handler(struct ufs_hba *hba)
= > > +static void ufshcd_update_uic_error(struct ufs_hba *hba)
= > > {
= > > u32 reg;
= > >
= > > + /* PA_INIT_ERROR is fatal and needs UIC reset */
= > > + reg = ufshcd_readl(hba,
= > REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
= > > + if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
= > > + hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR;
= > > +
= > > + /* UIC NL/TL/DME errors needs software retry */
= > > + reg = ufshcd_readl(hba,
= > REG_UIC_ERROR_CODE_NETWORK_LAYER);
= > > + if (reg)
= > > + hba->uic_error |= UFSHCD_UIC_NL_ERROR;
= > > +
= > > + reg = ufshcd_readl(hba,
= > REG_UIC_ERROR_CODE_TRANSPORT_LAYER);
= > > + if (reg)
= > > + hba->uic_error |= UFSHCD_UIC_TL_ERROR;
= > > +
= > > + reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DME);
= > > + if (reg)
= > > + hba->uic_error |= UFSHCD_UIC_DME_ERROR;
= > > +
= > > + dev_dbg(hba->dev, "%s: UIC error flags = 0x%08x\n",
= > > + __func__, hba->uic_error);
= > > +}
= > > +
= > > +/**
= > > + * ufshcd_check_errors - Check for errors that need s/w attention
= > > + * @hba: per-adapter instance
= > > + */
= > > +static void ufshcd_check_errors(struct ufs_hba *hba) {
= > > + bool queue_eh_work = false;
= > > +
= > > if (hba->errors & INT_FATAL_ERRORS)
= > > - goto fatal_eh;
= > > + queue_eh_work = true;
= > >
= > > if (hba->errors & UIC_ERROR) {
= > > - reg = ufshcd_readl(hba,
= > REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
= > > - if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
= > > - goto fatal_eh;
= > > + hba->uic_error = 0;
= > > + ufshcd_update_uic_error(hba);
= > > + if (hba->uic_error)
= > > + queue_eh_work = true;
= > > }
= > > - return;
= > > -fatal_eh:
= > > - /* handle fatal errors only when link is functional */
= > > - if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
= > > - /* block commands at driver layer until error is handled */
= > > - hba->ufshcd_state = UFSHCD_STATE_ERROR;
= > > - schedule_work(&hba->feh_workq);
= > > +
= > > + if (queue_eh_work) {
= > > + /* handle fatal errors only when link is functional */
= > > + if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
= > > + /* block commands from scsi mid-layer */
= > > + scsi_block_requests(hba->host);
= > > +
= > > + /* transfer error masks to sticky bits */
= > > + hba->saved_err |= hba->errors;
= > > + hba->saved_uic_err |= hba->uic_error;
= > > +
= > > + hba->ufshcd_state = UFSHCD_STATE_ERROR;
= > > + schedule_work(&hba->eh_work);
= > > + }
= > > }
= > > + /*
= > > + * if (!queue_eh_work) -
= > > + * Other errors are either non-fatal where host recovers
= > > + * itself without s/w intervention or errors that will be
= > > + * handled by the SCSI core layer.
= > > + */
= > > }
= > >
= > > /**
= > > @@ -2304,7 +2357,7 @@ static void ufshcd_sl_intr(struct ufs_hba *hba,
= > > u32
= > > intr_status)
= > > {
= > > hba->errors = UFSHCD_ERROR_MASK & intr_status;
= > > if (hba->errors)
= > > - ufshcd_err_handler(hba);
= > > + ufshcd_check_errors(hba);
= > >
= > > if (intr_status & UIC_COMMAND_COMPL)
= > > ufshcd_uic_cmd_compl(hba);
= > > @@ -2679,12 +2732,12 @@ static int
= > ufshcd_eh_host_reset_handler(struct
= > > scsi_cmnd *cmd)
= > > */
= > > do {
= > > spin_lock_irqsave(hba->host->host_lock, flags);
= > > - if (!(work_pending(&hba->feh_workq) ||
= > > + if (!(work_pending(&hba->eh_work) ||
= > > hba->ufshcd_state ==
= > UFSHCD_STATE_RESET))
= > > break;
= > > spin_unlock_irqrestore(hba->host->host_lock, flags);
= > > dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
= > > - flush_work_sync(&hba->feh_workq);
= > > + flush_work_sync(&hba->eh_work);
= > > } while (1);
= > >
= > > hba->ufshcd_state = UFSHCD_STATE_RESET; @@ -2918,7 +2971,7
= > @@ int
= > > ufshcd_init(struct device *dev, struct ufs_hba **hba_handle,
= > > init_waitqueue_head(&hba->tm_tag_wq);
= > >
= > > /* Initialize work queues */
= > > - INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler);
= > > + INIT_WORK(&hba->eh_work, ufshcd_err_handler);
= > > INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler);
= > >
= > > /* Initialize UIC command mutex */
= > > diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
= > > index 1e0585c..8f5624e 100644
= > > --- a/drivers/scsi/ufs/ufshcd.h
= > > +++ b/drivers/scsi/ufs/ufshcd.h
= > > @@ -182,9 +182,12 @@ struct ufs_dev_cmd {
= > > * @eh_flags: Error handling flags
= > > * @intr_mask: Interrupt Mask Bits
= > > * @ee_ctrl_mask: Exception event control mask
= > > - * @feh_workq: Work queue for fatal controller error handling
= > > + * @eh_work: Worker to handle UFS errors that require s/w attention
= > > * @eeh_work: Worker to handle exception events
= > > * @errors: HBA errors
= > > + * @uic_error: UFS interconnect layer error status
= > > + * @saved_err: sticky error mask
= > > + * @saved_uic_err: sticky UIC error mask
= > > * @dev_cmd: ufs device management command information
= > > * @auto_bkops_enabled: to track whether bkops is enabled in device
= > > */
= > > @@ -230,11 +233,14 @@ struct ufs_hba {
= > > u16 ee_ctrl_mask;
= > >
= > > /* Work Queues */
= > > - struct work_struct feh_workq;
= > > + struct work_struct eh_work;
= > > struct work_struct eeh_work;
= > >
= > > /* HBA Errors */
= > > u32 errors;
= > > + u32 uic_error;
= > > + u32 saved_err;
= > > + u32 saved_uic_err;
= > >
= > > /* Device management request data */
= > > struct ufs_dev_cmd dev_cmd;
= > > --
= > > QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a
= > > member of Code Aurora Forum, hosted by The Linux Foundation.
= > >
= > > --
= > > To unsubscribe from this list: send the line "unsubscribe linux-scsi"
= > > in the body of a message to majordomo@vger.kernel.org More
= > majordomo
= > > info at http://vger.kernel.org/majordomo-info.html
= > >
= >
= >
= > --
= > QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center, Inc. is a
= > member of Code Aurora Forum, hosted by The Linux Foundation
= >
= > --
= > To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the
= > body of a message to majordomo@vger.kernel.org More majordomo info
= > at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index c577e6e..4dca9b4 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -79,6 +79,14 @@ enum { UFSHCD_EH_IN_PROGRESS = (1 << 0), }; +/* UFSHCD UIC layer error flags */ +enum { + UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer error */ + UFSHCD_UIC_NL_ERROR = (1 << 1), /* Network layer error */ + UFSHCD_UIC_TL_ERROR = (1 << 2), /* Transport Layer error */ + UFSHCD_UIC_DME_ERROR = (1 << 3), /* DME error */ +}; + /* Interrupt configuration options */ enum { UFSHCD_INT_DISABLE, @@ -101,6 +109,8 @@ enum { static void ufshcd_tmc_handler(struct ufs_hba *hba); static void ufshcd_async_scan(void *data, async_cookie_t cookie); +static int ufshcd_reset_and_restore(struct ufs_hba *hba); +static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag); /* * ufshcd_wait_for_register - wait for register value to change @@ -1523,9 +1533,6 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba) goto out; } - if (hba->ufshcd_state == UFSHCD_STATE_RESET) - scsi_unblock_requests(hba->host); - out: return err; } @@ -1651,66 +1658,6 @@ static int ufshcd_verify_dev_init(struct ufs_hba *hba) } /** - * ufshcd_do_reset - reset the host controller - * @hba: per adapter instance - * - * Returns SUCCESS/FAILED - */ -static int ufshcd_do_reset(struct ufs_hba *hba) -{ - struct ufshcd_lrb *lrbp; - unsigned long flags; - int tag; - - /* block commands from midlayer */ - scsi_block_requests(hba->host); - - spin_lock_irqsave(hba->host->host_lock, flags); - hba->ufshcd_state = UFSHCD_STATE_RESET; - - /* send controller to reset state */ - ufshcd_hba_stop(hba); - spin_unlock_irqrestore(hba->host->host_lock, flags); - - /* abort outstanding commands */ - for (tag = 0; tag < hba->nutrs; tag++) { - if (test_bit(tag, &hba->outstanding_reqs)) { - lrbp = &hba->lrb[tag]; - if (lrbp->cmd) { - scsi_dma_unmap(lrbp->cmd); - lrbp->cmd->result = DID_RESET << 16; - lrbp->cmd->scsi_done(lrbp->cmd); - lrbp->cmd = NULL; - clear_bit_unlock(tag, &hba->lrb_in_use); - } - } - } - - /* complete device management command */ - if (hba->dev_cmd.complete) - complete(hba->dev_cmd.complete); - - /* clear outstanding request/task bit maps */ - hba->outstanding_reqs = 0; - hba->outstanding_tasks = 0; - - /* Host controller enable */ - if (ufshcd_hba_enable(hba)) { - dev_err(hba->dev, - "Reset: Controller initialization failed\n"); - return FAILED; - } - - if (ufshcd_link_startup(hba)) { - dev_err(hba->dev, - "Reset: Link start-up failed\n"); - return FAILED; - } - - return SUCCESS; -} - -/** * ufshcd_slave_alloc - handle initial SCSI device configurations * @sdev: pointer to SCSI device * @@ -1727,6 +1674,9 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev) sdev->use_10_for_ms = 1; scsi_set_tag_type(sdev, MSG_SIMPLE_TAG); + /* allow SCSI layer to restart the device in case of errors */ + sdev->allow_restart = 1; + /* * Inform SCSI Midlayer that the LUN queue depth is same as the * controller queue depth. If a LUN queue depth is less than the @@ -1930,6 +1880,9 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) case OCS_ABORTED: result |= DID_ABORT << 16; break; + case OCS_INVALID_COMMAND_STATUS: + result |= DID_REQUEUE << 16; + break; case OCS_INVALID_CMD_TABLE_ATTR: case OCS_INVALID_PRDT_ATTR: case OCS_MISMATCH_DATA_BUF_SIZE: @@ -2241,45 +2194,145 @@ out: } /** - * ufshcd_fatal_err_handler - handle fatal errors - * @hba: per adapter instance + * ufshcd_err_handler - handle UFS errors that require s/w attention + * @work: pointer to work structure */ -static void ufshcd_fatal_err_handler(struct work_struct *work) +static void ufshcd_err_handler(struct work_struct *work) { struct ufs_hba *hba; - hba = container_of(work, struct ufs_hba, feh_workq); + unsigned long flags; + u32 err_xfer = 0; + u32 err_tm = 0; + int err = 0; + int tag; + + hba = container_of(work, struct ufs_hba, eh_work); pm_runtime_get_sync(hba->dev); - /* check if reset is already in progress */ - if (hba->ufshcd_state != UFSHCD_STATE_RESET) - ufshcd_do_reset(hba); + + spin_lock_irqsave(hba->host->host_lock, flags); + if (hba->ufshcd_state == UFSHCD_STATE_RESET) { + spin_unlock_irqrestore(hba->host->host_lock, flags); + goto out; + } + + hba->ufshcd_state = UFSHCD_STATE_RESET; + ufshcd_set_eh_in_progress(hba); + + /* Complete requests that have door-bell cleared by h/w */ + ufshcd_transfer_req_compl(hba); + ufshcd_tmc_handler(hba); + spin_unlock_irqrestore(hba->host->host_lock, flags); + + /* Clear pending transfer requests */ + for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) + if (ufshcd_clear_cmd(hba, tag)) + err_xfer |= 1 << tag; + + /* Clear pending task management requests */ + for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) + if (ufshcd_clear_tm_cmd(hba, tag)) + err_tm |= 1 << tag; + + /* Complete the requests that are cleared by s/w */ + spin_lock_irqsave(hba->host->host_lock, flags); + ufshcd_transfer_req_compl(hba); + ufshcd_tmc_handler(hba); + spin_unlock_irqrestore(hba->host->host_lock, flags); + + /* Fatal errors need reset */ + if (err_xfer || err_tm || (hba->saved_err & INT_FATAL_ERRORS) || + ((hba->saved_err & UIC_ERROR) && + (hba->saved_uic_err & UFSHCD_UIC_DL_PA_INIT_ERROR))) { + err = ufshcd_reset_and_restore(hba); + if (err) { + dev_err(hba->dev, "%s: reset and restore failed\n", + __func__); + hba->ufshcd_state = UFSHCD_STATE_ERROR; + } + /* + * Inform scsi mid-layer that we did reset and allow to handle + * Unit Attention properly. + */ + scsi_report_bus_reset(hba->host, 0); + hba->saved_err = 0; + hba->saved_uic_err = 0; + } + ufshcd_clear_eh_in_progress(hba); + +out: + scsi_unblock_requests(hba->host); pm_runtime_put_sync(hba->dev); } /** - * ufshcd_err_handler - Check for fatal errors - * @work: pointer to a work queue structure + * ufshcd_update_uic_error - check and set fatal UIC error flags. + * @hba: per-adapter instance */ -static void ufshcd_err_handler(struct ufs_hba *hba) +static void ufshcd_update_uic_error(struct ufs_hba *hba) { u32 reg; + /* PA_INIT_ERROR is fatal and needs UIC reset */ + reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER); + if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT) + hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR; + + /* UIC NL/TL/DME errors needs software retry */ + reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER); + if (reg) + hba->uic_error |= UFSHCD_UIC_NL_ERROR; + + reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_TRANSPORT_LAYER); + if (reg) + hba->uic_error |= UFSHCD_UIC_TL_ERROR; + + reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DME); + if (reg) + hba->uic_error |= UFSHCD_UIC_DME_ERROR; + + dev_dbg(hba->dev, "%s: UIC error flags = 0x%08x\n", + __func__, hba->uic_error); +} + +/** + * ufshcd_check_errors - Check for errors that need s/w attention + * @hba: per-adapter instance + */ +static void ufshcd_check_errors(struct ufs_hba *hba) +{ + bool queue_eh_work = false; + if (hba->errors & INT_FATAL_ERRORS) - goto fatal_eh; + queue_eh_work = true; if (hba->errors & UIC_ERROR) { - reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER); - if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT) - goto fatal_eh; + hba->uic_error = 0; + ufshcd_update_uic_error(hba); + if (hba->uic_error) + queue_eh_work = true; } - return; -fatal_eh: - /* handle fatal errors only when link is functional */ - if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) { - /* block commands at driver layer until error is handled */ - hba->ufshcd_state = UFSHCD_STATE_ERROR; - schedule_work(&hba->feh_workq); + + if (queue_eh_work) { + /* handle fatal errors only when link is functional */ + if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) { + /* block commands from scsi mid-layer */ + scsi_block_requests(hba->host); + + /* transfer error masks to sticky bits */ + hba->saved_err |= hba->errors; + hba->saved_uic_err |= hba->uic_error; + + hba->ufshcd_state = UFSHCD_STATE_ERROR; + schedule_work(&hba->eh_work); + } } + /* + * if (!queue_eh_work) - + * Other errors are either non-fatal where host recovers + * itself without s/w intervention or errors that will be + * handled by the SCSI core layer. + */ } /** @@ -2304,7 +2357,7 @@ static void ufshcd_sl_intr(struct ufs_hba *hba, u32 intr_status) { hba->errors = UFSHCD_ERROR_MASK & intr_status; if (hba->errors) - ufshcd_err_handler(hba); + ufshcd_check_errors(hba); if (intr_status & UIC_COMMAND_COMPL) ufshcd_uic_cmd_compl(hba); @@ -2679,12 +2732,12 @@ static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd) */ do { spin_lock_irqsave(hba->host->host_lock, flags); - if (!(work_pending(&hba->feh_workq) || + if (!(work_pending(&hba->eh_work) || hba->ufshcd_state == UFSHCD_STATE_RESET)) break; spin_unlock_irqrestore(hba->host->host_lock, flags); dev_dbg(hba->dev, "%s: reset in progress\n", __func__); - flush_work_sync(&hba->feh_workq); + flush_work_sync(&hba->eh_work); } while (1); hba->ufshcd_state = UFSHCD_STATE_RESET; @@ -2918,7 +2971,7 @@ int ufshcd_init(struct device *dev, struct ufs_hba **hba_handle, init_waitqueue_head(&hba->tm_tag_wq); /* Initialize work queues */ - INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler); + INIT_WORK(&hba->eh_work, ufshcd_err_handler); INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler); /* Initialize UIC command mutex */ diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 1e0585c..8f5624e 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -182,9 +182,12 @@ struct ufs_dev_cmd { * @eh_flags: Error handling flags * @intr_mask: Interrupt Mask Bits * @ee_ctrl_mask: Exception event control mask - * @feh_workq: Work queue for fatal controller error handling + * @eh_work: Worker to handle UFS errors that require s/w attention * @eeh_work: Worker to handle exception events * @errors: HBA errors + * @uic_error: UFS interconnect layer error status + * @saved_err: sticky error mask + * @saved_uic_err: sticky UIC error mask * @dev_cmd: ufs device management command information * @auto_bkops_enabled: to track whether bkops is enabled in device */ @@ -230,11 +233,14 @@ struct ufs_hba { u16 ee_ctrl_mask; /* Work Queues */ - struct work_struct feh_workq; + struct work_struct eh_work; struct work_struct eeh_work; /* HBA Errors */ u32 errors; + u32 uic_error; + u32 saved_err; + u32 saved_uic_err; /* Device management request data */ struct ufs_dev_cmd dev_cmd;
Error handling in UFS driver is broken and resets the host controller for fatal errors without re-initialization. Correct the fatal error handling sequence according to UFS Host Controller Interface (HCI) v1.1 specification. o Processed requests which are completed w/wo error are reported to SCSI layer and any pending commands that are not started are aborted in the controller and re-queued into scsi mid-layer queue. o Upon determining fatal error condition the host controller may hang forever until a reset is applied. Block SCSI layer for sending new requests and apply reset in a separate error handling work. o SCSI is informed about the expected Unit-Attention exception from the device for the immediate command after a reset so that the SCSI layer take necessary steps to establish communication with the device. Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org> --- drivers/scsi/ufs/ufshcd.c | 229 ++++++++++++++++++++++++++++----------------- drivers/scsi/ufs/ufshcd.h | 10 ++- 2 files changed, 149 insertions(+), 90 deletions(-)