diff mbox series

[v7,2/4] ufs: core: requeue aborted request

Message ID 20240920090643.3566-3-peter.wang@mediatek.com (mailing list archive)
State Superseded
Headers show
Series fix abort defect | expand

Commit Message

Peter Wang (王信友) Sept. 20, 2024, 9:06 a.m. UTC
From: Peter Wang <peter.wang@mediatek.com>

Regarding the specification of MCQ:
Aborts a command using SQ cleanup, The host controller
will post a Completion Queue entry with OCS = ABORTED.

ufshcd_abort_all forcibly aborts all on-going commands.
In MCQ mode, set a variable to notify SCSI to requeue the
command after receiving response with OCS_ABORTED.
This approach would then be consistent with legacy SDB mode.

Below is ufshcd_err_handler legacy SDB flow:
ufshcd_err_handler()
  ufshcd_abort_all()
    ufshcd_abort_one()
      ufshcd_try_to_abort_task()
    ufshcd_complete_requests()
      ufshcd_transfer_req_compl()
        ufshcd_poll()
          get outstanding_lock
          clear outstanding_reqs tag
          release outstanding_lock
          __ufshcd_transfer_req_compl()
            ufshcd_compl_one_cqe()
              cmd->result = DID_REQUEUE
              ufshcd_release_scsi_cmd()
              scsi_done()

ufshcd_intr()
  ufshcd_sl_intr()
    ufshcd_transfer_req_compl()
      ufshcd_poll()
        get outstanding_lock
        clear outstanding_reqs tag
        release outstanding_lock
        __ufshcd_transfer_req_compl()
          ufshcd_compl_one_cqe()
          cmd->result = DID_REQUEUE
          ufshcd_release_scsi_cmd()
          scsi_done();

Below is ufshcd_err_handler MCQ flow:

ufshcd_err_handler()
  ufshcd_abort_all()
    ufshcd_abort_one()
      ufshcd_try_to_abort_task()
    ufshcd_complete_requests()
      ufshcd_mcq_compl_pending_transfer()
        ufshcd_mcq_poll_cqe_lock()
          ufshcd_mcq_process_cqe()
            ufshcd_compl_one_cqe()
              cmd->result = DID_ABORT // should change to DID_REQUEUE
              ufshcd_release_scsi_cmd()
              scsi_done()

ufs_mtk_mcq_intr()
  ufshcd_mcq_poll_cqe_lock()
    ufshcd_mcq_process_cqe()
      ufshcd_compl_one_cqe()
        cmd->result = DID_ABORT  // should change to DID_REQUEUE
        ufshcd_release_scsi_cmd()
        scsi_done()

So what we need to correct is to notify SCSI to requeue
when MCQ mode receives OCS: ABORTED.

Fixes: ab248643d3d6 ("scsi: ufs: core: Add error handling for MCQ mode")
Cc: stable@vger.kernel.org
Signed-off-by: Peter Wang <peter.wang@mediatek.com>
---
 drivers/ufs/core/ufshcd.c | 40 ++++++++++++++++++++++++---------------
 include/ufs/ufshcd.h      |  8 ++++++++
 2 files changed, 33 insertions(+), 15 deletions(-)
diff mbox series

Patch

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index a6f818cdef0e..4f9c7a632465 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -3006,6 +3006,7 @@  static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	ufshcd_prepare_lrbp_crypto(scsi_cmd_to_rq(cmd), lrbp);
 
 	lrbp->req_abort_skip = false;
+	lrbp->abort_initiated_by = UFS_NO_ABORT;
 
 	ufshcd_comp_scsi_upiu(hba, lrbp);
 
@@ -5404,10 +5405,19 @@  ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp,
 		}
 		break;
 	case OCS_ABORTED:
-		result |= DID_ABORT << 16;
+		if (lrbp->abort_initiated_by == UFS_ERR_HANDLER)
+			result |= DID_REQUEUE << 16;
+		else
+			result |= DID_ABORT << 16;
+		dev_warn(hba->dev,
+				"OCS aborted from controller = %x for tag %d\n",
+				ocs, lrbp->task_tag);
 		break;
 	case OCS_INVALID_COMMAND_STATUS:
 		result |= DID_REQUEUE << 16;
+		dev_warn(hba->dev,
+				"OCS invaild from controller = %x for tag %d\n",
+				ocs, lrbp->task_tag);
 		break;
 	case OCS_INVALID_CMD_TABLE_ATTR:
 	case OCS_INVALID_PRDT_ATTR:
@@ -6471,26 +6481,12 @@  static bool ufshcd_abort_one(struct request *rq, void *priv)
 	struct scsi_device *sdev = cmd->device;
 	struct Scsi_Host *shost = sdev->host;
 	struct ufs_hba *hba = shost_priv(shost);
-	struct ufshcd_lrb *lrbp = &hba->lrb[tag];
-	struct ufs_hw_queue *hwq;
-	unsigned long flags;
 
 	*ret = ufshcd_try_to_abort_task(hba, tag);
 	dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
 		hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
 		*ret ? "failed" : "succeeded");
 
-	/* Release cmd in MCQ mode if abort succeeds */
-	if (hba->mcq_enabled && (*ret == 0)) {
-		hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
-		if (!hwq)
-			return 0;
-		spin_lock_irqsave(&hwq->cq_lock, flags);
-		if (ufshcd_cmd_inflight(lrbp->cmd))
-			ufshcd_release_scsi_cmd(hba, lrbp);
-		spin_unlock_irqrestore(&hwq->cq_lock, flags);
-	}
-
 	return *ret == 0;
 }
 
@@ -7561,6 +7557,20 @@  int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
 		goto out;
 	}
 
+	/*
+	 * When the host software receives a "FUNCTION COMPLETE", set this
+	 * variable to requeue command after receive response with OCS_ABORTED
+	 *
+	 * MCQ mode: Host will post to CQ with OCS_ABORTED after SQ cleanup
+	 *
+	 * This variable is set because error handler ufshcd_abort_all forcibly
+	 * aborts all commands, and the host controller will automatically
+	 * fill in the OCS field of the corresponding response with OCS_ABORTED.
+	 * Therefore, upon receiving this response, it needs to be requeued.
+	 */
+	if (!err && hba->mcq_enabled && ufshcd_eh_in_progress(hba))
+		lrbp->abort_initiated_by = UFS_ERR_HANDLER;
+
 	err = ufshcd_clear_cmd(hba, tag);
 	if (err)
 		dev_err(hba->dev, "%s: Failed clearing cmd at tag %d, err %d\n",
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 0fd2aebac728..61a7dc489511 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -145,6 +145,11 @@  enum ufs_pm_level {
 	UFS_PM_LVL_MAX
 };
 
+enum ufs_abort_by {
+	UFS_NO_ABORT,
+	UFS_ERR_HANDLER,
+};
+
 struct ufs_pm_lvl_states {
 	enum ufs_dev_pwr_mode dev_state;
 	enum uic_link_state link_state;
@@ -173,6 +178,8 @@  struct ufs_pm_lvl_states {
  * @crypto_key_slot: the key slot to use for inline crypto (-1 if none)
  * @data_unit_num: the data unit number for the first block for inline crypto
  * @req_abort_skip: skip request abort task flag
+ * @abort_initiated_by: This variable is used to store the scenario in
+ *                      which the abort occurs
  */
 struct ufshcd_lrb {
 	struct utp_transfer_req_desc *utr_descriptor_ptr;
@@ -202,6 +209,7 @@  struct ufshcd_lrb {
 #endif
 
 	bool req_abort_skip;
+	int abort_initiated_by;
 };
 
 /**