diff mbox series

[v1,5/5] ufs: core: Add error handling for MCQ mode

Message ID 56ff07e08dbaa6ca3db265c41fa8922a63797905.1680083571.git.quic_nguyenb@quicinc.com (mailing list archive)
State Superseded
Headers show
Series ufs: core: mcq: Add ufshcd_abort() and error handler support in MCQ mode | expand

Commit Message

Bao D. Nguyen March 29, 2023, 10:01 a.m. UTC
Add support for error handling for MCQ mode.

Signed-off-by: Bao D. Nguyen <quic_nguyenb@quicinc.com>
---
 drivers/ufs/core/ufshcd.c | 80 ++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 69 insertions(+), 11 deletions(-)

Comments

Stanley Jhu April 27, 2023, 7:17 a.m. UTC | #1
Hi Bao,

Bao D. Nguyen <quic_nguyenb@quicinc.com> 於 2023年3月29日 週三 下午6:14寫道:
>
> Add support for error handling for MCQ mode.
>
> Signed-off-by: Bao D. Nguyen <quic_nguyenb@quicinc.com>
> ---
>  drivers/ufs/core/ufshcd.c | 80 ++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 69 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> index fef1907..e947f7f 100644
> --- a/drivers/ufs/core/ufshcd.c
> +++ b/drivers/ufs/core/ufshcd.c
> @@ -3127,6 +3127,12 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
>                 err = -ETIMEDOUT;
>                 dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
>                         __func__, lrbp->task_tag);
> +
> +               /* MCQ mode */
> +               if (is_mcq_enabled(hba))
> +                       return ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag);

When a time-out occurs during the command-clearing process, it appears
that the MCQ path does not properly clear 'hba->dev_cmd.complete'.
This could result in a null pointer reference if the device command
interrupt arrives at a later time.

Could you please help check this?

Thanks,
Stanley Chu

> +
> +               /* SDB mode */
>                 if (ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag) == 0) {
>                         /* successfully cleared the command, retry if needed */
>                         err = -EAGAIN;
> @@ -5562,6 +5568,10 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
>   */
>  static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
>  {
> +       struct ufshcd_lrb *lrbp;
> +       u32 hwq_num, utag;
> +       int tag;
> +
>         /* Resetting interrupt aggregation counters first and reading the
>          * DOOR_BELL afterward allows us to handle all the completed requests.
>          * In order to prevent other interrupts starvation the DB is read once
> @@ -5580,7 +5590,22 @@ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
>          * Ignore the ufshcd_poll() return value and return IRQ_HANDLED since we
>          * do not want polling to trigger spurious interrupt complaints.
>          */
> -       ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
> +       if (!is_mcq_enabled(hba)) {
> +               ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
> +               goto out;
> +       }
> +
> +       /* MCQ mode */
> +       for (tag = 0; tag < hba->nutrs; tag++) {
> +               lrbp = &hba->lrb[tag];
> +               if (lrbp->cmd) {
> +                       utag = blk_mq_unique_tag(scsi_cmd_to_rq(lrbp->cmd));
> +                       hwq_num = blk_mq_unique_tag_to_hwq(utag);
> +                       ufshcd_poll(hba->host, hwq_num);
> +               }
> +       }
> +
> +out:
>
>         return IRQ_HANDLED;
>  }
> @@ -6359,18 +6384,36 @@ static bool ufshcd_abort_all(struct ufs_hba *hba)
>         bool needs_reset = false;
>         int tag, ret;
>
> -       /* Clear pending transfer requests */
> -       for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
> -               ret = ufshcd_try_to_abort_task(hba, tag);
> -               dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
> -                       hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
> -                       ret ? "failed" : "succeeded");
> -               if (ret) {
> -                       needs_reset = true;
> -                       goto out;
> +       if (is_mcq_enabled(hba)) {
> +               struct ufshcd_lrb *lrbp;
> +               int tag;
> +
> +               for (tag = 0; tag < hba->nutrs; tag++) {
> +                       lrbp = &hba->lrb[tag];
> +                       if (lrbp->cmd) {
> +                               ret = ufshcd_try_to_abort_task(hba, tag);
> +                               dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
> +                                       hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
> +                                       ret ? "failed" : "succeeded");
> +                       }
> +                       if (ret) {
> +                               needs_reset = true;
> +                               goto out;
> +                       }
> +               }
> +       } else {
> +               /* Clear pending transfer requests */
> +               for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
> +                       ret = ufshcd_try_to_abort_task(hba, tag);
> +                       dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
> +                               hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
> +                               ret ? "failed" : "succeeded");
> +                       if (ret) {
> +                               needs_reset = true;
> +                               goto out;
> +                       }
>                 }
>         }
> -
>         /* Clear pending task management requests */
>         for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) {
>                 if (ufshcd_clear_tm_cmd(hba, tag)) {
> @@ -7302,6 +7345,8 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
>         unsigned long flags, pending_reqs = 0, not_cleared = 0;
>         struct Scsi_Host *host;
>         struct ufs_hba *hba;
> +       struct ufs_hw_queue *hwq;
> +       struct ufshcd_lrb *lrbp;
>         u32 pos;
>         int err;
>         u8 resp = 0xF, lun;
> @@ -7317,6 +7362,19 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
>                 goto out;
>         }
>
> +       if (is_mcq_enabled(hba)) {
> +               for (pos = 0; pos < hba->nutrs; pos++) {
> +                       lrbp = &hba->lrb[pos];
> +                       if (lrbp->cmd && lrbp->lun == lun) {
> +                               ufshcd_clear_cmds(hba, 1UL << pos);
> +                               hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
> +                               ufshcd_mcq_poll_cqe_lock(hba, hwq);
> +                       }
> +               }
> +               err = 0;
> +               goto out;
> +       }
> +
>         /* clear the commands that were pending for corresponding LUN */
>         spin_lock_irqsave(&hba->outstanding_lock, flags);
>         for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs)
> --
> 2.7.4
>
Bao D. Nguyen May 4, 2023, 4:18 a.m. UTC | #2
On 4/27/2023 12:17 AM, Stanley Chu wrote:
> Hi Bao,
> 
> Bao D. Nguyen <quic_nguyenb@quicinc.com> 於 2023年3月29日 週三 下午6:14寫道:
>>
>> Add support for error handling for MCQ mode.
>>
>> Signed-off-by: Bao D. Nguyen <quic_nguyenb@quicinc.com>
>> ---
>>   drivers/ufs/core/ufshcd.c | 80 ++++++++++++++++++++++++++++++++++++++++-------
>>   1 file changed, 69 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
>> index fef1907..e947f7f 100644
>> --- a/drivers/ufs/core/ufshcd.c
>> +++ b/drivers/ufs/core/ufshcd.c
>> @@ -3127,6 +3127,12 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
>>                  err = -ETIMEDOUT;
>>                  dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
>>                          __func__, lrbp->task_tag);
>> +
>> +               /* MCQ mode */
>> +               if (is_mcq_enabled(hba))
>> +                       return ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag);
> 
> When a time-out occurs during the command-clearing process, it appears
> that the MCQ path does not properly clear 'hba->dev_cmd.complete'.
> This could result in a null pointer reference if the device command
> interrupt arrives at a later time.
> 
> Could you please help check this?
Thanks Stanley. I will take a look.

> 
> Thanks,
> Stanley Chu
> 
>> +
>> +               /* SDB mode */
>>                  if (ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag) == 0) {
>>                          /* successfully cleared the command, retry if needed */
>>                          err = -EAGAIN;
>> @@ -5562,6 +5568,10 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
>>    */
>>   static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
>>   {
>> +       struct ufshcd_lrb *lrbp;
>> +       u32 hwq_num, utag;
>> +       int tag;
>> +
>>          /* Resetting interrupt aggregation counters first and reading the
>>           * DOOR_BELL afterward allows us to handle all the completed requests.
>>           * In order to prevent other interrupts starvation the DB is read once
>> @@ -5580,7 +5590,22 @@ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
>>           * Ignore the ufshcd_poll() return value and return IRQ_HANDLED since we
>>           * do not want polling to trigger spurious interrupt complaints.
>>           */
>> -       ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
>> +       if (!is_mcq_enabled(hba)) {
>> +               ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
>> +               goto out;
>> +       }
>> +
>> +       /* MCQ mode */
>> +       for (tag = 0; tag < hba->nutrs; tag++) {
>> +               lrbp = &hba->lrb[tag];
>> +               if (lrbp->cmd) {
>> +                       utag = blk_mq_unique_tag(scsi_cmd_to_rq(lrbp->cmd));
>> +                       hwq_num = blk_mq_unique_tag_to_hwq(utag);
>> +                       ufshcd_poll(hba->host, hwq_num);
>> +               }
>> +       }
>> +
>> +out:
>>
>>          return IRQ_HANDLED;
>>   }
>> @@ -6359,18 +6384,36 @@ static bool ufshcd_abort_all(struct ufs_hba *hba)
>>          bool needs_reset = false;
>>          int tag, ret;
>>
>> -       /* Clear pending transfer requests */
>> -       for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
>> -               ret = ufshcd_try_to_abort_task(hba, tag);
>> -               dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
>> -                       hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
>> -                       ret ? "failed" : "succeeded");
>> -               if (ret) {
>> -                       needs_reset = true;
>> -                       goto out;
>> +       if (is_mcq_enabled(hba)) {
>> +               struct ufshcd_lrb *lrbp;
>> +               int tag;
>> +
>> +               for (tag = 0; tag < hba->nutrs; tag++) {
>> +                       lrbp = &hba->lrb[tag];
>> +                       if (lrbp->cmd) {
>> +                               ret = ufshcd_try_to_abort_task(hba, tag);
>> +                               dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
>> +                                       hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
>> +                                       ret ? "failed" : "succeeded");
>> +                       }
>> +                       if (ret) {
>> +                               needs_reset = true;
>> +                               goto out;
>> +                       }
>> +               }
>> +       } else {
>> +               /* Clear pending transfer requests */
>> +               for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
>> +                       ret = ufshcd_try_to_abort_task(hba, tag);
>> +                       dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
>> +                               hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
>> +                               ret ? "failed" : "succeeded");
>> +                       if (ret) {
>> +                               needs_reset = true;
>> +                               goto out;
>> +                       }
>>                  }
>>          }
>> -
>>          /* Clear pending task management requests */
>>          for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) {
>>                  if (ufshcd_clear_tm_cmd(hba, tag)) {
>> @@ -7302,6 +7345,8 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
>>          unsigned long flags, pending_reqs = 0, not_cleared = 0;
>>          struct Scsi_Host *host;
>>          struct ufs_hba *hba;
>> +       struct ufs_hw_queue *hwq;
>> +       struct ufshcd_lrb *lrbp;
>>          u32 pos;
>>          int err;
>>          u8 resp = 0xF, lun;
>> @@ -7317,6 +7362,19 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
>>                  goto out;
>>          }
>>
>> +       if (is_mcq_enabled(hba)) {
>> +               for (pos = 0; pos < hba->nutrs; pos++) {
>> +                       lrbp = &hba->lrb[pos];
>> +                       if (lrbp->cmd && lrbp->lun == lun) {
>> +                               ufshcd_clear_cmds(hba, 1UL << pos);
>> +                               hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
>> +                               ufshcd_mcq_poll_cqe_lock(hba, hwq);
>> +                       }
>> +               }
>> +               err = 0;
>> +               goto out;
>> +       }
>> +
>>          /* clear the commands that were pending for corresponding LUN */
>>          spin_lock_irqsave(&hba->outstanding_lock, flags);
>>          for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs)
>> --
>> 2.7.4
>>
diff mbox series

Patch

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index fef1907..e947f7f 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -3127,6 +3127,12 @@  static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
 		err = -ETIMEDOUT;
 		dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
 			__func__, lrbp->task_tag);
+
+		/* MCQ mode */
+		if (is_mcq_enabled(hba))
+			return ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag);
+
+		/* SDB mode */
 		if (ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag) == 0) {
 			/* successfully cleared the command, retry if needed */
 			err = -EAGAIN;
@@ -5562,6 +5568,10 @@  static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
  */
 static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
 {
+	struct ufshcd_lrb *lrbp;
+	u32 hwq_num, utag;
+	int tag;
+
 	/* Resetting interrupt aggregation counters first and reading the
 	 * DOOR_BELL afterward allows us to handle all the completed requests.
 	 * In order to prevent other interrupts starvation the DB is read once
@@ -5580,7 +5590,22 @@  static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
 	 * Ignore the ufshcd_poll() return value and return IRQ_HANDLED since we
 	 * do not want polling to trigger spurious interrupt complaints.
 	 */
-	ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
+	if (!is_mcq_enabled(hba)) {
+		ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
+		goto out;
+	}
+
+	/* MCQ mode */
+	for (tag = 0; tag < hba->nutrs; tag++) {
+		lrbp = &hba->lrb[tag];
+		if (lrbp->cmd) {
+			utag = blk_mq_unique_tag(scsi_cmd_to_rq(lrbp->cmd));
+			hwq_num = blk_mq_unique_tag_to_hwq(utag);
+			ufshcd_poll(hba->host, hwq_num);
+		}
+	}
+
+out:
 
 	return IRQ_HANDLED;
 }
@@ -6359,18 +6384,36 @@  static bool ufshcd_abort_all(struct ufs_hba *hba)
 	bool needs_reset = false;
 	int tag, ret;
 
-	/* Clear pending transfer requests */
-	for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
-		ret = ufshcd_try_to_abort_task(hba, tag);
-		dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
-			hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
-			ret ? "failed" : "succeeded");
-		if (ret) {
-			needs_reset = true;
-			goto out;
+	if (is_mcq_enabled(hba)) {
+		struct ufshcd_lrb *lrbp;
+		int tag;
+
+		for (tag = 0; tag < hba->nutrs; tag++) {
+			lrbp = &hba->lrb[tag];
+			if (lrbp->cmd) {
+				ret = ufshcd_try_to_abort_task(hba, tag);
+				dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
+					hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
+					ret ? "failed" : "succeeded");
+			}
+			if (ret) {
+				needs_reset = true;
+				goto out;
+			}
+		}
+	} else {
+		/* Clear pending transfer requests */
+		for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
+			ret = ufshcd_try_to_abort_task(hba, tag);
+			dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
+				hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
+				ret ? "failed" : "succeeded");
+			if (ret) {
+				needs_reset = true;
+				goto out;
+			}
 		}
 	}
-
 	/* Clear pending task management requests */
 	for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) {
 		if (ufshcd_clear_tm_cmd(hba, tag)) {
@@ -7302,6 +7345,8 @@  static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
 	unsigned long flags, pending_reqs = 0, not_cleared = 0;
 	struct Scsi_Host *host;
 	struct ufs_hba *hba;
+	struct ufs_hw_queue *hwq;
+	struct ufshcd_lrb *lrbp;
 	u32 pos;
 	int err;
 	u8 resp = 0xF, lun;
@@ -7317,6 +7362,19 @@  static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
 		goto out;
 	}
 
+	if (is_mcq_enabled(hba)) {
+		for (pos = 0; pos < hba->nutrs; pos++) {
+			lrbp = &hba->lrb[pos];
+			if (lrbp->cmd && lrbp->lun == lun) {
+				ufshcd_clear_cmds(hba, 1UL << pos);
+				hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
+				ufshcd_mcq_poll_cqe_lock(hba, hwq);
+			}
+		}
+		err = 0;
+		goto out;
+	}
+
 	/* clear the commands that were pending for corresponding LUN */
 	spin_lock_irqsave(&hba->outstanding_lock, flags);
 	for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs)