diff mbox series

[1/2] scsi: Add limitless cmd retry support

Message ID 1600714109-6178-2-git-send-email-michael.christie@oracle.com (mailing list archive)
State Changes Requested
Headers show
Series scsi/sd: make cmd retries configurable | expand

Commit Message

Mike Christie Sept. 21, 2020, 6:48 p.m. UTC
The next patch allows users to configure disk scsi cmd retries from
-1 up to a ULD specific value where -1 means infinite retries.

This patch adds infinite retry support to scsi-ml by just combining
common checks for retries into some helper functions, and then
checking for the -1/SCSI_CMD_RETRIES_NO_LIMIT.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/scsi/scsi_error.c | 21 +++++++++++++++------
 drivers/scsi/scsi_lib.c   | 29 +++++++++++++++++++----------
 drivers/scsi/scsi_priv.h  |  1 +
 3 files changed, 35 insertions(+), 16 deletions(-)

Comments

Bart Van Assche Sept. 22, 2020, 3:10 a.m. UTC | #1
On 2020-09-21 11:48, Mike Christie wrote:
> +static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd)
> +{
> +	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
> +		return true;
> +
> +	return (++cmd->retries <= cmd->allowed);
> +}

Something minor: how about leaving out the parentheses from the above return
statement?

> @@ -1268,7 +1276,10 @@ int scsi_eh_get_sense(struct list_head *work_q,
>  			 * finished with the sense data, so set
>  			 * retries to the max allowed to ensure it
>  			 * won't get reissued */
> -			scmd->retries = scmd->allowed;
> +			if (scmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
> +				scmd->retries = scmd->allowed = 1;
> +			else
> +				scmd->retries = scmd->allowed;

Please make sure that the comment above the if-statement remains an accurate
description of the code. The comment only explains why scmd->retries is
modified but not why scmd->allowed is modified if scmd->allowed ==
SCSI_CMD_RETRIES_NO_LIMIT.

Thanks,

Bart.
Mike Christie Sept. 22, 2020, 4:01 p.m. UTC | #2
On 9/21/20 10:10 PM, Bart Van Assche wrote:
> On 2020-09-21 11:48, Mike Christie wrote:
>> +static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd)
>> +{
>> +	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
>> +		return true;
>> +
>> +	return (++cmd->retries <= cmd->allowed);
>> +}
> 
> Something minor: how about leaving out the parentheses from the above return
> statement?

Will do.

> 
>> @@ -1268,7 +1276,10 @@ int scsi_eh_get_sense(struct list_head *work_q,
>>  			 * finished with the sense data, so set
>>  			 * retries to the max allowed to ensure it
>>  			 * won't get reissued */
>> -			scmd->retries = scmd->allowed;
>> +			if (scmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
>> +				scmd->retries = scmd->allowed = 1;
>> +			else
>> +				scmd->retries = scmd->allowed;
> 
> Please make sure that the comment above the if-statement remains an accurate
> description of the code. The comment only explains why scmd->retries is
> modified but not why scmd->allowed is modified if scmd->allowed ==
> SCSI_CMD_RETRIES_NO_LIMIT.

Ok.
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 927b1e641842..8d237152581f 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -116,6 +116,14 @@  static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
 	return 1;
 }
 
+static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd)
+{
+	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+		return true;
+
+	return (++cmd->retries <= cmd->allowed);
+}
+
 /**
  * scmd_eh_abort_handler - Handle command aborts
  * @work:	command to be aborted.
@@ -151,7 +159,7 @@  scmd_eh_abort_handler(struct work_struct *work)
 						    "eh timeout, not retrying "
 						    "aborted command\n"));
 			} else if (!scsi_noretry_cmd(scmd) &&
-			    (++scmd->retries <= scmd->allowed)) {
+				   scsi_cmd_retry_allowed(scmd)) {
 				SCSI_LOG_ERROR_RECOVERY(3,
 					scmd_printk(KERN_WARNING, scmd,
 						    "retry aborted command\n"));
@@ -1268,7 +1276,10 @@  int scsi_eh_get_sense(struct list_head *work_q,
 			 * finished with the sense data, so set
 			 * retries to the max allowed to ensure it
 			 * won't get reissued */
-			scmd->retries = scmd->allowed;
+			if (scmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+				scmd->retries = scmd->allowed = 1;
+			else
+				scmd->retries = scmd->allowed;
 		else if (rtn != NEEDS_RETRY)
 			continue;
 
@@ -1944,8 +1955,7 @@  int scsi_decide_disposition(struct scsi_cmnd *scmd)
 	 * the request was not marked fast fail.  Note that above,
 	 * even if the request is marked fast fail, we still requeue
 	 * for queue congestion conditions (QUEUE_FULL or BUSY) */
-	if ((++scmd->retries) <= scmd->allowed
-	    && !scsi_noretry_cmd(scmd)) {
+	if (scsi_cmd_retry_allowed(scmd) && !scsi_noretry_cmd(scmd)) {
 		return NEEDS_RETRY;
 	} else {
 		/*
@@ -2091,8 +2101,7 @@  void scsi_eh_flush_done_q(struct list_head *done_q)
 	list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
 		list_del_init(&scmd->eh_entry);
 		if (scsi_device_online(scmd->device) &&
-		    !scsi_noretry_cmd(scmd) &&
-		    (++scmd->retries <= scmd->allowed)) {
+		    !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd)) {
 			SCSI_LOG_ERROR_RECOVERY(3,
 				scmd_printk(KERN_INFO, scmd,
 					     "%s: flush retry cmd\n",
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 06056e9ec333..37b953553362 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -653,6 +653,23 @@  static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
 	scsi_mq_requeue_cmd(cmd);
 }
 
+static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
+{
+	struct request *req = cmd->request;
+	unsigned long wait_for;
+
+	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
+		return false;
+
+	wait_for = (cmd->allowed + 1) * req->timeout;
+	if (time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
+		scmd_printk(KERN_ERR, cmd, "timing out command, waited %lus\n",
+			    wait_for/HZ);
+		return true;
+	}
+	return false;
+}
+
 /* Helper for scsi_io_completion() when special action required. */
 static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 {
@@ -661,7 +678,6 @@  static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 	int level = 0;
 	enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
 	      ACTION_DELAYED_RETRY} action;
-	unsigned long wait_for = (cmd->allowed + 1) * req->timeout;
 	struct scsi_sense_hdr sshdr;
 	bool sense_valid;
 	bool sense_current = true;      /* false implies "deferred sense" */
@@ -766,8 +782,7 @@  static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 	} else
 		action = ACTION_FAIL;
 
-	if (action != ACTION_FAIL &&
-	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies))
+	if (action != ACTION_FAIL && scsi_cmd_runtime_exceeced(cmd))
 		action = ACTION_FAIL;
 
 	switch (action) {
@@ -1440,7 +1455,6 @@  static bool scsi_mq_lld_busy(struct request_queue *q)
 static void scsi_softirq_done(struct request *rq)
 {
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
-	unsigned long wait_for = (cmd->allowed + 1) * rq->timeout;
 	int disposition;
 
 	INIT_LIST_HEAD(&cmd->eh_entry);
@@ -1450,13 +1464,8 @@  static void scsi_softirq_done(struct request *rq)
 		atomic_inc(&cmd->device->ioerr_cnt);
 
 	disposition = scsi_decide_disposition(cmd);
-	if (disposition != SUCCESS &&
-	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
-		scmd_printk(KERN_ERR, cmd,
-			    "timing out command, waited %lus\n",
-			    wait_for/HZ);
+	if (disposition != SUCCESS && scsi_cmd_runtime_exceeced(cmd))
 		disposition = SUCCESS;
-	}
 
 	scsi_log_completion(cmd, disposition);
 
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 22b6585e28b4..f8c46cc82e9f 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -15,6 +15,7 @@  struct scsi_host_template;
 struct Scsi_Host;
 struct scsi_nl_hdr;
 
+#define SCSI_CMD_RETRIES_NO_LIMIT -1
 
 /*
  * Scsi Error Handler Flags