diff mbox series

[v3,10/11] scsi: scsi_debug: Only allow sdebug_max_queue be modified when no shosts

Message ID 20230327074310.1862889-11-john.g.garry@oracle.com (mailing list archive)
State Accepted
Headers show
Series Fix shost command overloading issues | expand

Commit Message

John Garry March 27, 2023, 7:43 a.m. UTC
The shost->can_queue value is initially used to set per-HW queue context
tag depth in the block layer. This ensures that the shost is not sent too
many commands which it can deal with. However lowering sdebug_max_queue
separately means that we can easily overload the shost, as in the following
example:

$ cat /sys/bus/pseudo/drivers/scsi_debug/max_queue
192
$ cat /sys/class/scsi_host/host0/can_queue
192
$ echo 100 > /sys/bus/pseudo/drivers/scsi_debug/max_queue
$ cat /sys/class/scsi_host/host0/can_queue
192
$ fio --filename=/dev/sda --direct=1 --rw=read --bs=4k --iodepth=256
--runtime=1200 --numjobs=10 --time_based --group_reporting
--name=iops-test-job --eta-newline=1 --readonly    --ioengine=io_uring
--hipri --exitall_on_error
iops-test-job: (g=0): rw=read, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=io_uring, iodepth=256
...
fio-3.28
Starting 10 processes
[  111.269885] scsi_io_completion_action: 400 callbacks suppressed
[  111.269885] blk_print_req_error: 400 callbacks suppressed
[  111.269889] I/O error, dev sda, sector 440 op 0x0:(READ) flags 0x1200000 phys_seg 1 prio class 2
[  111.269892] sd 0:0:0:0: [sda] tag#132 FAILED Result: hostbyte=DID_ABORT driverbyte=DRIVER_OK cmd_age=0s
[  111.269897] sd 0:0:0:0: [sda] tag#132 CDB: Read(10) 28 00 00 00 01 68 00 00 08 00
[  111.277058] I/O error, dev sda, sector 360 op 0x0:(READ) flags 0x1200000 phys_seg 1 prio class 2

[...]

Ensure that this cannot happen by allowing sdebug_max_queue be modified
only when we have no shosts. As such, any shost->can_queue value will
match sdebug_max_queue, and sdebug_max_queue cannot be modified separately.

Since retired_max_queue is no longer set, remove support.

Continue to apply the restriction that sdebug_host_max_queue cannot be
modified when sdebug_host_max_queue is set. Adding support for that would
mean extra code, and no one has complained about this restriction
previously.

A command like the following may be used to remove a shost:
echo -1 > /sys/bus/pseudo/drivers/scsi_debug/add_host

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/scsi_debug.c | 67 ++++-----------------------------------
 1 file changed, 6 insertions(+), 61 deletions(-)

Comments

Douglas Gilbert April 3, 2023, 5:23 a.m. UTC | #1
On 2023-03-27 03:43, John Garry wrote:
> The shost->can_queue value is initially used to set per-HW queue context
> tag depth in the block layer. This ensures that the shost is not sent too
> many commands which it can deal with. However lowering sdebug_max_queue
> separately means that we can easily overload the shost, as in the following
> example:
> 
> $ cat /sys/bus/pseudo/drivers/scsi_debug/max_queue
> 192
> $ cat /sys/class/scsi_host/host0/can_queue
> 192
> $ echo 100 > /sys/bus/pseudo/drivers/scsi_debug/max_queue
> $ cat /sys/class/scsi_host/host0/can_queue
> 192
> $ fio --filename=/dev/sda --direct=1 --rw=read --bs=4k --iodepth=256
> --runtime=1200 --numjobs=10 --time_based --group_reporting
> --name=iops-test-job --eta-newline=1 --readonly    --ioengine=io_uring
> --hipri --exitall_on_error
> iops-test-job: (g=0): rw=read, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=io_uring, iodepth=256
> ...
> fio-3.28
> Starting 10 processes
> [  111.269885] scsi_io_completion_action: 400 callbacks suppressed
> [  111.269885] blk_print_req_error: 400 callbacks suppressed
> [  111.269889] I/O error, dev sda, sector 440 op 0x0:(READ) flags 0x1200000 phys_seg 1 prio class 2
> [  111.269892] sd 0:0:0:0: [sda] tag#132 FAILED Result: hostbyte=DID_ABORT driverbyte=DRIVER_OK cmd_age=0s
> [  111.269897] sd 0:0:0:0: [sda] tag#132 CDB: Read(10) 28 00 00 00 01 68 00 00 08 00
> [  111.277058] I/O error, dev sda, sector 360 op 0x0:(READ) flags 0x1200000 phys_seg 1 prio class 2
> 
> [...]
> 
> Ensure that this cannot happen by allowing sdebug_max_queue be modified
> only when we have no shosts. As such, any shost->can_queue value will
> match sdebug_max_queue, and sdebug_max_queue cannot be modified separately.
> 
> Since retired_max_queue is no longer set, remove support.
> 
> Continue to apply the restriction that sdebug_host_max_queue cannot be
> modified when sdebug_host_max_queue is set. Adding support for that would
> mean extra code, and no one has complained about this restriction
> previously.
> 
> A command like the following may be used to remove a shost:
> echo -1 > /sys/bus/pseudo/drivers/scsi_debug/add_host
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
Acked-by: Douglas Gilbert <dgilbert@interlog.com>

Thanks.
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 6e40e9253393..b6e5b1f2a746 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -762,7 +762,6 @@  static int sdebug_max_luns = DEF_MAX_LUNS;
 static int sdebug_max_queue = SDEBUG_CANQUEUE;	/* per submit queue */
 static unsigned int sdebug_medium_error_start = OPT_MEDIUM_ERR_ADDR;
 static int sdebug_medium_error_count = OPT_MEDIUM_ERR_NUM;
-static atomic_t retired_max_queue;	/* if > 0 then was prior max_queue */
 static int sdebug_ndelay = DEF_NDELAY;	/* if > 0 then unit is nanoseconds */
 static int sdebug_no_lun_0 = DEF_NO_LUN_0;
 static int sdebug_no_uld;
@@ -4928,7 +4927,6 @@  static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp)
 {
 	struct sdebug_queued_cmd *sqcp = container_of(sd_dp, struct sdebug_queued_cmd, sd_dp);
 	int qc_idx;
-	int retiring = 0;
 	unsigned long flags, iflags;
 	struct scsi_cmnd *scp = sqcp->scmd;
 	struct sdebug_scsi_cmd *sdsc;
@@ -4959,9 +4957,6 @@  static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp)
 		sd_dp->aborted = false;
 	ASSIGN_QEUEUED_CMD(scp, NULL);
 
-	if (unlikely(atomic_read(&retired_max_queue) > 0))
-		retiring = 1;
-
 	sqp->qc_arr[qc_idx] = NULL;
 	if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) {
 		spin_unlock_irqrestore(&sdsc->lock, flags);
@@ -4970,23 +4965,6 @@  static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp)
 		goto out;
 	}
 
-	if (unlikely(retiring)) {	/* user has reduced max_queue */
-		int k, retval;
-
-		retval = atomic_read(&retired_max_queue);
-		if (qc_idx >= retval) {
-			spin_unlock_irqrestore(&sdsc->lock, flags);
-			spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-			pr_err("index %d too large\n", retval);
-			goto out;
-		}
-		k = find_last_bit(sqp->in_use_bm, retval);
-		if ((k < sdebug_max_queue) || (k == retval))
-			atomic_set(&retired_max_queue, 0);
-		else
-			atomic_set(&retired_max_queue, k + 1);
-	}
-
 	spin_unlock_irqrestore(&sdsc->lock, flags);
 	spin_unlock_irqrestore(&sqp->qc_lock, iflags);
 
@@ -6431,29 +6409,18 @@  static ssize_t max_queue_show(struct device_driver *ddp, char *buf)
 static ssize_t max_queue_store(struct device_driver *ddp, const char *buf,
 			       size_t count)
 {
-	int j, n, k, a;
-	struct sdebug_queue *sqp;
+	int n;
 
 	if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n > 0) &&
 	    (n <= SDEBUG_CANQUEUE) &&
 	    (sdebug_host_max_queue == 0)) {
 		mutex_lock(&sdebug_host_list_mutex);
-		block_unblock_all_queues(true);
-		k = 0;
-		for (j = 0, sqp = sdebug_q_arr; j < submit_queues;
-		     ++j, ++sqp) {
-			a = find_last_bit(sqp->in_use_bm, SDEBUG_CANQUEUE);
-			if (a > k)
-				k = a;
-		}
-		sdebug_max_queue = n;
-		if (k == SDEBUG_CANQUEUE)
-			atomic_set(&retired_max_queue, 0);
-		else if (k >= n)
-			atomic_set(&retired_max_queue, k + 1);
+
+		/* We may only change sdebug_max_queue when we have no shosts */
+		if (list_empty(&sdebug_host_list))
+			sdebug_max_queue = n;
 		else
-			atomic_set(&retired_max_queue, 0);
-		block_unblock_all_queues(false);
+			count = -EBUSY;
 		mutex_unlock(&sdebug_host_list_mutex);
 		return count;
 	}
@@ -6882,7 +6849,6 @@  static int __init scsi_debug_init(void)
 
 	ramdisk_lck_a[0] = &atomic_rw;
 	ramdisk_lck_a[1] = &atomic_rw2;
-	atomic_set(&retired_max_queue, 0);
 
 	if (sdebug_ndelay >= 1000 * 1000 * 1000) {
 		pr_warn("ndelay must be less than 1 second, ignored\n");
@@ -7520,7 +7486,6 @@  static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque)
 	struct sdebug_queue *sqp;
 	unsigned long flags;
 	int queue_num = data->queue_num;
-	bool retiring = false;
 	int qc_idx;
 	ktime_t time;
 
@@ -7554,9 +7519,6 @@  static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque)
 		return true;
 	}
 
-	if (unlikely(atomic_read(&retired_max_queue) > 0))
-		retiring = true;
-
 	qc_idx = sd_dp->sqa_idx;
 	sqp->qc_arr[qc_idx] = NULL;
 	if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) {
@@ -7567,23 +7529,6 @@  static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque)
 		return true;
 	}
 
-	if (unlikely(retiring)) {	/* user has reduced max_queue */
-		int k, retval = atomic_read(&retired_max_queue);
-
-		if (qc_idx >= retval) {
-			pr_err("index %d too large\n", retval);
-			spin_unlock_irqrestore(&sdsc->lock, flags);
-			sdebug_free_queued_cmd(sqcp);
-			return true;
-		}
-
-		k = find_last_bit(sqp->in_use_bm, retval);
-		if ((k < sdebug_max_queue) || (k == retval))
-			atomic_set(&retired_max_queue, 0);
-		else
-			atomic_set(&retired_max_queue, k + 1);
-	}
-
 	ASSIGN_QEUEUED_CMD(cmd, NULL);
 	spin_unlock_irqrestore(&sdsc->lock, flags);