diff mbox

queue stall with blk-mq-sched

Message ID 717c595a-a3a6-0508-b537-8cf9e273271e@kernel.dk (mailing list archive)
State New, archived
Headers show

Commit Message

Jens Axboe Jan. 24, 2017, 10:06 p.m. UTC
On 01/24/2017 12:55 PM, Jens Axboe wrote:
> Try this patch. We only want to bump it for the driver tags, not the
> scheduler side.

More complete version, this one actually tested. I think this should fix
your issue, let me know.

Comments

Hannes Reinecke Jan. 25, 2017, 7:39 a.m. UTC | #1
On 01/24/2017 11:06 PM, Jens Axboe wrote:
> On 01/24/2017 12:55 PM, Jens Axboe wrote:
>> Try this patch. We only want to bump it for the driver tags, not the
>> scheduler side.
> 
> More complete version, this one actually tested. I think this should fix
> your issue, let me know.
> 
Nearly there.
The initial stall is gone, but the test got hung at the 'stonewall'
sequence again:

[global]
bs=4k
ioengine=libaio
iodepth=256
size=4g
direct=1
runtime=60
# directory=/mnt
numjobs=32
group_reporting
cpus_allowed_policy=split
filename=/dev/md127

[seq-read]
rw=read
-> stonewall

[rand-read]
rw=randread
stonewall

Restarting all queues made the fio job continue.
There were 4 queues with state 'restart', and one queue with state 'active'.
So we're missing a queue run somewhere.

Cheers,

Hannes
diff mbox

Patch

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index a49ec77..1b156ca 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -90,9 +90,11 @@  static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 	return atomic_read(&hctx->nr_active) < depth;
 }
 
-static int __blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt)
+static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
+			    struct sbitmap_queue *bt)
 {
-	if (!hctx_may_queue(hctx, bt))
+	if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
+	    !hctx_may_queue(data->hctx, bt))
 		return -1;
 	return __sbitmap_queue_get(bt);
 }
@@ -118,7 +120,7 @@  unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 		tag_offset = tags->nr_reserved_tags;
 	}
 
-	tag = __blk_mq_get_tag(data->hctx, bt);
+	tag = __blk_mq_get_tag(data, bt);
 	if (tag != -1)
 		goto found_tag;
 
@@ -129,7 +131,7 @@  unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 	do {
 		prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
 
-		tag = __blk_mq_get_tag(data->hctx, bt);
+		tag = __blk_mq_get_tag(data, bt);
 		if (tag != -1)
 			break;
 
@@ -144,7 +146,7 @@  unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 		 * Retry tag allocation after running the hardware queue,
 		 * as running the queue may also have found completions.
 		 */
-		tag = __blk_mq_get_tag(data->hctx, bt);
+		tag = __blk_mq_get_tag(data, bt);
 		if (tag != -1)
 			break;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ee69e5e..dcb5676 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -230,15 +230,14 @@  struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
 
 		rq = tags->static_rqs[tag];
 
-		if (blk_mq_tag_busy(data->hctx)) {
-			rq->rq_flags = RQF_MQ_INFLIGHT;
-			atomic_inc(&data->hctx->nr_active);
-		}
-
 		if (data->flags & BLK_MQ_REQ_INTERNAL) {
 			rq->tag = -1;
 			rq->internal_tag = tag;
 		} else {
+			if (blk_mq_tag_busy(data->hctx)) {
+				rq->rq_flags = RQF_MQ_INFLIGHT;
+				atomic_inc(&data->hctx->nr_active);
+			}
 			rq->tag = tag;
 			rq->internal_tag = -1;
 		}
@@ -869,6 +868,10 @@  static bool blk_mq_get_driver_tag(struct request *rq,
 
 	rq->tag = blk_mq_get_tag(&data);
 	if (rq->tag >= 0) {
+		if (blk_mq_tag_busy(data.hctx)) {
+			rq->rq_flags |= RQF_MQ_INFLIGHT;
+			atomic_inc(&data.hctx->nr_active);
+		}
 		data.hctx->tags->rqs[rq->tag] = rq;
 		goto done;
 	}