@@ -182,6 +182,7 @@ static const char *const hctx_state_name[] = {
HCTX_STATE_NAME(SCHED_RESTART),
HCTX_STATE_NAME(TAG_WAITING),
HCTX_STATE_NAME(START_ON_RUN),
+ HCTX_STATE_NAME(DISPATCH_BUSY),
};
#undef HCTX_STATE_NAME
@@ -146,7 +146,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
struct request_queue *q = hctx->queue;
struct elevator_queue *e = q->elevator;
const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
- bool do_sched_dispatch = true;
LIST_HEAD(rq_list);
/* RCU or SRCU read lock is needed before checking quiesced flag */
@@ -177,8 +176,33 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
*/
if (!list_empty(&rq_list)) {
blk_mq_sched_mark_restart_hctx(hctx);
- do_sched_dispatch = blk_mq_dispatch_rq_list(q, &rq_list);
- } else if (!has_sched_dispatch && !q->queue_depth) {
+ blk_mq_dispatch_rq_list(q, &rq_list);
+
+ /*
+ * We may clear DISPATCH_BUSY just after it
+ * is set from another context, the only cost
+ * is that one request is dequeued a bit early,
+ * we can survive that. Given the window is
+ * small enough, no need to worry about performance
+ * effect.
+ */
+ if (list_empty_careful(&hctx->dispatch))
+ clear_bit(BLK_MQ_S_DISPATCH_BUSY, &hctx->state);
+ }
+
+ /*
+ * If DISPATCH_BUSY is set, that means hw queue is busy
+ * and requests in the list of hctx->dispatch need to
+ * be flushed first, so return early.
+ *
+ * Wherever DISPATCH_BUSY is set, blk_mq_run_hw_queue()
+ * will be run to try to make progress, so it is always
+ * safe to check the state here.
+ */
+ if (test_bit(BLK_MQ_S_DISPATCH_BUSY, &hctx->state))
+ return;
+
+ if (!has_sched_dispatch) {
/*
* If there is no per-request_queue depth, we
* flush all requests in this hw queue, otherwise
@@ -187,22 +211,15 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
* run out of resource, which can be triggered
* easily by per-request_queue queue depth
*/
- blk_mq_flush_busy_ctxs(hctx, &rq_list);
- blk_mq_dispatch_rq_list(q, &rq_list);
- }
-
- if (!do_sched_dispatch)
- return;
-
- /*
- * We want to dispatch from the scheduler if there was nothing
- * on the dispatch list or we were able to dispatch from the
- * dispatch list.
- */
- if (has_sched_dispatch)
+ if (!q->queue_depth) {
+ blk_mq_flush_busy_ctxs(hctx, &rq_list);
+ blk_mq_dispatch_rq_list(q, &rq_list);
+ } else {
+ blk_mq_do_dispatch_ctx(q, hctx);
+ }
+ } else {
blk_mq_do_dispatch_sched(q, e, hctx);
- else
- blk_mq_do_dispatch_ctx(q, hctx);
+ }
}
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
@@ -1142,6 +1142,11 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
spin_lock(&hctx->lock);
list_splice_init(list, &hctx->dispatch);
+ /*
+ * DISPATCH_BUSY won't be cleared until all requests
+ * in hctx->dispatch are dispatched successfully
+ */
+ set_bit(BLK_MQ_S_DISPATCH_BUSY, &hctx->state);
spin_unlock(&hctx->lock);
/*
@@ -1446,6 +1451,7 @@ static void blk_mq_request_direct_insert(struct blk_mq_hw_ctx *hctx,
{
spin_lock(&hctx->lock);
list_add_tail(&rq->queuelist, &hctx->dispatch);
+ set_bit(BLK_MQ_S_DISPATCH_BUSY, &hctx->state);
spin_unlock(&hctx->lock);
blk_mq_run_hw_queue(hctx, false);
@@ -172,6 +172,7 @@ enum {
BLK_MQ_S_SCHED_RESTART = 2,
BLK_MQ_S_TAG_WAITING = 3,
BLK_MQ_S_START_ON_RUN = 4,
+ BLK_MQ_S_DISPATCH_BUSY = 5,
BLK_MQ_MAX_DEPTH = 10240,