blk-mq: Revert multiple recent patches

Message ID	20171107010731.5099-1-bart.vanassche@wdc.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-block-owner@kernel.org> From: Bart Van Assche <bart.vanassche@wdc.com> To: Jens Axboe <axboe@kernel.dk> CC: <linux-block@vger.kernel.org>, Christoph Hellwig <hch@lst.de>, <linux-scsi@vger.kernel.org>, Bart Van Assche <bart.vanassche@wdc.com>, "Martin K . Petersen" <martin.petersen@oracle.com>, Ming Lei <ming.lei@redhat.com> Subject: [PATCH] blk-mq: Revert multiple recent patches Date: Mon, 6 Nov 2017 17:07:31 -0800 Message-ID: <20171107010731.5099-1-bart.vanassche@wdc.com> MIME-Version: 1.0 Content-Type: text/plain Sender: linux-block-owner@vger.kernel.org Precedence: bulk

diff --git a/block/blk-core.c b/block/blk-core.c index 567607f7ea48..25ccddd0b82a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2402,7 +2402,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * * bypass a potential scheduler on the bottom device for * insert. */ - blk_mq_request_bypass_insert(rq, true); + blk_mq_request_bypass_insert(rq); return BLK_STS_OK; } diff --git a/block/blk-flush.c b/block/blk-flush.c index f17170675917..4938bec8cfef 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -231,13 +231,8 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) /* release the tag's ownership to the req cloned from */ spin_lock_irqsave(&fq->mq_flush_lock, flags); hctx = blk_mq_map_queue(q, flush_rq->mq_ctx->cpu); - if (!q->elevator) { - blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); - flush_rq->tag = -1; - } else { - blk_mq_put_driver_tag_hctx(hctx, flush_rq); - flush_rq->internal_tag = -1; - } + blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); + flush_rq->tag = -1; } running = &fq->flush_queue[fq->flush_running_idx]; @@ -323,26 +318,19 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) blk_rq_init(q, flush_rq); /* - * In case of none scheduler, borrow tag from the first request - * since they can't be in flight at the same time. And acquire - * the tag's ownership for flush req. - * - * In case of IO scheduler, flush rq need to borrow scheduler tag - * just for cheating put/get driver tag. + * Borrow tag from the first request since they can't + * be in flight at the same time. And acquire the tag's + * ownership for flush req. */ if (q->mq_ops) { struct blk_mq_hw_ctx *hctx; flush_rq->mq_ctx = first_rq->mq_ctx; + flush_rq->tag = first_rq->tag; + fq->orig_rq = first_rq; - if (!q->elevator) { - fq->orig_rq = first_rq; - flush_rq->tag = first_rq->tag; - hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu); - blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq); - } else { - flush_rq->internal_tag = first_rq->internal_tag; - } + hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu); + blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq); } flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; @@ -406,11 +394,6 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) hctx = blk_mq_map_queue(q, ctx->cpu); - if (q->elevator) { - WARN_ON(rq->tag < 0); - blk_mq_put_driver_tag_hctx(hctx, rq); - } - /* * After populating an empty queue, kick it to avoid stall. Read * the comment in flush_end_io(). @@ -480,7 +463,7 @@ void blk_insert_flush(struct request *rq) if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { if (q->mq_ops) - blk_mq_request_bypass_insert(rq, false); + blk_mq_sched_insert_request(rq, false, true, false, false); else list_add_tail(&rq->queuelist, &q->queue_head); return; diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 01a43fed6b8c..df8581bb0a37 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -68,25 +68,29 @@ static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); } -void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) +static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) { if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - return; + return false; + + if (hctx->flags & BLK_MQ_F_TAG_SHARED) { + struct request_queue *q = hctx->queue; - clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); + if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + atomic_dec(&q->shared_hctx_restart); + } else + clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); if (blk_mq_hctx_has_pending(hctx)) { blk_mq_run_hw_queue(hctx, true); - return; + return true; } + + return false; } -/* - * Only SCSI implements .get_budget and .put_budget, and SCSI restarts - * its queue by itself in its completion handler, so we don't need to - * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE. - */ -static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) +/* return true if hctx need to run again */ +static bool blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; struct elevator_queue *e = q->elevator; @@ -94,18 +98,23 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) do { struct request *rq; + blk_status_t ret; if (e->type->ops.mq.has_work && !e->type->ops.mq.has_work(hctx)) break; - if (!blk_mq_get_dispatch_budget(hctx)) - break; + ret = blk_mq_get_dispatch_budget(hctx); + if (ret == BLK_STS_RESOURCE) + return true; rq = e->type->ops.mq.dispatch_request(hctx); if (!rq) { blk_mq_put_dispatch_budget(hctx); break; + } else if (ret != BLK_STS_OK) { + blk_mq_end_request(rq, ret); + continue; } /* @@ -115,6 +124,8 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) */ list_add(&rq->queuelist, &rq_list); } while (blk_mq_dispatch_rq_list(q, &rq_list, true)); + + return false; } static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx, @@ -128,12 +139,8 @@ static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx, return hctx->ctxs[idx]; } -/* - * Only SCSI implements .get_budget and .put_budget, and SCSI restarts - * its queue by itself in its completion handler, so we don't need to - * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE. - */ -static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) +/* return true if hctx need to run again */ +static bool blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; LIST_HEAD(rq_list); @@ -141,17 +148,22 @@ static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) do { struct request *rq; + blk_status_t ret; if (!sbitmap_any_bit_set(&hctx->ctx_map)) break; - if (!blk_mq_get_dispatch_budget(hctx)) - break; + ret = blk_mq_get_dispatch_budget(hctx); + if (ret == BLK_STS_RESOURCE) + return true; rq = blk_mq_dequeue_from_ctx(hctx, ctx); if (!rq) { blk_mq_put_dispatch_budget(hctx); break; + } else if (ret != BLK_STS_OK) { + blk_mq_end_request(rq, ret); + continue; } /* @@ -167,19 +179,22 @@ static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) } while (blk_mq_dispatch_rq_list(q, &rq_list, true)); WRITE_ONCE(hctx->dispatch_from, ctx); + + return false; } /* return true if hw queue need to be run again */ -void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) +bool blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; struct elevator_queue *e = q->elevator; const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request; LIST_HEAD(rq_list); + bool run_queue = false; /* RCU or SRCU read lock is needed before checking quiesced flag */ if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) - return; + return false; hctx->run++; @@ -211,12 +226,12 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) blk_mq_sched_mark_restart_hctx(hctx); if (blk_mq_dispatch_rq_list(q, &rq_list, false)) { if (has_sched_dispatch) - blk_mq_do_dispatch_sched(hctx); + run_queue = blk_mq_do_dispatch_sched(hctx); else - blk_mq_do_dispatch_ctx(hctx); + run_queue = blk_mq_do_dispatch_ctx(hctx); } } else if (has_sched_dispatch) { - blk_mq_do_dispatch_sched(hctx); + run_queue = blk_mq_do_dispatch_sched(hctx); } else if (q->mq_ops->get_budget) { /* * If we need to get budget before queuing request, we @@ -226,11 +241,19 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) * TODO: get more budgets, and dequeue more requests in * one time. */ - blk_mq_do_dispatch_ctx(hctx); + run_queue = blk_mq_do_dispatch_ctx(hctx); } else { blk_mq_flush_busy_ctxs(hctx, &rq_list); blk_mq_dispatch_rq_list(q, &rq_list, false); } + + if (run_queue && !blk_mq_sched_needs_restart(hctx) && + !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state)) { + blk_mq_sched_mark_restart_hctx(hctx); + return true; + } + + return false; } bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, @@ -345,21 +368,98 @@ void blk_mq_sched_request_inserted(struct request *rq) EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, - bool has_sched, struct request *rq) { - /* dispatch flush rq directly */ - if (rq->rq_flags & RQF_FLUSH_SEQ) { - spin_lock(&hctx->lock); - list_add(&rq->queuelist, &hctx->dispatch); - spin_unlock(&hctx->lock); - return true; + if (rq->tag == -1) { + rq->rq_flags |= RQF_SORTED; + return false; } - if (has_sched) - rq->rq_flags |= RQF_SORTED; + /* + * If we already have a real request tag, send directly to + * the dispatch list. + */ + spin_lock(&hctx->lock); + list_add(&rq->queuelist, &hctx->dispatch); + spin_unlock(&hctx->lock); + return true; +} - return false; +/** + * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list + * @pos: loop cursor. + * @skip: the list element that will not be examined. Iteration starts at + * @skip->next. + * @head: head of the list to examine. This list must have at least one + * element, namely @skip. + * @member: name of the list_head structure within typeof(*pos). + */ +#define list_for_each_entry_rcu_rr(pos, skip, head, member) \ + for ((pos) = (skip); \ + (pos = (pos)->member.next != (head) ? list_entry_rcu( \ + (pos)->member.next, typeof(*pos), member) : \ + list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \ + (pos) != (skip); ) + +/* + * Called after a driver tag has been freed to check whether a hctx needs to + * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware + * queues in a round-robin fashion if the tag set of @hctx is shared with other + * hardware queues. + */ +void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx) +{ + struct blk_mq_tags *const tags = hctx->tags; + struct blk_mq_tag_set *const set = hctx->queue->tag_set; + struct request_queue *const queue = hctx->queue, *q; + struct blk_mq_hw_ctx *hctx2; + unsigned int i, j; + + if (set->flags & BLK_MQ_F_TAG_SHARED) { + /* + * If this is 0, then we know that no hardware queues + * have RESTART marked. We're done. + */ + if (!atomic_read(&queue->shared_hctx_restart)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu_rr(q, queue, &set->tag_list, + tag_set_list) { + queue_for_each_hw_ctx(q, hctx2, i) + if (hctx2->tags == tags && + blk_mq_sched_restart_hctx(hctx2)) + goto done; + } + j = hctx->queue_num + 1; + for (i = 0; i < queue->nr_hw_queues; i++, j++) { + if (j == queue->nr_hw_queues) + j = 0; + hctx2 = queue->queue_hw_ctx[j]; + if (hctx2->tags == tags && + blk_mq_sched_restart_hctx(hctx2)) + break; + } +done: + rcu_read_unlock(); + } else { + blk_mq_sched_restart_hctx(hctx); + } +} + +/* + * Add flush/fua to the queue. If we fail getting a driver tag, then + * punt to the requeue list. Requeue will re-invoke us from a context + * that's safe to block from. + */ +static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx, + struct request *rq, bool can_block) +{ + if (blk_mq_get_driver_tag(rq, &hctx, can_block)) { + blk_insert_flush(rq); + blk_mq_run_hw_queue(hctx, true); + } else + blk_mq_add_to_requeue_list(rq, false, true); } void blk_mq_sched_insert_request(struct request *rq, bool at_head, @@ -370,15 +470,12 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, struct blk_mq_ctx *ctx = rq->mq_ctx; struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); - /* flush rq in flush machinery need to be dispatched directly */ - if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) { - blk_insert_flush(rq); - goto run; + if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) { + blk_mq_sched_insert_flush(hctx, rq, can_block); + return; } - WARN_ON(e && (rq->tag != -1)); - - if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) + if (e && blk_mq_sched_bypass_insert(hctx, rq)) goto run; if (e && e->type->ops.mq.insert_requests) { @@ -404,6 +501,23 @@ void blk_mq_sched_insert_requests(struct request_queue *q, struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); struct elevator_queue *e = hctx->queue->elevator; + if (e) { + struct request *rq, *next; + + /* + * We bypass requests that already have a driver tag assigned, + * which should only be flushes. Flushes are only ever inserted + * as single requests, so we shouldn't ever hit the + * WARN_ON_ONCE() below (but let's handle it just in case). + */ + list_for_each_entry_safe(rq, next, list, queuelist) { + if (WARN_ON_ONCE(rq->tag != -1)) { + list_del_init(&rq->queuelist); + blk_mq_sched_bypass_insert(hctx, rq); + } + } + } + if (e && e->type->ops.mq.insert_requests) e->type->ops.mq.insert_requests(hctx, list, false); else diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 9267d0b7c197..2434061cc5b7 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -22,7 +22,7 @@ void blk_mq_sched_insert_requests(struct request_queue *q, struct blk_mq_ctx *ctx, struct list_head *list, bool run_queue_async); -void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx); +bool blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx); int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e); void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e); diff --git a/block/blk-mq.c b/block/blk-mq.c index 9b49a0d16907..4fed71fd74c0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -653,8 +653,6 @@ static void __blk_mq_requeue_request(struct request *rq) { struct request_queue *q = rq->q; - blk_mq_put_driver_tag(rq); - trace_block_rq_requeue(q, rq); wbt_requeue(q->rq_wb, &rq->issue_stat); blk_mq_sched_requeue_request(rq); @@ -1004,6 +1002,62 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, return rq->tag != -1; } +static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag); + rq->tag = -1; + + if (rq->rq_flags & RQF_MQ_INFLIGHT) { + rq->rq_flags &= ~RQF_MQ_INFLIGHT; + atomic_dec(&hctx->nr_active); + } +} + +static void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + if (rq->tag == -1 || rq->internal_tag == -1) + return; + + __blk_mq_put_driver_tag(hctx, rq); +} + +static void blk_mq_put_driver_tag(struct request *rq) +{ + struct blk_mq_hw_ctx *hctx; + + if (rq->tag == -1 || rq->internal_tag == -1) + return; + + hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); + __blk_mq_put_driver_tag(hctx, rq); +} + +/* + * If we fail getting a driver tag because all the driver tags are already + * assigned and on the dispatch list, BUT the first entry does not have a + * tag, then we could deadlock. For that case, move entries with assigned + * driver tags to the front, leaving the set of tagged requests in the + * same order, and the untagged set in the same order. + */ +static bool reorder_tags_to_front(struct list_head *list) +{ + struct request *rq, *tmp, *first = NULL; + + list_for_each_entry_safe_reverse(rq, tmp, list, queuelist) { + if (rq == first) + break; + if (rq->tag != -1) { + list_move(&rq->queuelist, list); + if (!first) + first = rq; + } + } + + return first != NULL; +} + static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int flags, void *key) { @@ -1046,7 +1100,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, bool got_budget) { struct blk_mq_hw_ctx *hctx; - struct request *rq, *nxt; + struct request *rq; int errors, queued; if (list_empty(list)) @@ -1064,6 +1118,9 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, rq = list_first_entry(list, struct request, queuelist); if (!blk_mq_get_driver_tag(rq, &hctx, false)) { + if (!queued && reorder_tags_to_front(list)) + continue; + /* * The initial allocation attempt failed, so we need to * rerun the hardware queue when a tag is freed. @@ -1086,8 +1143,13 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, } } - if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) - break; + if (!got_budget) { + ret = blk_mq_get_dispatch_budget(hctx); + if (ret == BLK_STS_RESOURCE) + break; + if (ret != BLK_STS_OK) + goto fail_rq; + } list_del_init(&rq->queuelist); @@ -1100,25 +1162,21 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, if (list_empty(list)) bd.last = true; else { + struct request *nxt; + nxt = list_first_entry(list, struct request, queuelist); bd.last = !blk_mq_get_driver_tag(nxt, NULL, false); } ret = q->mq_ops->queue_rq(hctx, &bd); if (ret == BLK_STS_RESOURCE) { - /* - * If an I/O scheduler has been configured and we got a - * driver tag for the next request already, free it again. - */ - if (!list_empty(list)) { - nxt = list_first_entry(list, struct request, queuelist); - blk_mq_put_driver_tag(nxt); - } + blk_mq_put_driver_tag_hctx(hctx, rq); list_add(&rq->queuelist, list); __blk_mq_requeue_request(rq); break; } + fail_rq: if (unlikely(ret != BLK_STS_OK)) { errors++; blk_mq_end_request(rq, BLK_STS_IOERR); @@ -1135,6 +1193,13 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, * that is where we will continue on next queue run. */ if (!list_empty(list)) { + /* + * If an I/O scheduler has been configured and we got a driver + * tag for the next request already, free it again. + */ + rq = list_first_entry(list, struct request, queuelist); + blk_mq_put_driver_tag(rq); + spin_lock(&hctx->lock); list_splice_init(list, &hctx->dispatch); spin_unlock(&hctx->lock); @@ -1170,6 +1235,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) { int srcu_idx; + bool run_queue; /* * We should be running this queue from one of the CPUs that @@ -1186,15 +1252,18 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { rcu_read_lock(); - blk_mq_sched_dispatch_requests(hctx); + run_queue = blk_mq_sched_dispatch_requests(hctx); rcu_read_unlock(); } else { might_sleep(); srcu_idx = srcu_read_lock(hctx->queue_rq_srcu); - blk_mq_sched_dispatch_requests(hctx); + run_queue = blk_mq_sched_dispatch_requests(hctx); srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx); } + + if (run_queue) + blk_mq_run_hw_queue(hctx, true); } /* @@ -1440,7 +1509,7 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, * Should only be used carefully, when the caller knows we want to * bypass a potential IO scheduler on the target device. */ -void blk_mq_request_bypass_insert(struct request *rq, bool run_queue) +void blk_mq_request_bypass_insert(struct request *rq) { struct blk_mq_ctx *ctx = rq->mq_ctx; struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu); @@ -1449,8 +1518,7 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue) list_add_tail(&rq->queuelist, &hctx->dispatch); spin_unlock(&hctx->lock); - if (run_queue) - blk_mq_run_hw_queue(hctx, false); + blk_mq_run_hw_queue(hctx, false); } void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, @@ -1584,10 +1652,12 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, if (!blk_mq_get_driver_tag(rq, NULL, false)) goto insert; - if (!blk_mq_get_dispatch_budget(hctx)) { + ret = blk_mq_get_dispatch_budget(hctx); + if (ret == BLK_STS_RESOURCE) { blk_mq_put_driver_tag(rq); goto insert; - } + } else if (ret != BLK_STS_OK) + goto fail_rq; new_cookie = request_to_qc_t(hctx, rq); @@ -1605,6 +1675,7 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, __blk_mq_requeue_request(rq); goto insert; default: + fail_rq: *cookie = BLK_QC_T_NONE; blk_mq_end_request(rq, ret); return; @@ -1678,10 +1749,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) if (unlikely(is_flush_fua)) { blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); - - /* bypass scheduler for flush rq */ - blk_insert_flush(rq); - blk_mq_run_hw_queue(data.hctx, true); + if (q->elevator) { + blk_mq_sched_insert_request(rq, false, true, true, + true); + } else { + blk_insert_flush(rq); + blk_mq_run_hw_queue(data.hctx, true); + } } else if (plug && q->nr_hw_queues == 1) { struct request *last = NULL; diff --git a/block/blk-mq.h b/block/blk-mq.h index 2502f40ccdc0..522b420dedc0 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -2,7 +2,6 @@ #define INT_BLK_MQ_H #include "blk-stat.h" -#include "blk-mq-tag.h" struct blk_mq_tag_set; @@ -57,7 +56,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, */ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head); -void blk_mq_request_bypass_insert(struct request *rq, bool run_queue); +void blk_mq_request_bypass_insert(struct request *rq); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list); @@ -148,45 +147,14 @@ static inline void blk_mq_put_dispatch_budget(struct blk_mq_hw_ctx *hctx) q->mq_ops->put_budget(hctx); } -static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx) +static inline blk_status_t blk_mq_get_dispatch_budget( + struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; if (q->mq_ops->get_budget) return q->mq_ops->get_budget(hctx); - return true; -} - -static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, - struct request *rq) -{ - blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag); - rq->tag = -1; - - if (rq->rq_flags & RQF_MQ_INFLIGHT) { - rq->rq_flags &= ~RQF_MQ_INFLIGHT; - atomic_dec(&hctx->nr_active); - } -} - -static inline void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx, - struct request *rq) -{ - if (rq->tag == -1 || rq->internal_tag == -1) - return; - - __blk_mq_put_driver_tag(hctx, rq); -} - -static inline void blk_mq_put_driver_tag(struct request *rq) -{ - struct blk_mq_hw_ctx *hctx; - - if (rq->tag == -1 || rq->internal_tag == -1) - return; - - hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); - __blk_mq_put_driver_tag(hctx, rq); + return BLK_STS_OK; } #endif diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index c537c4d768c1..5634288f244f 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1940,33 +1940,6 @@ static void scsi_mq_done(struct scsi_cmnd *cmd) blk_mq_complete_request(cmd->request); } -static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx) -{ - struct request_queue *q = hctx->queue; - struct scsi_device *sdev = q->queuedata; - - atomic_dec(&sdev->device_busy); - put_device(&sdev->sdev_gendev); -} - -static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx) -{ - struct request_queue *q = hctx->queue; - struct scsi_device *sdev = q->queuedata; - - if (!get_device(&sdev->sdev_gendev)) - goto out; - if (!scsi_dev_queue_ready(q, sdev)) - goto out_put_device; - - return true; - -out_put_device: - put_device(&sdev->sdev_gendev); -out: - return false; -} - static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1980,11 +1953,16 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, ret = prep_to_mq(scsi_prep_state_check(sdev, req)); if (ret != BLK_STS_OK) - goto out_put_budget; + goto out; ret = BLK_STS_RESOURCE; + if (!get_device(&sdev->sdev_gendev)) + goto out; + + if (!scsi_dev_queue_ready(q, sdev)) + goto out_put_device; if (!scsi_target_queue_ready(shost, sdev)) - goto out_put_budget; + goto out_dec_device_busy; if (!scsi_host_queue_ready(q, shost, sdev)) goto out_dec_target_busy; @@ -2015,12 +1993,15 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; out_dec_host_busy: - atomic_dec(&shost->host_busy); + atomic_dec(&shost->host_busy); out_dec_target_busy: if (scsi_target(sdev)->can_queue > 0) atomic_dec(&scsi_target(sdev)->target_busy); -out_put_budget: - scsi_mq_put_budget(hctx); +out_dec_device_busy: + atomic_dec(&sdev->device_busy); +out_put_device: + put_device(&sdev->sdev_gendev); +out: switch (ret) { case BLK_STS_OK: break; @@ -2224,8 +2205,6 @@ struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev) } static const struct blk_mq_ops scsi_mq_ops = { - .get_budget = scsi_mq_get_budget, - .put_budget = scsi_mq_put_budget, .queue_rq = scsi_queue_rq, .complete = scsi_softirq_done, .timeout = scsi_timeout, diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 674641527da7..f2e3079eecdd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -92,7 +92,7 @@ struct blk_mq_queue_data { typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); -typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); +typedef blk_status_t (get_budget_fn)(struct blk_mq_hw_ctx *); typedef void (put_budget_fn)(struct blk_mq_hw_ctx *); typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);

blk-mq: Revert multiple recent patches

Commit Message

Comments

Patch