@@ -417,9 +417,9 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
blk_mq_run_hw_queue(hctx, run_queue_async);
}
-static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
- struct blk_mq_hw_ctx *hctx,
- unsigned int hctx_idx)
+void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
+ struct blk_mq_hw_ctx *hctx,
+ unsigned int hctx_idx)
{
if (hctx->sched_tags) {
blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
@@ -428,9 +428,9 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
}
}
-static int blk_mq_sched_alloc_tags(struct request_queue *q,
- struct blk_mq_hw_ctx *hctx,
- unsigned int hctx_idx)
+int blk_mq_sched_alloc_tags(struct request_queue *q,
+ struct blk_mq_hw_ctx *hctx,
+ unsigned int hctx_idx)
{
struct blk_mq_tag_set *set = q->tag_set;
int ret;
@@ -450,14 +450,52 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
return ret;
}
+static int blk_mq_set_queue_depth(struct blk_mq_hw_ctx *hctx,
+ unsigned int nr)
+{
+ if (!hctx->tags)
+ return -EINVAL;
+
+ return blk_mq_tag_update_depth(hctx, &hctx->tags, nr, false);
+}
+
+static int blk_mq_set_queues_depth(struct request_queue *q,
+ unsigned int nr)
+{
+ struct blk_mq_hw_ctx *hctx;
+ int i, j, ret;
+
+ queue_for_each_hw_ctx(q, hctx, i) {
+ ret = blk_mq_set_queue_depth(hctx, nr);
+ if (ret)
+ goto recovery;
+ }
+ return 0;
+
+ recovery:
+ queue_for_each_hw_ctx(q, hctx, j) {
+ if (j >= i)
+ break;
+ blk_mq_tag_update_depth(hctx, &hctx->tags,
+ q->act_hw_queue_depth,
+ false);
+ }
+ return ret;
+}
+
static void blk_mq_sched_tags_teardown(struct request_queue *q)
{
struct blk_mq_tag_set *set = q->tag_set;
struct blk_mq_hw_ctx *hctx;
int i;
- queue_for_each_hw_ctx(q, hctx, i)
+ queue_for_each_hw_ctx(q, hctx, i) {
+ if (hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG) {
+ blk_mq_set_queue_depth(hctx, q->act_hw_queue_depth);
+ hctx->flags &= ~BLK_MQ_F_SCHED_USE_HW_TAG;
+ }
blk_mq_sched_free_tags(set, hctx, i);
+ }
}
int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
@@ -504,12 +542,28 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
}
+/*
+ * If this queue has enough hardware tags and doesn't share tags with
+ * other queues, just use hw tag directly for scheduling.
+ */
+bool blk_mq_sched_may_use_hw_tag(struct request_queue *q)
+{
+ if (q->tag_set->flags & BLK_MQ_F_TAG_SHARED)
+ return false;
+
+ if (q->act_hw_queue_depth < q->nr_requests)
+ return false;
+
+ return true;
+}
+
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
{
struct blk_mq_hw_ctx *hctx;
struct elevator_queue *eq;
unsigned int i;
int ret;
+ bool auto_hw_tag;
if (!e) {
q->elevator = NULL;
@@ -522,7 +576,19 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
*/
q->nr_requests = 2 * BLKDEV_MAX_RQ;
+ auto_hw_tag = blk_mq_sched_may_use_hw_tag(q);
+ if (auto_hw_tag) {
+ q->act_hw_queue_depth = blk_mq_get_queue_depth(q);
+ if (blk_mq_set_queues_depth(q, q->nr_requests))
+ auto_hw_tag = false;
+ }
+
queue_for_each_hw_ctx(q, hctx, i) {
+ if (auto_hw_tag)
+ hctx->flags |= BLK_MQ_F_SCHED_USE_HW_TAG;
+ else
+ hctx->flags &= ~BLK_MQ_F_SCHED_USE_HW_TAG;
+
ret = blk_mq_sched_alloc_tags(q, hctx, i);
if (ret)
goto err;
@@ -25,6 +25,7 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
+bool blk_mq_sched_may_use_hw_tag(struct request_queue *q);
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
@@ -35,6 +36,13 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
int blk_mq_sched_init(struct request_queue *q);
+void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
+ struct blk_mq_hw_ctx *hctx,
+ unsigned int hctx_idx);
+int blk_mq_sched_alloc_tags(struct request_queue *q,
+ struct blk_mq_hw_ctx *hctx,
+ unsigned int hctx_idx);
+
static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
@@ -2150,6 +2150,34 @@ int blk_mq_get_queue_depth(struct request_queue *q)
return tags->bitmap_tags.sb.depth + tags->breserved_tags.sb.depth;
}
+static void blk_mq_update_sched_flag(struct request_queue *q)
+{
+ struct blk_mq_hw_ctx *hctx;
+ int i;
+
+ if (!q->elevator)
+ return;
+
+ if (!blk_mq_sched_may_use_hw_tag(q))
+ queue_for_each_hw_ctx(q, hctx, i) {
+ if (hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG) {
+ blk_mq_set_queue_depth(hctx, q->act_hw_queue_depth);
+ hctx->flags &= ~BLK_MQ_F_SCHED_USE_HW_TAG;
+ }
+ if (!hctx->sched_tags) {
+ if (blk_mq_sched_alloc_tags(q, hctx, i))
+ goto force_use_hw_tag;
+ }
+ }
+ else
+ force_use_hw_tag:
+ queue_for_each_hw_ctx(q, hctx, i) {
+ hctx->flags |= BLK_MQ_F_SCHED_USE_HW_TAG;
+ if (hctx->sched_tags)
+ blk_mq_sched_free_tags(q->tag_set, hctx, i);
+ }
+}
+
static void queue_set_hctx_shared(struct request_queue *q, bool shared)
{
struct blk_mq_hw_ctx *hctx;
@@ -2366,7 +2394,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
/*
* Do this after blk_queue_make_request() overrides it...
*/
- q->nr_requests = set->queue_depth;
+ q->act_hw_queue_depth = q->nr_requests = set->queue_depth;
/*
* Default to classic polling
@@ -2689,8 +2717,11 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
break;
}
- if (!ret)
+ if (!ret) {
q->nr_requests = nr;
+ q->act_hw_queue_depth = blk_mq_get_queue_depth(q);
+ blk_mq_update_sched_flag(q);
+ }
blk_mq_unfreeze_queue(q);
@@ -428,6 +428,14 @@ struct request_queue {
unsigned int nr_hw_queues;
/*
+ * save active hw queue depth before using hw tag for scheduling,
+ * this need to revisit if per hw queue depth is supported.
+ *
+ * Only used by blk-mq-sched.
+ */
+ unsigned int act_hw_queue_depth;
+
+ /*
* Dispatch queue sorting
*/
sector_t end_sector;
When tag space of one device is big enough, we use hw tag directly for I/O scheduling. Now the decision is made if hw queue depth is not less than q->nr_requests and the tag set isn't shared. Signed-off-by: Ming Lei <ming.lei@redhat.com> --- block/blk-mq-sched.c | 80 +++++++++++++++++++++++++++++++++++++++++++++----- block/blk-mq-sched.h | 8 +++++ block/blk-mq.c | 35 ++++++++++++++++++++-- include/linux/blkdev.h | 8 +++++ 4 files changed, 122 insertions(+), 9 deletions(-)