From patchwork Wed Jun 10 17:29:08 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11598477 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 84DAC913 for ; Wed, 10 Jun 2020 17:33:32 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 760BC2072E for ; Wed, 10 Jun 2020 17:33:32 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726257AbgFJRdb (ORCPT ); Wed, 10 Jun 2020 13:33:31 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:5804 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726327AbgFJRdb (ORCPT ); Wed, 10 Jun 2020 13:33:31 -0400 Received: from DGGEMS409-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id D8E569FF5E15F0A2D922; Thu, 11 Jun 2020 01:33:26 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS409-HUB.china.huawei.com (10.3.19.209) with Microsoft SMTP Server id 14.3.487.0; Thu, 11 Jun 2020 01:33:18 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , John Garry Subject: [PATCH RFC v7 01/12] blk-mq: rename BLK_MQ_F_TAG_SHARED as BLK_MQ_F_TAG_QUEUE_SHARED Date: Thu, 11 Jun 2020 01:29:08 +0800 Message-ID: <1591810159-240929-2-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1591810159-240929-1-git-send-email-john.garry@huawei.com> References: <1591810159-240929-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Ming Lei BLK_MQ_F_TAG_SHARED actually means that tags is shared among request queues, all of which should belong to LUNs attached to same HBA. So rename it to make the point explicitly. Suggested-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Signed-off-by: John Garry --- block/blk-mq-debugfs.c | 2 +- block/blk-mq-tag.c | 2 +- block/blk-mq-tag.h | 4 ++-- block/blk-mq.c | 20 ++++++++++---------- include/linux/blk-mq.h | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 15df3a36e9fa..52d11f8422a7 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -237,7 +237,7 @@ static const char *const alloc_policy_name[] = { #define HCTX_FLAG_NAME(name) [ilog2(BLK_MQ_F_##name)] = #name static const char *const hctx_flag_name[] = { HCTX_FLAG_NAME(SHOULD_MERGE), - HCTX_FLAG_NAME(TAG_SHARED), + HCTX_FLAG_NAME(TAG_QUEUE_SHARED), HCTX_FLAG_NAME(BLOCKING), HCTX_FLAG_NAME(NO_SCHED), HCTX_FLAG_NAME(STACKING), diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 96a39d0724a2..85aa1690cbcf 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -65,7 +65,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, { unsigned int depth, users; - if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED)) + if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return true; if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) return true; diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index d38e48f2a0a4..c810a346db8e 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -56,7 +56,7 @@ extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) + if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return false; return __blk_mq_tag_busy(hctx); @@ -64,7 +64,7 @@ static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) { - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) + if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return; __blk_mq_tag_idle(hctx); diff --git a/block/blk-mq.c b/block/blk-mq.c index 9a36ac1c1fa1..d255c485ca5f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -281,7 +281,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->tag = BLK_MQ_NO_TAG; rq->internal_tag = tag; } else { - if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) { + if (data->hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) { rq_flags = RQF_MQ_INFLIGHT; atomic_inc(&data->hctx->nr_active); } @@ -1116,7 +1116,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, wait_queue_entry_t *wait; bool ret; - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) { + if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { blk_mq_sched_mark_restart_hctx(hctx); /* @@ -1282,7 +1282,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, * For non-shared tags, the RESTART check * will suffice. */ - if (hctx->flags & BLK_MQ_F_TAG_SHARED) + if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) no_tag = true; break; } @@ -2579,7 +2579,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, spin_lock_init(&hctx->lock); INIT_LIST_HEAD(&hctx->dispatch); hctx->queue = q; - hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; + hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED; INIT_LIST_HEAD(&hctx->hctx_list); @@ -2796,9 +2796,9 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) queue_for_each_hw_ctx(q, hctx, i) { if (shared) - hctx->flags |= BLK_MQ_F_TAG_SHARED; + hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; else - hctx->flags &= ~BLK_MQ_F_TAG_SHARED; + hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; } } @@ -2824,7 +2824,7 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) list_del_rcu(&q->tag_set_list); if (list_is_singular(&set->tag_list)) { /* just transitioned to unshared */ - set->flags &= ~BLK_MQ_F_TAG_SHARED; + set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ blk_mq_update_tag_set_depth(set, false); } @@ -2841,12 +2841,12 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, * Check to see if we're transitioning to shared (from 1 to 2 queues). */ if (!list_empty(&set->tag_list) && - !(set->flags & BLK_MQ_F_TAG_SHARED)) { - set->flags |= BLK_MQ_F_TAG_SHARED; + !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { + set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ blk_mq_update_tag_set_depth(set, true); } - if (set->flags & BLK_MQ_F_TAG_SHARED) + if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED) queue_set_hctx_shared(q, true); list_add_tail_rcu(&q->tag_set_list, &set->tag_list); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d6fcae17da5a..233209e8030d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -392,7 +392,7 @@ struct blk_mq_ops { enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, - BLK_MQ_F_TAG_SHARED = 1 << 1, + BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, /* * Set when this device requires underlying blk-mq device for * completing IO: From patchwork Wed Jun 10 17:29:09 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11598495 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 0E805739 for ; Wed, 10 Jun 2020 17:33:38 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id EDA402072E for ; Wed, 10 Jun 2020 17:33:37 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726988AbgFJRdh (ORCPT ); Wed, 10 Jun 2020 13:33:37 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:5813 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726524AbgFJRdf (ORCPT ); Wed, 10 Jun 2020 13:33:35 -0400 Received: from DGGEMS409-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 3A5C1CE5B94ED033FB87; Thu, 11 Jun 2020 01:33:27 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS409-HUB.china.huawei.com (10.3.19.209) with Microsoft SMTP Server id 14.3.487.0; Thu, 11 Jun 2020 01:33:18 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , Hannes Reinecke , John Garry Subject: [PATCH RFC v7 02/12] blk-mq: rename blk_mq_update_tag_set_depth() Date: Thu, 11 Jun 2020 01:29:09 +0800 Message-ID: <1591810159-240929-3-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1591810159-240929-1-git-send-email-john.garry@huawei.com> References: <1591810159-240929-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Hannes Reinecke The function does not set the depth, but rather transitions from shared to non-shared queues and vice versa. So rename it to blk_mq_update_tag_set_shared() to better reflect its purpose. Signed-off-by: Hannes Reinecke Signed-off-by: John Garry Signed-off-by: Hannes Reinecke Signed-off-by: John Garry --- block/blk-mq-tag.c | 18 ++++++++++-------- block/blk-mq.c | 8 ++++---- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 85aa1690cbcf..bedddf168253 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -454,24 +454,22 @@ static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, node); } -static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, - int node, int alloc_policy) +static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, + int node, int alloc_policy) { unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node)) - goto free_tags; + return -ENOMEM; if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin, node)) goto free_bitmap_tags; - return tags; + return 0; free_bitmap_tags: sbitmap_queue_free(&tags->bitmap_tags); -free_tags: - kfree(tags); - return NULL; + return -ENOMEM; } struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, @@ -492,7 +490,11 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, tags->nr_tags = total_tags; tags->nr_reserved_tags = reserved_tags; - return blk_mq_init_bitmap_tags(tags, node, alloc_policy); + if (blk_mq_init_bitmap_tags(tags, node, alloc_policy) < 0) { + kfree(tags); + tags = NULL; + } + return tags; } void blk_mq_free_tags(struct blk_mq_tags *tags) diff --git a/block/blk-mq.c b/block/blk-mq.c index d255c485ca5f..c20d75c851f2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2802,8 +2802,8 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) } } -static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, - bool shared) +static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set, + bool shared) { struct request_queue *q; @@ -2826,7 +2826,7 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) /* just transitioned to unshared */ set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ - blk_mq_update_tag_set_depth(set, false); + blk_mq_update_tag_set_shared(set, false); } mutex_unlock(&set->tag_list_lock); INIT_LIST_HEAD(&q->tag_set_list); @@ -2844,7 +2844,7 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ - blk_mq_update_tag_set_depth(set, true); + blk_mq_update_tag_set_shared(set, true); } if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED) queue_set_hctx_shared(q, true); From patchwork Wed Jun 10 17:29:10 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11598515 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DAE8614E3 for ; Wed, 10 Jun 2020 17:33:42 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id CD17820734 for ; Wed, 10 Jun 2020 17:33:42 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726965AbgFJRdl (ORCPT ); Wed, 10 Jun 2020 13:33:41 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:5812 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726637AbgFJRdf (ORCPT ); Wed, 10 Jun 2020 13:33:35 -0400 Received: from DGGEMS409-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 30CF63BDA679831812F4; Thu, 11 Jun 2020 01:33:27 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS409-HUB.china.huawei.com (10.3.19.209) with Microsoft SMTP Server id 14.3.487.0; Thu, 11 Jun 2020 01:33:18 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , John Garry Subject: [PATCH RFC v7 03/12] blk-mq: Use pointers for blk_mq_tags bitmap tags Date: Thu, 11 Jun 2020 01:29:10 +0800 Message-ID: <1591810159-240929-4-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1591810159-240929-1-git-send-email-john.garry@huawei.com> References: <1591810159-240929-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org Introduce pointers for the blk_mq_tags regular and reserved bitmap tags, with the goal of later being able to use a common shared tag bitmap across all HW contexts in a set. Reviewed-by: Hannes Reinecke Signed-off-by: John Garry --- block/bfq-iosched.c | 4 ++-- block/blk-mq-debugfs.c | 8 ++++---- block/blk-mq-tag.c | 41 ++++++++++++++++++++++------------------- block/blk-mq-tag.h | 7 +++++-- block/blk-mq.c | 4 ++-- block/kyber-iosched.c | 4 ++-- 6 files changed, 37 insertions(+), 31 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 50c8f034c01c..a1123d4d586d 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6372,8 +6372,8 @@ static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx) struct blk_mq_tags *tags = hctx->sched_tags; unsigned int min_shallow; - min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags); - sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow); + min_shallow = bfq_update_depths(bfqd, tags->bitmap_tags); + sbitmap_queue_min_shallow_depth(tags->bitmap_tags, min_shallow); } static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 52d11f8422a7..a400b6698dff 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -450,11 +450,11 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m, atomic_read(&tags->active_queues)); seq_puts(m, "\nbitmap_tags:\n"); - sbitmap_queue_show(&tags->bitmap_tags, m); + sbitmap_queue_show(tags->bitmap_tags, m); if (tags->nr_reserved_tags) { seq_puts(m, "\nbreserved_tags:\n"); - sbitmap_queue_show(&tags->breserved_tags, m); + sbitmap_queue_show(tags->breserved_tags, m); } } @@ -485,7 +485,7 @@ static int hctx_tags_bitmap_show(void *data, struct seq_file *m) if (res) goto out; if (hctx->tags) - sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m); + sbitmap_bitmap_show(&hctx->tags->bitmap_tags->sb, m); mutex_unlock(&q->sysfs_lock); out: @@ -519,7 +519,7 @@ static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m) if (res) goto out; if (hctx->sched_tags) - sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m); + sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags->sb, m); mutex_unlock(&q->sysfs_lock); out: diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index bedddf168253..be39db3c88d7 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -35,9 +35,9 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) */ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve) { - sbitmap_queue_wake_all(&tags->bitmap_tags); + sbitmap_queue_wake_all(tags->bitmap_tags); if (include_reserve) - sbitmap_queue_wake_all(&tags->breserved_tags); + sbitmap_queue_wake_all(tags->breserved_tags); } /* @@ -113,10 +113,10 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) WARN_ON_ONCE(1); return BLK_MQ_NO_TAG; } - bt = &tags->breserved_tags; + bt = tags->breserved_tags; tag_offset = 0; } else { - bt = &tags->bitmap_tags; + bt = tags->bitmap_tags; tag_offset = tags->nr_reserved_tags; } @@ -162,9 +162,9 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) data->ctx); tags = blk_mq_tags_from_data(data); if (data->flags & BLK_MQ_REQ_RESERVED) - bt = &tags->breserved_tags; + bt = tags->breserved_tags; else - bt = &tags->bitmap_tags; + bt = tags->bitmap_tags; /* * If destination hw queue is changed, fake wake up on @@ -198,10 +198,10 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, const int real_tag = tag - tags->nr_reserved_tags; BUG_ON(real_tag >= tags->nr_tags); - sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu); + sbitmap_queue_clear(tags->bitmap_tags, real_tag, ctx->cpu); } else { BUG_ON(tag >= tags->nr_reserved_tags); - sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu); + sbitmap_queue_clear(tags->breserved_tags, tag, ctx->cpu); } } @@ -325,9 +325,9 @@ static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags, WARN_ON_ONCE(flags & BT_TAG_ITER_RESERVED); if (tags->nr_reserved_tags) - bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, + bt_tags_for_each(tags, tags->breserved_tags, fn, priv, flags | BT_TAG_ITER_RESERVED); - bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, flags); + bt_tags_for_each(tags, tags->bitmap_tags, fn, priv, flags); } /** @@ -441,8 +441,8 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, continue; if (tags->nr_reserved_tags) - bt_for_each(hctx, &tags->breserved_tags, fn, priv, true); - bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false); + bt_for_each(hctx, tags->breserved_tags, fn, priv, true); + bt_for_each(hctx, tags->bitmap_tags, fn, priv, false); } blk_queue_exit(q); } @@ -460,15 +460,18 @@ static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; - if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node)) + if (bt_alloc(&tags->__bitmap_tags, depth, round_robin, node)) return -ENOMEM; - if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin, - node)) + if (bt_alloc(&tags->__breserved_tags, tags->nr_reserved_tags, + round_robin, node)) goto free_bitmap_tags; + tags->bitmap_tags = &tags->__bitmap_tags; + tags->breserved_tags = &tags->__breserved_tags; + return 0; free_bitmap_tags: - sbitmap_queue_free(&tags->bitmap_tags); + sbitmap_queue_free(&tags->__bitmap_tags); return -ENOMEM; } @@ -499,8 +502,8 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, void blk_mq_free_tags(struct blk_mq_tags *tags) { - sbitmap_queue_free(&tags->bitmap_tags); - sbitmap_queue_free(&tags->breserved_tags); + sbitmap_queue_free(&tags->__bitmap_tags); + sbitmap_queue_free(&tags->__breserved_tags); kfree(tags); } @@ -550,7 +553,7 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, * Don't need (or can't) update reserved tags here, they * remain static and should never need resizing. */ - sbitmap_queue_resize(&tags->bitmap_tags, + sbitmap_queue_resize(tags->bitmap_tags, tdepth - tags->nr_reserved_tags); } diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index c810a346db8e..cebf7a4b280a 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -13,8 +13,11 @@ struct blk_mq_tags { atomic_t active_queues; - struct sbitmap_queue bitmap_tags; - struct sbitmap_queue breserved_tags; + struct sbitmap_queue *bitmap_tags; + struct sbitmap_queue *breserved_tags; + + struct sbitmap_queue __bitmap_tags; + struct sbitmap_queue __breserved_tags; struct request **rqs; struct request **static_rqs; diff --git a/block/blk-mq.c b/block/blk-mq.c index c20d75c851f2..90b645c3092c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1093,7 +1093,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, struct sbitmap_queue *sbq; list_del_init(&wait->entry); - sbq = &hctx->tags->bitmap_tags; + sbq = hctx->tags->bitmap_tags; atomic_dec(&sbq->ws_active); } spin_unlock(&hctx->dispatch_wait_lock); @@ -1111,7 +1111,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, struct request *rq) { - struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags; + struct sbitmap_queue *sbq = hctx->tags->bitmap_tags; struct wait_queue_head *wq; wait_queue_entry_t *wait; bool ret; diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index a38c5ab103d1..075e99c207ef 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -359,7 +359,7 @@ static unsigned int kyber_sched_tags_shift(struct request_queue *q) * All of the hardware queues have the same depth, so we can just grab * the shift of the first one. */ - return q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift; + return q->queue_hw_ctx[0]->sched_tags->bitmap_tags->sb.shift; } static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) @@ -502,7 +502,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) khd->batching = 0; hctx->sched_data = khd; - sbitmap_queue_min_shallow_depth(&hctx->sched_tags->bitmap_tags, + sbitmap_queue_min_shallow_depth(hctx->sched_tags->bitmap_tags, kqd->async_depth); return 0; From patchwork Wed Jun 10 17:29:11 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11598517 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2A0D8739 for ; Wed, 10 Jun 2020 17:33:45 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 159A52072E for ; Wed, 10 Jun 2020 17:33:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727084AbgFJRdn (ORCPT ); Wed, 10 Jun 2020 13:33:43 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:5811 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726651AbgFJRdf (ORCPT ); Wed, 10 Jun 2020 13:33:35 -0400 Received: from DGGEMS409-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 26B5DDBF4E63B636A098; Thu, 11 Jun 2020 01:33:27 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS409-HUB.china.huawei.com (10.3.19.209) with Microsoft SMTP Server id 14.3.487.0; Thu, 11 Jun 2020 01:33:18 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , John Garry Subject: [PATCH RFC v7 04/12] blk-mq: Facilitate a shared sbitmap per tagset Date: Thu, 11 Jun 2020 01:29:11 +0800 Message-ID: <1591810159-240929-5-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1591810159-240929-1-git-send-email-john.garry@huawei.com> References: <1591810159-240929-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org Some SCSI HBAs (such as HPSA, megaraid, mpt3sas, hisi_sas_v3 ..) support multiple reply queues with single hostwide tags. In addition, these drivers want to use interrupt assignment in pci_alloc_irq_vectors(PCI_IRQ_AFFINITY). However, as discussed in [0], CPU hotplug may cause in-flight IO completion to not be serviced when an interrupt is shutdown. That problem is solved in commit bf0beec0607d ("blk-mq: drain I/O when all CPUs in a hctx are offline"). However, to take advantage of that blk-mq feature, the HBA HW queuess are required to be mapped to that of the blk-mq hctx's; to do that, the HBA HW queues need to be exposed to the upper layer. In making that transition, the per-SCSI command request tags are no longer unique per Scsi host - they are just unique per hctx. As such, the HBA LLDD would have to generate this tag internally, which has a certain performance overhead. However another problem is that blk-mq assumes the host may accept (Scsi_host.can_queue * #hw queue) commands. In commit 6eb045e092ef ("scsi: core: avoid host-wide host_busy counter for scsi_mq"), the Scsi host busy counter was removed, which would stop the LLDD being sent more than .can_queue commands; however, it should still be ensured that the block layer does not issue more than .can_queue commands to the Scsi host. To solve this problem, introduce a shared sbitmap per blk_mq_tag_set, which may be requested at init time. New flag BLK_MQ_F_TAG_HCTX_SHARED should be set when requesting the tagset to indicate whether the shared sbitmap should be used. Even when BLK_MQ_F_TAG_HCTX_SHARED is set, a full set of tags and requests are still allocated per hctx; the reason for this is that if tags and requests were only allocated for a single hctx - like hctx0 - it may break block drivers which expect a request be associated with a specific hctx, i.e. not always hctx0. This will introduce extra memory usage. This change is based on work originally from Ming Lei in [1] and from Bart's suggestion in [2]. [0] https://lore.kernel.org/linux-block/alpine.DEB.2.21.1904051331270.1802@nanos.tec.linutronix.de/ [1] https://lore.kernel.org/linux-block/20190531022801.10003-1-ming.lei@redhat.com/ [2] https://lore.kernel.org/linux-block/ff77beff-5fd9-9f05-12b6-826922bace1f@huawei.com/T/#m3db0a602f095cbcbff27e9c884d6b4ae826144be Signed-off-by: John Garry --- block/blk-mq-tag.c | 39 +++++++++++++++++++++++++++++++++++++-- block/blk-mq-tag.h | 10 +++++++++- block/blk-mq.c | 24 +++++++++++++++++++++++- block/blk-mq.h | 5 +++++ include/linux/blk-mq.h | 6 ++++++ 5 files changed, 80 insertions(+), 4 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index be39db3c88d7..92843e3e1a2a 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -228,7 +228,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) * We can hit rq == NULL here, because the tagging functions * test and set the bit before assigning ->rqs[]. */ - if (rq && rq->q == hctx->queue) + if (rq && rq->q == hctx->queue && rq->mq_hctx == hctx) return iter_data->fn(hctx, rq, iter_data->data, reserved); return true; } @@ -466,6 +466,7 @@ static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, round_robin, node)) goto free_bitmap_tags; + /* We later overwrite these in case of per-set shared sbitmap */ tags->bitmap_tags = &tags->__bitmap_tags; tags->breserved_tags = &tags->__breserved_tags; @@ -475,7 +476,32 @@ static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, return -ENOMEM; } -struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, +bool blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *tag_set) +{ + unsigned int depth = tag_set->queue_depth - tag_set->reserved_tags; + int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(tag_set->flags); + bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; + int node = tag_set->numa_node; + + if (bt_alloc(&tag_set->__bitmap_tags, depth, round_robin, node)) + return false; + if (bt_alloc(&tag_set->__breserved_tags, tag_set->reserved_tags, + round_robin, node)) + goto free_bitmap_tags; + return true; +free_bitmap_tags: + sbitmap_queue_free(&tag_set->__bitmap_tags); + return false; +} + +void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *tag_set) +{ + sbitmap_queue_free(&tag_set->__bitmap_tags); + sbitmap_queue_free(&tag_set->__breserved_tags); +} + +struct blk_mq_tags *blk_mq_init_tags(struct blk_mq_tag_set *set, + unsigned int total_tags, unsigned int reserved_tags, int node, int alloc_policy) { @@ -502,6 +528,10 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, void blk_mq_free_tags(struct blk_mq_tags *tags) { + /* + * Do not free tags->{bitmap, breserved}_tags, as this may point to + * shared sbitmap + */ sbitmap_queue_free(&tags->__bitmap_tags); sbitmap_queue_free(&tags->__breserved_tags); kfree(tags); @@ -560,6 +590,11 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, return 0; } +void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int size) +{ + sbitmap_queue_resize(&set->__bitmap_tags, size - set->reserved_tags); +} + /** * blk_mq_unique_tag() - return a tag that is unique queue-wide * @rq: request for which to compute a unique tag diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index cebf7a4b280a..cf39dd13a24d 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -25,7 +25,12 @@ struct blk_mq_tags { }; -extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy); +extern bool blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *tag_set); +extern void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *tag_set); +extern struct blk_mq_tags *blk_mq_init_tags(struct blk_mq_tag_set *tag_set, + unsigned int nr_tags, + unsigned int reserved_tags, + int node, int alloc_policy); extern void blk_mq_free_tags(struct blk_mq_tags *tags); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); @@ -34,6 +39,9 @@ extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags **tags, unsigned int depth, bool can_grow); +extern void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set, + unsigned int size); + extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void *priv); diff --git a/block/blk-mq.c b/block/blk-mq.c index 90b645c3092c..77120dd4e4d5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2229,7 +2229,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, if (node == NUMA_NO_NODE) node = set->numa_node; - tags = blk_mq_init_tags(nr_tags, reserved_tags, node, + tags = blk_mq_init_tags(set, nr_tags, reserved_tags, node, BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); if (!tags) return NULL; @@ -3349,11 +3349,28 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (ret) goto out_free_mq_map; + if (blk_mq_is_sbitmap_shared(set)) { + if (!blk_mq_init_shared_sbitmap(set)) { + ret = -ENOMEM; + goto out_free_mq_rq_maps; + } + + for (i = 0; i < set->nr_hw_queues; i++) { + struct blk_mq_tags *tags = set->tags[i]; + + tags->bitmap_tags = &set->__bitmap_tags; + tags->breserved_tags = &set->__breserved_tags; + } + } + mutex_init(&set->tag_list_lock); INIT_LIST_HEAD(&set->tag_list); return 0; +out_free_mq_rq_maps: + for (i = 0; i < set->nr_hw_queues; i++) + blk_mq_free_rq_map(set->tags[i]); out_free_mq_map: for (i = 0; i < set->nr_maps; i++) { kfree(set->map[i].mq_map); @@ -3372,6 +3389,9 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) for (i = 0; i < set->nr_hw_queues; i++) blk_mq_free_map_and_requests(set, i); + if (blk_mq_is_sbitmap_shared(set)) + blk_mq_exit_shared_sbitmap(set); + for (j = 0; j < set->nr_maps; j++) { kfree(set->map[j].mq_map); set->map[j].mq_map = NULL; @@ -3408,6 +3428,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) if (!hctx->sched_tags) { ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr, false); + if (!ret && blk_mq_is_sbitmap_shared(set)) + blk_mq_tag_resize_shared_sbitmap(set, nr); } else { ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, nr, true); diff --git a/block/blk-mq.h b/block/blk-mq.h index a139b0631817..1a283c707215 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -158,6 +158,11 @@ struct blk_mq_alloc_data { struct blk_mq_hw_ctx *hctx; }; +static inline bool blk_mq_is_sbitmap_shared(struct blk_mq_tag_set *tag_set) +{ + return tag_set->flags & BLK_MQ_F_TAG_HCTX_SHARED; +} + static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) { if (data->flags & BLK_MQ_REQ_INTERNAL) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 233209e8030d..7b31cdb92a71 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -231,6 +231,9 @@ enum hctx_type { * @flags: Zero or more BLK_MQ_F_* flags. * @driver_data: Pointer to data owned by the block driver that created this * tag set. + * @__bitmap_tags: A shared tags sbitmap, used over all hctx's + * @__breserved_tags: + * A shared reserved tags sbitmap, used over all hctx's * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues * elements. * @tag_list_lock: Serializes tag_list accesses. @@ -250,6 +253,8 @@ struct blk_mq_tag_set { unsigned int flags; void *driver_data; + struct sbitmap_queue __bitmap_tags; + struct sbitmap_queue __breserved_tags; struct blk_mq_tags **tags; struct mutex tag_list_lock; @@ -398,6 +403,7 @@ enum { * completing IO: */ BLK_MQ_F_STACKING = 1 << 2, + BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_NO_SCHED = 1 << 6, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, From patchwork Wed Jun 10 17:29:12 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11598511 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2381614E3 for ; Wed, 10 Jun 2020 17:33:42 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 1206A20734 for ; Wed, 10 Jun 2020 17:33:42 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726924AbgFJRdf (ORCPT ); Wed, 10 Jun 2020 13:33:35 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:5807 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726393AbgFJRde (ORCPT ); Wed, 10 Jun 2020 13:33:34 -0400 Received: from DGGEMS409-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 023A14CF78F34D365122; Thu, 11 Jun 2020 01:33:27 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS409-HUB.china.huawei.com (10.3.19.209) with Microsoft SMTP Server id 14.3.487.0; Thu, 11 Jun 2020 01:33:19 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , John Garry Subject: [PATCH RFC v7 05/12] blk-mq: Record nr_active_requests per queue for when using shared sbitmap Date: Thu, 11 Jun 2020 01:29:12 +0800 Message-ID: <1591810159-240929-6-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1591810159-240929-1-git-send-email-john.garry@huawei.com> References: <1591810159-240929-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org The per-hctx nr_active value can no longer be used to fairly assign a share of tag depth per request queue for when using a shared sbitmap, as it does not consider that the tags are shared tags over all hctx's. For this case, record the nr_active_requests per request_queue, and make the judgment based on that value. Also introduce a debugfs version of per-hctx blk_mq_debugfs_attr, omitting hctx_active_show() (as blk_mq_hw_ctx.nr_active is no longer maintained for the case of shared sbitmap) and other entries which we can add which would be revised specifically for when using a shared sbitmap. Co-developed-with: Kashyap Desai Signed-off-by: John Garry --- block/blk-core.c | 2 ++ block/blk-mq-debugfs.c | 23 ++++++++++++++++++++++- block/blk-mq-tag.c | 10 ++++++---- block/blk-mq.c | 6 +++--- block/blk-mq.h | 28 +++++++++++++++++++++++++++- include/linux/blkdev.h | 2 ++ 6 files changed, 62 insertions(+), 9 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 03252af8c82c..c622453c1363 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -529,6 +529,8 @@ struct request_queue *__blk_alloc_queue(int node_id) q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK; q->node = node_id; + atomic_set(&q->nr_active_requests_shared_sbitmap, 0); + timer_setup(&q->backing_dev_info->laptop_mode_wb_timer, laptop_mode_timer_fn, 0); timer_setup(&q->timeout, blk_rq_timed_out_timer, 0); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index a400b6698dff..0fa3af41ab65 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -796,6 +796,23 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = { {}, }; +static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_shared_sbitmap_attrs[] = { + {"state", 0400, hctx_state_show}, + {"flags", 0400, hctx_flags_show}, + {"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops}, + {"busy", 0400, hctx_busy_show}, + {"ctx_map", 0400, hctx_ctx_map_show}, + {"sched_tags", 0400, hctx_sched_tags_show}, + {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show}, + {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write}, + {"dispatched", 0600, hctx_dispatched_show, hctx_dispatched_write}, + {"queued", 0600, hctx_queued_show, hctx_queued_write}, + {"run", 0600, hctx_run_show, hctx_run_write}, + {"active", 0400, hctx_active_show}, + {"dispatch_busy", 0400, hctx_dispatch_busy_show}, + {} +}; + static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { {"default_rq_list", 0400, .seq_ops = &ctx_default_rq_list_seq_ops}, {"read_rq_list", 0400, .seq_ops = &ctx_read_rq_list_seq_ops}, @@ -878,13 +895,17 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { struct blk_mq_ctx *ctx; + struct blk_mq_tag_set *set = q->tag_set; char name[20]; int i; snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir); - debugfs_create_files(hctx->debugfs_dir, hctx, blk_mq_debugfs_hctx_attrs); + if (blk_mq_is_sbitmap_shared(set)) + debugfs_create_files(hctx->debugfs_dir, hctx, blk_mq_debugfs_hctx_shared_sbitmap_attrs); + else + debugfs_create_files(hctx->debugfs_dir, hctx, blk_mq_debugfs_hctx_attrs); hctx_for_each_ctx(hctx, ctx, i) blk_mq_debugfs_register_ctx(hctx, ctx); diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 92843e3e1a2a..7db16e49f6f6 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -60,9 +60,11 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) * For shared tag users, we track the number of currently active users * and attempt to provide a fair share of the tag depth for each of them. */ -static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, +static inline bool hctx_may_queue(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt) { + struct blk_mq_hw_ctx *hctx = data->hctx; + struct request_queue *q = data->q; unsigned int depth, users; if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) @@ -84,15 +86,15 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, * Allow at least some tags */ depth = max((bt->sb.depth + users - 1) / users, 4U); - return atomic_read(&hctx->nr_active) < depth; + return __blk_mq_active_requests(hctx, q) < depth; } static int __blk_mq_get_tag(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt) { if (!(data->flags & BLK_MQ_REQ_INTERNAL) && - !hctx_may_queue(data->hctx, bt)) - return BLK_MQ_NO_TAG; + !hctx_may_queue(data, bt)) + return -1; if (data->shallow_depth) return __sbitmap_queue_get_shallow(bt, data->shallow_depth); else diff --git a/block/blk-mq.c b/block/blk-mq.c index 77120dd4e4d5..0f7e062a1665 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -283,7 +283,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, } else { if (data->hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) { rq_flags = RQF_MQ_INFLIGHT; - atomic_inc(&data->hctx->nr_active); + __blk_mq_inc_active_requests(data->hctx, data->q); } rq->tag = tag; rq->internal_tag = BLK_MQ_NO_TAG; @@ -527,7 +527,7 @@ void blk_mq_free_request(struct request *rq) ctx->rq_completed[rq_is_sync(rq)]++; if (rq->rq_flags & RQF_MQ_INFLIGHT) - atomic_dec(&hctx->nr_active); + __blk_mq_dec_active_requests(hctx, q); if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) laptop_io_completion(q->backing_dev_info); @@ -1073,7 +1073,7 @@ bool blk_mq_get_driver_tag(struct request *rq) if (rq->tag >= 0) { if (shared) { rq->rq_flags |= RQF_MQ_INFLIGHT; - atomic_inc(&data.hctx->nr_active); + __blk_mq_inc_active_requests(rq->mq_hctx, rq->q); } data.hctx->tags->rqs[rq->tag] = rq; } diff --git a/block/blk-mq.h b/block/blk-mq.h index 1a283c707215..9c1e612c2298 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -202,6 +202,32 @@ static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx) return true; } +static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx, + struct request_queue *q) +{ + if (blk_mq_is_sbitmap_shared(q->tag_set)) + atomic_inc(&q->nr_active_requests_shared_sbitmap); + else + atomic_inc(&hctx->nr_active); +} + +static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx, + struct request_queue *q) +{ + if (blk_mq_is_sbitmap_shared(q->tag_set)) + atomic_dec(&q->nr_active_requests_shared_sbitmap); + else + atomic_dec(&hctx->nr_active); +} + +static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx, + struct request_queue *q) +{ + if (blk_mq_is_sbitmap_shared(q->tag_set)) + return atomic_read(&q->nr_active_requests_shared_sbitmap); + return atomic_read(&hctx->nr_active); +} + static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq) { @@ -210,7 +236,7 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, if (rq->rq_flags & RQF_MQ_INFLIGHT) { rq->rq_flags &= ~RQF_MQ_INFLIGHT; - atomic_dec(&hctx->nr_active); + __blk_mq_dec_active_requests(hctx, rq->q); } } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8fd900998b4e..c536278bec9e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -488,6 +488,8 @@ struct request_queue { struct timer_list timeout; struct work_struct timeout_work; + atomic_t nr_active_requests_shared_sbitmap; + struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); From patchwork Wed Jun 10 17:29:13 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11598489 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 416AC739 for ; Wed, 10 Jun 2020 17:33:37 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 3357F20734 for ; Wed, 10 Jun 2020 17:33:37 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726393AbgFJRdg (ORCPT ); Wed, 10 Jun 2020 13:33:36 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:5810 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726525AbgFJRdf (ORCPT ); Wed, 10 Jun 2020 13:33:35 -0400 Received: from DGGEMS409-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 1D2E7B0A4BC7E47E04F1; Thu, 11 Jun 2020 01:33:27 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS409-HUB.china.huawei.com (10.3.19.209) with Microsoft SMTP Server id 14.3.487.0; Thu, 11 Jun 2020 01:33:19 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , John Garry Subject: [PATCH RFC v7 06/12] blk-mq: Record active_queues_shared_sbitmap per tag_set for when using shared sbitmap Date: Thu, 11 Jun 2020 01:29:13 +0800 Message-ID: <1591810159-240929-7-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1591810159-240929-1-git-send-email-john.garry@huawei.com> References: <1591810159-240929-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org For when using a shared sbitmap, no longer should the number of active request queues per hctx be relied on for when judging how to share the tag bitmap. Instead maintain the number of active request queues per tag_set, and make the judgment based on that. And since the blk_mq_tags.active_queues is no longer maintained, do not show it in debugfs. Originally-from: Kashyap Desai Signed-off-by: John Garry --- block/blk-mq-debugfs.c | 25 ++++++++++++++++++++-- block/blk-mq-tag.c | 47 ++++++++++++++++++++++++++++++++---------- block/blk-mq.c | 2 ++ include/linux/blk-mq.h | 1 + include/linux/blkdev.h | 1 + 5 files changed, 63 insertions(+), 13 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 0fa3af41ab65..05b4be0c03d9 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -458,17 +458,37 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m, } } +static void blk_mq_debugfs_tags_shared_sbitmap_show(struct seq_file *m, + struct blk_mq_tags *tags) +{ + seq_printf(m, "nr_tags=%u\n", tags->nr_tags); + seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags); + + seq_puts(m, "\nbitmap_tags:\n"); + sbitmap_queue_show(tags->bitmap_tags, m); + + if (tags->nr_reserved_tags) { + seq_puts(m, "\nbreserved_tags:\n"); + sbitmap_queue_show(tags->breserved_tags, m); + } +} + static int hctx_tags_show(void *data, struct seq_file *m) { struct blk_mq_hw_ctx *hctx = data; struct request_queue *q = hctx->queue; + struct blk_mq_tag_set *set = q->tag_set; int res; res = mutex_lock_interruptible(&q->sysfs_lock); if (res) goto out; - if (hctx->tags) - blk_mq_debugfs_tags_show(m, hctx->tags); + if (hctx->tags) { + if (blk_mq_is_sbitmap_shared(set)) + blk_mq_debugfs_tags_shared_sbitmap_show(m, hctx->tags); + else + blk_mq_debugfs_tags_show(m, hctx->tags); + } mutex_unlock(&q->sysfs_lock); out: @@ -802,6 +822,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_shared_sbitmap_attrs {"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops}, {"busy", 0400, hctx_busy_show}, {"ctx_map", 0400, hctx_ctx_map_show}, + {"tags", 0400, hctx_tags_show}, {"sched_tags", 0400, hctx_sched_tags_show}, {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show}, {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write}, diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 7db16e49f6f6..6ca06b1c3a99 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -23,9 +23,19 @@ */ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && - !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) - atomic_inc(&hctx->tags->active_queues); + struct request_queue *q = hctx->queue; + struct blk_mq_tag_set *set = q->tag_set; + + if (blk_mq_is_sbitmap_shared(set)){ + if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) && + !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) + atomic_inc(&set->active_queues_shared_sbitmap); + + } else { + if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && + !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) + atomic_inc(&hctx->tags->active_queues); + } return true; } @@ -47,11 +57,19 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve) void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) { struct blk_mq_tags *tags = hctx->tags; - - if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) - return; - - atomic_dec(&tags->active_queues); + struct request_queue *q = hctx->queue; + struct blk_mq_tag_set *set = q->tag_set; + + if (blk_mq_is_sbitmap_shared(q->tag_set)){ + if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE, + &q->queue_flags)) + return; + atomic_dec(&set->active_queues_shared_sbitmap); + } else { + if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) + return; + atomic_dec(&tags->active_queues); + } blk_mq_tag_wakeup_all(tags, false); } @@ -65,12 +83,11 @@ static inline bool hctx_may_queue(struct blk_mq_alloc_data *data, { struct blk_mq_hw_ctx *hctx = data->hctx; struct request_queue *q = data->q; + struct blk_mq_tag_set *set = q->tag_set; unsigned int depth, users; if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return true; - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) - return true; /* * Don't try dividing an ant @@ -78,7 +95,15 @@ static inline bool hctx_may_queue(struct blk_mq_alloc_data *data, if (bt->sb.depth == 1) return true; - users = atomic_read(&hctx->tags->active_queues); + if (blk_mq_is_sbitmap_shared(q->tag_set)) { + if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &q->queue_flags)) + return true; + users = atomic_read(&set->active_queues_shared_sbitmap); + } else { + if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) + return true; + users = atomic_read(&hctx->tags->active_queues); + } if (!users) return true; diff --git a/block/blk-mq.c b/block/blk-mq.c index 0f7e062a1665..f73a2f9c58bd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3350,6 +3350,8 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) goto out_free_mq_map; if (blk_mq_is_sbitmap_shared(set)) { + atomic_set(&set->active_queues_shared_sbitmap, 0); + if (!blk_mq_init_shared_sbitmap(set)) { ret = -ENOMEM; goto out_free_mq_rq_maps; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7b31cdb92a71..66711c7234db 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -252,6 +252,7 @@ struct blk_mq_tag_set { unsigned int timeout; unsigned int flags; void *driver_data; + atomic_t active_queues_shared_sbitmap; struct sbitmap_queue __bitmap_tags; struct sbitmap_queue __breserved_tags; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c536278bec9e..1b0087e8d01a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -619,6 +619,7 @@ struct request_queue { #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ #define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ +#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP)) From patchwork Wed Jun 10 17:29:14 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11598499 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 662F1913 for ; Wed, 10 Jun 2020 17:33:39 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 568C920734 for ; Wed, 10 Jun 2020 17:33:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726987AbgFJRdg (ORCPT ); Wed, 10 Jun 2020 13:33:36 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:5809 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726713AbgFJRdf (ORCPT ); Wed, 10 Jun 2020 13:33:35 -0400 Received: from DGGEMS409-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 142185FA3A0285FBF2DE; Thu, 11 Jun 2020 01:33:27 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS409-HUB.china.huawei.com (10.3.19.209) with Microsoft SMTP Server id 14.3.487.0; Thu, 11 Jun 2020 01:33:19 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , John Garry Subject: [PATCH RFC v7 07/12] blk-mq: Add support in hctx_tags_bitmap_show() for a shared sbitmap Date: Thu, 11 Jun 2020 01:29:14 +0800 Message-ID: <1591810159-240929-8-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1591810159-240929-1-git-send-email-john.garry@huawei.com> References: <1591810159-240929-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org Since a set-wide shared tag sbitmap may be used, it is no longer valid to examine the per-hctx tagset for getting the active requests for a hctx (when a shared sbitmap is used). As such, add support for the shared sbitmap by using an intermediate sbitmap per hctx, iterating all active tags for the specific hctx in the shared sbitmap. Originally-by: Bart Van Assche Reviewed-by: Hannes Reinecke #earlier version Signed-off-by: John Garry --- block/blk-mq-debugfs.c | 62 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 05b4be0c03d9..4da7e54adf3b 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -495,6 +495,67 @@ static int hctx_tags_show(void *data, struct seq_file *m) return res; } +struct hctx_sb_data { + struct sbitmap *sb; /* output bitmap */ + struct blk_mq_hw_ctx *hctx; /* input hctx */ +}; + +static bool hctx_filter_fn(struct blk_mq_hw_ctx *hctx, struct request *req, + void *priv, bool reserved) +{ + struct hctx_sb_data *hctx_sb_data = priv; + + if (hctx == hctx_sb_data->hctx) + sbitmap_set_bit(hctx_sb_data->sb, req->tag); + + return true; +} + +static void hctx_filter_sb(struct sbitmap *sb, struct blk_mq_hw_ctx *hctx) +{ + struct hctx_sb_data hctx_sb_data = { .sb = sb, .hctx = hctx }; + + blk_mq_queue_tag_busy_iter(hctx->queue, hctx_filter_fn, &hctx_sb_data); +} + +static int hctx_tags_shared_sbitmap_bitmap_show(void *data, struct seq_file *m) +{ + struct blk_mq_hw_ctx *hctx = data; + struct request_queue *q = hctx->queue; + struct blk_mq_tag_set *set = q->tag_set; + struct sbitmap shared_sb, *sb; + int res; + + if (!set) + return 0; + + /* + * We could use the allocated sbitmap for that hctx here, but + * that would mean that we would need to clean it prior to use. + */ + res = sbitmap_init_node(&shared_sb, + set->__bitmap_tags.sb.depth, + set->__bitmap_tags.sb.shift, + GFP_KERNEL, NUMA_NO_NODE); + if (res) + return res; + sb = &shared_sb; + + res = mutex_lock_interruptible(&q->sysfs_lock); + if (res) + goto out; + if (hctx->tags) { + hctx_filter_sb(sb, hctx); + sbitmap_bitmap_show(sb, m); + } + + mutex_unlock(&q->sysfs_lock); + +out: + sbitmap_free(&shared_sb); + return res; +} + static int hctx_tags_bitmap_show(void *data, struct seq_file *m) { struct blk_mq_hw_ctx *hctx = data; @@ -823,6 +884,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_shared_sbitmap_attrs {"busy", 0400, hctx_busy_show}, {"ctx_map", 0400, hctx_ctx_map_show}, {"tags", 0400, hctx_tags_show}, + {"tags_bitmap", 0400, hctx_tags_shared_sbitmap_bitmap_show}, {"sched_tags", 0400, hctx_sched_tags_show}, {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show}, {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write}, From patchwork Wed Jun 10 17:29:15 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11598501 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2382C913 for ; Wed, 10 Jun 2020 17:33:40 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 116D420734 for ; Wed, 10 Jun 2020 17:33:40 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727043AbgFJRdj (ORCPT ); Wed, 10 Jun 2020 13:33:39 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:5814 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726794AbgFJRdg (ORCPT ); Wed, 10 Jun 2020 13:33:36 -0400 Received: from DGGEMS409-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 4331E5A829A4F59AED6D; Thu, 11 Jun 2020 01:33:27 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS409-HUB.china.huawei.com (10.3.19.209) with Microsoft SMTP Server id 14.3.487.0; Thu, 11 Jun 2020 01:33:20 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , John Garry Subject: [PATCH RFC v7 08/12] scsi: Add template flag 'host_tagset' Date: Thu, 11 Jun 2020 01:29:15 +0800 Message-ID: <1591810159-240929-9-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1591810159-240929-1-git-send-email-john.garry@huawei.com> References: <1591810159-240929-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Hannes Reinecke Add a host template flag 'host_tagset' so hostwide tagset can be shared on multiple reply queues after the SCSI device's reply queue is converted to blk-mq hw queue. Signed-off-by: Hannes Reinecke jpg: Update comment on can_queue Signed-off-by: John Garry --- drivers/scsi/scsi_lib.c | 2 ++ include/scsi/scsi_host.h | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 0ba7a65e7c8d..0652acdcec22 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1894,6 +1894,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost) tag_set->flags |= BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy); tag_set->driver_data = shost; + if (shost->hostt->host_tagset) + tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; return blk_mq_alloc_tag_set(tag_set); } diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 46ef8cccc982..9b7e333a681d 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -436,6 +436,9 @@ struct scsi_host_template { /* True if the controller does not support WRITE SAME */ unsigned no_write_same:1; + /* True if the host uses host-wide tagspace */ + unsigned host_tagset:1; + /* * Countdown for host blocking with no commands outstanding. */ @@ -603,7 +606,8 @@ struct Scsi_Host { * * Note: it is assumed that each hardware queue has a queue depth of * can_queue. In other words, the total queue depth per host - * is nr_hw_queues * can_queue. + * is nr_hw_queues * can_queue. However, for when host_tagset is set, + * the total queue depth is can_queue. */ unsigned nr_hw_queues; unsigned active_mode:2; From patchwork Wed Jun 10 17:29:16 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11598483 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 870F4739 for ; Wed, 10 Jun 2020 17:33:34 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 712852072E for ; Wed, 10 Jun 2020 17:33:34 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726482AbgFJRdb (ORCPT ); Wed, 10 Jun 2020 13:33:31 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:5806 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726313AbgFJRdb (ORCPT ); Wed, 10 Jun 2020 13:33:31 -0400 Received: from DGGEMS409-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id ECC13802EAF99330BCCA; Thu, 11 Jun 2020 01:33:26 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS409-HUB.china.huawei.com (10.3.19.209) with Microsoft SMTP Server id 14.3.487.0; Thu, 11 Jun 2020 01:33:20 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , John Garry Subject: [PATCH RFC v7 09/12] scsi: hisi_sas: Switch v3 hw to MQ Date: Thu, 11 Jun 2020 01:29:16 +0800 Message-ID: <1591810159-240929-10-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1591810159-240929-1-git-send-email-john.garry@huawei.com> References: <1591810159-240929-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org Now that the block layer provides a shared tag, we can switch the driver to expose all HW queues. Signed-off-by: John Garry --- drivers/scsi/hisi_sas/hisi_sas.h | 3 +- drivers/scsi/hisi_sas/hisi_sas_main.c | 36 ++++++----- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 87 +++++++++++--------------- 3 files changed, 56 insertions(+), 70 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index 2bdd64648ef0..e6acbf940712 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -8,6 +8,8 @@ #define _HISI_SAS_H_ #include +#include +#include #include #include #include @@ -431,7 +433,6 @@ struct hisi_hba { u32 intr_coal_count; /* Interrupt count to coalesce */ int cq_nvecs; - unsigned int *reply_map; /* bist */ enum sas_linkrate debugfs_bist_linkrate; diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 11caa4b0d797..7ed4eaedb7ca 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -417,6 +417,7 @@ static int hisi_sas_task_prep(struct sas_task *task, struct device *dev = hisi_hba->dev; int dlvry_queue_slot, dlvry_queue, rc, slot_idx; int n_elem = 0, n_elem_dif = 0, n_elem_req = 0; + struct scsi_cmnd *scmd = NULL; struct hisi_sas_dq *dq; unsigned long flags; int wr_q_index; @@ -432,10 +433,23 @@ static int hisi_sas_task_prep(struct sas_task *task, return -ECOMM; } - if (hisi_hba->reply_map) { - int cpu = raw_smp_processor_id(); - unsigned int dq_index = hisi_hba->reply_map[cpu]; + if (task->uldd_task) { + struct ata_queued_cmd *qc; + if (dev_is_sata(device)) { + qc = task->uldd_task; + scmd = qc->scsicmd; + } else { + scmd = task->uldd_task; + } + } + + if (scmd) { + unsigned int dq_index; + u32 blk_tag; + + blk_tag = blk_mq_unique_tag(scmd->request); + dq_index = blk_mq_unique_tag_to_hwq(blk_tag); *dq_pointer = dq = &hisi_hba->dq[dq_index]; } else { *dq_pointer = dq = sas_dev->dq; @@ -464,21 +478,9 @@ static int hisi_sas_task_prep(struct sas_task *task, if (hisi_hba->hw->slot_index_alloc) rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device); - else { - struct scsi_cmnd *scsi_cmnd = NULL; - - if (task->uldd_task) { - struct ata_queued_cmd *qc; + else + rc = hisi_sas_slot_index_alloc(hisi_hba, scmd); - if (dev_is_sata(device)) { - qc = task->uldd_task; - scsi_cmnd = qc->scsicmd; - } else { - scsi_cmnd = task->uldd_task; - } - } - rc = hisi_sas_slot_index_alloc(hisi_hba, scsi_cmnd); - } if (rc < 0) goto err_out_dif_dma_unmap; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 3e6b78a1f993..e22231403bbb 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2360,68 +2360,36 @@ static irqreturn_t cq_interrupt_v3_hw(int irq_no, void *p) return IRQ_WAKE_THREAD; } -static void setup_reply_map_v3_hw(struct hisi_hba *hisi_hba, int nvecs) +static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba) { - const struct cpumask *mask; - int queue, cpu; + int vectors; + int max_msi = HISI_SAS_MSI_COUNT_V3_HW, min_msi; + struct Scsi_Host *shost = hisi_hba->shost; + struct irq_affinity desc = { + .pre_vectors = BASE_VECTORS_V3_HW, + }; - for (queue = 0; queue < nvecs; queue++) { - struct hisi_sas_cq *cq = &hisi_hba->cq[queue]; + min_msi = MIN_AFFINE_VECTORS_V3_HW; + vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev, + min_msi, max_msi, + PCI_IRQ_MSI | + PCI_IRQ_AFFINITY, + &desc); + if (vectors < 0) + return -ENOENT; - mask = pci_irq_get_affinity(hisi_hba->pci_dev, queue + - BASE_VECTORS_V3_HW); - if (!mask) - goto fallback; - cq->irq_mask = mask; - for_each_cpu(cpu, mask) - hisi_hba->reply_map[cpu] = queue; - } - return; -fallback: - for_each_possible_cpu(cpu) - hisi_hba->reply_map[cpu] = cpu % hisi_hba->queue_count; - /* Don't clean all CQ masks */ + hisi_hba->cq_nvecs = vectors - BASE_VECTORS_V3_HW; + shost->nr_hw_queues = hisi_hba->cq_nvecs; + + return 0; } static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba) { struct device *dev = hisi_hba->dev; struct pci_dev *pdev = hisi_hba->pci_dev; - int vectors, rc, i; - int max_msi = HISI_SAS_MSI_COUNT_V3_HW, min_msi; - - if (auto_affine_msi_experimental) { - struct irq_affinity desc = { - .pre_vectors = BASE_VECTORS_V3_HW, - }; - - dev_info(dev, "Enable MSI auto-affinity\n"); - - min_msi = MIN_AFFINE_VECTORS_V3_HW; - - hisi_hba->reply_map = devm_kcalloc(dev, nr_cpu_ids, - sizeof(unsigned int), - GFP_KERNEL); - if (!hisi_hba->reply_map) - return -ENOMEM; - vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev, - min_msi, max_msi, - PCI_IRQ_MSI | - PCI_IRQ_AFFINITY, - &desc); - if (vectors < 0) - return -ENOENT; - setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW); - } else { - min_msi = max_msi; - vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi, - max_msi, PCI_IRQ_MSI); - if (vectors < 0) - return vectors; - } - - hisi_hba->cq_nvecs = vectors - BASE_VECTORS_V3_HW; + int rc, i; rc = devm_request_irq(dev, pci_irq_vector(pdev, 1), int_phy_up_down_bcast_v3_hw, 0, @@ -3070,6 +3038,15 @@ static int debugfs_set_bist_v3_hw(struct hisi_hba *hisi_hba, bool enable) return 0; } +static int hisi_sas_map_queues(struct Scsi_Host *shost) +{ + struct hisi_hba *hisi_hba = shost_priv(shost); + struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; + + return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev, + BASE_VECTORS_V3_HW); +} + static struct scsi_host_template sht_v3_hw = { .name = DRV_NAME, .proc_name = DRV_NAME, @@ -3079,6 +3056,7 @@ static struct scsi_host_template sht_v3_hw = { .slave_configure = hisi_sas_slave_configure, .scan_finished = hisi_sas_scan_finished, .scan_start = hisi_sas_scan_start, + .map_queues = hisi_sas_map_queues, .change_queue_depth = sas_change_queue_depth, .bios_param = sas_bios_param, .this_id = -1, @@ -3095,6 +3073,7 @@ static struct scsi_host_template sht_v3_hw = { .shost_attrs = host_attrs_v3_hw, .tag_alloc_policy = BLK_TAG_ALLOC_RR, .host_reset = hisi_sas_host_reset, + .host_tagset = 1, }; static const struct hisi_sas_hw hisi_sas_v3_hw = { @@ -3266,6 +3245,10 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (hisi_sas_debugfs_enable) hisi_sas_debugfs_init(hisi_hba); + rc = interrupt_preinit_v3_hw(hisi_hba); + if (rc) + goto err_out_ha; + dev_err(dev, "%d hw qeues\n", shost->nr_hw_queues); rc = scsi_add_host(shost, dev); if (rc) goto err_out_ha; From patchwork Wed Jun 10 17:29:17 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11598521 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 152B8913 for ; Wed, 10 Jun 2020 17:33:47 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 051DA2072E for ; Wed, 10 Jun 2020 17:33:47 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726651AbgFJRdq (ORCPT ); Wed, 10 Jun 2020 13:33:46 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:5815 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726524AbgFJRdq (ORCPT ); Wed, 10 Jun 2020 13:33:46 -0400 Received: from DGGEMS409-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 4B41AC78CC531FA84610; Thu, 11 Jun 2020 01:33:27 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS409-HUB.china.huawei.com (10.3.19.209) with Microsoft SMTP Server id 14.3.487.0; Thu, 11 Jun 2020 01:33:20 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , John Garry Subject: [PATCH RFC v7 10/12] megaraid_sas: switch fusion adapters to MQ Date: Thu, 11 Jun 2020 01:29:17 +0800 Message-ID: <1591810159-240929-11-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1591810159-240929-1-git-send-email-john.garry@huawei.com> References: <1591810159-240929-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Hannes Reinecke Fusion adapters can steer completions to individual queues, and we now have support for shared host-wide tags. So we can enable multiqueue support for fusion adapters and drop the hand-crafted interrupt affinity settings. Signed-off-by: Hannes Reinecke Signed-off-by: John Garry Reported-by: Long Li Signed-off-by: Ming Lei Signed-off-by: Ming Lei --- drivers/scsi/megaraid/megaraid_sas.h | 1 - drivers/scsi/megaraid/megaraid_sas_base.c | 59 +++++++-------------- drivers/scsi/megaraid/megaraid_sas_fusion.c | 24 +++++---- 3 files changed, 32 insertions(+), 52 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index af2c7a2a9565..b27a34a5f5de 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2261,7 +2261,6 @@ enum MR_PERF_MODE { struct megasas_instance { - unsigned int *reply_map; __le32 *producer; dma_addr_t producer_h; __le32 *consumer; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 00668335c2af..e6bb2a64d51c 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -3115,6 +3116,19 @@ megasas_bios_param(struct scsi_device *sdev, struct block_device *bdev, return 0; } +static int megasas_map_queues(struct Scsi_Host *shost) +{ + struct megasas_instance *instance; + + instance = (struct megasas_instance *)shost->hostdata; + + if (!instance->smp_affinity_enable) + return 0; + + return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + instance->pdev, instance->low_latency_index_start); +} + static void megasas_aen_polling(struct work_struct *work); /** @@ -3423,8 +3437,10 @@ static struct scsi_host_template megasas_template = { .eh_timed_out = megasas_reset_timer, .shost_attrs = megaraid_host_attrs, .bios_param = megasas_bios_param, + .map_queues = megasas_map_queues, .change_queue_depth = scsi_change_queue_depth, .max_segment_size = 0xffffffff, + .host_tagset = 1, }; /** @@ -5708,34 +5724,6 @@ megasas_setup_jbod_map(struct megasas_instance *instance) instance->use_seqnum_jbod_fp = false; } -static void megasas_setup_reply_map(struct megasas_instance *instance) -{ - const struct cpumask *mask; - unsigned int queue, cpu, low_latency_index_start; - - low_latency_index_start = instance->low_latency_index_start; - - for (queue = low_latency_index_start; queue < instance->msix_vectors; queue++) { - mask = pci_irq_get_affinity(instance->pdev, queue); - if (!mask) - goto fallback; - - for_each_cpu(cpu, mask) - instance->reply_map[cpu] = queue; - } - return; - -fallback: - queue = low_latency_index_start; - for_each_possible_cpu(cpu) { - instance->reply_map[cpu] = queue; - if (queue == (instance->msix_vectors - 1)) - queue = low_latency_index_start; - else - queue++; - } -} - /** * megasas_get_device_list - Get the PD and LD device list from FW. * @instance: Adapter soft state @@ -6158,8 +6146,6 @@ static int megasas_init_fw(struct megasas_instance *instance) goto fail_init_adapter; } - megasas_setup_reply_map(instance); - dev_info(&instance->pdev->dev, "current msix/online cpus\t: (%d/%d)\n", instance->msix_vectors, (unsigned int)num_online_cpus()); @@ -6793,6 +6779,9 @@ static int megasas_io_attach(struct megasas_instance *instance) host->max_id = MEGASAS_MAX_DEV_PER_CHANNEL; host->max_lun = MEGASAS_MAX_LUN; host->max_cmd_len = 16; + if (instance->adapter_type != MFI_SERIES && instance->msix_vectors > 0) + host->nr_hw_queues = instance->msix_vectors - + instance->low_latency_index_start; /* * Notify the mid-layer about the new controller @@ -6960,11 +6949,6 @@ static inline int megasas_alloc_mfi_ctrl_mem(struct megasas_instance *instance) */ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) { - instance->reply_map = kcalloc(nr_cpu_ids, sizeof(unsigned int), - GFP_KERNEL); - if (!instance->reply_map) - return -ENOMEM; - switch (instance->adapter_type) { case MFI_SERIES: if (megasas_alloc_mfi_ctrl_mem(instance)) @@ -6981,8 +6965,6 @@ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) return 0; fail: - kfree(instance->reply_map); - instance->reply_map = NULL; return -ENOMEM; } @@ -6995,7 +6977,6 @@ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) */ static inline void megasas_free_ctrl_mem(struct megasas_instance *instance) { - kfree(instance->reply_map); if (instance->adapter_type == MFI_SERIES) { if (instance->producer) dma_free_coherent(&instance->pdev->dev, sizeof(u32), @@ -7683,8 +7664,6 @@ megasas_resume(struct pci_dev *pdev) goto fail_reenable_msix; } - megasas_setup_reply_map(instance); - if (instance->adapter_type != MFI_SERIES) { megasas_reset_reply_desc(instance); if (megasas_ioc_init_fusion(instance)) { diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 319f241da4b6..8e25b700988e 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -373,24 +373,24 @@ megasas_get_msix_index(struct megasas_instance *instance, { int sdev_busy; - /* nr_hw_queue = 1 for MegaRAID */ - struct blk_mq_hw_ctx *hctx = - scmd->device->request_queue->queue_hw_ctx[0]; + struct blk_mq_hw_ctx *hctx = scmd->request->mq_hctx; sdev_busy = atomic_read(&hctx->nr_active); if (instance->perf_mode == MR_BALANCED_PERF_MODE && - sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) + sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) { cmd->request_desc->SCSIIO.MSIxIndex = mega_mod64((atomic64_add_return(1, &instance->high_iops_outstanding) / MR_HIGH_IOPS_BATCH_COUNT), instance->low_latency_index_start); - else if (instance->msix_load_balance) + } else if (instance->msix_load_balance) { cmd->request_desc->SCSIIO.MSIxIndex = (mega_mod64(atomic64_add_return(1, &instance->total_io_count), instance->msix_vectors)); - else - cmd->request_desc->SCSIIO.MSIxIndex = - instance->reply_map[raw_smp_processor_id()]; + } else { + u32 tag = blk_mq_unique_tag(scmd->request); + + cmd->request_desc->SCSIIO.MSIxIndex = blk_mq_unique_tag_to_hwq(tag) + instance->low_latency_index_start; + } } /** @@ -3326,7 +3326,7 @@ megasas_build_and_issue_cmd_fusion(struct megasas_instance *instance, { struct megasas_cmd_fusion *cmd, *r1_cmd = NULL; union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc; - u32 index; + u32 index, blk_tag, unique_tag; if ((megasas_cmd_type(scmd) == READ_WRITE_LDIO) && instance->ldio_threshold && @@ -3342,7 +3342,9 @@ megasas_build_and_issue_cmd_fusion(struct megasas_instance *instance, return SCSI_MLQUEUE_HOST_BUSY; } - cmd = megasas_get_cmd_fusion(instance, scmd->request->tag); + unique_tag = blk_mq_unique_tag(scmd->request); + blk_tag = blk_mq_unique_tag_to_tag(unique_tag); + cmd = megasas_get_cmd_fusion(instance, blk_tag); if (!cmd) { atomic_dec(&instance->fw_outstanding); @@ -3383,7 +3385,7 @@ megasas_build_and_issue_cmd_fusion(struct megasas_instance *instance, */ if (cmd->r1_alt_dev_handle != MR_DEVHANDLE_INVALID) { r1_cmd = megasas_get_cmd_fusion(instance, - (scmd->request->tag + instance->max_fw_cmds)); + (blk_tag + instance->max_fw_cmds)); megasas_prepare_secondRaid1_IO(instance, cmd, r1_cmd); } From patchwork Wed Jun 10 17:29:18 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11598485 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DF5F8913 for ; Wed, 10 Jun 2020 17:33:35 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id CB23320734 for ; Wed, 10 Jun 2020 17:33:35 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726823AbgFJRdf (ORCPT ); Wed, 10 Jun 2020 13:33:35 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:5808 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726332AbgFJRde (ORCPT ); Wed, 10 Jun 2020 13:33:34 -0400 Received: from DGGEMS409-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 0BBDD1831338785E6BCC; Thu, 11 Jun 2020 01:33:27 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS409-HUB.china.huawei.com (10.3.19.209) with Microsoft SMTP Server id 14.3.487.0; Thu, 11 Jun 2020 01:33:21 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , Hannes Reinecke Subject: [PATCH RFC v7 11/12] smartpqi: enable host tagset Date: Thu, 11 Jun 2020 01:29:18 +0800 Message-ID: <1591810159-240929-12-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1591810159-240929-1-git-send-email-john.garry@huawei.com> References: <1591810159-240929-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Hannes Reinecke Enable host tagset for smartpqi; with this we can use the request tag to look command from the pool avoiding the list iteration in the hot path. Signed-off-by: Hannes Reinecke --- drivers/scsi/smartpqi/smartpqi_init.c | 38 ++++++++++++++++++++------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index cd157f11eb22..1f4de4c2d876 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -575,17 +575,29 @@ static inline void pqi_reinit_io_request(struct pqi_io_request *io_request) } static struct pqi_io_request *pqi_alloc_io_request( - struct pqi_ctrl_info *ctrl_info) + struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd) { struct pqi_io_request *io_request; + unsigned int limit = PQI_RESERVED_IO_SLOTS; u16 i = ctrl_info->next_io_request_slot; /* benignly racy */ - while (1) { + if (scmd) { + u32 blk_tag = blk_mq_unique_tag(scmd->request); + + i = blk_mq_unique_tag_to_tag(blk_tag) + limit; io_request = &ctrl_info->io_request_pool[i]; - if (atomic_inc_return(&io_request->refcount) == 1) - break; - atomic_dec(&io_request->refcount); - i = (i + 1) % ctrl_info->max_io_slots; + if (WARN_ON(atomic_inc_return(&io_request->refcount) > 1)) { + atomic_dec(&io_request->refcount); + return NULL; + } + } else { + while (1) { + io_request = &ctrl_info->io_request_pool[i]; + if (atomic_inc_return(&io_request->refcount) == 1) + break; + atomic_dec(&io_request->refcount); + i = (i + 1) % limit; + } } /* benignly racy */ @@ -4075,7 +4087,7 @@ static int pqi_submit_raid_request_synchronous(struct pqi_ctrl_info *ctrl_info, atomic_inc(&ctrl_info->sync_cmds_outstanding); - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, NULL); put_unaligned_le16(io_request->index, &(((struct pqi_raid_path_request *)request)->request_id)); @@ -5032,7 +5044,9 @@ static inline int pqi_raid_submit_scsi_cmd(struct pqi_ctrl_info *ctrl_info, { struct pqi_io_request *io_request; - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, scmd); + if (!io_request) + return SCSI_MLQUEUE_HOST_BUSY; return pqi_raid_submit_scsi_cmd_with_io_request(ctrl_info, io_request, device, scmd, queue_group); @@ -5230,7 +5244,10 @@ static int pqi_aio_submit_io(struct pqi_ctrl_info *ctrl_info, struct pqi_io_request *io_request; struct pqi_aio_path_request *request; - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, scmd); + if (!io_request) + return SCSI_MLQUEUE_HOST_BUSY; + io_request->io_complete_callback = pqi_aio_io_complete; io_request->scmd = scmd; io_request->raid_bypass = raid_bypass; @@ -5657,7 +5674,7 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, DECLARE_COMPLETION_ONSTACK(wait); struct pqi_task_management_request *request; - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, NULL); io_request->io_complete_callback = pqi_lun_reset_complete; io_request->context = &wait; @@ -6504,6 +6521,7 @@ static struct scsi_host_template pqi_driver_template = { .map_queues = pqi_map_queues, .sdev_attrs = pqi_sdev_attrs, .shost_attrs = pqi_shost_attrs, + .host_tagset = 1, }; static int pqi_register_scsi(struct pqi_ctrl_info *ctrl_info) From patchwork Wed Jun 10 17:29:19 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11598503 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9DA5514E3 for ; Wed, 10 Jun 2020 17:33:40 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 8F633207C3 for ; Wed, 10 Jun 2020 17:33:40 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727059AbgFJRdj (ORCPT ); Wed, 10 Jun 2020 13:33:39 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:5816 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726965AbgFJRdi (ORCPT ); Wed, 10 Jun 2020 13:33:38 -0400 Received: from DGGEMS409-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id E823267E54BE6431EE5A; Thu, 11 Jun 2020 01:33:31 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS409-HUB.china.huawei.com (10.3.19.209) with Microsoft SMTP Server id 14.3.487.0; Thu, 11 Jun 2020 01:33:21 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , Hannes Reinecke Subject: [PATCH RFC v7 12/12] hpsa: enable host_tagset and switch to MQ Date: Thu, 11 Jun 2020 01:29:19 +0800 Message-ID: <1591810159-240929-13-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1591810159-240929-1-git-send-email-john.garry@huawei.com> References: <1591810159-240929-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Hannes Reinecke The smart array HBAs can steer interrupt completion, so this patch switches the implementation to use multiqueue and enables 'host_tagset' as the HBA has a shared host-wide tagset. Signed-off-by: Hannes Reinecke Tested-by: Don Brace Reviewed-by: Don Brace --- drivers/scsi/hpsa.c | 44 +++++++------------------------------------- drivers/scsi/hpsa.h | 1 - 2 files changed, 7 insertions(+), 38 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 1e9302e99d05..f807f9bdae85 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -980,6 +980,7 @@ static struct scsi_host_template hpsa_driver_template = { .shost_attrs = hpsa_shost_attrs, .max_sectors = 2048, .no_write_same = 1, + .host_tagset = 1, }; static inline u32 next_command(struct ctlr_info *h, u8 q) @@ -1144,12 +1145,14 @@ static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h, static void __enqueue_cmd_and_start_io(struct ctlr_info *h, struct CommandList *c, int reply_queue) { + u32 blk_tag = blk_mq_unique_tag(c->scsi_cmd->request); + dial_down_lockup_detection_during_fw_flash(h, c); atomic_inc(&h->commands_outstanding); if (c->device) atomic_inc(&c->device->commands_outstanding); - reply_queue = h->reply_map[raw_smp_processor_id()]; + reply_queue = blk_mq_unique_tag_to_hwq(blk_tag); switch (c->cmd_type) { case CMD_IOACCEL1: set_ioaccel1_performant_mode(h, c, reply_queue); @@ -5653,8 +5656,6 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd) /* Get the ptr to our adapter structure out of cmd->host. */ h = sdev_to_hba(cmd->device); - BUG_ON(cmd->request->tag < 0); - dev = cmd->device->hostdata; if (!dev) { cmd->result = DID_NO_CONNECT << 16; @@ -5830,7 +5831,7 @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h) sh->hostdata[0] = (unsigned long) h; sh->irq = pci_irq_vector(h->pdev, 0); sh->unique_id = sh->irq; - + sh->nr_hw_queues = h->msix_vectors > 0 ? h->msix_vectors : 1; h->scsi_host = sh; return 0; } @@ -5856,7 +5857,8 @@ static int hpsa_scsi_add_host(struct ctlr_info *h) */ static int hpsa_get_cmd_index(struct scsi_cmnd *scmd) { - int idx = scmd->request->tag; + u32 blk_tag = blk_mq_unique_tag(scmd->request); + int idx = blk_mq_unique_tag_to_tag(blk_tag); if (idx < 0) return idx; @@ -7456,26 +7458,6 @@ static void hpsa_disable_interrupt_mode(struct ctlr_info *h) h->msix_vectors = 0; } -static void hpsa_setup_reply_map(struct ctlr_info *h) -{ - const struct cpumask *mask; - unsigned int queue, cpu; - - for (queue = 0; queue < h->msix_vectors; queue++) { - mask = pci_irq_get_affinity(h->pdev, queue); - if (!mask) - goto fallback; - - for_each_cpu(cpu, mask) - h->reply_map[cpu] = queue; - } - return; - -fallback: - for_each_possible_cpu(cpu) - h->reply_map[cpu] = 0; -} - /* If MSI/MSI-X is supported by the kernel we will try to enable it on * controllers that are capable. If not, we use legacy INTx mode. */ @@ -7872,9 +7854,6 @@ static int hpsa_pci_init(struct ctlr_info *h) if (err) goto clean1; - /* setup mapping between CPU and reply queue */ - hpsa_setup_reply_map(h); - err = hpsa_pci_find_memory_BAR(h->pdev, &h->paddr); if (err) goto clean2; /* intmode+region, pci */ @@ -8613,7 +8592,6 @@ static struct workqueue_struct *hpsa_create_controller_wq(struct ctlr_info *h, static void hpda_free_ctlr_info(struct ctlr_info *h) { - kfree(h->reply_map); kfree(h); } @@ -8622,14 +8600,6 @@ static struct ctlr_info *hpda_alloc_ctlr_info(void) struct ctlr_info *h; h = kzalloc(sizeof(*h), GFP_KERNEL); - if (!h) - return NULL; - - h->reply_map = kcalloc(nr_cpu_ids, sizeof(*h->reply_map), GFP_KERNEL); - if (!h->reply_map) { - kfree(h); - return NULL; - } return h; } diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index f8c88fc7b80a..ea4a609e3eb7 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h @@ -161,7 +161,6 @@ struct bmic_controller_parameters { #pragma pack() struct ctlr_info { - unsigned int *reply_map; int ctlr; char devname[8]; char *product_name;