From patchwork Wed Aug 19 15:20:19 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724445 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id AC45F739 for ; Wed, 19 Aug 2020 15:26:01 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 9CA5B208A9 for ; Wed, 19 Aug 2020 15:26:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728741AbgHSPZw (ORCPT ); Wed, 19 Aug 2020 11:25:52 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:38874 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728762AbgHSPZt (ORCPT ); Wed, 19 Aug 2020 11:25:49 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id 9BE5B110EB0C1973CD99; Wed, 19 Aug 2020 23:24:58 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:47 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 01/18] blk-mq: Rename BLK_MQ_F_TAG_SHARED as BLK_MQ_F_TAG_QUEUE_SHARED Date: Wed, 19 Aug 2020 23:20:19 +0800 Message-ID: <1597850436-116171-2-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Ming Lei BLK_MQ_F_TAG_SHARED actually means that tags is shared among request queues, all of which should belong to LUNs attached to same HBA. So rename it to make the point explicitly. Suggested-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei [jpg: rebase a few times, add rnbd-clt.c change] Signed-off-by: John Garry --- block/blk-mq-debugfs.c | 2 +- block/blk-mq-tag.h | 6 +++--- block/blk-mq.c | 20 ++++++++++---------- drivers/block/rnbd/rnbd-clt.c | 2 +- include/linux/blk-mq.h | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 3f09bcb8a6fd..a4597febff69 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -240,7 +240,7 @@ static const char *const alloc_policy_name[] = { #define HCTX_FLAG_NAME(name) [ilog2(BLK_MQ_F_##name)] = #name static const char *const hctx_flag_name[] = { HCTX_FLAG_NAME(SHOULD_MERGE), - HCTX_FLAG_NAME(TAG_SHARED), + HCTX_FLAG_NAME(TAG_QUEUE_SHARED), HCTX_FLAG_NAME(BLOCKING), HCTX_FLAG_NAME(NO_SCHED), HCTX_FLAG_NAME(STACKING), diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index b1acac518c4e..918e2cee4f43 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -56,7 +56,7 @@ extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) + if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return false; return __blk_mq_tag_busy(hctx); @@ -64,7 +64,7 @@ static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) { - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) + if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return; __blk_mq_tag_idle(hctx); @@ -79,7 +79,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, { unsigned int depth, users; - if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED)) + if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return true; if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) return true; diff --git a/block/blk-mq.c b/block/blk-mq.c index 0015a1892153..8f95cc443527 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1124,7 +1124,7 @@ static bool blk_mq_get_driver_tag(struct request *rq) if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq)) return false; - if ((hctx->flags & BLK_MQ_F_TAG_SHARED) && + if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) && !(rq->rq_flags & RQF_MQ_INFLIGHT)) { rq->rq_flags |= RQF_MQ_INFLIGHT; atomic_inc(&hctx->nr_active); @@ -1168,7 +1168,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, wait_queue_entry_t *wait; bool ret; - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) { + if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { blk_mq_sched_mark_restart_hctx(hctx); /* @@ -1420,7 +1420,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, bool needs_restart; /* For non-shared tags, the RESTART check will suffice */ bool no_tag = prep == PREP_DISPATCH_NO_TAG && - (hctx->flags & BLK_MQ_F_TAG_SHARED); + (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED); bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET; blk_mq_release_budgets(q, nr_budgets); @@ -2657,7 +2657,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, spin_lock_init(&hctx->lock); INIT_LIST_HEAD(&hctx->dispatch); hctx->queue = q; - hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; + hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED; INIT_LIST_HEAD(&hctx->hctx_list); @@ -2874,9 +2874,9 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) queue_for_each_hw_ctx(q, hctx, i) { if (shared) - hctx->flags |= BLK_MQ_F_TAG_SHARED; + hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; else - hctx->flags &= ~BLK_MQ_F_TAG_SHARED; + hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; } } @@ -2902,7 +2902,7 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) list_del(&q->tag_set_list); if (list_is_singular(&set->tag_list)) { /* just transitioned to unshared */ - set->flags &= ~BLK_MQ_F_TAG_SHARED; + set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ blk_mq_update_tag_set_depth(set, false); } @@ -2919,12 +2919,12 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, * Check to see if we're transitioning to shared (from 1 to 2 queues). */ if (!list_empty(&set->tag_list) && - !(set->flags & BLK_MQ_F_TAG_SHARED)) { - set->flags |= BLK_MQ_F_TAG_SHARED; + !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { + set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ blk_mq_update_tag_set_depth(set, true); } - if (set->flags & BLK_MQ_F_TAG_SHARED) + if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED) queue_set_hctx_shared(q, true); list_add_tail(&q->tag_set_list, &set->tag_list); diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index cc6a4e2587ae..77b0e0c62c36 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1180,7 +1180,7 @@ static int setup_mq_tags(struct rnbd_clt_session *sess) tag_set->queue_depth = sess->queue_depth; tag_set->numa_node = NUMA_NO_NODE; tag_set->flags = BLK_MQ_F_SHOULD_MERGE | - BLK_MQ_F_TAG_SHARED; + BLK_MQ_F_TAG_QUEUE_SHARED; tag_set->cmd_size = sizeof(struct rnbd_iu); tag_set->nr_hw_queues = num_online_cpus(); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9d2d5ad367a4..95aca7aa8957 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -378,7 +378,7 @@ struct blk_mq_ops { enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, - BLK_MQ_F_TAG_SHARED = 1 << 1, + BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, /* * Set when this device requires underlying blk-mq device for * completing IO: From patchwork Wed Aug 19 15:20:20 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724471 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1AFC6739 for ; Wed, 19 Aug 2020 15:26:45 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 01BAD208A9 for ; Wed, 19 Aug 2020 15:26:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728868AbgHSP0a (ORCPT ); Wed, 19 Aug 2020 11:26:30 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:39652 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728735AbgHSPZW (ORCPT ); Wed, 19 Aug 2020 11:25:22 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id D749C98872B15E6472CE; Wed, 19 Aug 2020 23:24:58 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:48 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 02/18] blk-mq: Rename blk_mq_update_tag_set_depth() Date: Wed, 19 Aug 2020 23:20:20 +0800 Message-ID: <1597850436-116171-3-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Hannes Reinecke The function does not set the depth, but rather transitions from shared to non-shared queues and vice versa. So rename it to blk_mq_update_tag_set_shared() to better reflect its purpose. Signed-off-by: Hannes Reinecke [jpg: take out some unrelated changes in blk_mq_init_bitmap_tags()] Signed-off-by: John Garry --- block/blk-mq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 8f95cc443527..50de9eb60466 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2880,8 +2880,8 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) } } -static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, - bool shared) +static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set, + bool shared) { struct request_queue *q; @@ -2904,7 +2904,7 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) /* just transitioned to unshared */ set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ - blk_mq_update_tag_set_depth(set, false); + blk_mq_update_tag_set_shared(set, false); } mutex_unlock(&set->tag_list_lock); INIT_LIST_HEAD(&q->tag_set_list); @@ -2922,7 +2922,7 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ - blk_mq_update_tag_set_depth(set, true); + blk_mq_update_tag_set_shared(set, true); } if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED) queue_set_hctx_shared(q, true); From patchwork Wed Aug 19 15:20:21 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724479 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E5B4715E4 for ; Wed, 19 Aug 2020 15:26:55 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id D6E0E20897 for ; Wed, 19 Aug 2020 15:26:55 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728767AbgHSP0p (ORCPT ); Wed, 19 Aug 2020 11:26:45 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:38868 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726747AbgHSP0m (ORCPT ); Wed, 19 Aug 2020 11:26:42 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id 73BF6DEDDFE7CC69C53B; Wed, 19 Aug 2020 23:24:58 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:48 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 03/18] blk-mq: Free tags in blk_mq_init_tags() upon error Date: Wed, 19 Aug 2020 23:20:21 +0800 Message-ID: <1597850436-116171-4-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Hannes Reinecke Since the tags are allocated in blk_mq_init_tags(), it's better practice to free in that same function upon error, rather than a callee which is to init the bitmap tags (blk_mq_init_tags()). Signed-off-by: Hannes Reinecke [jpg: Split from an earlier patch with a new commit message] Signed-off-by: John Garry --- block/blk-mq-tag.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 32d82e23b095..c2c9f369d777 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -429,24 +429,22 @@ static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, node); } -static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, - int node, int alloc_policy) +static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, + int node, int alloc_policy) { unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node)) - goto free_tags; + return -ENOMEM; if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin, node)) goto free_bitmap_tags; - return tags; + return 0; free_bitmap_tags: sbitmap_queue_free(&tags->bitmap_tags); -free_tags: - kfree(tags); - return NULL; + return -ENOMEM; } struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, @@ -467,7 +465,11 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, tags->nr_tags = total_tags; tags->nr_reserved_tags = reserved_tags; - return blk_mq_init_bitmap_tags(tags, node, alloc_policy); + if (blk_mq_init_bitmap_tags(tags, node, alloc_policy) < 0) { + kfree(tags); + return NULL; + } + return tags; } void blk_mq_free_tags(struct blk_mq_tags *tags) From patchwork Wed Aug 19 15:20:22 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724455 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8ADED739 for ; Wed, 19 Aug 2020 15:26:23 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 75F91208A9 for ; Wed, 19 Aug 2020 15:26:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728816AbgHSP0E (ORCPT ); Wed, 19 Aug 2020 11:26:04 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:39910 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728774AbgHSPZb (ORCPT ); Wed, 19 Aug 2020 11:25:31 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id C8B062969AEDCC96C3A5; Wed, 19 Aug 2020 23:24:58 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:48 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 04/18] blk-mq: Pass flags for tag init/free Date: Wed, 19 Aug 2020 23:20:22 +0800 Message-ID: <1597850436-116171-5-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org Pass hctx/tagset flags argument down to blk_mq_init_tags() and blk_mq_free_tags() for selective init/free. For now, make it include the alloc policy flag, which can be evaluated when needed (in blk_mq_init_tags()). Signed-off-by: John Garry --- block/blk-mq-sched.c | 11 ++++++++--- block/blk-mq-tag.c | 12 +++++++----- block/blk-mq-tag.h | 7 ++++--- block/blk-mq.c | 23 +++++++++++++---------- block/blk-mq.h | 5 +++-- 5 files changed, 35 insertions(+), 23 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index a19cdf159b75..ee970d848e62 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -607,9 +607,11 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { + unsigned int flags = set->flags; + if (hctx->sched_tags) { blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx); - blk_mq_free_rq_map(hctx->sched_tags); + blk_mq_free_rq_map(hctx->sched_tags, flags); hctx->sched_tags = NULL; } } @@ -619,10 +621,11 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q, unsigned int hctx_idx) { struct blk_mq_tag_set *set = q->tag_set; + unsigned int flags = set->flags; int ret; hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests, - set->reserved_tags); + set->reserved_tags, flags); if (!hctx->sched_tags) return -ENOMEM; @@ -640,8 +643,10 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q) int i; queue_for_each_hw_ctx(q, hctx, i) { + unsigned int flags = hctx->flags; + if (hctx->sched_tags) { - blk_mq_free_rq_map(hctx->sched_tags); + blk_mq_free_rq_map(hctx->sched_tags, flags); hctx->sched_tags = NULL; } } diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index c2c9f369d777..0426f01e06a6 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -449,8 +449,9 @@ static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, unsigned int reserved_tags, - int node, int alloc_policy) + int node, unsigned int flags) { + int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(flags); struct blk_mq_tags *tags; if (total_tags > BLK_MQ_TAG_MAX) { @@ -472,7 +473,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, return tags; } -void blk_mq_free_tags(struct blk_mq_tags *tags) +void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags) { sbitmap_queue_free(&tags->bitmap_tags); sbitmap_queue_free(&tags->breserved_tags); @@ -494,6 +495,7 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, */ if (tdepth > tags->nr_tags) { struct blk_mq_tag_set *set = hctx->queue->tag_set; + unsigned int flags = set->flags; struct blk_mq_tags *new; bool ret; @@ -508,17 +510,17 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, return -EINVAL; new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, - tags->nr_reserved_tags); + tags->nr_reserved_tags, flags); if (!new) return -ENOMEM; ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth); if (ret) { - blk_mq_free_rq_map(new); + blk_mq_free_rq_map(new, flags); return -ENOMEM; } blk_mq_free_rqs(set, *tagsptr, hctx->queue_num); - blk_mq_free_rq_map(*tagsptr); + blk_mq_free_rq_map(*tagsptr, flags); *tagsptr = new; } else { /* diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 918e2cee4f43..391f68d32fa1 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -21,9 +21,10 @@ struct blk_mq_tags { struct list_head page_list; }; - -extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy); -extern void blk_mq_free_tags(struct blk_mq_tags *tags); +extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, + unsigned int reserved_tags, + int node, unsigned int flags); +extern void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, diff --git a/block/blk-mq.c b/block/blk-mq.c index 50de9eb60466..376930061452 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2285,20 +2285,21 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, } } -void blk_mq_free_rq_map(struct blk_mq_tags *tags) +void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags) { kfree(tags->rqs); tags->rqs = NULL; kfree(tags->static_rqs); tags->static_rqs = NULL; - blk_mq_free_tags(tags); + blk_mq_free_tags(tags, flags); } struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, unsigned int hctx_idx, unsigned int nr_tags, - unsigned int reserved_tags) + unsigned int reserved_tags, + unsigned int flags) { struct blk_mq_tags *tags; int node; @@ -2307,8 +2308,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, if (node == NUMA_NO_NODE) node = set->numa_node; - tags = blk_mq_init_tags(nr_tags, reserved_tags, node, - BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); + tags = blk_mq_init_tags(nr_tags, reserved_tags, node, flags); if (!tags) return NULL; @@ -2316,7 +2316,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node); if (!tags->rqs) { - blk_mq_free_tags(tags); + blk_mq_free_tags(tags, flags); return NULL; } @@ -2325,7 +2325,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, node); if (!tags->static_rqs) { kfree(tags->rqs); - blk_mq_free_tags(tags); + blk_mq_free_tags(tags, flags); return NULL; } @@ -2734,10 +2734,11 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set, int hctx_idx) { + unsigned int flags = set->flags; int ret = 0; set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx, - set->queue_depth, set->reserved_tags); + set->queue_depth, set->reserved_tags, flags); if (!set->tags[hctx_idx]) return false; @@ -2746,7 +2747,7 @@ static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set, if (!ret) return true; - blk_mq_free_rq_map(set->tags[hctx_idx]); + blk_mq_free_rq_map(set->tags[hctx_idx], flags); set->tags[hctx_idx] = NULL; return false; } @@ -2754,9 +2755,11 @@ static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set, static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set, unsigned int hctx_idx) { + unsigned int flags = set->flags; + if (set->tags && set->tags[hctx_idx]) { blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx); - blk_mq_free_rq_map(set->tags[hctx_idx]); + blk_mq_free_rq_map(set->tags[hctx_idx], flags); set->tags[hctx_idx] = NULL; } } diff --git a/block/blk-mq.h b/block/blk-mq.h index 863a2f3346d4..b86e7fe47789 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -53,11 +53,12 @@ struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, */ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx); -void blk_mq_free_rq_map(struct blk_mq_tags *tags); +void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags); struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, unsigned int hctx_idx, unsigned int nr_tags, - unsigned int reserved_tags); + unsigned int reserved_tags, + unsigned int flags); int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx, unsigned int depth); From patchwork Wed Aug 19 15:20:23 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724441 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2D66316B1 for ; Wed, 19 Aug 2020 15:25:54 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 1A38620B1F for ; Wed, 19 Aug 2020 15:25:54 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728801AbgHSPZj (ORCPT ); Wed, 19 Aug 2020 11:25:39 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:38884 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728259AbgHSPZd (ORCPT ); Wed, 19 Aug 2020 11:25:33 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id A6135B0D07DE32A285FB; Wed, 19 Aug 2020 23:24:58 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:49 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 05/18] blk-mq: Use pointers for blk_mq_tags bitmap tags Date: Wed, 19 Aug 2020 23:20:23 +0800 Message-ID: <1597850436-116171-6-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org Introduce pointers for the blk_mq_tags regular and reserved bitmap tags, with the goal of later being able to use a common shared tag bitmap across all HW contexts in a set. Tested-by: Don Brace #SCSI resv cmds patches used Reviewed-by: Hannes Reinecke Signed-off-by: John Garry --- block/bfq-iosched.c | 4 ++-- block/blk-mq-debugfs.c | 8 ++++---- block/blk-mq-tag.c | 41 ++++++++++++++++++++++------------------- block/blk-mq-tag.h | 7 +++++-- block/blk-mq.c | 8 ++++---- block/kyber-iosched.c | 4 ++-- 6 files changed, 39 insertions(+), 33 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index a4c0bec920cb..88f0dfa545d7 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6372,8 +6372,8 @@ static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx) struct blk_mq_tags *tags = hctx->sched_tags; unsigned int min_shallow; - min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags); - sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow); + min_shallow = bfq_update_depths(bfqd, tags->bitmap_tags); + sbitmap_queue_min_shallow_depth(tags->bitmap_tags, min_shallow); } static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index a4597febff69..645b7f800cb8 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -452,11 +452,11 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m, atomic_read(&tags->active_queues)); seq_puts(m, "\nbitmap_tags:\n"); - sbitmap_queue_show(&tags->bitmap_tags, m); + sbitmap_queue_show(tags->bitmap_tags, m); if (tags->nr_reserved_tags) { seq_puts(m, "\nbreserved_tags:\n"); - sbitmap_queue_show(&tags->breserved_tags, m); + sbitmap_queue_show(tags->breserved_tags, m); } } @@ -487,7 +487,7 @@ static int hctx_tags_bitmap_show(void *data, struct seq_file *m) if (res) goto out; if (hctx->tags) - sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m); + sbitmap_bitmap_show(&hctx->tags->bitmap_tags->sb, m); mutex_unlock(&q->sysfs_lock); out: @@ -521,7 +521,7 @@ static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m) if (res) goto out; if (hctx->sched_tags) - sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m); + sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags->sb, m); mutex_unlock(&q->sysfs_lock); out: diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 0426f01e06a6..77fbeae05745 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -35,9 +35,9 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) */ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve) { - sbitmap_queue_wake_all(&tags->bitmap_tags); + sbitmap_queue_wake_all(tags->bitmap_tags); if (include_reserve) - sbitmap_queue_wake_all(&tags->breserved_tags); + sbitmap_queue_wake_all(tags->breserved_tags); } /* @@ -82,10 +82,10 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) WARN_ON_ONCE(1); return BLK_MQ_NO_TAG; } - bt = &tags->breserved_tags; + bt = tags->breserved_tags; tag_offset = 0; } else { - bt = &tags->bitmap_tags; + bt = tags->bitmap_tags; tag_offset = tags->nr_reserved_tags; } @@ -131,9 +131,9 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) data->ctx); tags = blk_mq_tags_from_data(data); if (data->flags & BLK_MQ_REQ_RESERVED) - bt = &tags->breserved_tags; + bt = tags->breserved_tags; else - bt = &tags->bitmap_tags; + bt = tags->bitmap_tags; /* * If destination hw queue is changed, fake wake up on @@ -167,10 +167,10 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, const int real_tag = tag - tags->nr_reserved_tags; BUG_ON(real_tag >= tags->nr_tags); - sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu); + sbitmap_queue_clear(tags->bitmap_tags, real_tag, ctx->cpu); } else { BUG_ON(tag >= tags->nr_reserved_tags); - sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu); + sbitmap_queue_clear(tags->breserved_tags, tag, ctx->cpu); } } @@ -298,9 +298,9 @@ static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags, WARN_ON_ONCE(flags & BT_TAG_ITER_RESERVED); if (tags->nr_reserved_tags) - bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, + bt_tags_for_each(tags, tags->breserved_tags, fn, priv, flags | BT_TAG_ITER_RESERVED); - bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, flags); + bt_tags_for_each(tags, tags->bitmap_tags, fn, priv, flags); } /** @@ -416,8 +416,8 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, continue; if (tags->nr_reserved_tags) - bt_for_each(hctx, &tags->breserved_tags, fn, priv, true); - bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false); + bt_for_each(hctx, tags->breserved_tags, fn, priv, true); + bt_for_each(hctx, tags->bitmap_tags, fn, priv, false); } blk_queue_exit(q); } @@ -435,15 +435,18 @@ static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; - if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node)) + if (bt_alloc(&tags->__bitmap_tags, depth, round_robin, node)) return -ENOMEM; - if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin, - node)) + if (bt_alloc(&tags->__breserved_tags, tags->nr_reserved_tags, + round_robin, node)) goto free_bitmap_tags; + tags->bitmap_tags = &tags->__bitmap_tags; + tags->breserved_tags = &tags->__breserved_tags; + return 0; free_bitmap_tags: - sbitmap_queue_free(&tags->bitmap_tags); + sbitmap_queue_free(&tags->__bitmap_tags); return -ENOMEM; } @@ -475,8 +478,8 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags) { - sbitmap_queue_free(&tags->bitmap_tags); - sbitmap_queue_free(&tags->breserved_tags); + sbitmap_queue_free(&tags->__bitmap_tags); + sbitmap_queue_free(&tags->__breserved_tags); kfree(tags); } @@ -527,7 +530,7 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, * Don't need (or can't) update reserved tags here, they * remain static and should never need resizing. */ - sbitmap_queue_resize(&tags->bitmap_tags, + sbitmap_queue_resize(tags->bitmap_tags, tdepth - tags->nr_reserved_tags); } diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 391f68d32fa1..5b6f25bd1966 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -13,8 +13,11 @@ struct blk_mq_tags { atomic_t active_queues; - struct sbitmap_queue bitmap_tags; - struct sbitmap_queue breserved_tags; + struct sbitmap_queue *bitmap_tags; + struct sbitmap_queue *breserved_tags; + + struct sbitmap_queue __bitmap_tags; + struct sbitmap_queue __breserved_tags; struct request **rqs; struct request **static_rqs; diff --git a/block/blk-mq.c b/block/blk-mq.c index 376930061452..ce712ec818e4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1096,14 +1096,14 @@ static inline unsigned int queued_to_index(unsigned int queued) static bool __blk_mq_get_driver_tag(struct request *rq) { - struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; + struct sbitmap_queue *bt = rq->mq_hctx->tags->bitmap_tags; unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags; int tag; blk_mq_tag_busy(rq->mq_hctx); if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) { - bt = &rq->mq_hctx->tags->breserved_tags; + bt = rq->mq_hctx->tags->breserved_tags; tag_offset = 0; } @@ -1145,7 +1145,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, struct sbitmap_queue *sbq; list_del_init(&wait->entry); - sbq = &hctx->tags->bitmap_tags; + sbq = hctx->tags->bitmap_tags; atomic_dec(&sbq->ws_active); } spin_unlock(&hctx->dispatch_wait_lock); @@ -1163,7 +1163,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, struct request *rq) { - struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags; + struct sbitmap_queue *sbq = hctx->tags->bitmap_tags; struct wait_queue_head *wq; wait_queue_entry_t *wait; bool ret; diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index a38c5ab103d1..075e99c207ef 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -359,7 +359,7 @@ static unsigned int kyber_sched_tags_shift(struct request_queue *q) * All of the hardware queues have the same depth, so we can just grab * the shift of the first one. */ - return q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift; + return q->queue_hw_ctx[0]->sched_tags->bitmap_tags->sb.shift; } static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) @@ -502,7 +502,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) khd->batching = 0; hctx->sched_data = khd; - sbitmap_queue_min_shallow_depth(&hctx->sched_tags->bitmap_tags, + sbitmap_queue_min_shallow_depth(hctx->sched_tags->bitmap_tags, kqd->async_depth); return 0; From patchwork Wed Aug 19 15:20:24 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724475 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B5B451744 for ; Wed, 19 Aug 2020 15:26:45 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id A577C208DB for ; Wed, 19 Aug 2020 15:26:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728849AbgHSP03 (ORCPT ); Wed, 19 Aug 2020 11:26:29 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:39600 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728729AbgHSPZZ (ORCPT ); Wed, 19 Aug 2020 11:25:25 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id 7CFA469B6EBA2B998A65; Wed, 19 Aug 2020 23:24:58 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:49 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 06/18] blk-mq: Facilitate a shared sbitmap per tagset Date: Wed, 19 Aug 2020 23:20:24 +0800 Message-ID: <1597850436-116171-7-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org Some SCSI HBAs (such as HPSA, megaraid, mpt3sas, hisi_sas_v3 ..) support multiple reply queues with single hostwide tags. In addition, these drivers want to use interrupt assignment in pci_alloc_irq_vectors(PCI_IRQ_AFFINITY). However, as discussed in [0], CPU hotplug may cause in-flight IO completion to not be serviced when an interrupt is shutdown. That problem is solved in commit bf0beec0607d ("blk-mq: drain I/O when all CPUs in a hctx are offline"). However, to take advantage of that blk-mq feature, the HBA HW queuess are required to be mapped to that of the blk-mq hctx's; to do that, the HBA HW queues need to be exposed to the upper layer. In making that transition, the per-SCSI command request tags are no longer unique per Scsi host - they are just unique per hctx. As such, the HBA LLDD would have to generate this tag internally, which has a certain performance overhead. However another problem is that blk-mq assumes the host may accept (Scsi_host.can_queue * #hw queue) commands. In commit 6eb045e092ef ("scsi: core: avoid host-wide host_busy counter for scsi_mq"), the Scsi host busy counter was removed, which would stop the LLDD being sent more than .can_queue commands; however, it should still be ensured that the block layer does not issue more than .can_queue commands to the Scsi host. To solve this problem, introduce a shared sbitmap per blk_mq_tag_set, which may be requested at init time. New flag BLK_MQ_F_TAG_HCTX_SHARED should be set when requesting the tagset to indicate whether the shared sbitmap should be used. Even when BLK_MQ_F_TAG_HCTX_SHARED is set, a full set of tags and requests are still allocated per hctx; the reason for this is that if tags and requests were only allocated for a single hctx - like hctx0 - it may break block drivers which expect a request be associated with a specific hctx, i.e. not always hctx0. This will introduce extra memory usage. This change is based on work originally from Ming Lei in [1] and from Bart's suggestion in [2]. [0] https://lore.kernel.org/linux-block/alpine.DEB.2.21.1904051331270.1802@nanos.tec.linutronix.de/ [1] https://lore.kernel.org/linux-block/20190531022801.10003-1-ming.lei@redhat.com/ [2] https://lore.kernel.org/linux-block/ff77beff-5fd9-9f05-12b6-826922bace1f@huawei.com/T/#m3db0a602f095cbcbff27e9c884d6b4ae826144be Tested-by: Don Brace #SCSI resv cmds patches used Signed-off-by: John Garry --- block/blk-mq-sched.c | 8 ++++--- block/blk-mq-tag.c | 51 ++++++++++++++++++++++++++++++++++++++---- block/blk-mq-tag.h | 7 ++++++ block/blk-mq.c | 15 +++++++++++++ block/blk-mq.h | 5 +++++ include/linux/blk-mq.h | 6 +++++ 6 files changed, 85 insertions(+), 7 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index ee970d848e62..777be967c4f2 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -607,7 +607,7 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { - unsigned int flags = set->flags; + unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED; if (hctx->sched_tags) { blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx); @@ -621,7 +621,8 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q, unsigned int hctx_idx) { struct blk_mq_tag_set *set = q->tag_set; - unsigned int flags = set->flags; + /* Clear HCTX_SHARED so tags are init'ed */ + unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED; int ret; hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests, @@ -643,7 +644,8 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q) int i; queue_for_each_hw_ctx(q, hctx, i) { - unsigned int flags = hctx->flags; + /* Clear HCTX_SHARED so tags are freed */ + unsigned int flags = hctx->flags & ~BLK_MQ_F_TAG_HCTX_SHARED; if (hctx->sched_tags) { blk_mq_free_rq_map(hctx->sched_tags, flags); diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 77fbeae05745..c6d7ebc62bdb 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -197,7 +197,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) * We can hit rq == NULL here, because the tagging functions * test and set the bit before assigning ->rqs[]. */ - if (rq && rq->q == hctx->queue) + if (rq && rq->q == hctx->queue && rq->mq_hctx == hctx) return iter_data->fn(hctx, rq, iter_data->data, reserved); return true; } @@ -450,6 +450,38 @@ static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, return -ENOMEM; } +int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int flags) +{ + unsigned int depth = set->queue_depth - set->reserved_tags; + int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags); + bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; + int i, node = set->numa_node; + + if (bt_alloc(&set->__bitmap_tags, depth, round_robin, node)) + return -ENOMEM; + if (bt_alloc(&set->__breserved_tags, set->reserved_tags, + round_robin, node)) + goto free_bitmap_tags; + + for (i = 0; i < set->nr_hw_queues; i++) { + struct blk_mq_tags *tags = set->tags[i]; + + tags->bitmap_tags = &set->__bitmap_tags; + tags->breserved_tags = &set->__breserved_tags; + } + + return 0; +free_bitmap_tags: + sbitmap_queue_free(&set->__bitmap_tags); + return -ENOMEM; +} + +void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set) +{ + sbitmap_queue_free(&set->__bitmap_tags); + sbitmap_queue_free(&set->__breserved_tags); +} + struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, unsigned int reserved_tags, int node, unsigned int flags) @@ -469,6 +501,9 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, tags->nr_tags = total_tags; tags->nr_reserved_tags = reserved_tags; + if (flags & BLK_MQ_F_TAG_HCTX_SHARED) + return tags; + if (blk_mq_init_bitmap_tags(tags, node, alloc_policy) < 0) { kfree(tags); return NULL; @@ -478,8 +513,10 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags) { - sbitmap_queue_free(&tags->__bitmap_tags); - sbitmap_queue_free(&tags->__breserved_tags); + if (!(flags & BLK_MQ_F_TAG_HCTX_SHARED)) { + sbitmap_queue_free(tags->bitmap_tags); + sbitmap_queue_free(tags->breserved_tags); + } kfree(tags); } @@ -498,7 +535,8 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, */ if (tdepth > tags->nr_tags) { struct blk_mq_tag_set *set = hctx->queue->tag_set; - unsigned int flags = set->flags; + /* Only sched tags can grow, so clear HCTX_SHARED flag */ + unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED; struct blk_mq_tags *new; bool ret; @@ -537,6 +575,11 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, return 0; } +void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int size) +{ + sbitmap_queue_resize(&set->__bitmap_tags, size - set->reserved_tags); +} + /** * blk_mq_unique_tag() - return a tag that is unique queue-wide * @rq: request for which to compute a unique tag diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 5b6f25bd1966..8f4cce52ba2d 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -29,12 +29,19 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, int node, unsigned int flags); extern void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags); +extern int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set, + unsigned int flags); +extern void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set); + extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, unsigned int tag); extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags **tags, unsigned int depth, bool can_grow); +extern void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set, + unsigned int size); + extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void *priv); diff --git a/block/blk-mq.c b/block/blk-mq.c index ce712ec818e4..a500d1dfa1bd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3430,11 +3430,21 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (ret) goto out_free_mq_map; + if (blk_mq_is_sbitmap_shared(set->flags)) { + if (blk_mq_init_shared_sbitmap(set, set->flags)) { + ret = -ENOMEM; + goto out_free_mq_rq_maps; + } + } + mutex_init(&set->tag_list_lock); INIT_LIST_HEAD(&set->tag_list); return 0; +out_free_mq_rq_maps: + for (i = 0; i < set->nr_hw_queues; i++) + blk_mq_free_map_and_requests(set, i); out_free_mq_map: for (i = 0; i < set->nr_maps; i++) { kfree(set->map[i].mq_map); @@ -3453,6 +3463,9 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) for (i = 0; i < set->nr_hw_queues; i++) blk_mq_free_map_and_requests(set, i); + if (blk_mq_is_sbitmap_shared(set->flags)) + blk_mq_exit_shared_sbitmap(set); + for (j = 0; j < set->nr_maps; j++) { kfree(set->map[j].mq_map); set->map[j].mq_map = NULL; @@ -3489,6 +3502,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) if (!hctx->sched_tags) { ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr, false); + if (!ret && blk_mq_is_sbitmap_shared(set->flags)) + blk_mq_tag_resize_shared_sbitmap(set, nr); } else { ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, nr, true); diff --git a/block/blk-mq.h b/block/blk-mq.h index b86e7fe47789..3acdb56065e1 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -159,6 +159,11 @@ struct blk_mq_alloc_data { struct blk_mq_hw_ctx *hctx; }; +static inline bool blk_mq_is_sbitmap_shared(unsigned int flags) +{ + return flags & BLK_MQ_F_TAG_HCTX_SHARED; +} + static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) { if (data->q->elevator) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 95aca7aa8957..03b1f39d5a72 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -231,6 +231,9 @@ enum hctx_type { * @flags: Zero or more BLK_MQ_F_* flags. * @driver_data: Pointer to data owned by the block driver that created this * tag set. + * @__bitmap_tags: A shared tags sbitmap, used over all hctx's + * @__breserved_tags: + * A shared reserved tags sbitmap, used over all hctx's * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues * elements. * @tag_list_lock: Serializes tag_list accesses. @@ -250,6 +253,8 @@ struct blk_mq_tag_set { unsigned int flags; void *driver_data; + struct sbitmap_queue __bitmap_tags; + struct sbitmap_queue __breserved_tags; struct blk_mq_tags **tags; struct mutex tag_list_lock; @@ -384,6 +389,7 @@ enum { * completing IO: */ BLK_MQ_F_STACKING = 1 << 2, + BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_NO_SCHED = 1 << 6, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, From patchwork Wed Aug 19 15:20:25 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724459 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3C31215E4 for ; Wed, 19 Aug 2020 15:26:26 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 2EA4D208A9 for ; Wed, 19 Aug 2020 15:26:26 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728763AbgHSP0D (ORCPT ); Wed, 19 Aug 2020 11:26:03 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:39924 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728782AbgHSPZb (ORCPT ); Wed, 19 Aug 2020 11:25:31 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id B0A14375EA32A0D143C2; Wed, 19 Aug 2020 23:24:58 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:49 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 07/18] blk-mq: Relocate hctx_may_queue() Date: Wed, 19 Aug 2020 23:20:25 +0800 Message-ID: <1597850436-116171-8-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org blk-mq.h and blk-mq-tag.h include on each other, which is less than ideal. Locate hctx_may_queue() to blk-mq.h, as it is not really tag specific code. In this way, we can drop the blk-mq-tag.h include of blk-mq.h Signed-off-by: John Garry --- block/blk-mq-tag.h | 33 --------------------------------- block/blk-mq.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 8f4cce52ba2d..7d3e6b333a4a 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -2,8 +2,6 @@ #ifndef INT_BLK_MQ_TAG_H #define INT_BLK_MQ_TAG_H -#include "blk-mq.h" - /* * Tag address space map. */ @@ -81,37 +79,6 @@ static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) __blk_mq_tag_idle(hctx); } -/* - * For shared tag users, we track the number of currently active users - * and attempt to provide a fair share of the tag depth for each of them. - */ -static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, - struct sbitmap_queue *bt) -{ - unsigned int depth, users; - - if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) - return true; - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) - return true; - - /* - * Don't try dividing an ant - */ - if (bt->sb.depth == 1) - return true; - - users = atomic_read(&hctx->tags->active_queues); - if (!users) - return true; - - /* - * Allow at least some tags - */ - depth = max((bt->sb.depth + users - 1) / users, 4U); - return atomic_read(&hctx->nr_active) < depth; -} - static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, unsigned int tag) { diff --git a/block/blk-mq.h b/block/blk-mq.h index 3acdb56065e1..56dc37c21908 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -259,4 +259,36 @@ static inline struct blk_plug *blk_mq_plug(struct request_queue *q, return NULL; } +/* + * For shared tag users, we track the number of currently active users + * and attempt to provide a fair share of the tag depth for each of them. + */ +static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, + struct sbitmap_queue *bt) +{ + unsigned int depth, users; + + if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) + return true; + if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) + return true; + + /* + * Don't try dividing an ant + */ + if (bt->sb.depth == 1) + return true; + + users = atomic_read(&hctx->tags->active_queues); + if (!users) + return true; + + /* + * Allow at least some tags + */ + depth = max((bt->sb.depth + users - 1) / users, 4U); + return atomic_read(&hctx->nr_active) < depth; +} + + #endif From patchwork Wed Aug 19 15:20:26 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724435 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A764015E4 for ; Wed, 19 Aug 2020 15:25:45 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 987F620897 for ; Wed, 19 Aug 2020 15:25:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728814AbgHSPZn (ORCPT ); Wed, 19 Aug 2020 11:25:43 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:40096 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728789AbgHSPZm (ORCPT ); Wed, 19 Aug 2020 11:25:42 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id 5579C411A4F08A245081; Wed, 19 Aug 2020 23:24:58 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:50 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 08/18] blk-mq: Record nr_active_requests per queue for when using shared sbitmap Date: Wed, 19 Aug 2020 23:20:26 +0800 Message-ID: <1597850436-116171-9-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org The per-hctx nr_active value can no longer be used to fairly assign a share of tag depth per request queue for when using a shared sbitmap, as it does not consider that the tags are shared tags over all hctx's. For this case, record the nr_active_requests per request_queue, and make the judgement based on that value. Tested-by: Don Brace #SCSI resv cmds patches used Co-developed-with: Kashyap Desai Signed-off-by: John Garry --- block/blk-core.c | 2 ++ block/blk-mq.c | 4 ++-- block/blk-mq.h | 26 ++++++++++++++++++++++++-- include/linux/blkdev.h | 2 ++ 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index d9d632639bd1..360975255a2a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -542,6 +542,8 @@ struct request_queue *blk_alloc_queue(int node_id) q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK; q->node = node_id; + atomic_set(&q->nr_active_requests_shared_sbitmap, 0); + timer_setup(&q->backing_dev_info->laptop_mode_wb_timer, laptop_mode_timer_fn, 0); timer_setup(&q->timeout, blk_rq_timed_out_timer, 0); diff --git a/block/blk-mq.c b/block/blk-mq.c index a500d1dfa1bd..ebb72a59b433 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -519,7 +519,7 @@ void blk_mq_free_request(struct request *rq) ctx->rq_completed[rq_is_sync(rq)]++; if (rq->rq_flags & RQF_MQ_INFLIGHT) - atomic_dec(&hctx->nr_active); + __blk_mq_dec_active_requests(hctx); if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) laptop_io_completion(q->backing_dev_info); @@ -1127,7 +1127,7 @@ static bool blk_mq_get_driver_tag(struct request *rq) if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) && !(rq->rq_flags & RQF_MQ_INFLIGHT)) { rq->rq_flags |= RQF_MQ_INFLIGHT; - atomic_inc(&hctx->nr_active); + __blk_mq_inc_active_requests(hctx); } hctx->tags->rqs[rq->tag] = rq; return true; diff --git a/block/blk-mq.h b/block/blk-mq.h index 56dc37c21908..25ec73078e95 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -199,6 +199,28 @@ static inline bool blk_mq_get_dispatch_budget(struct request_queue *q) return true; } +static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) +{ + if (blk_mq_is_sbitmap_shared(hctx->flags)) + atomic_inc(&hctx->queue->nr_active_requests_shared_sbitmap); + else + atomic_inc(&hctx->nr_active); +} + +static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) +{ + if (blk_mq_is_sbitmap_shared(hctx->flags)) + atomic_dec(&hctx->queue->nr_active_requests_shared_sbitmap); + else + atomic_dec(&hctx->nr_active); +} + +static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx) +{ + if (blk_mq_is_sbitmap_shared(hctx->flags)) + return atomic_read(&hctx->queue->nr_active_requests_shared_sbitmap); + return atomic_read(&hctx->nr_active); +} static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq) { @@ -207,7 +229,7 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, if (rq->rq_flags & RQF_MQ_INFLIGHT) { rq->rq_flags &= ~RQF_MQ_INFLIGHT; - atomic_dec(&hctx->nr_active); + __blk_mq_dec_active_requests(hctx); } } @@ -287,7 +309,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, * Allow at least some tags */ depth = max((bt->sb.depth + users - 1) / users, 4U); - return atomic_read(&hctx->nr_active) < depth; + return __blk_mq_active_requests(hctx) < depth; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bb5636cc17b9..1d85235611e1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -484,6 +484,8 @@ struct request_queue { struct timer_list timeout; struct work_struct timeout_work; + atomic_t nr_active_requests_shared_sbitmap; + struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); From patchwork Wed Aug 19 15:20:27 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724449 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id BAA89739 for ; Wed, 19 Aug 2020 15:26:04 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id AE502208A9 for ; Wed, 19 Aug 2020 15:26:04 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728836AbgHSP0D (ORCPT ); Wed, 19 Aug 2020 11:26:03 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:39912 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728776AbgHSPZa (ORCPT ); Wed, 19 Aug 2020 11:25:30 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id BB1DD5363F1B61B21628; Wed, 19 Aug 2020 23:24:58 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:50 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 09/18] blk-mq: Record active_queues_shared_sbitmap per tag_set for when using shared sbitmap Date: Wed, 19 Aug 2020 23:20:27 +0800 Message-ID: <1597850436-116171-10-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org For when using a shared sbitmap, no longer should the number of active request queues per hctx be relied on for when judging how to share the tag bitmap. Instead maintain the number of active request queues per tag_set, and make the judgement based on that. Tested-by: Don Brace #SCSI resv cmds patches used Originally-from: Kashyap Desai Signed-off-by: John Garry --- block/blk-mq-tag.c | 33 +++++++++++++++++++++++++-------- block/blk-mq.c | 2 ++ block/blk-mq.h | 16 +++++++++++++--- include/linux/blk-mq.h | 1 + include/linux/blkdev.h | 1 + 5 files changed, 42 insertions(+), 11 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index c6d7ebc62bdb..c31c4a0478a5 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -23,9 +23,18 @@ */ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && - !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) - atomic_inc(&hctx->tags->active_queues); + if (blk_mq_is_sbitmap_shared(hctx->flags)) { + struct request_queue *q = hctx->queue; + struct blk_mq_tag_set *set = q->tag_set; + + if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) && + !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) + atomic_inc(&set->active_queues_shared_sbitmap); + } else { + if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && + !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) + atomic_inc(&hctx->tags->active_queues); + } return true; } @@ -47,11 +56,19 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve) void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) { struct blk_mq_tags *tags = hctx->tags; - - if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) - return; - - atomic_dec(&tags->active_queues); + struct request_queue *q = hctx->queue; + struct blk_mq_tag_set *set = q->tag_set; + + if (blk_mq_is_sbitmap_shared(hctx->flags)) { + if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE, + &q->queue_flags)) + return; + atomic_dec(&set->active_queues_shared_sbitmap); + } else { + if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) + return; + atomic_dec(&tags->active_queues); + } blk_mq_tag_wakeup_all(tags, false); } diff --git a/block/blk-mq.c b/block/blk-mq.c index ebb72a59b433..457b43829a4f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3431,6 +3431,8 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) goto out_free_mq_map; if (blk_mq_is_sbitmap_shared(set->flags)) { + atomic_set(&set->active_queues_shared_sbitmap, 0); + if (blk_mq_init_shared_sbitmap(set, set->flags)) { ret = -ENOMEM; goto out_free_mq_rq_maps; diff --git a/block/blk-mq.h b/block/blk-mq.h index 25ec73078e95..a52703c98b77 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -292,8 +292,6 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return true; - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) - return true; /* * Don't try dividing an ant @@ -301,7 +299,19 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, if (bt->sb.depth == 1) return true; - users = atomic_read(&hctx->tags->active_queues); + if (blk_mq_is_sbitmap_shared(hctx->flags)) { + struct request_queue *q = hctx->queue; + struct blk_mq_tag_set *set = q->tag_set; + + if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &q->queue_flags)) + return true; + users = atomic_read(&set->active_queues_shared_sbitmap); + } else { + if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) + return true; + users = atomic_read(&hctx->tags->active_queues); + } + if (!users) return true; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 03b1f39d5a72..a4b35ec60faf 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -252,6 +252,7 @@ struct blk_mq_tag_set { unsigned int timeout; unsigned int flags; void *driver_data; + atomic_t active_queues_shared_sbitmap; struct sbitmap_queue __bitmap_tags; struct sbitmap_queue __breserved_tags; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1d85235611e1..67b39596436c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -617,6 +617,7 @@ struct request_queue { #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ #define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ +#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP)) From patchwork Wed Aug 19 15:20:28 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724499 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3925815E4 for ; Wed, 19 Aug 2020 15:27:35 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 2BCFF208DB for ; Wed, 19 Aug 2020 15:27:35 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726747AbgHSP1P (ORCPT ); Wed, 19 Aug 2020 11:27:15 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:38872 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728203AbgHSPZM (ORCPT ); Wed, 19 Aug 2020 11:25:12 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id 60254424BC119C6AF290; Wed, 19 Aug 2020 23:24:58 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:51 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 10/18] blk-mq, elevator: Count requests per hctx to improve performance Date: Wed, 19 Aug 2020 23:20:28 +0800 Message-ID: <1597850436-116171-11-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Kashyap Desai High CPU utilization on "native_queued_spin_lock_slowpath" due to lock contention is possible for mq-deadline and bfq IO schedulers when nr_hw_queues is more than one. It is because kblockd work queue can submit IO from all online CPUs (through blk_mq_run_hw_queues()) even though only one hctx has pending commands. The elevator callback .has_work for mq-deadline and bfq scheduler considers pending work if there are any IOs on request queue but it does not account hctx context. Add a per-hctx 'elevator_queued' count to the hctx to avoid triggering the elevator even though there are no requests queued. Signed-off-by: Kashyap Desai Signed-off-by: Hannes Reinecke [jpg: Relocated atomic_dec() in dd_dispatch_request(), update commit message per Kashyap] Signed-off-by: John Garry --- block/bfq-iosched.c | 5 +++++ block/blk-mq.c | 1 + block/mq-deadline.c | 6 ++++++ include/linux/blk-mq.h | 4 ++++ 4 files changed, 16 insertions(+) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 88f0dfa545d7..4650012f1e55 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -4640,6 +4640,9 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx) { struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; + if (!atomic_read(&hctx->elevator_queued)) + return false; + /* * Avoiding lock: a race on bfqd->busy_queues should cause at * most a call to dispatch for nothing @@ -5554,6 +5557,7 @@ static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); bfq_insert_request(hctx, rq, at_head); + atomic_inc(&hctx->elevator_queued); } } @@ -5933,6 +5937,7 @@ static void bfq_finish_requeue_request(struct request *rq) bfq_completed_request(bfqq, bfqd); bfq_finish_requeue_request_body(bfqq); + atomic_dec(&rq->mq_hctx->elevator_queued); spin_unlock_irqrestore(&bfqd->lock, flags); } else { diff --git a/block/blk-mq.c b/block/blk-mq.c index 457b43829a4f..361fb9fe1dc5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2649,6 +2649,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, goto free_hctx; atomic_set(&hctx->nr_active, 0); + atomic_set(&hctx->elevator_queued, 0); if (node == NUMA_NO_NODE) node = set->numa_node; hctx->numa_node = node; diff --git a/block/mq-deadline.c b/block/mq-deadline.c index b57470e154c8..800ac902809b 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -386,6 +386,8 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) spin_lock(&dd->lock); rq = __dd_dispatch_request(dd); spin_unlock(&dd->lock); + if (rq) + atomic_dec(&rq->mq_hctx->elevator_queued); return rq; } @@ -533,6 +535,7 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); dd_insert_request(hctx, rq, at_head); + atomic_inc(&hctx->elevator_queued); } spin_unlock(&dd->lock); } @@ -579,6 +582,9 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx) { struct deadline_data *dd = hctx->queue->elevator->elevator_data; + if (!atomic_read(&hctx->elevator_queued)) + return false; + return !list_empty_careful(&dd->dispatch) || !list_empty_careful(&dd->fifo_list[0]) || !list_empty_careful(&dd->fifo_list[1]); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a4b35ec60faf..2f3ba31a1658 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -139,6 +139,10 @@ struct blk_mq_hw_ctx { * shared across request queues. */ atomic_t nr_active; + /** + * @elevator_queued: Number of queued requests on hctx. + */ + atomic_t elevator_queued; /** @cpuhp_online: List to store request if CPU is going to die */ struct hlist_node cpuhp_online; From patchwork Wed Aug 19 15:20:29 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724463 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C394E739 for ; Wed, 19 Aug 2020 15:26:26 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id AAF64208A9 for ; Wed, 19 Aug 2020 15:26:26 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728704AbgHSP0D (ORCPT ); Wed, 19 Aug 2020 11:26:03 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:39926 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728785AbgHSPZd (ORCPT ); Wed, 19 Aug 2020 11:25:33 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id 6A4E5B11DB585D6BBB3D; Wed, 19 Aug 2020 23:24:58 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:51 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 11/18] null_blk: Support shared tag bitmap Date: Wed, 19 Aug 2020 23:20:29 +0800 Message-ID: <1597850436-116171-12-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org Support a shared tag bitmap, whereby request tags are unique over all submission queues, and not just per submission queue. As such, per device total queue depth is normally hw_queue_depth * submit_queues, but hw_queue_depth when set. And a similar story for when shared_tags is set, where that is the queue depth over all null blk devices. Signed-off-by: John Garry --- drivers/block/null_blk_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 47a9dad880af..19c04e074880 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -164,6 +164,10 @@ static bool shared_tags; module_param(shared_tags, bool, 0444); MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq"); +static bool g_shared_tag_bitmap; +module_param_named(shared_tag_bitmap, g_shared_tag_bitmap, bool, 0444); +MODULE_PARM_DESC(shared_tag_bitmap, "Use shared tag bitmap for all submission queues for blk-mq"); + static int g_irqmode = NULL_IRQ_SOFTIRQ; static int null_set_irqmode(const char *str, const struct kernel_param *kp) @@ -1692,6 +1696,8 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) set->flags = BLK_MQ_F_SHOULD_MERGE; if (g_no_sched) set->flags |= BLK_MQ_F_NO_SCHED; + if (g_shared_tag_bitmap) + set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; set->driver_data = NULL; if ((nullb && nullb->dev->blocking) || g_blocking) From patchwork Wed Aug 19 15:20:30 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724447 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 012FA16B1 for ; Wed, 19 Aug 2020 15:26:04 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id E3952208C7 for ; Wed, 19 Aug 2020 15:26:03 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727932AbgHSPZ3 (ORCPT ); Wed, 19 Aug 2020 11:25:29 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:39650 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728758AbgHSPZV (ORCPT ); Wed, 19 Aug 2020 11:25:21 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id E2FC13F36854AB68B766; Wed, 19 Aug 2020 23:24:58 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:51 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , Hannes Reinecke , John Garry Subject: [PATCH v8 12/18] scsi: Add host and host template flag 'host_tagset' Date: Wed, 19 Aug 2020 23:20:30 +0800 Message-ID: <1597850436-116171-13-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Hannes Reinecke Add Host and host template flag 'host_tagset' so hostwide tagset can be shared on multiple reply queues after the SCSI device's reply queue is converted to blk-mq hw queue. Tested-by: Don Brace #SCSI resv cmds patches used Signed-off-by: Hannes Reinecke [jpg: Update comment on .can_queue and add Scsi_Host.host_tagset] Signed-off-by: John Garry --- drivers/scsi/hosts.c | 1 + drivers/scsi/scsi_lib.c | 2 ++ include/scsi/scsi_host.h | 9 ++++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 37d1c5565d90..2f162603876f 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -421,6 +421,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) shost->cmd_per_lun = sht->cmd_per_lun; shost->unchecked_isa_dma = sht->unchecked_isa_dma; shost->no_write_same = sht->no_write_same; + shost->host_tagset = sht->host_tagset; if (shost_eh_deadline == -1 || !sht->eh_host_reset_handler) shost->eh_deadline = -1; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 7c6dd6f75190..cdef5d1777cb 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1891,6 +1891,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost) tag_set->flags |= BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy); tag_set->driver_data = shost; + if (shost->host_tagset) + tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; return blk_mq_alloc_tag_set(tag_set); } diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 46ef8cccc982..701f178b20ae 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -436,6 +436,9 @@ struct scsi_host_template { /* True if the controller does not support WRITE SAME */ unsigned no_write_same:1; + /* True if the host uses host-wide tagspace */ + unsigned host_tagset:1; + /* * Countdown for host blocking with no commands outstanding. */ @@ -603,7 +606,8 @@ struct Scsi_Host { * * Note: it is assumed that each hardware queue has a queue depth of * can_queue. In other words, the total queue depth per host - * is nr_hw_queues * can_queue. + * is nr_hw_queues * can_queue. However, for when host_tagset is set, + * the total queue depth is can_queue. */ unsigned nr_hw_queues; unsigned active_mode:2; @@ -634,6 +638,9 @@ struct Scsi_Host { /* The controller does not support WRITE SAME */ unsigned no_write_same:1; + /* True if the host uses host-wide tagspace */ + unsigned host_tagset:1; + /* Host responded with short (<36 bytes) INQUIRY result */ unsigned short_inquiry:1; From patchwork Wed Aug 19 15:20:31 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724469 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id BC61415E4 for ; Wed, 19 Aug 2020 15:26:42 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id A400F208A9 for ; Wed, 19 Aug 2020 15:26:42 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728873AbgHSP0a (ORCPT ); Wed, 19 Aug 2020 11:26:30 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:39602 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728692AbgHSPZV (ORCPT ); Wed, 19 Aug 2020 11:25:21 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id 928F0B1CB9DFE1F7A85A; Wed, 19 Aug 2020 23:24:58 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:51 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 13/18] scsi: core: Show nr_hw_queues in sysfs Date: Wed, 19 Aug 2020 23:20:31 +0800 Message-ID: <1597850436-116171-14-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org So that we don't use a value of 0 for when Scsi_Host.nr_hw_queues is unset, use the tag_set->nr_hw_queues (which holds 1 for this case). Signed-off-by: John Garry --- drivers/scsi/scsi_sysfs.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 163dbcb741c1..d6e344fa33ad 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -393,6 +393,16 @@ show_use_blk_mq(struct device *dev, struct device_attribute *attr, char *buf) } static DEVICE_ATTR(use_blk_mq, S_IRUGO, show_use_blk_mq, NULL); +static ssize_t +show_nr_hw_queues(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct blk_mq_tag_set *tag_set = &shost->tag_set; + + return snprintf(buf, 20, "%d\n", tag_set->nr_hw_queues); +} +static DEVICE_ATTR(nr_hw_queues, S_IRUGO, show_nr_hw_queues, NULL); + static struct attribute *scsi_sysfs_shost_attrs[] = { &dev_attr_use_blk_mq.attr, &dev_attr_unique_id.attr, @@ -411,6 +421,7 @@ static struct attribute *scsi_sysfs_shost_attrs[] = { &dev_attr_prot_guard_type.attr, &dev_attr_host_reset.attr, &dev_attr_eh_deadline.attr, + &dev_attr_nr_hw_queues.attr, NULL }; From patchwork Wed Aug 19 15:20:32 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724485 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1E66C739 for ; Wed, 19 Aug 2020 15:27:01 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 0B48E20897 for ; Wed, 19 Aug 2020 15:27:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728677AbgHSPZS (ORCPT ); Wed, 19 Aug 2020 11:25:18 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:9780 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727932AbgHSPZI (ORCPT ); Wed, 19 Aug 2020 11:25:08 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 636322665BD9170CBC7F; Wed, 19 Aug 2020 23:25:03 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:52 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 14/18] scsi: hisi_sas: Switch v3 hw to MQ Date: Wed, 19 Aug 2020 23:20:32 +0800 Message-ID: <1597850436-116171-15-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org Now that the block layer provides a shared tag, we can switch the driver to expose all HW queues. Signed-off-by: John Garry --- drivers/scsi/hisi_sas/hisi_sas.h | 3 +- drivers/scsi/hisi_sas/hisi_sas_main.c | 36 ++++++----- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 87 +++++++++++--------------- 3 files changed, 56 insertions(+), 70 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index 2bdd64648ef0..e6acbf940712 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -8,6 +8,8 @@ #define _HISI_SAS_H_ #include +#include +#include #include #include #include @@ -431,7 +433,6 @@ struct hisi_hba { u32 intr_coal_count; /* Interrupt count to coalesce */ int cq_nvecs; - unsigned int *reply_map; /* bist */ enum sas_linkrate debugfs_bist_linkrate; diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 11caa4b0d797..7ed4eaedb7ca 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -417,6 +417,7 @@ static int hisi_sas_task_prep(struct sas_task *task, struct device *dev = hisi_hba->dev; int dlvry_queue_slot, dlvry_queue, rc, slot_idx; int n_elem = 0, n_elem_dif = 0, n_elem_req = 0; + struct scsi_cmnd *scmd = NULL; struct hisi_sas_dq *dq; unsigned long flags; int wr_q_index; @@ -432,10 +433,23 @@ static int hisi_sas_task_prep(struct sas_task *task, return -ECOMM; } - if (hisi_hba->reply_map) { - int cpu = raw_smp_processor_id(); - unsigned int dq_index = hisi_hba->reply_map[cpu]; + if (task->uldd_task) { + struct ata_queued_cmd *qc; + if (dev_is_sata(device)) { + qc = task->uldd_task; + scmd = qc->scsicmd; + } else { + scmd = task->uldd_task; + } + } + + if (scmd) { + unsigned int dq_index; + u32 blk_tag; + + blk_tag = blk_mq_unique_tag(scmd->request); + dq_index = blk_mq_unique_tag_to_hwq(blk_tag); *dq_pointer = dq = &hisi_hba->dq[dq_index]; } else { *dq_pointer = dq = sas_dev->dq; @@ -464,21 +478,9 @@ static int hisi_sas_task_prep(struct sas_task *task, if (hisi_hba->hw->slot_index_alloc) rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device); - else { - struct scsi_cmnd *scsi_cmnd = NULL; - - if (task->uldd_task) { - struct ata_queued_cmd *qc; + else + rc = hisi_sas_slot_index_alloc(hisi_hba, scmd); - if (dev_is_sata(device)) { - qc = task->uldd_task; - scsi_cmnd = qc->scsicmd; - } else { - scsi_cmnd = task->uldd_task; - } - } - rc = hisi_sas_slot_index_alloc(hisi_hba, scsi_cmnd); - } if (rc < 0) goto err_out_dif_dma_unmap; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 60adf5c32143..b6f75a1764df 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2362,68 +2362,36 @@ static irqreturn_t cq_interrupt_v3_hw(int irq_no, void *p) return IRQ_WAKE_THREAD; } -static void setup_reply_map_v3_hw(struct hisi_hba *hisi_hba, int nvecs) +static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba) { - const struct cpumask *mask; - int queue, cpu; + int vectors; + int max_msi = HISI_SAS_MSI_COUNT_V3_HW, min_msi; + struct Scsi_Host *shost = hisi_hba->shost; + struct irq_affinity desc = { + .pre_vectors = BASE_VECTORS_V3_HW, + }; - for (queue = 0; queue < nvecs; queue++) { - struct hisi_sas_cq *cq = &hisi_hba->cq[queue]; + min_msi = MIN_AFFINE_VECTORS_V3_HW; + vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev, + min_msi, max_msi, + PCI_IRQ_MSI | + PCI_IRQ_AFFINITY, + &desc); + if (vectors < 0) + return -ENOENT; - mask = pci_irq_get_affinity(hisi_hba->pci_dev, queue + - BASE_VECTORS_V3_HW); - if (!mask) - goto fallback; - cq->irq_mask = mask; - for_each_cpu(cpu, mask) - hisi_hba->reply_map[cpu] = queue; - } - return; -fallback: - for_each_possible_cpu(cpu) - hisi_hba->reply_map[cpu] = cpu % hisi_hba->queue_count; - /* Don't clean all CQ masks */ + hisi_hba->cq_nvecs = vectors - BASE_VECTORS_V3_HW; + shost->nr_hw_queues = hisi_hba->cq_nvecs; + + return 0; } static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba) { struct device *dev = hisi_hba->dev; struct pci_dev *pdev = hisi_hba->pci_dev; - int vectors, rc, i; - int max_msi = HISI_SAS_MSI_COUNT_V3_HW, min_msi; - - if (auto_affine_msi_experimental) { - struct irq_affinity desc = { - .pre_vectors = BASE_VECTORS_V3_HW, - }; - - dev_info(dev, "Enable MSI auto-affinity\n"); - - min_msi = MIN_AFFINE_VECTORS_V3_HW; - - hisi_hba->reply_map = devm_kcalloc(dev, nr_cpu_ids, - sizeof(unsigned int), - GFP_KERNEL); - if (!hisi_hba->reply_map) - return -ENOMEM; - vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev, - min_msi, max_msi, - PCI_IRQ_MSI | - PCI_IRQ_AFFINITY, - &desc); - if (vectors < 0) - return -ENOENT; - setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW); - } else { - min_msi = max_msi; - vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi, - max_msi, PCI_IRQ_MSI); - if (vectors < 0) - return vectors; - } - - hisi_hba->cq_nvecs = vectors - BASE_VECTORS_V3_HW; + int rc, i; rc = devm_request_irq(dev, pci_irq_vector(pdev, 1), int_phy_up_down_bcast_v3_hw, 0, @@ -3072,6 +3040,15 @@ static int debugfs_set_bist_v3_hw(struct hisi_hba *hisi_hba, bool enable) return 0; } +static int hisi_sas_map_queues(struct Scsi_Host *shost) +{ + struct hisi_hba *hisi_hba = shost_priv(shost); + struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; + + return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev, + BASE_VECTORS_V3_HW); +} + static struct scsi_host_template sht_v3_hw = { .name = DRV_NAME, .proc_name = DRV_NAME, @@ -3082,6 +3059,7 @@ static struct scsi_host_template sht_v3_hw = { .slave_configure = hisi_sas_slave_configure, .scan_finished = hisi_sas_scan_finished, .scan_start = hisi_sas_scan_start, + .map_queues = hisi_sas_map_queues, .change_queue_depth = sas_change_queue_depth, .bios_param = sas_bios_param, .this_id = -1, @@ -3098,6 +3076,7 @@ static struct scsi_host_template sht_v3_hw = { .shost_attrs = host_attrs_v3_hw, .tag_alloc_policy = BLK_TAG_ALLOC_RR, .host_reset = hisi_sas_host_reset, + .host_tagset = 1, }; static const struct hisi_sas_hw hisi_sas_v3_hw = { @@ -3269,6 +3248,10 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (hisi_sas_debugfs_enable) hisi_sas_debugfs_init(hisi_hba); + rc = interrupt_preinit_v3_hw(hisi_hba); + if (rc) + goto err_out_ha; + dev_err(dev, "%d hw queues\n", shost->nr_hw_queues); rc = scsi_add_host(shost, dev); if (rc) goto err_out_ha; From patchwork Wed Aug 19 15:20:33 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724425 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 5C7A215E4 for ; Wed, 19 Aug 2020 15:25:08 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 4C7F8208A9 for ; Wed, 19 Aug 2020 15:25:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726187AbgHSPZG (ORCPT ); Wed, 19 Aug 2020 11:25:06 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:9781 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726894AbgHSPZG (ORCPT ); Wed, 19 Aug 2020 11:25:06 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 6DC731556ACDD80C64D7; Wed, 19 Aug 2020 23:25:03 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:52 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 15/18] scsi: scsi_debug: Support host tagset Date: Wed, 19 Aug 2020 23:20:33 +0800 Message-ID: <1597850436-116171-16-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org When host_max_queue is set (> 0), set the Scsi_Host.host_tagset such that blk-mq will use a hostwide tagset over all SCSI host submission queues. This means that we may expose all submission queues and always use the hwq chosen by blk-mq. And since if sdebug_host_max_queue is set, sdebug_max_queue is fixed to the same value, we can simplify how sdebug_driver_template.can_queue is set. Signed-off-by: John Garry --- drivers/scsi/scsi_debug.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 064ed680c053..4238baa2b34b 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -4698,19 +4698,14 @@ static int resp_rwp_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) static struct sdebug_queue *get_queue(struct scsi_cmnd *cmnd) { u16 hwq; + u32 tag = blk_mq_unique_tag(cmnd->request); - if (sdebug_host_max_queue) { - /* Provide a simple method to choose the hwq */ - hwq = smp_processor_id() % submit_queues; - } else { - u32 tag = blk_mq_unique_tag(cmnd->request); + hwq = blk_mq_unique_tag_to_hwq(tag); - hwq = blk_mq_unique_tag_to_hwq(tag); + pr_debug("tag=%#x, hwq=%d\n", tag, hwq); + if (WARN_ON_ONCE(hwq >= submit_queues)) + hwq = 0; - pr_debug("tag=%#x, hwq=%d\n", tag, hwq); - if (WARN_ON_ONCE(hwq >= submit_queues)) - hwq = 0; - } return sdebug_q_arr + hwq; } @@ -7347,10 +7342,7 @@ static int sdebug_driver_probe(struct device *dev) sdbg_host = to_sdebug_host(dev); - if (sdebug_host_max_queue) - sdebug_driver_template.can_queue = sdebug_host_max_queue; - else - sdebug_driver_template.can_queue = sdebug_max_queue; + sdebug_driver_template.can_queue = sdebug_max_queue; if (!sdebug_clustering) sdebug_driver_template.dma_boundary = PAGE_SIZE - 1; @@ -7367,11 +7359,11 @@ static int sdebug_driver_probe(struct device *dev) } /* * Decide whether to tell scsi subsystem that we want mq. The - * following should give the same answer for each host. If the host - * has a limit of hostwide max commands, then do not set. + * following should give the same answer for each host. */ - if (!sdebug_host_max_queue) - hpnt->nr_hw_queues = submit_queues; + hpnt->nr_hw_queues = submit_queues; + if (sdebug_host_max_queue) + hpnt->host_tagset = 1; sdbg_host->shost = hpnt; *((struct sdebug_host_info **)hpnt->hostdata) = sdbg_host; From patchwork Wed Aug 19 15:20:34 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724487 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6850E739 for ; Wed, 19 Aug 2020 15:27:03 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 5A9E5208C7 for ; Wed, 19 Aug 2020 15:27:03 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728681AbgHSPZO (ORCPT ); Wed, 19 Aug 2020 11:25:14 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:9782 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728312AbgHSPZI (ORCPT ); Wed, 19 Aug 2020 11:25:08 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 76875C8A035CB46D41DC; Wed, 19 Aug 2020 23:25:03 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:53 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 16/18] hpsa: enable host_tagset and switch to MQ Date: Wed, 19 Aug 2020 23:20:34 +0800 Message-ID: <1597850436-116171-17-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Hannes Reinecke The smart array HBAs can steer interrupt completion, so this patch switches the implementation to use multiqueue and enables 'host_tagset' as the HBA has a shared host-wide tagset. Reviewed-by: Don Brace Signed-off-by: Hannes Reinecke Signed-off-by: John Garry --- drivers/scsi/hpsa.c | 44 +++++++------------------------------------- drivers/scsi/hpsa.h | 1 - 2 files changed, 7 insertions(+), 38 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 91794a50b31f..eaddcd3e9caf 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -984,6 +984,7 @@ static struct scsi_host_template hpsa_driver_template = { .shost_attrs = hpsa_shost_attrs, .max_sectors = 2048, .no_write_same = 1, + .host_tagset = 1, }; static inline u32 next_command(struct ctlr_info *h, u8 q) @@ -1148,12 +1149,14 @@ static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h, static void __enqueue_cmd_and_start_io(struct ctlr_info *h, struct CommandList *c, int reply_queue) { + u32 blk_tag = blk_mq_unique_tag(c->scsi_cmd->request); + dial_down_lockup_detection_during_fw_flash(h, c); atomic_inc(&h->commands_outstanding); if (c->device) atomic_inc(&c->device->commands_outstanding); - reply_queue = h->reply_map[raw_smp_processor_id()]; + reply_queue = blk_mq_unique_tag_to_hwq(blk_tag); switch (c->cmd_type) { case CMD_IOACCEL1: set_ioaccel1_performant_mode(h, c, reply_queue); @@ -5676,8 +5679,6 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd) /* Get the ptr to our adapter structure out of cmd->host. */ h = sdev_to_hba(cmd->device); - BUG_ON(cmd->request->tag < 0); - dev = cmd->device->hostdata; if (!dev) { cmd->result = DID_NO_CONNECT << 16; @@ -5853,7 +5854,7 @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h) sh->hostdata[0] = (unsigned long) h; sh->irq = pci_irq_vector(h->pdev, 0); sh->unique_id = sh->irq; - + sh->nr_hw_queues = h->msix_vectors > 0 ? h->msix_vectors : 1; h->scsi_host = sh; return 0; } @@ -5879,7 +5880,8 @@ static int hpsa_scsi_add_host(struct ctlr_info *h) */ static int hpsa_get_cmd_index(struct scsi_cmnd *scmd) { - int idx = scmd->request->tag; + u32 blk_tag = blk_mq_unique_tag(scmd->request); + int idx = blk_mq_unique_tag_to_tag(blk_tag); if (idx < 0) return idx; @@ -7456,26 +7458,6 @@ static void hpsa_disable_interrupt_mode(struct ctlr_info *h) h->msix_vectors = 0; } -static void hpsa_setup_reply_map(struct ctlr_info *h) -{ - const struct cpumask *mask; - unsigned int queue, cpu; - - for (queue = 0; queue < h->msix_vectors; queue++) { - mask = pci_irq_get_affinity(h->pdev, queue); - if (!mask) - goto fallback; - - for_each_cpu(cpu, mask) - h->reply_map[cpu] = queue; - } - return; - -fallback: - for_each_possible_cpu(cpu) - h->reply_map[cpu] = 0; -} - /* If MSI/MSI-X is supported by the kernel we will try to enable it on * controllers that are capable. If not, we use legacy INTx mode. */ @@ -7872,9 +7854,6 @@ static int hpsa_pci_init(struct ctlr_info *h) if (err) goto clean1; - /* setup mapping between CPU and reply queue */ - hpsa_setup_reply_map(h); - err = hpsa_pci_find_memory_BAR(h->pdev, &h->paddr); if (err) goto clean2; /* intmode+region, pci */ @@ -8613,7 +8592,6 @@ static struct workqueue_struct *hpsa_create_controller_wq(struct ctlr_info *h, static void hpda_free_ctlr_info(struct ctlr_info *h) { - kfree(h->reply_map); kfree(h); } @@ -8622,14 +8600,6 @@ static struct ctlr_info *hpda_alloc_ctlr_info(void) struct ctlr_info *h; h = kzalloc(sizeof(*h), GFP_KERNEL); - if (!h) - return NULL; - - h->reply_map = kcalloc(nr_cpu_ids, sizeof(*h->reply_map), GFP_KERNEL); - if (!h->reply_map) { - kfree(h); - return NULL; - } return h; } diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index 6b87d9815b35..9c6a161f2367 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h @@ -161,7 +161,6 @@ struct bmic_controller_parameters { #pragma pack() struct ctlr_info { - unsigned int *reply_map; int ctlr; char devname[8]; char *product_name; From patchwork Wed Aug 19 15:20:35 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724495 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 555AD16B1 for ; Wed, 19 Aug 2020 15:27:23 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 43E5720897 for ; Wed, 19 Aug 2020 15:27:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728772AbgHSP1P (ORCPT ); Wed, 19 Aug 2020 11:27:15 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:9783 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726636AbgHSPZI (ORCPT ); Wed, 19 Aug 2020 11:25:08 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 7F5A234F37896C527B4D; Wed, 19 Aug 2020 23:25:03 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:53 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , Hannes Reinecke , John Garry Subject: [PATCH v8 17/18] scsi: megaraid_sas: Added support for shared host tagset for cpuhotplug Date: Wed, 19 Aug 2020 23:20:35 +0800 Message-ID: <1597850436-116171-18-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Kashyap Desai Fusion adapters can steer completions to individual queues, and we now have support for shared host-wide tags. So we can enable multiqueue support for fusion adapters. Once driver enable shared host-wide tags, cpu hotplug feature is also supported as it was enabled using below patchsets - commit bf0beec0607d ("blk-mq: drain I/O when all CPUs in a hctx are offline") Currently driver has provision to disable host-wide tags using "host_tagset_enable" module parameter. Once we do not have any major performance regression using host-wide tags, we will drop the hand-crafted interrupt affinity settings. Performance is also meeting the expecatation - (used both none and mq-deadline scheduler) 24 Drive SSD on Aero with/without this patch can get 3.1M IOPs 3 VDs consist of 8 SAS SSD on Aero with/without this patch can get 3.1M IOPs. Signed-off-by: Kashyap Desai Signed-off-by: Hannes Reinecke Signed-off-by: John Garry --- drivers/scsi/megaraid/megaraid_sas_base.c | 39 +++++++++++++++++++++ drivers/scsi/megaraid/megaraid_sas_fusion.c | 29 ++++++++------- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 861f7140f52e..6960922d0d7f 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -113,6 +114,10 @@ unsigned int enable_sdev_max_qd; module_param(enable_sdev_max_qd, int, 0444); MODULE_PARM_DESC(enable_sdev_max_qd, "Enable sdev max qd as can_queue. Default: 0"); +int host_tagset_enable = 1; +module_param(host_tagset_enable, int, 0444); +MODULE_PARM_DESC(host_tagset_enable, "Shared host tagset enable/disable Default: enable(1)"); + MODULE_LICENSE("GPL"); MODULE_VERSION(MEGASAS_VERSION); MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com"); @@ -3119,6 +3124,19 @@ megasas_bios_param(struct scsi_device *sdev, struct block_device *bdev, return 0; } +static int megasas_map_queues(struct Scsi_Host *shost) +{ + struct megasas_instance *instance; + + instance = (struct megasas_instance *)shost->hostdata; + + if (shost->nr_hw_queues == 1) + return 0; + + return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + instance->pdev, instance->low_latency_index_start); +} + static void megasas_aen_polling(struct work_struct *work); /** @@ -3427,6 +3445,7 @@ static struct scsi_host_template megasas_template = { .eh_timed_out = megasas_reset_timer, .shost_attrs = megaraid_host_attrs, .bios_param = megasas_bios_param, + .map_queues = megasas_map_queues, .change_queue_depth = scsi_change_queue_depth, .max_segment_size = 0xffffffff, }; @@ -6808,6 +6827,26 @@ static int megasas_io_attach(struct megasas_instance *instance) host->max_lun = MEGASAS_MAX_LUN; host->max_cmd_len = 16; + /* Use shared host tagset only for fusion adaptors + * if there are managed interrupts (smp affinity enabled case). + * Single msix_vectors in kdump, so shared host tag is also disabled. + */ + + host->host_tagset = 0; + host->nr_hw_queues = 1; + + if ((instance->adapter_type != MFI_SERIES) && + (instance->msix_vectors > instance->low_latency_index_start) && + host_tagset_enable && + instance->smp_affinity_enable) { + host->host_tagset = 1; + host->nr_hw_queues = instance->msix_vectors - + instance->low_latency_index_start; + } + + dev_info(&instance->pdev->dev, + "Max firmware commands: %d shared with nr_hw_queues = %d\n", + instance->max_fw_cmds, host->nr_hw_queues); /* * Notify the mid-layer about the new controller */ diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 0824410f78f8..a4251121f173 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -359,24 +359,29 @@ megasas_get_msix_index(struct megasas_instance *instance, { int sdev_busy; - /* nr_hw_queue = 1 for MegaRAID */ - struct blk_mq_hw_ctx *hctx = - scmd->device->request_queue->queue_hw_ctx[0]; - - sdev_busy = atomic_read(&hctx->nr_active); + /* TBD - if sml remove device_busy in future, driver + * should track counter in internal structure. + */ + sdev_busy = atomic_read(&scmd->device->device_busy); if (instance->perf_mode == MR_BALANCED_PERF_MODE && - sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) + sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) { cmd->request_desc->SCSIIO.MSIxIndex = mega_mod64((atomic64_add_return(1, &instance->high_iops_outstanding) / MR_HIGH_IOPS_BATCH_COUNT), instance->low_latency_index_start); - else if (instance->msix_load_balance) + } else if (instance->msix_load_balance) { cmd->request_desc->SCSIIO.MSIxIndex = (mega_mod64(atomic64_add_return(1, &instance->total_io_count), instance->msix_vectors)); - else + } else if (instance->host->nr_hw_queues > 1) { + u32 tag = blk_mq_unique_tag(scmd->request); + + cmd->request_desc->SCSIIO.MSIxIndex = blk_mq_unique_tag_to_hwq(tag) + + instance->low_latency_index_start; + } else { cmd->request_desc->SCSIIO.MSIxIndex = instance->reply_map[raw_smp_processor_id()]; + } } /** @@ -956,9 +961,6 @@ megasas_alloc_cmds_fusion(struct megasas_instance *instance) if (megasas_alloc_cmdlist_fusion(instance)) goto fail_exit; - dev_info(&instance->pdev->dev, "Configured max firmware commands: %d\n", - instance->max_fw_cmds); - /* The first 256 bytes (SMID 0) is not used. Don't add to the cmd list */ io_req_base = fusion->io_request_frames + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE; io_req_base_phys = fusion->io_request_frames_phys + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE; @@ -1102,8 +1104,9 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) MR_HIGH_IOPS_QUEUE_COUNT) && cur_intr_coalescing) instance->perf_mode = MR_BALANCED_PERF_MODE; - dev_info(&instance->pdev->dev, "Performance mode :%s\n", - MEGASAS_PERF_MODE_2STR(instance->perf_mode)); + dev_info(&instance->pdev->dev, "Performance mode :%s (latency index = %d)\n", + MEGASAS_PERF_MODE_2STR(instance->perf_mode), + instance->low_latency_index_start); instance->fw_sync_cache_support = (scratch_pad_1 & MR_CAN_HANDLE_SYNC_CACHE_OFFSET) ? 1 : 0; From patchwork Wed Aug 19 15:20:36 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 11724489 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9AD92739 for ; Wed, 19 Aug 2020 15:27:04 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 8E7A7208A9 for ; Wed, 19 Aug 2020 15:27:04 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728732AbgHSP07 (ORCPT ); Wed, 19 Aug 2020 11:26:59 -0400 Received: from szxga04-in.huawei.com ([45.249.212.190]:9851 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726894AbgHSPZQ (ORCPT ); Wed, 19 Aug 2020 11:25:16 -0400 Received: from DGGEMS414-HUB.china.huawei.com (unknown [172.30.72.59]) by Forcepoint Email with ESMTP id 97823174EF24E6297F9B; Wed, 19 Aug 2020 23:25:03 +0800 (CST) Received: from localhost.localdomain (10.69.192.58) by DGGEMS414-HUB.china.huawei.com (10.3.19.214) with Microsoft SMTP Server id 14.3.487.0; Wed, 19 Aug 2020 23:24:54 +0800 From: John Garry To: , , , , , , , , , , CC: , , , , , , , , John Garry Subject: [PATCH v8 18/18] smartpqi: enable host tagset Date: Wed, 19 Aug 2020 23:20:36 +0800 Message-ID: <1597850436-116171-19-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1597850436-116171-1-git-send-email-john.garry@huawei.com> References: <1597850436-116171-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-CFilter-Loop: Reflected Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Hannes Reinecke Enable host tagset for smartpqi; with this we can use the request tag to look command from the pool avoiding the list iteration in the hot path. Signed-off-by: Hannes Reinecke [jpg: Mod ctrl_info->next_io_request_slot calc] Signed-off-by: John Garry --- drivers/scsi/smartpqi/smartpqi_init.c | 45 ++++++++++++++++++--------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index bd38c8cea56e..870ed1400a9e 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -575,22 +575,33 @@ static inline void pqi_reinit_io_request(struct pqi_io_request *io_request) } static struct pqi_io_request *pqi_alloc_io_request( - struct pqi_ctrl_info *ctrl_info) + struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd) { struct pqi_io_request *io_request; - u16 i = ctrl_info->next_io_request_slot; /* benignly racy */ + unsigned int limit = PQI_RESERVED_IO_SLOTS; + u16 i; - while (1) { + if (scmd) { + u32 blk_tag = blk_mq_unique_tag(scmd->request); + + i = blk_mq_unique_tag_to_tag(blk_tag) + limit; io_request = &ctrl_info->io_request_pool[i]; - if (atomic_inc_return(&io_request->refcount) == 1) - break; - atomic_dec(&io_request->refcount); - i = (i + 1) % ctrl_info->max_io_slots; + if (WARN_ON(atomic_inc_return(&io_request->refcount) > 1)) { + atomic_dec(&io_request->refcount); + return NULL; + } + } else { + i = ctrl_info->next_io_request_slot; /* benignly racy */ + while (1) { + io_request = &ctrl_info->io_request_pool[i]; + if (atomic_inc_return(&io_request->refcount) == 1) + break; + atomic_dec(&io_request->refcount); + i = (i + 1) % limit; + } + ctrl_info->next_io_request_slot = (i + 1) % limit; } - /* benignly racy */ - ctrl_info->next_io_request_slot = (i + 1) % ctrl_info->max_io_slots; - pqi_reinit_io_request(io_request); return io_request; @@ -4075,7 +4086,7 @@ static int pqi_submit_raid_request_synchronous(struct pqi_ctrl_info *ctrl_info, atomic_inc(&ctrl_info->sync_cmds_outstanding); - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, NULL); put_unaligned_le16(io_request->index, &(((struct pqi_raid_path_request *)request)->request_id)); @@ -5032,7 +5043,9 @@ static inline int pqi_raid_submit_scsi_cmd(struct pqi_ctrl_info *ctrl_info, { struct pqi_io_request *io_request; - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, scmd); + if (!io_request) + return SCSI_MLQUEUE_HOST_BUSY; return pqi_raid_submit_scsi_cmd_with_io_request(ctrl_info, io_request, device, scmd, queue_group); @@ -5230,7 +5243,10 @@ static int pqi_aio_submit_io(struct pqi_ctrl_info *ctrl_info, struct pqi_io_request *io_request; struct pqi_aio_path_request *request; - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, scmd); + if (!io_request) + return SCSI_MLQUEUE_HOST_BUSY; + io_request->io_complete_callback = pqi_aio_io_complete; io_request->scmd = scmd; io_request->raid_bypass = raid_bypass; @@ -5657,7 +5673,7 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, DECLARE_COMPLETION_ONSTACK(wait); struct pqi_task_management_request *request; - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, NULL); io_request->io_complete_callback = pqi_lun_reset_complete; io_request->context = &wait; @@ -6504,6 +6520,7 @@ static struct scsi_host_template pqi_driver_template = { .map_queues = pqi_map_queues, .sdev_attrs = pqi_sdev_attrs, .shost_attrs = pqi_shost_attrs, + .host_tagset = 1, }; static int pqi_register_scsi(struct pqi_ctrl_info *ctrl_info)