From patchwork Tue Nov 2 11:27:33 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 12598561 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 04C23C433F5 for ; Tue, 2 Nov 2021 11:32:52 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id DC72D60F36 for ; Tue, 2 Nov 2021 11:32:51 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230411AbhKBLfZ (ORCPT ); Tue, 2 Nov 2021 07:35:25 -0400 Received: from frasgout.his.huawei.com ([185.176.79.56]:4049 "EHLO frasgout.his.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230349AbhKBLfX (ORCPT ); Tue, 2 Nov 2021 07:35:23 -0400 Received: from fraeml704-chm.china.huawei.com (unknown [172.18.147.206]) by frasgout.his.huawei.com (SkyGuard) with ESMTP id 4Hk71N6Ng7z6H6rk; Tue, 2 Nov 2021 19:27:52 +0800 (CST) Received: from lhreml724-chm.china.huawei.com (10.201.108.75) by fraeml704-chm.china.huawei.com (10.206.15.53) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256_P256) id 15.1.2308.15; Tue, 2 Nov 2021 12:32:46 +0100 Received: from localhost.localdomain (10.69.192.58) by lhreml724-chm.china.huawei.com (10.201.108.75) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2308.15; Tue, 2 Nov 2021 11:32:44 +0000 From: John Garry To: CC: , , , , , "John Garry" Subject: [PATCH RFT 1/3] blk-mq: Drop busy_iter_fn blk_mq_hw_ctx argument Date: Tue, 2 Nov 2021 19:27:33 +0800 Message-ID: <1635852455-39935-2-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1635852455-39935-1-git-send-email-john.garry@huawei.com> References: <1635852455-39935-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-ClientProxiedBy: dggems703-chm.china.huawei.com (10.3.19.180) To lhreml724-chm.china.huawei.com (10.201.108.75) X-CFilter-Loop: Reflected Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org The only user of blk_mq_hw_ctx blk_mq_hw_ctx argument is blk_mq_rq_inflight(). Function blk_mq_rq_inflight() uses the hctx to find the associated request queue to match against the request. However this same check is already done in caller bt_iter(), so drop this check. With that change there are no more users of busy_iter_fn blk_mq_hw_ctx argument, so drop the argument. Signed-off-by: John Garry Reviewed-by: Ming Lei --- block/blk-mq-tag.c | 2 +- block/blk-mq.c | 17 ++++++++--------- include/linux/blk-mq.h | 3 +-- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 995336abee33..0d773c44a7ec 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -254,7 +254,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) return true; if (rq->q == hctx->queue && rq->mq_hctx == hctx) - ret = iter_data->fn(hctx, rq, iter_data->data, reserved); + ret = iter_data->fn(rq, iter_data->data, reserved); blk_mq_put_rq_ref(rq); return ret; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 07eb1412760b..c26e6c9301dd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -126,8 +126,7 @@ struct mq_inflight { unsigned int inflight[2]; }; -static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, - struct request *rq, void *priv, +static bool blk_mq_check_inflight(struct request *rq, void *priv, bool reserved) { struct mq_inflight *mi = priv; @@ -1136,14 +1135,15 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, } EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); -static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq, - void *priv, bool reserved) +static bool blk_mq_rq_inflight(struct request *rq, void *priv, + bool reserved) { /* - * If we find a request that isn't idle and the queue matches, - * we know the queue is busy. Return false to stop the iteration. + * If we find a request that isn't idle we know the queue is busy + * as it's checked in the iter. + * Return false to stop the iteration. */ - if (blk_mq_request_started(rq) && rq->q == hctx->queue) { + if (blk_mq_request_started(rq)) { bool *busy = priv; *busy = true; @@ -1205,8 +1205,7 @@ void blk_mq_put_rq_ref(struct request *rq) __blk_mq_free_request(rq); } -static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, - struct request *rq, void *priv, bool reserved) +static bool blk_mq_check_expired(struct request *rq, void *priv, bool reserved) { unsigned long *next = priv; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8682663e7368..da8de0d6f99b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -479,8 +479,7 @@ struct blk_mq_queue_data { bool last; }; -typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, - bool); +typedef bool (busy_iter_fn)(struct request *, void *, bool); typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); /** From patchwork Tue Nov 2 11:27:34 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 12598565 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id F04FAC433FE for ; Tue, 2 Nov 2021 11:32:52 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id D3A1660F36 for ; Tue, 2 Nov 2021 11:32:52 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231219AbhKBLfZ (ORCPT ); Tue, 2 Nov 2021 07:35:25 -0400 Received: from frasgout.his.huawei.com ([185.176.79.56]:4050 "EHLO frasgout.his.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230114AbhKBLfZ (ORCPT ); Tue, 2 Nov 2021 07:35:25 -0400 Received: from fraeml703-chm.china.huawei.com (unknown [172.18.147.200]) by frasgout.his.huawei.com (SkyGuard) with ESMTP id 4Hk71y0pz8z67v1F; Tue, 2 Nov 2021 19:28:22 +0800 (CST) Received: from lhreml724-chm.china.huawei.com (10.201.108.75) by fraeml703-chm.china.huawei.com (10.206.15.52) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256_P256) id 15.1.2308.15; Tue, 2 Nov 2021 12:32:48 +0100 Received: from localhost.localdomain (10.69.192.58) by lhreml724-chm.china.huawei.com (10.201.108.75) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2308.15; Tue, 2 Nov 2021 11:32:46 +0000 From: John Garry To: CC: , , , , , "John Garry" Subject: [PATCH RFT 2/3] blk-mq: Delete busy_iter_fn Date: Tue, 2 Nov 2021 19:27:34 +0800 Message-ID: <1635852455-39935-3-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1635852455-39935-1-git-send-email-john.garry@huawei.com> References: <1635852455-39935-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-ClientProxiedBy: dggems703-chm.china.huawei.com (10.3.19.180) To lhreml724-chm.china.huawei.com (10.201.108.75) X-CFilter-Loop: Reflected Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org Typedefs busy_iter_fn and busy_tag_iter_fn are now identical, so delete busy_iter_fn to reduce duplication. It would be nicer to delete busy_tag_iter_fn, as the name busy_iter_fn is less specific. However busy_tag_iter_fn is used in many different parts of the tree, unlike busy_iter_fn which is just use in block/, so just take the straightforward path now, so that we could rename later treewide. Signed-off-by: John Garry Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke --- block/blk-mq-tag.c | 6 +++--- block/blk-mq-tag.h | 2 +- include/linux/blk-mq.h | 1 - 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 0d773c44a7ec..bc233ea92adf 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -215,7 +215,7 @@ void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags) struct bt_iter_data { struct blk_mq_hw_ctx *hctx; - busy_iter_fn *fn; + busy_tag_iter_fn *fn; void *data; bool reserved; }; @@ -274,7 +274,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) * bitmap_tags member of struct blk_mq_tags. */ static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt, - busy_iter_fn *fn, void *data, bool reserved) + busy_tag_iter_fn *fn, void *data, bool reserved) { struct bt_iter_data iter_data = { .hctx = hctx, @@ -457,7 +457,7 @@ EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request); * called for all requests on all queues that share that tag set and not only * for requests associated with @q. */ -void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, +void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn, void *priv) { struct blk_mq_hw_ctx *hctx; diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index df787b5a23bd..5668e28be0b7 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -28,7 +28,7 @@ extern void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, extern void blk_mq_tag_update_sched_shared_tags(struct request_queue *q); extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); -void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, +void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn, void *priv); void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index da8de0d6f99b..2344c68bff35 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -479,7 +479,6 @@ struct blk_mq_queue_data { bool last; }; -typedef bool (busy_iter_fn)(struct request *, void *, bool); typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); /** From patchwork Tue Nov 2 11:27:35 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Garry X-Patchwork-Id: 12598567 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 93332C433EF for ; Tue, 2 Nov 2021 11:32:56 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 7EEC3603E5 for ; Tue, 2 Nov 2021 11:32:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231303AbhKBLf2 (ORCPT ); Tue, 2 Nov 2021 07:35:28 -0400 Received: from frasgout.his.huawei.com ([185.176.79.56]:4051 "EHLO frasgout.his.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230349AbhKBLf1 (ORCPT ); Tue, 2 Nov 2021 07:35:27 -0400 Received: from fraeml702-chm.china.huawei.com (unknown [172.18.147.200]) by frasgout.his.huawei.com (SkyGuard) with ESMTP id 4Hk7201Fdsz67v1F; Tue, 2 Nov 2021 19:28:24 +0800 (CST) Received: from lhreml724-chm.china.huawei.com (10.201.108.75) by fraeml702-chm.china.huawei.com (10.206.15.51) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256_P256) id 15.1.2308.15; Tue, 2 Nov 2021 12:32:50 +0100 Received: from localhost.localdomain (10.69.192.58) by lhreml724-chm.china.huawei.com (10.201.108.75) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2308.15; Tue, 2 Nov 2021 11:32:48 +0000 From: John Garry To: CC: , , , , , "John Garry" Subject: [PATCH RFT 3/3] blk-mq: Optimise blk_mq_queue_tag_busy_iter() for shared tags Date: Tue, 2 Nov 2021 19:27:35 +0800 Message-ID: <1635852455-39935-4-git-send-email-john.garry@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1635852455-39935-1-git-send-email-john.garry@huawei.com> References: <1635852455-39935-1-git-send-email-john.garry@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.69.192.58] X-ClientProxiedBy: dggems703-chm.china.huawei.com (10.3.19.180) To lhreml724-chm.china.huawei.com (10.201.108.75) X-CFilter-Loop: Reflected Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org Kashyap reports high CPU usage in blk_mq_queue_tag_busy_iter() and callees using megaraid SAS RAID card since moving to shared tags [0]. Previously, when shared tags was shared sbitmap, this function was less than optimum since we would iter through all tags for all hctx's, yet only ever match upto tagset depth number of rqs. Since the change to shared tags, things are even less efficient if we have parallel callers of blk_mq_queue_tag_busy_iter(). This is because in bt_iter() -> blk_mq_find_and_get_req() there would be more contention on accessing each request ref and tags->lock since they are now shared among all HW queues. Optimise by having separate calls to bt_for_each() for when we're using shared tags. In this case no longer pass a hctx, as it is no longer relevant, and teach bt_iter() about this. Ming suggested something along the lines of this change, apart from a different implementation. [0] https://lore.kernel.org/linux-block/e4e92abbe9d52bcba6b8cc6c91c442cc@mail.gmail.com/ Signed-off-by: John Garry Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei --- block/blk-mq-tag.c | 52 +++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index bc233ea92adf..00515933c8a8 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -215,6 +215,7 @@ void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags) struct bt_iter_data { struct blk_mq_hw_ctx *hctx; + struct request_queue *q; busy_tag_iter_fn *fn; void *data; bool reserved; @@ -238,11 +239,18 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) { struct bt_iter_data *iter_data = data; struct blk_mq_hw_ctx *hctx = iter_data->hctx; - struct blk_mq_tags *tags = hctx->tags; + struct request_queue *q = iter_data->q; bool reserved = iter_data->reserved; + struct blk_mq_tag_set *set = q->tag_set; + struct blk_mq_tags *tags; struct request *rq; bool ret = true; + if (blk_mq_is_shared_tags(set->flags)) + tags = set->shared_tags; + else + tags = hctx->tags; + if (!reserved) bitnr += tags->nr_reserved_tags; /* @@ -253,7 +261,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) if (!rq) return true; - if (rq->q == hctx->queue && rq->mq_hctx == hctx) + if (rq->q == q && (!hctx || rq->mq_hctx == hctx)) ret = iter_data->fn(rq, iter_data->data, reserved); blk_mq_put_rq_ref(rq); return ret; @@ -274,13 +282,15 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) * bitmap_tags member of struct blk_mq_tags. */ static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt, - busy_tag_iter_fn *fn, void *data, bool reserved) + busy_tag_iter_fn *fn, void *data, bool reserved, + struct request_queue *q) { struct bt_iter_data iter_data = { .hctx = hctx, .fn = fn, .data = data, .reserved = reserved, + .q = q, }; sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data); @@ -460,9 +470,6 @@ EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request); void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn, void *priv) { - struct blk_mq_hw_ctx *hctx; - int i; - /* * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx * while the queue is frozen. So we can use q_usage_counter to avoid @@ -471,19 +478,30 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn, if (!percpu_ref_tryget(&q->q_usage_counter)) return; - queue_for_each_hw_ctx(q, hctx, i) { - struct blk_mq_tags *tags = hctx->tags; - - /* - * If no software queues are currently mapped to this - * hardware queue, there's nothing to check - */ - if (!blk_mq_hw_queue_mapped(hctx)) - continue; + if (blk_mq_is_shared_tags(q->tag_set->flags)) { + struct blk_mq_tags *tags = q->tag_set->shared_tags; if (tags->nr_reserved_tags) - bt_for_each(hctx, &tags->breserved_tags, fn, priv, true); - bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false); + bt_for_each(NULL, &tags->breserved_tags, fn, priv, true, q); + bt_for_each(NULL, &tags->bitmap_tags, fn, priv, false, q); + } else { + struct blk_mq_hw_ctx *hctx; + int i; + + queue_for_each_hw_ctx(q, hctx, i) { + struct blk_mq_tags *tags = hctx->tags; + + /* + * If no software queues are currently mapped to this + * hardware queue, there's nothing to check + */ + if (!blk_mq_hw_queue_mapped(hctx)) + continue; + + if (tags->nr_reserved_tags) + bt_for_each(hctx, &tags->breserved_tags, fn, priv, true, q); + bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false, q); + } } blk_queue_exit(q); }