From patchwork Mon Dec 2 15:39:04 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hannes Reinecke X-Patchwork-Id: 11269273 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 63B64930 for ; Mon, 2 Dec 2019 15:39:29 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 4C1D621774 for ; Mon, 2 Dec 2019 15:39:29 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727569AbfLBPj2 (ORCPT ); Mon, 2 Dec 2019 10:39:28 -0500 Received: from mx2.suse.de ([195.135.220.15]:44706 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727547AbfLBPj2 (ORCPT ); Mon, 2 Dec 2019 10:39:28 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 6E342C1A6; Mon, 2 Dec 2019 15:39:25 +0000 (UTC) From: Hannes Reinecke To: "Martin K. Petersen" Cc: Jens Axboe , Christoph Hellwig , James Bottomley , John Garry , Ming Lei , linux-scsi@vger.kernel.org, linux-block@vger.kernel.org Subject: [PATCH 01/11] blk-mq: Remove some unused function arguments Date: Mon, 2 Dec 2019 16:39:04 +0100 Message-Id: <20191202153914.84722-2-hare@suse.de> X-Mailer: git-send-email 2.16.4 In-Reply-To: <20191202153914.84722-1-hare@suse.de> References: <20191202153914.84722-1-hare@suse.de> Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: John Garry The struct blk_mq_hw_ctx * argument in blk_mq_put_tag(), blk_mq_poll_nsecs(), and blk_mq_poll_hybrid_sleep() is unused, so remove it. Reviewed-by: Hannes Reinecke Signed-off-by: John Garry --- block/blk-mq-tag.c | 4 ++-- block/blk-mq-tag.h | 4 ++-- block/blk-mq.c | 10 ++++------ block/blk-mq.h | 2 +- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 008388e82b5c..53b4a9414fbd 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -191,8 +191,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) return tag + tag_offset; } -void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, - struct blk_mq_ctx *ctx, unsigned int tag) +void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, + unsigned int tag) { if (!blk_mq_tag_is_reserved(tags, tag)) { const int real_tag = tag - tags->nr_reserved_tags; diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 61deab0b5a5a..66d04dea0bdb 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -26,8 +26,8 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r extern void blk_mq_free_tags(struct blk_mq_tags *tags); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); -extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, - struct blk_mq_ctx *ctx, unsigned int tag); +extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, + unsigned int tag); extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags **tags, diff --git a/block/blk-mq.c b/block/blk-mq.c index 6e3b15f70cd7..16aa20d23b67 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -499,9 +499,9 @@ static void __blk_mq_free_request(struct request *rq) blk_pm_mark_last_busy(rq); rq->mq_hctx = NULL; if (rq->tag != -1) - blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); + blk_mq_put_tag(hctx->tags, ctx, rq->tag); if (sched_tag != -1) - blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag); + blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag); blk_mq_sched_restart(hctx); blk_queue_exit(q); } @@ -3354,7 +3354,6 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb) } static unsigned long blk_mq_poll_nsecs(struct request_queue *q, - struct blk_mq_hw_ctx *hctx, struct request *rq) { unsigned long ret = 0; @@ -3387,7 +3386,6 @@ static unsigned long blk_mq_poll_nsecs(struct request_queue *q, } static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, - struct blk_mq_hw_ctx *hctx, struct request *rq) { struct hrtimer_sleeper hs; @@ -3407,7 +3405,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, if (q->poll_nsec > 0) nsecs = q->poll_nsec; else - nsecs = blk_mq_poll_nsecs(q, hctx, rq); + nsecs = blk_mq_poll_nsecs(q, rq); if (!nsecs) return false; @@ -3462,7 +3460,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, return false; } - return blk_mq_poll_hybrid_sleep(q, hctx, rq); + return blk_mq_poll_hybrid_sleep(q, rq); } /** diff --git a/block/blk-mq.h b/block/blk-mq.h index 32c62c64e6c2..78d38b5f2793 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -208,7 +208,7 @@ static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx) static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq) { - blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag); + blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag); rq->tag = -1; if (rq->rq_flags & RQF_MQ_INFLIGHT) { From patchwork Mon Dec 2 15:39:05 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hannes Reinecke X-Patchwork-Id: 11269283 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 64EF2109A for ; Mon, 2 Dec 2019 15:39:31 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 4D9E02146E for ; Mon, 2 Dec 2019 15:39:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727562AbfLBPj2 (ORCPT ); Mon, 2 Dec 2019 10:39:28 -0500 Received: from mx2.suse.de ([195.135.220.15]:44698 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727545AbfLBPj2 (ORCPT ); Mon, 2 Dec 2019 10:39:28 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 6D16AC1A3; Mon, 2 Dec 2019 15:39:25 +0000 (UTC) From: Hannes Reinecke To: "Martin K. Petersen" Cc: Jens Axboe , Christoph Hellwig , James Bottomley , John Garry , Ming Lei , linux-scsi@vger.kernel.org, linux-block@vger.kernel.org Subject: [PATCH 02/11] blk-mq: rename BLK_MQ_F_TAG_SHARED as BLK_MQ_F_TAG_QUEUE_SHARED Date: Mon, 2 Dec 2019 16:39:05 +0100 Message-Id: <20191202153914.84722-3-hare@suse.de> X-Mailer: git-send-email 2.16.4 In-Reply-To: <20191202153914.84722-1-hare@suse.de> References: <20191202153914.84722-1-hare@suse.de> Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Ming Lei BLK_MQ_F_TAG_SHARED actually means that tags is shared among request queues, all of which should belong to LUNs attached to same HBA. So rename it to make the point explicitly. Suggested-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Signed-off-by: John Garry --- block/blk-mq-debugfs.c | 2 +- block/blk-mq-tag.c | 2 +- block/blk-mq-tag.h | 4 ++-- block/blk-mq.c | 20 ++++++++++---------- include/linux/blk-mq.h | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index b3f2ba483992..33a40ae1d60f 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -236,7 +236,7 @@ static const char *const alloc_policy_name[] = { #define HCTX_FLAG_NAME(name) [ilog2(BLK_MQ_F_##name)] = #name static const char *const hctx_flag_name[] = { HCTX_FLAG_NAME(SHOULD_MERGE), - HCTX_FLAG_NAME(TAG_SHARED), + HCTX_FLAG_NAME(TAG_QUEUE_SHARED), HCTX_FLAG_NAME(BLOCKING), HCTX_FLAG_NAME(NO_SCHED), }; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 53b4a9414fbd..d7aa23c82dbf 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -73,7 +73,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, { unsigned int depth, users; - if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED)) + if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return true; if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) return true; diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 66d04dea0bdb..6c0f7c9ce9f6 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -55,7 +55,7 @@ extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) + if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return false; return __blk_mq_tag_busy(hctx); @@ -63,7 +63,7 @@ static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) { - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) + if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return; __blk_mq_tag_idle(hctx); diff --git a/block/blk-mq.c b/block/blk-mq.c index 16aa20d23b67..6b39cf0efdcd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -302,7 +302,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->tag = -1; rq->internal_tag = tag; } else { - if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) { + if (data->hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) { rq_flags = RQF_MQ_INFLIGHT; atomic_inc(&data->hctx->nr_active); } @@ -1118,7 +1118,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, wait_queue_entry_t *wait; bool ret; - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) { + if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { blk_mq_sched_mark_restart_hctx(hctx); /* @@ -1249,7 +1249,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, * For non-shared tags, the RESTART check * will suffice. */ - if (hctx->flags & BLK_MQ_F_TAG_SHARED) + if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) no_tag = true; break; } @@ -2358,7 +2358,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, spin_lock_init(&hctx->lock); INIT_LIST_HEAD(&hctx->dispatch); hctx->queue = q; - hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; + hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED; INIT_LIST_HEAD(&hctx->hctx_list); @@ -2575,9 +2575,9 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) queue_for_each_hw_ctx(q, hctx, i) { if (shared) - hctx->flags |= BLK_MQ_F_TAG_SHARED; + hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; else - hctx->flags &= ~BLK_MQ_F_TAG_SHARED; + hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; } } @@ -2603,7 +2603,7 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) list_del_rcu(&q->tag_set_list); if (list_is_singular(&set->tag_list)) { /* just transitioned to unshared */ - set->flags &= ~BLK_MQ_F_TAG_SHARED; + set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ blk_mq_update_tag_set_depth(set, false); } @@ -2620,12 +2620,12 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, * Check to see if we're transitioning to shared (from 1 to 2 queues). */ if (!list_empty(&set->tag_list) && - !(set->flags & BLK_MQ_F_TAG_SHARED)) { - set->flags |= BLK_MQ_F_TAG_SHARED; + !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { + set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ blk_mq_update_tag_set_depth(set, true); } - if (set->flags & BLK_MQ_F_TAG_SHARED) + if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED) queue_set_hctx_shared(q, true); list_add_tail_rcu(&q->tag_set_list, &set->tag_list); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 0bf056de5cc3..147185394a25 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -225,7 +225,7 @@ struct blk_mq_ops { enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, - BLK_MQ_F_TAG_SHARED = 1 << 1, + BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_NO_SCHED = 1 << 6, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, From patchwork Mon Dec 2 15:39:06 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hannes Reinecke X-Patchwork-Id: 11269269 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DCAA8109A for ; Mon, 2 Dec 2019 15:39:28 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id C4D4620881 for ; Mon, 2 Dec 2019 15:39:28 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727555AbfLBPj2 (ORCPT ); Mon, 2 Dec 2019 10:39:28 -0500 Received: from mx2.suse.de ([195.135.220.15]:44716 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727549AbfLBPj1 (ORCPT ); Mon, 2 Dec 2019 10:39:27 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 6E5A7C1A7; Mon, 2 Dec 2019 15:39:25 +0000 (UTC) From: Hannes Reinecke To: "Martin K. Petersen" Cc: Jens Axboe , Christoph Hellwig , James Bottomley , John Garry , Ming Lei , linux-scsi@vger.kernel.org, linux-block@vger.kernel.org, Hannes Reinecke Subject: [PATCH 03/11] blk-mq: rename blk_mq_update_tag_set_depth() Date: Mon, 2 Dec 2019 16:39:06 +0100 Message-Id: <20191202153914.84722-4-hare@suse.de> X-Mailer: git-send-email 2.16.4 In-Reply-To: <20191202153914.84722-1-hare@suse.de> References: <20191202153914.84722-1-hare@suse.de> Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org The function does not set the depth, but rather transitions from shared to non-shared queues and vice versa. So rename it to blk_mq_update_tag_set_shared() to better reflect its purpose. Signed-off-by: Hannes Reinecke --- block/blk-mq-tag.c | 18 ++++++++++-------- block/blk-mq.c | 8 ++++---- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index d7aa23c82dbf..f5009587e1b5 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -440,24 +440,22 @@ static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, node); } -static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, - int node, int alloc_policy) +static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, + int node, int alloc_policy) { unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node)) - goto free_tags; + return -ENOMEM; if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin, node)) goto free_bitmap_tags; - return tags; + return 0; free_bitmap_tags: sbitmap_queue_free(&tags->bitmap_tags); -free_tags: - kfree(tags); - return NULL; + return -ENOMEM; } struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, @@ -478,7 +476,11 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, tags->nr_tags = total_tags; tags->nr_reserved_tags = reserved_tags; - return blk_mq_init_bitmap_tags(tags, node, alloc_policy); + if (blk_mq_init_bitmap_tags(tags, node, alloc_policy) < 0) { + kfree(tags); + tags = NULL; + } + return tags; } void blk_mq_free_tags(struct blk_mq_tags *tags) diff --git a/block/blk-mq.c b/block/blk-mq.c index 6b39cf0efdcd..91950d3e436a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2581,8 +2581,8 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) } } -static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, - bool shared) +static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set, + bool shared) { struct request_queue *q; @@ -2605,7 +2605,7 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) /* just transitioned to unshared */ set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ - blk_mq_update_tag_set_depth(set, false); + blk_mq_update_tag_set_shared(set, false); } mutex_unlock(&set->tag_list_lock); INIT_LIST_HEAD(&q->tag_set_list); @@ -2623,7 +2623,7 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ - blk_mq_update_tag_set_depth(set, true); + blk_mq_update_tag_set_shared(set, true); } if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED) queue_set_hctx_shared(q, true); From patchwork Mon Dec 2 15:39:07 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hannes Reinecke X-Patchwork-Id: 11269303 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id F190E159A for ; Mon, 2 Dec 2019 15:39:35 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id D06272146E for ; Mon, 2 Dec 2019 15:39:35 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727595AbfLBPjc (ORCPT ); Mon, 2 Dec 2019 10:39:32 -0500 Received: from mx2.suse.de ([195.135.220.15]:44744 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727553AbfLBPja (ORCPT ); Mon, 2 Dec 2019 10:39:30 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 6E1F8C1A5; Mon, 2 Dec 2019 15:39:25 +0000 (UTC) From: Hannes Reinecke To: "Martin K. Petersen" Cc: Jens Axboe , Christoph Hellwig , James Bottomley , John Garry , Ming Lei , linux-scsi@vger.kernel.org, linux-block@vger.kernel.org, Hannes Reinecke Subject: [PATCH 04/11] blk-mq: Facilitate a shared sbitmap per tagset Date: Mon, 2 Dec 2019 16:39:07 +0100 Message-Id: <20191202153914.84722-5-hare@suse.de> X-Mailer: git-send-email 2.16.4 In-Reply-To: <20191202153914.84722-1-hare@suse.de> References: <20191202153914.84722-1-hare@suse.de> Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: John Garry Some SCSI HBAs (such as HPSA, megaraid, mpt3sas, hisi_sas_v3 ..) support multiple reply queues with single hostwide tags. In addition, these drivers want to use interrupt assignment in pci_alloc_irq_vectors(PCI_IRQ_AFFINITY). However, as discussed in [0], CPU hotplug may cause in-flight IO completion to not be serviced when an interrupt is shutdown. To solve that problem, Ming's patchset to drain hctx's should ensure no IOs are missed in-flight [1]. However, to take advantage of that patchset, we need to map the HBA HW queues to blk mq hctx's; to do that, we need to expose the HBA HW queues. In making that transition, the per-SCSI command request tags are no longer unique per Scsi host - they are just unique per hctx. As such, the HBA LLDD would have to generate this tag internally, which has a certain performance overhead. However another problem is that blk mq assumes the host may accept (Scsi_host.can_queue * #hw queue) commands. In [2], we removed the Scsi host busy counter, which would stop the LLDD being sent more than .can_queue commands; however, we should still ensure that the block layer does not issue more than .can_queue commands to the Scsi host. To solve this problem, introduce a shared sbitmap per blk_mq_tag_set, which may be requested at init time. New flag BLK_MQ_F_TAG_HCTX_SHARED should be set when requesting the tagset to indicate whether the shared sbitmap should be used. Even when BLK_MQ_F_TAG_HCTX_SHARED is set, we still allocate a full set of tags and requests per hctx; the reason for this is that if we only allocate tags and requests for a single hctx - like hctx0 - we may break block drivers which expect a request be associated with a specific hctx, i.e. not hctx0. This is based on work originally from Ming Lei in [3] and from Bart's suggestion in [4]. [0] https://lore.kernel.org/linux-block/alpine.DEB.2.21.1904051331270.1802@nanos.tec.linutronix.de/ [1] https://lore.kernel.org/linux-block/20191014015043.25029-1-ming.lei@redhat.com/ [2] https://lore.kernel.org/linux-scsi/20191025065855.6309-1-ming.lei@redhat.com/ [3] https://lore.kernel.org/linux-block/20190531022801.10003-1-ming.lei@redhat.com/ [4] https://lore.kernel.org/linux-block/ff77beff-5fd9-9f05-12b6-826922bace1f@huawei.com/T/#m3db0a602f095cbcbff27e9c884d6b4ae826144be Signed-off-by: John Garry Signed-off-by: Hannes Reinecke --- block/bfq-iosched.c | 4 +- block/blk-mq-debugfs.c | 10 ++--- block/blk-mq-sched.c | 14 ++++++ block/blk-mq-tag.c | 114 +++++++++++++++++++++++++++++++++++++++---------- block/blk-mq-tag.h | 17 ++++++-- block/blk-mq.c | 67 ++++++++++++++++++++++++++--- block/blk-mq.h | 5 +++ block/kyber-iosched.c | 4 +- include/linux/blk-mq.h | 9 ++++ 9 files changed, 204 insertions(+), 40 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0319d6339822..ca89d0c34994 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6327,8 +6327,8 @@ static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx) struct blk_mq_tags *tags = hctx->sched_tags; unsigned int min_shallow; - min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags); - sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow); + min_shallow = bfq_update_depths(bfqd, tags->bitmap_tags); + sbitmap_queue_min_shallow_depth(tags->bitmap_tags, min_shallow); } static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 33a40ae1d60f..46f57dbed890 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -449,11 +449,11 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m, atomic_read(&tags->active_queues)); seq_puts(m, "\nbitmap_tags:\n"); - sbitmap_queue_show(&tags->bitmap_tags, m); + sbitmap_queue_show(tags->bitmap_tags, m); if (tags->nr_reserved_tags) { seq_puts(m, "\nbreserved_tags:\n"); - sbitmap_queue_show(&tags->breserved_tags, m); + sbitmap_queue_show(tags->breserved_tags, m); } } @@ -483,8 +483,8 @@ static int hctx_tags_bitmap_show(void *data, struct seq_file *m) res = mutex_lock_interruptible(&q->sysfs_lock); if (res) goto out; - if (hctx->tags) - sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m); + if (hctx->tags) /* We should just iterate the relevant bits for this hctx FIXME */ + sbitmap_bitmap_show(&hctx->tags->bitmap_tags->sb, m); mutex_unlock(&q->sysfs_lock); out: @@ -518,7 +518,7 @@ static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m) if (res) goto out; if (hctx->sched_tags) - sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m); + sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags->sb, m); mutex_unlock(&q->sysfs_lock); out: diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index ca22afd47b3d..1855f8f5edd4 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -492,6 +492,7 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q) int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) { + struct blk_mq_tag_set *tag_set = q->tag_set; struct blk_mq_hw_ctx *hctx; struct elevator_queue *eq; unsigned int i; @@ -537,6 +538,19 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) blk_mq_debugfs_register_sched_hctx(q, hctx); } + if (blk_mq_is_sbitmap_shared(tag_set)) { + if (!blk_mq_init_sched_shared_sbitmap(tag_set, q->nr_requests)) { + ret = -ENOMEM; + goto err; + } + queue_for_each_hw_ctx(q, hctx, i) { + struct blk_mq_tags *tags = hctx->sched_tags; + + tags->bitmap_tags = &tag_set->__sched_bitmap_tags; + tags->breserved_tags = &tag_set->__sched_breserved_tags; + } + } + return 0; err: diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index f5009587e1b5..2e714123e846 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -20,7 +20,7 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags) if (!tags) return true; - return sbitmap_any_bit_clear(&tags->bitmap_tags.sb); + return sbitmap_any_bit_clear(&tags->bitmap_tags->sb); } /* @@ -43,9 +43,9 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) */ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve) { - sbitmap_queue_wake_all(&tags->bitmap_tags); + sbitmap_queue_wake_all(tags->bitmap_tags); if (include_reserve) - sbitmap_queue_wake_all(&tags->breserved_tags); + sbitmap_queue_wake_all(tags->breserved_tags); } /* @@ -121,10 +121,10 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) WARN_ON_ONCE(1); return BLK_MQ_TAG_FAIL; } - bt = &tags->breserved_tags; + bt = tags->breserved_tags; tag_offset = 0; } else { - bt = &tags->bitmap_tags; + bt = tags->bitmap_tags; tag_offset = tags->nr_reserved_tags; } @@ -170,9 +170,9 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) data->ctx); tags = blk_mq_tags_from_data(data); if (data->flags & BLK_MQ_REQ_RESERVED) - bt = &tags->breserved_tags; + bt = tags->breserved_tags; else - bt = &tags->bitmap_tags; + bt = tags->bitmap_tags; /* * If destination hw queue is changed, fake wake up on @@ -198,10 +198,10 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, const int real_tag = tag - tags->nr_reserved_tags; BUG_ON(real_tag >= tags->nr_tags); - sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu); + sbitmap_queue_clear(tags->bitmap_tags, real_tag, ctx->cpu); } else { BUG_ON(tag >= tags->nr_reserved_tags); - sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu); + sbitmap_queue_clear(tags->breserved_tags, tag, ctx->cpu); } } @@ -228,7 +228,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) * We can hit rq == NULL here, because the tagging functions * test and set the bit before assigning ->rqs[]. */ - if (rq && rq->q == hctx->queue) + if (rq && rq->q == hctx->queue && rq->mq_hctx == hctx) return iter_data->fn(hctx, rq, iter_data->data, reserved); return true; } @@ -329,8 +329,8 @@ static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv) { if (tags->nr_reserved_tags) - bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, true); - bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false); + bt_tags_for_each(tags, tags->breserved_tags, fn, priv, true); + bt_tags_for_each(tags, tags->bitmap_tags, fn, priv, false); } /** @@ -427,8 +427,8 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, continue; if (tags->nr_reserved_tags) - bt_for_each(hctx, &tags->breserved_tags, fn, priv, true); - bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false); + bt_for_each(hctx, tags->breserved_tags, fn, priv, true); + bt_for_each(hctx, tags->bitmap_tags, fn, priv, false); } blk_queue_exit(q); } @@ -446,19 +446,85 @@ static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; - if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node)) + if (bt_alloc(&tags->__bitmap_tags, depth, round_robin, node)) return -ENOMEM; - if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin, - node)) + if (bt_alloc(&tags->__breserved_tags, tags->nr_reserved_tags, + round_robin, node)) goto free_bitmap_tags; + tags->bitmap_tags = &tags->__bitmap_tags; + tags->breserved_tags = &tags->__breserved_tags; + return 0; free_bitmap_tags: - sbitmap_queue_free(&tags->bitmap_tags); + sbitmap_queue_free(&tags->__bitmap_tags); return -ENOMEM; } -struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, +bool blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *tag_set) +{ + unsigned int depth = tag_set->queue_depth -tag_set->reserved_tags; + int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(tag_set->flags); + bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; + int node = tag_set->numa_node; + + if (tag_set->flags & BLK_MQ_F_TAG_BITMAP_ALLOCATED) + return false; + if (bt_alloc(&tag_set->__bitmap_tags, depth, round_robin, node)) + return false; + if (bt_alloc(&tag_set->__breserved_tags, tag_set->reserved_tags, round_robin, + node)) + goto free_bitmap_tags; + tag_set->flags |= BLK_MQ_F_TAG_BITMAP_ALLOCATED; + return true; +free_bitmap_tags: + sbitmap_queue_free(&tag_set->__bitmap_tags); + return false; +} + +void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *tag_set) +{ + if (tag_set->flags & BLK_MQ_F_TAG_BITMAP_ALLOCATED) { + sbitmap_queue_free(&tag_set->__bitmap_tags); + sbitmap_queue_free(&tag_set->__breserved_tags); + tag_set->flags &= ~BLK_MQ_F_TAG_BITMAP_ALLOCATED; + } +} + +bool blk_mq_init_sched_shared_sbitmap(struct blk_mq_tag_set *tag_set, + unsigned long nr_requests) +{ + unsigned int depth = nr_requests -tag_set->reserved_tags; + int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(tag_set->flags); + bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; + int node = tag_set->numa_node; + + if (tag_set->flags & BLK_MQ_F_TAG_SCHED_BITMAP_ALLOCATED) + return false; + if (bt_alloc(&tag_set->__sched_bitmap_tags, depth, round_robin, node)) + return false; + if (bt_alloc(&tag_set->__sched_breserved_tags, tag_set->reserved_tags, + round_robin, node)) + goto free_bitmap_tags; + + tag_set->flags |= BLK_MQ_F_TAG_SCHED_BITMAP_ALLOCATED; + return true; +free_bitmap_tags: + sbitmap_queue_free(&tag_set->__sched_bitmap_tags); + return false; +} + +void blk_mq_exit_shared_sched_sbitmap(struct blk_mq_tag_set *tag_set) +{ + if (tag_set->flags & BLK_MQ_F_TAG_SCHED_BITMAP_ALLOCATED) { + sbitmap_queue_free(&tag_set->__sched_bitmap_tags); + sbitmap_queue_free(&tag_set->__sched_breserved_tags); + tag_set->flags &= ~BLK_MQ_F_TAG_SCHED_BITMAP_ALLOCATED; + } +} + +struct blk_mq_tags *blk_mq_init_tags(struct blk_mq_tag_set *set, + unsigned int total_tags, unsigned int reserved_tags, int node, int alloc_policy) { @@ -476,6 +542,8 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, tags->nr_tags = total_tags; tags->nr_reserved_tags = reserved_tags; + if (blk_mq_is_sbitmap_shared(set)) + return tags; if (blk_mq_init_bitmap_tags(tags, node, alloc_policy) < 0) { kfree(tags); tags = NULL; @@ -485,8 +553,10 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, void blk_mq_free_tags(struct blk_mq_tags *tags) { - sbitmap_queue_free(&tags->bitmap_tags); - sbitmap_queue_free(&tags->breserved_tags); + if (tags->bitmap_tags == &tags->__bitmap_tags) + sbitmap_queue_free(&tags->__bitmap_tags); + if (tags->breserved_tags == &tags->__breserved_tags) + sbitmap_queue_free(&tags->__breserved_tags); kfree(tags); } @@ -536,7 +606,7 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, * Don't need (or can't) update reserved tags here, they * remain static and should never need resizing. */ - sbitmap_queue_resize(&tags->bitmap_tags, + sbitmap_queue_resize(tags->bitmap_tags, tdepth - tags->nr_reserved_tags); } diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 6c0f7c9ce9f6..9463b878462f 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -13,8 +13,11 @@ struct blk_mq_tags { atomic_t active_queues; - struct sbitmap_queue bitmap_tags; - struct sbitmap_queue breserved_tags; + struct sbitmap_queue *bitmap_tags; + struct sbitmap_queue *breserved_tags; + + struct sbitmap_queue __bitmap_tags; + struct sbitmap_queue __breserved_tags; struct request **rqs; struct request **static_rqs; @@ -22,7 +25,15 @@ struct blk_mq_tags { }; -extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy); +extern bool blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *tag_set); +extern void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *tag_set); +extern bool blk_mq_init_sched_shared_sbitmap(struct blk_mq_tag_set *tag_set, + unsigned long nr_requests); +extern void blk_mq_exit_shared_sched_sbitmap(struct blk_mq_tag_set *tag_set); +extern struct blk_mq_tags *blk_mq_init_tags(struct blk_mq_tag_set *tag_set, + unsigned int nr_tags, + unsigned int reserved_tags, + int node, int alloc_policy); extern void blk_mq_free_tags(struct blk_mq_tags *tags); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); diff --git a/block/blk-mq.c b/block/blk-mq.c index 91950d3e436a..016f8401cfb9 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1095,7 +1095,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, struct sbitmap_queue *sbq; list_del_init(&wait->entry); - sbq = &hctx->tags->bitmap_tags; + sbq = hctx->tags->bitmap_tags; atomic_dec(&sbq->ws_active); } spin_unlock(&hctx->dispatch_wait_lock); @@ -1113,7 +1113,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, struct request *rq) { - struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags; + struct sbitmap_queue *sbq = hctx->tags->bitmap_tags; struct wait_queue_head *wq; wait_queue_entry_t *wait; bool ret; @@ -2081,7 +2081,6 @@ void blk_mq_free_rq_map(struct blk_mq_tags *tags) tags->rqs = NULL; kfree(tags->static_rqs); tags->static_rqs = NULL; - blk_mq_free_tags(tags); } @@ -2097,7 +2096,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, if (node == NUMA_NO_NODE) node = set->numa_node; - tags = blk_mq_init_tags(nr_tags, reserved_tags, node, + tags = blk_mq_init_tags(set, nr_tags, reserved_tags, node, BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); if (!tags) return NULL; @@ -2954,8 +2953,10 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) return 0; out_unwind: - while (--i >= 0) + while (--i >= 0) { blk_mq_free_rq_map(set->tags[i]); + set->tags[i] = NULL; + } return -ENOMEM; } @@ -3100,6 +3101,20 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (ret) goto out_free_mq_map; + if (blk_mq_is_sbitmap_shared(set)) { + if (!blk_mq_init_shared_sbitmap(set)) { + ret = -ENOMEM; + goto out_free_mq_map; + } + + for (i = 0; i < set->nr_hw_queues; i++) { + struct blk_mq_tags *tags = set->tags[i]; + + tags->bitmap_tags = &set->__bitmap_tags; + tags->breserved_tags = &set->__breserved_tags; + } + } + mutex_init(&set->tag_list_lock); INIT_LIST_HEAD(&set->tag_list); @@ -3123,6 +3138,9 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) for (i = 0; i < nr_hw_queues(set); i++) blk_mq_free_map_and_requests(set, i); + blk_mq_exit_shared_sched_sbitmap(set); + blk_mq_exit_shared_sbitmap(set); + for (j = 0; j < set->nr_maps; j++) { kfree(set->map[j].mq_map); set->map[j].mq_map = NULL; @@ -3137,6 +3155,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) { struct blk_mq_tag_set *set = q->tag_set; struct blk_mq_hw_ctx *hctx; + bool sched_tags = false; int i, ret; if (!set) @@ -3160,6 +3179,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr, false); } else { + sched_tags = true; ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, nr, true); } @@ -3169,8 +3189,43 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) q->elevator->type->ops.depth_updated(hctx); } - if (!ret) + /* + * if ret is 0, all queues should have been updated to the same depth + * if not, then maybe some have been updated - yuk, need to handle this for shared sbitmap... + * if some are updated, we should probably roll back the change altogether. FIXME + */ + if (!ret) { + if (blk_mq_is_sbitmap_shared(set)) { + if (sched_tags) { + blk_mq_exit_shared_sched_sbitmap(set); + if (!blk_mq_init_sched_shared_sbitmap(set, nr)) + return -ENOMEM; /* fixup error handling */ + + queue_for_each_hw_ctx(q, hctx, i) { + hctx->sched_tags->bitmap_tags = + &set->__sched_bitmap_tags; + hctx->sched_tags->breserved_tags = + &set->__sched_breserved_tags; + } + } else { + blk_mq_exit_shared_sbitmap(set); + if (!blk_mq_init_shared_sbitmap(set)) + return -ENOMEM; /* fixup error handling */ + + queue_for_each_hw_ctx(q, hctx, i) { + hctx->tags->bitmap_tags = + &set->__bitmap_tags; + hctx->tags->breserved_tags = + &set->__breserved_tags; + } + } + } q->nr_requests = nr; + } + /* + * if ret != 0, q->nr_requests would not be updated, yet the depth + * for some hctx may have changed - is that right? + */ blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q); diff --git a/block/blk-mq.h b/block/blk-mq.h index 78d38b5f2793..4c1ea206d3f4 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -166,6 +166,11 @@ struct blk_mq_alloc_data { struct blk_mq_hw_ctx *hctx; }; +static inline bool blk_mq_is_sbitmap_shared(struct blk_mq_tag_set *tag_set) +{ + return !!(tag_set->flags & BLK_MQ_F_TAG_HCTX_SHARED); +} + static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) { if (data->flags & BLK_MQ_REQ_INTERNAL) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 34dcea0ef637..a7a537501d70 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -359,7 +359,7 @@ static unsigned int kyber_sched_tags_shift(struct request_queue *q) * All of the hardware queues have the same depth, so we can just grab * the shift of the first one. */ - return q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift; + return q->queue_hw_ctx[0]->sched_tags->bitmap_tags->sb.shift; } static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) @@ -502,7 +502,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) khd->batching = 0; hctx->sched_data = khd; - sbitmap_queue_min_shallow_depth(&hctx->sched_tags->bitmap_tags, + sbitmap_queue_min_shallow_depth(hctx->sched_tags->bitmap_tags, kqd->async_depth); return 0; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 147185394a25..10c9ed3dbe80 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -109,6 +109,12 @@ struct blk_mq_tag_set { unsigned int flags; /* BLK_MQ_F_* */ void *driver_data; + struct sbitmap_queue __bitmap_tags; + struct sbitmap_queue __breserved_tags; + + struct sbitmap_queue __sched_bitmap_tags; + struct sbitmap_queue __sched_breserved_tags; + struct blk_mq_tags **tags; struct mutex tag_list_lock; @@ -226,6 +232,9 @@ struct blk_mq_ops { enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, + BLK_MQ_F_TAG_HCTX_SHARED = 1 << 2, + BLK_MQ_F_TAG_BITMAP_ALLOCATED = 1 << 3, + BLK_MQ_F_TAG_SCHED_BITMAP_ALLOCATED = 1 << 4, BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_NO_SCHED = 1 << 6, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, From patchwork Mon Dec 2 15:39:08 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hannes Reinecke X-Patchwork-Id: 11269307 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 63183930 for ; Mon, 2 Dec 2019 15:39:37 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 4C37320881 for ; Mon, 2 Dec 2019 15:39:37 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727431AbfLBPjb (ORCPT ); Mon, 2 Dec 2019 10:39:31 -0500 Received: from mx2.suse.de ([195.135.220.15]:44838 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727572AbfLBPja (ORCPT ); Mon, 2 Dec 2019 10:39:30 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 2B0AFC1AC; Mon, 2 Dec 2019 15:39:26 +0000 (UTC) From: Hannes Reinecke To: "Martin K. Petersen" Cc: Jens Axboe , Christoph Hellwig , James Bottomley , John Garry , Ming Lei , linux-scsi@vger.kernel.org, linux-block@vger.kernel.org, Hannes Reinecke Subject: [PATCH 05/11] blk-mq: add WARN_ON in blk_mq_free_rqs() Date: Mon, 2 Dec 2019 16:39:08 +0100 Message-Id: <20191202153914.84722-6-hare@suse.de> X-Mailer: git-send-email 2.16.4 In-Reply-To: <20191202153914.84722-1-hare@suse.de> References: <20191202153914.84722-1-hare@suse.de> Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org Before freeing up the requests we should ensure that none of those requests are still present in the ->rqs array; this could lead to an use-after free error. Signed-off-by: Hannes Reinecke --- block/blk-mq.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 016f8401cfb9..054c0597c052 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2049,10 +2049,14 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx) { struct page *page; + int i; - if (tags->rqs && set->ops->exit_request) { - int i; - + if (tags->rqs) { + for (i = 0; i < tags->nr_tags; i++) + if (WARN_ON(tags->rqs[i])) + tags->rqs[i] = NULL; + } + if (tags->static_rqs && set->ops->exit_request) { for (i = 0; i < tags->nr_tags; i++) { struct request *rq = tags->static_rqs[i]; From patchwork Mon Dec 2 15:39:09 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hannes Reinecke X-Patchwork-Id: 11269289 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2B658109A for ; Mon, 2 Dec 2019 15:39:33 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 13EE82146E for ; Mon, 2 Dec 2019 15:39:33 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727594AbfLBPjc (ORCPT ); Mon, 2 Dec 2019 10:39:32 -0500 Received: from mx2.suse.de ([195.135.220.15]:44832 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727549AbfLBPjb (ORCPT ); Mon, 2 Dec 2019 10:39:31 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 0D84DC1A8; Mon, 2 Dec 2019 15:39:26 +0000 (UTC) From: Hannes Reinecke To: "Martin K. Petersen" Cc: Jens Axboe , Christoph Hellwig , James Bottomley , John Garry , Ming Lei , linux-scsi@vger.kernel.org, linux-block@vger.kernel.org, Hannes Reinecke Subject: [PATCH 06/11] blk-mq: move shared sbitmap into elevator queue Date: Mon, 2 Dec 2019 16:39:09 +0100 Message-Id: <20191202153914.84722-7-hare@suse.de> X-Mailer: git-send-email 2.16.4 In-Reply-To: <20191202153914.84722-1-hare@suse.de> References: <20191202153914.84722-1-hare@suse.de> Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org When an elevator is present the sbitmap is actually per request queue, so modifying the tagset queue depth in the shared sbitmap case will be doing the wrong thing as it'll change the queue depth for all request queues. So this patch moves the shared scheduler sbitmap into struct elevator queue, thereby insulating any modifications to this request queue only. And with that we can increase the number of requests for the shared sbitmap case to the queue depth times the number of queues, as now all tags are allocated from the same bitmap. This also solves the problem of sbitmap resizing in the shared sbitmap case; we can simply require an elevator to be present if the queue depth needs to be modified, as then the queue depth will be modified for this particular request queue only. Signed-off-by: Hannes Reinecke --- block/blk-mq-sched.c | 14 +++++++++++--- block/blk-mq-tag.c | 30 ++++++++++++++++-------------- block/blk-mq-tag.h | 8 +++++--- block/blk-mq.c | 41 +++++++++++++++-------------------------- block/blk-sysfs.c | 7 +++++++ include/linux/blk-mq.h | 4 ---- include/linux/elevator.h | 3 +++ 7 files changed, 57 insertions(+), 50 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 1855f8f5edd4..f2184199a1b7 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -511,6 +511,12 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) */ q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth, BLKDEV_MAX_RQ); + /* + * For the shared sbitmap case it's per request queue, so multiply + * with the number of hw queues + */ + if (blk_mq_is_sbitmap_shared(tag_set)) + q->nr_requests *= q->nr_hw_queues; queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_sched_alloc_tags(q, hctx, i); @@ -539,15 +545,16 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) } if (blk_mq_is_sbitmap_shared(tag_set)) { - if (!blk_mq_init_sched_shared_sbitmap(tag_set, q->nr_requests)) { + eq = q->elevator; + if (!blk_mq_init_elevator_sbitmap(q, eq, q->nr_requests)) { ret = -ENOMEM; goto err; } queue_for_each_hw_ctx(q, hctx, i) { struct blk_mq_tags *tags = hctx->sched_tags; - tags->bitmap_tags = &tag_set->__sched_bitmap_tags; - tags->breserved_tags = &tag_set->__sched_breserved_tags; + tags->bitmap_tags = &eq->__bitmap_tags; + tags->breserved_tags = &eq->__breserved_tags; } } @@ -591,5 +598,6 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) if (e->type->ops.exit_sched) e->type->ops.exit_sched(e); blk_mq_sched_tags_teardown(q); + blk_mq_exit_elevator_sbitmap(q, e); q->elevator = NULL; } diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 2e714123e846..39c7beffdd04 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -491,35 +491,37 @@ void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *tag_set) } } -bool blk_mq_init_sched_shared_sbitmap(struct blk_mq_tag_set *tag_set, - unsigned long nr_requests) +bool blk_mq_init_elevator_sbitmap(struct request_queue *q, + struct elevator_queue *eq, + unsigned int nr_requests) { - unsigned int depth = nr_requests -tag_set->reserved_tags; + struct blk_mq_tag_set *tag_set = q->tag_set; + unsigned int depth = nr_requests - tag_set->reserved_tags; int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(tag_set->flags); bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; int node = tag_set->numa_node; - if (tag_set->flags & BLK_MQ_F_TAG_SCHED_BITMAP_ALLOCATED) - return false; - if (bt_alloc(&tag_set->__sched_bitmap_tags, depth, round_robin, node)) + if (!blk_mq_is_sbitmap_shared(q->tag_set)) + return true; + + if (bt_alloc(&eq->__bitmap_tags, depth, round_robin, node)) return false; - if (bt_alloc(&tag_set->__sched_breserved_tags, tag_set->reserved_tags, + if (bt_alloc(&eq->__breserved_tags, tag_set->reserved_tags, round_robin, node)) goto free_bitmap_tags; - tag_set->flags |= BLK_MQ_F_TAG_SCHED_BITMAP_ALLOCATED; return true; free_bitmap_tags: - sbitmap_queue_free(&tag_set->__sched_bitmap_tags); + sbitmap_queue_free(&eq->__bitmap_tags); return false; } -void blk_mq_exit_shared_sched_sbitmap(struct blk_mq_tag_set *tag_set) +void blk_mq_exit_elevator_sbitmap(struct request_queue *q, + struct elevator_queue *eq) { - if (tag_set->flags & BLK_MQ_F_TAG_SCHED_BITMAP_ALLOCATED) { - sbitmap_queue_free(&tag_set->__sched_bitmap_tags); - sbitmap_queue_free(&tag_set->__sched_breserved_tags); - tag_set->flags &= ~BLK_MQ_F_TAG_SCHED_BITMAP_ALLOCATED; + if (blk_mq_is_sbitmap_shared(q->tag_set)) { + sbitmap_queue_free(&eq->__bitmap_tags); + sbitmap_queue_free(&eq->__breserved_tags); } } diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 9463b878462f..31a42d50a8f1 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -27,9 +27,11 @@ struct blk_mq_tags { extern bool blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *tag_set); extern void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *tag_set); -extern bool blk_mq_init_sched_shared_sbitmap(struct blk_mq_tag_set *tag_set, - unsigned long nr_requests); -extern void blk_mq_exit_shared_sched_sbitmap(struct blk_mq_tag_set *tag_set); +extern bool blk_mq_init_elevator_sbitmap(struct request_queue *q, + struct elevator_queue *eq, + unsigned int nr_tags); +extern void blk_mq_exit_elevator_sbitmap(struct request_queue *q, + struct elevator_queue *eq); extern struct blk_mq_tags *blk_mq_init_tags(struct blk_mq_tag_set *tag_set, unsigned int nr_tags, unsigned int reserved_tags, diff --git a/block/blk-mq.c b/block/blk-mq.c index 054c0597c052..c5cff1de56b3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3142,7 +3142,6 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) for (i = 0; i < nr_hw_queues(set); i++) blk_mq_free_map_and_requests(set, i); - blk_mq_exit_shared_sched_sbitmap(set); blk_mq_exit_shared_sbitmap(set); for (j = 0; j < set->nr_maps; j++) { @@ -3159,12 +3158,14 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) { struct blk_mq_tag_set *set = q->tag_set; struct blk_mq_hw_ctx *hctx; - bool sched_tags = false; int i, ret; if (!set) return -EINVAL; + if (blk_mq_is_sbitmap_shared(set) && !q->elevator) + return -EINVAL; + if (q->nr_requests == nr) return 0; @@ -3183,7 +3184,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr, false); } else { - sched_tags = true; ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, nr, true); } @@ -3199,29 +3199,18 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) * if some are updated, we should probably roll back the change altogether. FIXME */ if (!ret) { - if (blk_mq_is_sbitmap_shared(set)) { - if (sched_tags) { - blk_mq_exit_shared_sched_sbitmap(set); - if (!blk_mq_init_sched_shared_sbitmap(set, nr)) - return -ENOMEM; /* fixup error handling */ - - queue_for_each_hw_ctx(q, hctx, i) { - hctx->sched_tags->bitmap_tags = - &set->__sched_bitmap_tags; - hctx->sched_tags->breserved_tags = - &set->__sched_breserved_tags; - } - } else { - blk_mq_exit_shared_sbitmap(set); - if (!blk_mq_init_shared_sbitmap(set)) - return -ENOMEM; /* fixup error handling */ - - queue_for_each_hw_ctx(q, hctx, i) { - hctx->tags->bitmap_tags = - &set->__bitmap_tags; - hctx->tags->breserved_tags = - &set->__breserved_tags; - } + if (blk_mq_is_sbitmap_shared(set) && q->elevator) { + struct elevator_queue *eq = q->elevator; + + blk_mq_exit_elevator_sbitmap(q, eq); + if (!blk_mq_init_elevator_sbitmap(q, eq, nr)) + return -ENOMEM; /* fixup error handling */ + + queue_for_each_hw_ctx(q, hctx, i) { + hctx->sched_tags->bitmap_tags = + &eq->__bitmap_tags; + hctx->sched_tags->breserved_tags = + &eq->__breserved_tags; } } q->nr_requests = nr; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 46f5198be017..01e644b1cba5 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -78,6 +78,13 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) if (nr < BLKDEV_MIN_RQ) nr = BLKDEV_MIN_RQ; + /* + * We can only modify the queue depth for shared sbitmaps + * if an I/O scheduler is set. + */ + if (blk_mq_is_sbitmap_shared(q->tag_set) && !q->elevator) + return -EINVAL; + err = blk_mq_update_nr_requests(q, nr); if (err) return err; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 10c9ed3dbe80..b4515bb862d4 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -112,9 +112,6 @@ struct blk_mq_tag_set { struct sbitmap_queue __bitmap_tags; struct sbitmap_queue __breserved_tags; - struct sbitmap_queue __sched_bitmap_tags; - struct sbitmap_queue __sched_breserved_tags; - struct blk_mq_tags **tags; struct mutex tag_list_lock; @@ -234,7 +231,6 @@ enum { BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, BLK_MQ_F_TAG_HCTX_SHARED = 1 << 2, BLK_MQ_F_TAG_BITMAP_ALLOCATED = 1 << 3, - BLK_MQ_F_TAG_SCHED_BITMAP_ALLOCATED = 1 << 4, BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_NO_SCHED = 1 << 6, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 901bda352dcb..11d492f56089 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -4,6 +4,7 @@ #include #include +#include #ifdef CONFIG_BLOCK @@ -104,6 +105,8 @@ struct elevator_queue void *elevator_data; struct kobject kobj; struct mutex sysfs_lock; + struct sbitmap_queue __bitmap_tags; + struct sbitmap_queue __breserved_tags; unsigned int registered:1; DECLARE_HASHTABLE(hash, ELV_HASH_BITS); }; From patchwork Mon Dec 2 15:39:10 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hannes Reinecke X-Patchwork-Id: 11269311 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 51AAC159A for ; Mon, 2 Dec 2019 15:39:38 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 392962084F for ; Mon, 2 Dec 2019 15:39:38 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727604AbfLBPjh (ORCPT ); Mon, 2 Dec 2019 10:39:37 -0500 Received: from mx2.suse.de ([195.135.220.15]:44834 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727568AbfLBPj3 (ORCPT ); Mon, 2 Dec 2019 10:39:29 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 25AB6C1AA; Mon, 2 Dec 2019 15:39:26 +0000 (UTC) From: Hannes Reinecke To: "Martin K. Petersen" Cc: Jens Axboe , Christoph Hellwig , James Bottomley , John Garry , Ming Lei , linux-scsi@vger.kernel.org, linux-block@vger.kernel.org, Hannes Reinecke Subject: [PATCH 07/11] scsi: Add template flag 'host_tagset' Date: Mon, 2 Dec 2019 16:39:10 +0100 Message-Id: <20191202153914.84722-8-hare@suse.de> X-Mailer: git-send-email 2.16.4 In-Reply-To: <20191202153914.84722-1-hare@suse.de> References: <20191202153914.84722-1-hare@suse.de> Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: Hannes Reinecke Add a host template flag 'host_tagset' so hostwide tagset can be shared on multiple reply queues after the SCSI device's reply queue is converted to blk-mq hw queue. Signed-off-by: Hannes Reinecke Signed-off-by: John Garry --- drivers/scsi/scsi_lib.c | 2 ++ include/scsi/scsi_host.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 2563b061f56b..d7ad7b99bc05 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1899,6 +1899,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost) shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; shost->tag_set.flags |= BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy); + if (shost->hostt->host_tagset) + shost->tag_set.flags |= BLK_MQ_F_TAG_HCTX_SHARED; shost->tag_set.driver_data = shost; return blk_mq_alloc_tag_set(&shost->tag_set); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index f577647bf5f2..4fd0af0883dd 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -429,6 +429,9 @@ struct scsi_host_template { /* True if the low-level driver supports blk-mq only */ unsigned force_blk_mq:1; + /* True if the host uses host-wide tagspace */ + unsigned host_tagset:1; + /* * Countdown for host blocking with no commands outstanding. */ From patchwork Mon Dec 2 15:39:11 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hannes Reinecke X-Patchwork-Id: 11269315 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 17569159A for ; Mon, 2 Dec 2019 15:39:39 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id F3A7920881 for ; Mon, 2 Dec 2019 15:39:38 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727446AbfLBPjg (ORCPT ); Mon, 2 Dec 2019 10:39:36 -0500 Received: from mx2.suse.de ([195.135.220.15]:44842 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727581AbfLBPjb (ORCPT ); Mon, 2 Dec 2019 10:39:31 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 2B659C1AD; Mon, 2 Dec 2019 15:39:26 +0000 (UTC) From: Hannes Reinecke To: "Martin K. Petersen" Cc: Jens Axboe , Christoph Hellwig , James Bottomley , John Garry , Ming Lei , linux-scsi@vger.kernel.org, linux-block@vger.kernel.org Subject: [PATCH 08/11] scsi: hisi_sas: Switch v3 hw to MQ Date: Mon, 2 Dec 2019 16:39:11 +0100 Message-Id: <20191202153914.84722-9-hare@suse.de> X-Mailer: git-send-email 2.16.4 In-Reply-To: <20191202153914.84722-1-hare@suse.de> References: <20191202153914.84722-1-hare@suse.de> Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org From: John Garry Now that the block layer provides a shared tag, we can switch the driver to expose all HW queues. Signed-off-by: John Garry --- drivers/scsi/hisi_sas/hisi_sas.h | 3 +- drivers/scsi/hisi_sas/hisi_sas_main.c | 36 +++++++------- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 86 ++++++++++++++-------------------- 3 files changed, 56 insertions(+), 69 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index 233c73e01246..0405602df2a4 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -8,6 +8,8 @@ #define _HISI_SAS_H_ #include +#include +#include #include #include #include @@ -431,7 +433,6 @@ struct hisi_hba { u32 intr_coal_count; /* Interrupt count to coalesce */ int cq_nvecs; - unsigned int *reply_map; /* bist */ enum sas_linkrate debugfs_bist_linkrate; diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 03588ec3c394..e185935c3399 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -421,6 +421,7 @@ static int hisi_sas_task_prep(struct sas_task *task, struct device *dev = hisi_hba->dev; int dlvry_queue_slot, dlvry_queue, rc, slot_idx; int n_elem = 0, n_elem_dif = 0, n_elem_req = 0; + struct scsi_cmnd *scmd = NULL; struct hisi_sas_dq *dq; unsigned long flags; int wr_q_index; @@ -436,10 +437,23 @@ static int hisi_sas_task_prep(struct sas_task *task, return -ECOMM; } - if (hisi_hba->reply_map) { - int cpu = raw_smp_processor_id(); - unsigned int dq_index = hisi_hba->reply_map[cpu]; + if (task->uldd_task) { + struct ata_queued_cmd *qc; + if (dev_is_sata(device)) { + qc = task->uldd_task; + scmd = qc->scsicmd; + } else { + scmd = task->uldd_task; + } + } + + if (scmd) { + unsigned int dq_index; + u32 blk_tag; + + blk_tag = blk_mq_unique_tag(scmd->request); + dq_index = blk_mq_unique_tag_to_hwq(blk_tag); *dq_pointer = dq = &hisi_hba->dq[dq_index]; } else { *dq_pointer = dq = sas_dev->dq; @@ -468,21 +482,9 @@ static int hisi_sas_task_prep(struct sas_task *task, if (hisi_hba->hw->slot_index_alloc) rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device); - else { - struct scsi_cmnd *scsi_cmnd = NULL; - - if (task->uldd_task) { - struct ata_queued_cmd *qc; + else + rc = hisi_sas_slot_index_alloc(hisi_hba, scmd); - if (dev_is_sata(device)) { - qc = task->uldd_task; - scsi_cmnd = qc->scsicmd; - } else { - scsi_cmnd = task->uldd_task; - } - } - rc = hisi_sas_slot_index_alloc(hisi_hba, scsi_cmnd); - } if (rc < 0) goto err_out_dif_dma_unmap; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index bf5d5f138437..e7b015d88968 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2353,66 +2353,35 @@ static irqreturn_t cq_interrupt_v3_hw(int irq_no, void *p) return IRQ_HANDLED; } -static void setup_reply_map_v3_hw(struct hisi_hba *hisi_hba, int nvecs) +static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba) { - const struct cpumask *mask; - int queue, cpu; - - for (queue = 0; queue < nvecs; queue++) { - struct hisi_sas_cq *cq = &hisi_hba->cq[queue]; + int vectors; + int max_msi = HISI_SAS_MSI_COUNT_V3_HW, min_msi; + struct Scsi_Host *shost = hisi_hba->shost; + struct irq_affinity desc = { + .pre_vectors = BASE_VECTORS_V3_HW, + }; + + min_msi = MIN_AFFINE_VECTORS_V3_HW; + vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev, + min_msi, max_msi, + PCI_IRQ_MSI | + PCI_IRQ_AFFINITY, + &desc); + if (vectors < 0) + return -ENOENT; - mask = pci_irq_get_affinity(hisi_hba->pci_dev, queue + - BASE_VECTORS_V3_HW); - if (!mask) - goto fallback; - cq->pci_irq_mask = mask; - for_each_cpu(cpu, mask) - hisi_hba->reply_map[cpu] = queue; - } - return; + hisi_hba->cq_nvecs = vectors - BASE_VECTORS_V3_HW; + shost->nr_hw_queues = hisi_hba->cq_nvecs; -fallback: - for_each_possible_cpu(cpu) - hisi_hba->reply_map[cpu] = cpu % hisi_hba->queue_count; - /* Don't clean all CQ masks */ + return 0; } static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba) { struct device *dev = hisi_hba->dev; struct pci_dev *pdev = hisi_hba->pci_dev; - int vectors, rc, i; - int max_msi = HISI_SAS_MSI_COUNT_V3_HW, min_msi; - - if (auto_affine_msi_experimental) { - struct irq_affinity desc = { - .pre_vectors = BASE_VECTORS_V3_HW, - }; - - min_msi = MIN_AFFINE_VECTORS_V3_HW; - - hisi_hba->reply_map = devm_kcalloc(dev, nr_cpu_ids, - sizeof(unsigned int), - GFP_KERNEL); - if (!hisi_hba->reply_map) - return -ENOMEM; - vectors = pci_alloc_irq_vectors_affinity(hisi_hba->pci_dev, - min_msi, max_msi, - PCI_IRQ_MSI | - PCI_IRQ_AFFINITY, - &desc); - if (vectors < 0) - return -ENOENT; - setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW); - } else { - min_msi = max_msi; - vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi, - max_msi, PCI_IRQ_MSI); - if (vectors < 0) - return vectors; - } - - hisi_hba->cq_nvecs = vectors - BASE_VECTORS_V3_HW; + int rc, i; rc = devm_request_irq(dev, pci_irq_vector(pdev, 1), int_phy_up_down_bcast_v3_hw, 0, @@ -3057,6 +3026,15 @@ static int debugfs_set_bist_v3_hw(struct hisi_hba *hisi_hba, bool enable) return 0; } +static int hisi_sas_map_queues(struct Scsi_Host *shost) +{ + struct hisi_hba *hisi_hba = shost_priv(shost); + struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; + + return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev, + BASE_VECTORS_V3_HW); +} + static struct scsi_host_template sht_v3_hw = { .name = DRV_NAME, .module = THIS_MODULE, @@ -3065,6 +3043,7 @@ static struct scsi_host_template sht_v3_hw = { .slave_configure = hisi_sas_slave_configure, .scan_finished = hisi_sas_scan_finished, .scan_start = hisi_sas_scan_start, + .map_queues = hisi_sas_map_queues, .change_queue_depth = sas_change_queue_depth, .bios_param = sas_bios_param, .this_id = -1, @@ -3078,6 +3057,7 @@ static struct scsi_host_template sht_v3_hw = { .shost_attrs = host_attrs_v3_hw, .tag_alloc_policy = BLK_TAG_ALLOC_RR, .host_reset = hisi_sas_host_reset, + .host_tagset = 1, }; static const struct hisi_sas_hw hisi_sas_v3_hw = { @@ -3249,6 +3229,10 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (hisi_sas_debugfs_enable) hisi_sas_debugfs_init(hisi_hba); + rc = interrupt_preinit_v3_hw(hisi_hba); + if (rc) + goto err_out_ha; + dev_err(dev, "%d hw qeues\n", shost->nr_hw_queues); rc = scsi_add_host(shost, dev); if (rc) goto err_out_ha; From patchwork Mon Dec 2 15:39:12 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hannes Reinecke X-Patchwork-Id: 11269291 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B0957930 for ; Mon, 2 Dec 2019 15:39:33 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 8EC8B20881 for ; Mon, 2 Dec 2019 15:39:33 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727599AbfLBPjc (ORCPT ); Mon, 2 Dec 2019 10:39:32 -0500 Received: from mx2.suse.de ([195.135.220.15]:44836 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727566AbfLBPjb (ORCPT ); Mon, 2 Dec 2019 10:39:31 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 0D881C1A9; Mon, 2 Dec 2019 15:39:26 +0000 (UTC) From: Hannes Reinecke To: "Martin K. Petersen" Cc: Jens Axboe , Christoph Hellwig , James Bottomley , John Garry , Ming Lei , linux-scsi@vger.kernel.org, linux-block@vger.kernel.org, Hannes Reinecke , Hannes Reinecke Subject: [PATCH 09/11] megaraid_sas: switch fusion adapters to MQ Date: Mon, 2 Dec 2019 16:39:12 +0100 Message-Id: <20191202153914.84722-10-hare@suse.de> X-Mailer: git-send-email 2.16.4 In-Reply-To: <20191202153914.84722-1-hare@suse.de> References: <20191202153914.84722-1-hare@suse.de> Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org Fusion adapters can steer completions to individual queues, and we now have support for shared host-wide tags. So we can enable multiqueue support for fusion adapters and drop the hand-crafted interrupt affinity settings. Signed-off-by: Hannes Reinecke --- drivers/scsi/megaraid/megaraid_sas.h | 1 - drivers/scsi/megaraid/megaraid_sas_base.c | 65 +++++++++-------------------- drivers/scsi/megaraid/megaraid_sas_fusion.c | 14 ++++--- 3 files changed, 28 insertions(+), 52 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index bd8184072bed..844ea2d6dbb8 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2261,7 +2261,6 @@ enum MR_PERF_MODE { struct megasas_instance { - unsigned int *reply_map; __le32 *producer; dma_addr_t producer_h; __le32 *consumer; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index a4bc81479284..9d0d74e3d491 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -3106,6 +3107,19 @@ megasas_bios_param(struct scsi_device *sdev, struct block_device *bdev, return 0; } +static int megasas_map_queues(struct Scsi_Host *shost) +{ + struct megasas_instance *instance; + + instance = (struct megasas_instance *)shost->hostdata; + + if (!instance->smp_affinity_enable) + return 0; + + return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + instance->pdev, instance->low_latency_index_start); +} + static void megasas_aen_polling(struct work_struct *work); /** @@ -3414,9 +3428,11 @@ static struct scsi_host_template megasas_template = { .eh_timed_out = megasas_reset_timer, .shost_attrs = megaraid_host_attrs, .bios_param = megasas_bios_param, + .map_queues = megasas_map_queues, .change_queue_depth = scsi_change_queue_depth, .max_segment_size = 0xffffffff, .no_write_same = 1, + .host_tagset = 1, }; /** @@ -5695,34 +5711,6 @@ megasas_setup_jbod_map(struct megasas_instance *instance) instance->use_seqnum_jbod_fp = false; } -static void megasas_setup_reply_map(struct megasas_instance *instance) -{ - const struct cpumask *mask; - unsigned int queue, cpu, low_latency_index_start; - - low_latency_index_start = instance->low_latency_index_start; - - for (queue = low_latency_index_start; queue < instance->msix_vectors; queue++) { - mask = pci_irq_get_affinity(instance->pdev, queue); - if (!mask) - goto fallback; - - for_each_cpu(cpu, mask) - instance->reply_map[cpu] = queue; - } - return; - -fallback: - queue = low_latency_index_start; - for_each_possible_cpu(cpu) { - instance->reply_map[cpu] = queue; - if (queue == (instance->msix_vectors - 1)) - queue = low_latency_index_start; - else - queue++; - } -} - /** * megasas_get_device_list - Get the PD and LD device list from FW. * @instance: Adapter soft state @@ -6021,12 +6009,6 @@ static int megasas_init_fw(struct megasas_instance *instance) instance->is_rdpq = (scratch_pad_1 & MR_RDPQ_MODE_OFFSET) ? 1 : 0; - if (instance->adapter_type >= INVADER_SERIES && - !instance->msix_combined) { - instance->msix_load_balance = true; - instance->smp_affinity_enable = false; - } - /* Save 1-15 reply post index address to local memory * Index 0 is already saved from reg offset * MPI2_REPLY_POST_HOST_INDEX_OFFSET @@ -6145,8 +6127,6 @@ static int megasas_init_fw(struct megasas_instance *instance) goto fail_init_adapter; } - megasas_setup_reply_map(instance); - dev_info(&instance->pdev->dev, "current msix/online cpus\t: (%d/%d)\n", instance->msix_vectors, (unsigned int)num_online_cpus()); @@ -6780,6 +6760,9 @@ static int megasas_io_attach(struct megasas_instance *instance) host->max_id = MEGASAS_MAX_DEV_PER_CHANNEL; host->max_lun = MEGASAS_MAX_LUN; host->max_cmd_len = 16; + if (instance->adapter_type != MFI_SERIES && instance->msix_vectors > 0) + host->nr_hw_queues = instance->msix_vectors - + instance->low_latency_index_start; /* * Notify the mid-layer about the new controller @@ -6947,11 +6930,6 @@ static inline int megasas_alloc_mfi_ctrl_mem(struct megasas_instance *instance) */ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) { - instance->reply_map = kcalloc(nr_cpu_ids, sizeof(unsigned int), - GFP_KERNEL); - if (!instance->reply_map) - return -ENOMEM; - switch (instance->adapter_type) { case MFI_SERIES: if (megasas_alloc_mfi_ctrl_mem(instance)) @@ -6968,8 +6946,6 @@ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) return 0; fail: - kfree(instance->reply_map); - instance->reply_map = NULL; return -ENOMEM; } @@ -6982,7 +6958,6 @@ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance) */ static inline void megasas_free_ctrl_mem(struct megasas_instance *instance) { - kfree(instance->reply_map); if (instance->adapter_type == MFI_SERIES) { if (instance->producer) dma_free_coherent(&instance->pdev->dev, sizeof(u32), @@ -7645,8 +7620,6 @@ megasas_resume(struct pci_dev *pdev) if (rval < 0) goto fail_reenable_msix; - megasas_setup_reply_map(instance); - if (instance->adapter_type != MFI_SERIES) { megasas_reset_reply_desc(instance); if (megasas_ioc_init_fusion(instance)) { diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index e301458bcbae..bae96b82bb10 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2731,6 +2731,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, struct MR_PRIV_DEVICE *mrdev_priv; struct RAID_CONTEXT *rctx; struct RAID_CONTEXT_G35 *rctx_g35; + u32 tag = blk_mq_unique_tag(scp->request); device_id = MEGASAS_DEV_INDEX(scp); @@ -2837,7 +2838,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, instance->msix_vectors)); else cmd->request_desc->SCSIIO.MSIxIndex = - instance->reply_map[raw_smp_processor_id()]; + blk_mq_unique_tag_to_hwq(tag); if (instance->adapter_type >= VENTURA_SERIES) { /* FP for Optimal raid level 1. @@ -3080,6 +3081,7 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, u16 pd_index = 0; u16 os_timeout_value; u16 timeout_limit; + u32 tag = blk_mq_unique_tag(scmd->request); struct MR_DRV_RAID_MAP_ALL *local_map_ptr; struct RAID_CONTEXT *pRAID_Context; struct MR_PD_CFG_SEQ_NUM_SYNC *pd_sync; @@ -3169,7 +3171,7 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, instance->msix_vectors)); else cmd->request_desc->SCSIIO.MSIxIndex = - instance->reply_map[raw_smp_processor_id()]; + blk_mq_unique_tag_to_hwq(tag); if (!fp_possible) { /* system pd firmware path */ @@ -3373,7 +3375,7 @@ megasas_build_and_issue_cmd_fusion(struct megasas_instance *instance, { struct megasas_cmd_fusion *cmd, *r1_cmd = NULL; union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc; - u32 index; + u32 index, blk_tag, unique_tag; if ((megasas_cmd_type(scmd) == READ_WRITE_LDIO) && instance->ldio_threshold && @@ -3389,7 +3391,9 @@ megasas_build_and_issue_cmd_fusion(struct megasas_instance *instance, return SCSI_MLQUEUE_HOST_BUSY; } - cmd = megasas_get_cmd_fusion(instance, scmd->request->tag); + unique_tag = blk_mq_unique_tag(scmd->request); + blk_tag = blk_mq_unique_tag_to_tag(unique_tag); + cmd = megasas_get_cmd_fusion(instance, blk_tag); if (!cmd) { atomic_dec(&instance->fw_outstanding); @@ -3430,7 +3434,7 @@ megasas_build_and_issue_cmd_fusion(struct megasas_instance *instance, */ if (cmd->r1_alt_dev_handle != MR_DEVHANDLE_INVALID) { r1_cmd = megasas_get_cmd_fusion(instance, - (scmd->request->tag + instance->max_fw_cmds)); + (blk_tag + instance->max_fw_cmds)); megasas_prepare_secondRaid1_IO(instance, cmd, r1_cmd); } From patchwork Mon Dec 2 15:39:13 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hannes Reinecke X-Patchwork-Id: 11269287 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E4571159A for ; Mon, 2 Dec 2019 15:39:32 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id CD95C2146E for ; Mon, 2 Dec 2019 15:39:32 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727589AbfLBPjc (ORCPT ); Mon, 2 Dec 2019 10:39:32 -0500 Received: from mx2.suse.de ([195.135.220.15]:44830 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727557AbfLBPja (ORCPT ); Mon, 2 Dec 2019 10:39:30 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 25B15C1AB; Mon, 2 Dec 2019 15:39:26 +0000 (UTC) From: Hannes Reinecke To: "Martin K. Petersen" Cc: Jens Axboe , Christoph Hellwig , James Bottomley , John Garry , Ming Lei , linux-scsi@vger.kernel.org, linux-block@vger.kernel.org, Hannes Reinecke Subject: [PATCH 10/11] smartpqi: enable host tagset Date: Mon, 2 Dec 2019 16:39:13 +0100 Message-Id: <20191202153914.84722-11-hare@suse.de> X-Mailer: git-send-email 2.16.4 In-Reply-To: <20191202153914.84722-1-hare@suse.de> References: <20191202153914.84722-1-hare@suse.de> Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org Enable host tagset for smartpqi; with this we can use the request tag to look command from the pool avoiding the list iteration in the hot path. Signed-off-by: Hannes Reinecke --- drivers/scsi/smartpqi/smartpqi_init.c | 38 ++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 7b7ef3acb504..c17b533c84ad 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -575,17 +575,29 @@ static inline void pqi_reinit_io_request(struct pqi_io_request *io_request) } static struct pqi_io_request *pqi_alloc_io_request( - struct pqi_ctrl_info *ctrl_info) + struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd) { struct pqi_io_request *io_request; + unsigned int limit = PQI_RESERVED_IO_SLOTS; u16 i = ctrl_info->next_io_request_slot; /* benignly racy */ - while (1) { + if (scmd) { + u32 blk_tag = blk_mq_unique_tag(scmd->request); + + i = blk_mq_unique_tag_to_tag(blk_tag) + limit; io_request = &ctrl_info->io_request_pool[i]; - if (atomic_inc_return(&io_request->refcount) == 1) - break; - atomic_dec(&io_request->refcount); - i = (i + 1) % ctrl_info->max_io_slots; + if (WARN_ON(atomic_inc_return(&io_request->refcount) > 1)) { + atomic_dec(&io_request->refcount); + return NULL; + } + } else { + while (1) { + io_request = &ctrl_info->io_request_pool[i]; + if (atomic_inc_return(&io_request->refcount) == 1) + break; + atomic_dec(&io_request->refcount); + i = (i + 1) % limit; + } } /* benignly racy */ @@ -4075,7 +4087,7 @@ static int pqi_submit_raid_request_synchronous(struct pqi_ctrl_info *ctrl_info, atomic_inc(&ctrl_info->sync_cmds_outstanding); - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, NULL); put_unaligned_le16(io_request->index, &(((struct pqi_raid_path_request *)request)->request_id)); @@ -5032,7 +5044,9 @@ static inline int pqi_raid_submit_scsi_cmd(struct pqi_ctrl_info *ctrl_info, { struct pqi_io_request *io_request; - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, scmd); + if (!io_request) + return SCSI_MLQUEUE_HOST_BUSY; return pqi_raid_submit_scsi_cmd_with_io_request(ctrl_info, io_request, device, scmd, queue_group); @@ -5230,7 +5244,10 @@ static int pqi_aio_submit_io(struct pqi_ctrl_info *ctrl_info, struct pqi_io_request *io_request; struct pqi_aio_path_request *request; - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, scmd); + if (!io_request) + return SCSI_MLQUEUE_HOST_BUSY; + io_request->io_complete_callback = pqi_aio_io_complete; io_request->scmd = scmd; io_request->raid_bypass = raid_bypass; @@ -5657,7 +5674,7 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, DECLARE_COMPLETION_ONSTACK(wait); struct pqi_task_management_request *request; - io_request = pqi_alloc_io_request(ctrl_info); + io_request = pqi_alloc_io_request(ctrl_info, NULL); io_request->io_complete_callback = pqi_lun_reset_complete; io_request->context = &wait; @@ -6504,6 +6521,7 @@ static struct scsi_host_template pqi_driver_template = { .map_queues = pqi_map_queues, .sdev_attrs = pqi_sdev_attrs, .shost_attrs = pqi_shost_attrs, + .host_tagset = 1, }; static int pqi_register_scsi(struct pqi_ctrl_info *ctrl_info) From patchwork Mon Dec 2 15:39:14 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hannes Reinecke X-Patchwork-Id: 11269285 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B0429930 for ; Mon, 2 Dec 2019 15:39:32 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 97F522084F for ; Mon, 2 Dec 2019 15:39:32 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727588AbfLBPjb (ORCPT ); Mon, 2 Dec 2019 10:39:31 -0500 Received: from mx2.suse.de ([195.135.220.15]:44840 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727547AbfLBPjb (ORCPT ); Mon, 2 Dec 2019 10:39:31 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 3460FC1AE; Mon, 2 Dec 2019 15:39:26 +0000 (UTC) From: Hannes Reinecke To: "Martin K. Petersen" Cc: Jens Axboe , Christoph Hellwig , James Bottomley , John Garry , Ming Lei , linux-scsi@vger.kernel.org, linux-block@vger.kernel.org, Hannes Reinecke Subject: [PATCH 11/11] hpsa: enable host_tagset and switch to MQ Date: Mon, 2 Dec 2019 16:39:14 +0100 Message-Id: <20191202153914.84722-12-hare@suse.de> X-Mailer: git-send-email 2.16.4 In-Reply-To: <20191202153914.84722-1-hare@suse.de> References: <20191202153914.84722-1-hare@suse.de> Sender: linux-block-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-block@vger.kernel.org The smart array HBAs can steer interrupt completion, so this patch switches the implementation to use multiqueue and enables 'host_tagset' as the HBA has a shared host-wide tagset. Signed-off-by: Hannes Reinecke --- drivers/scsi/hpsa.c | 44 +++++++------------------------------------- drivers/scsi/hpsa.h | 1 - 2 files changed, 7 insertions(+), 38 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index ac39ed79ccaa..2b811c981b43 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -974,6 +974,7 @@ static struct scsi_host_template hpsa_driver_template = { .shost_attrs = hpsa_shost_attrs, .max_sectors = 2048, .no_write_same = 1, + .host_tagset = 1, }; static inline u32 next_command(struct ctlr_info *h, u8 q) @@ -1138,12 +1139,14 @@ static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h, static void __enqueue_cmd_and_start_io(struct ctlr_info *h, struct CommandList *c, int reply_queue) { + u32 blk_tag = blk_mq_unique_tag(c->scsi_cmd->request); + dial_down_lockup_detection_during_fw_flash(h, c); atomic_inc(&h->commands_outstanding); if (c->device) atomic_inc(&c->device->commands_outstanding); - reply_queue = h->reply_map[raw_smp_processor_id()]; + reply_queue = blk_mq_unique_tag_to_hwq(blk_tag); switch (c->cmd_type) { case CMD_IOACCEL1: set_ioaccel1_performant_mode(h, c, reply_queue); @@ -5628,8 +5631,6 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd) /* Get the ptr to our adapter structure out of cmd->host. */ h = sdev_to_hba(cmd->device); - BUG_ON(cmd->request->tag < 0); - dev = cmd->device->hostdata; if (!dev) { cmd->result = DID_NO_CONNECT << 16; @@ -5805,7 +5806,7 @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h) sh->hostdata[0] = (unsigned long) h; sh->irq = pci_irq_vector(h->pdev, 0); sh->unique_id = sh->irq; - + sh->nr_hw_queues = h->msix_vectors > 0 ? h->msix_vectors : 1; h->scsi_host = sh; return 0; } @@ -5831,7 +5832,8 @@ static int hpsa_scsi_add_host(struct ctlr_info *h) */ static int hpsa_get_cmd_index(struct scsi_cmnd *scmd) { - int idx = scmd->request->tag; + u32 blk_tag = blk_mq_unique_tag(scmd->request); + int idx = blk_mq_unique_tag_to_tag(blk_tag); if (idx < 0) return idx; @@ -7431,26 +7433,6 @@ static void hpsa_disable_interrupt_mode(struct ctlr_info *h) h->msix_vectors = 0; } -static void hpsa_setup_reply_map(struct ctlr_info *h) -{ - const struct cpumask *mask; - unsigned int queue, cpu; - - for (queue = 0; queue < h->msix_vectors; queue++) { - mask = pci_irq_get_affinity(h->pdev, queue); - if (!mask) - goto fallback; - - for_each_cpu(cpu, mask) - h->reply_map[cpu] = queue; - } - return; - -fallback: - for_each_possible_cpu(cpu) - h->reply_map[cpu] = 0; -} - /* If MSI/MSI-X is supported by the kernel we will try to enable it on * controllers that are capable. If not, we use legacy INTx mode. */ @@ -7847,9 +7829,6 @@ static int hpsa_pci_init(struct ctlr_info *h) if (err) goto clean1; - /* setup mapping between CPU and reply queue */ - hpsa_setup_reply_map(h); - err = hpsa_pci_find_memory_BAR(h->pdev, &h->paddr); if (err) goto clean2; /* intmode+region, pci */ @@ -8575,7 +8554,6 @@ static struct workqueue_struct *hpsa_create_controller_wq(struct ctlr_info *h, static void hpda_free_ctlr_info(struct ctlr_info *h) { - kfree(h->reply_map); kfree(h); } @@ -8584,14 +8562,6 @@ static struct ctlr_info *hpda_alloc_ctlr_info(void) struct ctlr_info *h; h = kzalloc(sizeof(*h), GFP_KERNEL); - if (!h) - return NULL; - - h->reply_map = kcalloc(nr_cpu_ids, sizeof(*h->reply_map), GFP_KERNEL); - if (!h->reply_map) { - kfree(h); - return NULL; - } return h; } diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index f8c88fc7b80a..ea4a609e3eb7 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h @@ -161,7 +161,6 @@ struct bmic_controller_parameters { #pragma pack() struct ctlr_info { - unsigned int *reply_map; int ctlr; char devname[8]; char *product_name;