diff mbox series

[V2,09/20] block: simplify elevator rebuild for updating nr_hw_queues

Message ID 20250418163708.442085-10-ming.lei@redhat.com (mailing list archive)
State New
Headers show
Series block: unify elevator changing and fix lockdep warning | expand

Commit Message

Ming Lei April 18, 2025, 4:36 p.m. UTC
In blk_mq_update_nr_hw_queues(), nr_hw_queues changes and elevator data
depends on it, so elevator has to be rebuilt.

Now elevator switch isn't allowed during blk_mq_update_nr_hw_queues(), so
we can simply call elevator_change() to rebuild elevator sched tags after
nr_hw_queues is updated.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-mq.c   | 103 ++++++-----------------------------------------
 block/blk.h      |   4 +-
 block/elevator.c |  12 +++---
 3 files changed, 22 insertions(+), 97 deletions(-)

Comments

Nilay Shroff April 19, 2025, 11:25 a.m. UTC | #1
On 4/18/25 10:06 PM, Ming Lei wrote:
> In blk_mq_update_nr_hw_queues(), nr_hw_queues changes and elevator data
> depends on it, so elevator has to be rebuilt.
> 
> Now elevator switch isn't allowed during blk_mq_update_nr_hw_queues(), so
> we can simply call elevator_change() to rebuild elevator sched tags after
> nr_hw_queues is updated.
> 
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>

Looks good to me:
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
diff mbox series

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index e1662617cc7a..0f4a5e674874 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4929,88 +4929,10 @@  int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 	return ret;
 }
 
-/*
- * request_queue and elevator_type pair.
- * It is just used by __blk_mq_update_nr_hw_queues to cache
- * the elevator_type associated with a request_queue.
- */
-struct blk_mq_qe_pair {
-	struct list_head node;
-	struct request_queue *q;
-	struct elevator_type *type;
-};
-
-/*
- * Cache the elevator_type in qe pair list and switch the
- * io scheduler to 'none'
- */
-static bool blk_mq_elv_switch_none(struct list_head *head,
-		struct request_queue *q)
-{
-	struct blk_mq_qe_pair *qe;
-
-	qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
-	if (!qe)
-		return false;
-
-	/* Accessing q->elevator needs protection from ->elevator_lock. */
-	mutex_lock(&q->elevator_lock);
-
-	if (!q->elevator) {
-		kfree(qe);
-		goto unlock;
-	}
-
-	INIT_LIST_HEAD(&qe->node);
-	qe->q = q;
-	qe->type = q->elevator->type;
-	/* keep a reference to the elevator module as we'll switch back */
-	__elevator_get(qe->type);
-	list_add(&qe->node, head);
-	elevator_disable(q);
-unlock:
-	mutex_unlock(&q->elevator_lock);
-
-	return true;
-}
-
-static struct blk_mq_qe_pair *blk_lookup_qe_pair(struct list_head *head,
-						struct request_queue *q)
-{
-	struct blk_mq_qe_pair *qe;
-
-	list_for_each_entry(qe, head, node)
-		if (qe->q == q)
-			return qe;
-
-	return NULL;
-}
-
-static void blk_mq_elv_switch_back(struct list_head *head,
-				  struct request_queue *q)
-{
-	struct blk_mq_qe_pair *qe;
-	struct elevator_type *t;
-
-	qe = blk_lookup_qe_pair(head, q);
-	if (!qe)
-		return;
-	t = qe->type;
-	list_del(&qe->node);
-	kfree(qe);
-
-	mutex_lock(&q->elevator_lock);
-	elevator_switch(q, t);
-	/* drop the reference acquired in blk_mq_elv_switch_none */
-	elevator_put(t);
-	mutex_unlock(&q->elevator_lock);
-}
-
 static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 							int nr_hw_queues)
 {
 	struct request_queue *q;
-	LIST_HEAD(head);
 	int prev_nr_hw_queues = set->nr_hw_queues;
 	unsigned int memflags;
 	int i;
@@ -5028,15 +4950,6 @@  static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
 		blk_mq_freeze_queue_nomemsave(q);
 
-	/*
-	 * Switch IO scheduler to 'none', cleaning up the data associated
-	 * with the previous scheduler. We will switch back once we are done
-	 * updating the new sw to hw queue mappings.
-	 */
-	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		if (!blk_mq_elv_switch_none(&head, q))
-			goto switch_back;
-
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
 		blk_mq_debugfs_unregister_hctxs(q);
 		blk_mq_sysfs_unregister_hctxs(q);
@@ -5070,9 +4983,19 @@  static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 		blk_mq_debugfs_register_hctxs(q);
 	}
 
-switch_back:
-	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		blk_mq_elv_switch_back(&head, q);
+	list_for_each_entry(q, &set->tag_list, tag_set_list) {
+		const char *name = "none";
+
+		mutex_lock(&q->elevator_lock);
+		if (q->elevator && !blk_queue_dying(q))
+			name = q->elevator->type->elevator_name;
+		/*
+		 * nr_hw_queues is changed and elevator data depends on
+		 * it, so we have to force to rebuild elevator
+		 */
+		__elevator_change(q, name, true);
+		mutex_unlock(&q->elevator_lock);
+	}
 
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
 		blk_mq_unfreeze_queue_nomemrestore(q);
diff --git a/block/blk.h b/block/blk.h
index 006e3be433d2..0c3cc1af2525 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -319,8 +319,8 @@  bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
 
 bool blk_insert_flush(struct request *rq);
 
-int elevator_switch(struct request_queue *q, struct elevator_type *new_e);
-void elevator_disable(struct request_queue *q);
+int __elevator_change(struct request_queue *q, const char *elevator_name,
+		      bool force);
 void elevator_exit(struct request_queue *q);
 int elv_register_queue(struct request_queue *q, bool uevent);
 void elv_unregister_queue(struct request_queue *q);
diff --git a/block/elevator.c b/block/elevator.c
index c23912652f96..f4c02a6c045d 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -621,7 +621,7 @@  void elevator_init_mq(struct request_queue *q)
  * If switching fails, we are most likely running out of memory and not able
  * to restore the old io scheduler, so leaving the io scheduler being none.
  */
-int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
+static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 {
 	int ret;
 
@@ -657,7 +657,7 @@  int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	return ret;
 }
 
-void elevator_disable(struct request_queue *q)
+static void elevator_disable(struct request_queue *q)
 {
 	WARN_ON_ONCE(q->mq_freeze_depth == 0);
 	lockdep_assert_held(&q->elevator_lock);
@@ -677,7 +677,8 @@  void elevator_disable(struct request_queue *q)
 /*
  * Switch this queue to the given IO scheduler.
  */
-static int elevator_change(struct request_queue *q, const char *elevator_name)
+int __elevator_change(struct request_queue *q, const char *elevator_name,
+		      bool force)
 {
 	struct elevator_type *e;
 	int ret;
@@ -692,7 +693,8 @@  static int elevator_change(struct request_queue *q, const char *elevator_name)
 		return 0;
 	}
 
-	if (q->elevator && elevator_match(q->elevator->type, elevator_name))
+	if (!force && q->elevator &&
+	    elevator_match(q->elevator->type, elevator_name))
 		return 0;
 
 	e = elevator_find_get(elevator_name);
@@ -743,7 +745,7 @@  ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
 
 	memflags = blk_mq_freeze_queue(q);
 	mutex_lock(&q->elevator_lock);
-	ret = elevator_change(q, name);
+	ret = __elevator_change(q, name, false);
 	if (!ret)
 		ret = count;
 	mutex_unlock(&q->elevator_lock);