diff mbox series

[1/3] block: add blk_mq_enter_no_io() and blk_mq_exit_no_io()

Message ID 20250402043851.946498-2-ming.lei@redhat.com (mailing list archive)
State New
Headers show
Series block: fix lock dependency between freeze and elevator lock | expand

Commit Message

Ming Lei April 2, 2025, 4:38 a.m. UTC
Add blk_mq_enter_no_io() and blk_mq_exit_no_io() for preventing queue
from handling any FS or passthrough IO, meantime the queue is kept in
non-freeze state.

The added two APIs are for avoiding many potential lock risk related
with freeze lock.

Also add two variants of memsave version.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-core.c       |  6 ++++--
 block/blk-mq.c         | 18 ++++++++++++++++--
 block/blk-mq.h         | 19 +++++++++++++++++++
 block/blk.h            |  5 +++--
 include/linux/blkdev.h |  8 ++++++++
 5 files changed, 50 insertions(+), 6 deletions(-)

Comments

Ming Lei April 2, 2025, 7:55 a.m. UTC | #1
On Wed, Apr 02, 2025 at 12:38:47PM +0800, Ming Lei wrote:
> Add blk_mq_enter_no_io() and blk_mq_exit_no_io() for preventing queue
> from handling any FS or passthrough IO, meantime the queue is kept in
> non-freeze state.
> 
> The added two APIs are for avoiding many potential lock risk related
> with freeze lock.
> 
> Also add two variants of memsave version.
> 
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>  block/blk-core.c       |  6 ++++--
>  block/blk-mq.c         | 18 ++++++++++++++++--
>  block/blk-mq.h         | 19 +++++++++++++++++++
>  block/blk.h            |  5 +++--
>  include/linux/blkdev.h |  8 ++++++++
>  5 files changed, 50 insertions(+), 6 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 4623de79effa..a54a18fada8a 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -319,7 +319,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
>  		smp_rmb();
>  		wait_event(q->mq_freeze_wq,
>  			   (!q->mq_freeze_depth &&
> -			    blk_pm_resume_queue(pm, q)) ||
> +			    (blk_pm_resume_queue(pm, q) ||
> +			     !blk_queue_no_io(q))) ||
>  			   blk_queue_dying(q));
>  		if (blk_queue_dying(q))
>  			return -ENODEV;
> @@ -352,7 +353,8 @@ int __bio_queue_enter(struct request_queue *q, struct bio *bio)
>  		smp_rmb();
>  		wait_event(q->mq_freeze_wq,
>  			   (!q->mq_freeze_depth &&
> -			    blk_pm_resume_queue(false, q)) ||
> +			    (blk_pm_resume_queue(false, q) ||

Here the above '||' should have been '&&'.

> +			     !blk_queue_no_io(q))) ||
>  			   test_bit(GD_DEAD, &disk->state));
>  		if (test_bit(GD_DEAD, &disk->state))
>  			goto dead;
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index ae8494d88897..075ee51066b3 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -222,8 +222,7 @@ bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
>  	bool unfreeze;
>  
>  	mutex_lock(&q->mq_freeze_lock);
> -	if (force_atomic)
> -		q->q_usage_counter.data->force_atomic = true;
> +	q->q_usage_counter.data->force_atomic = force_atomic;
>  	q->mq_freeze_depth--;
>  	WARN_ON_ONCE(q->mq_freeze_depth < 0);
>  	if (!q->mq_freeze_depth) {
> @@ -278,6 +277,21 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q)
>  }
>  EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
>  
> +void blk_mq_enter_no_io(struct request_queue *q)
> +{
> +	blk_mq_freeze_queue_nomemsave(q);
> +	q->no_io = true;
> +	if (__blk_mq_unfreeze_queue(q, true))
> +		blk_unfreeze_release_lock(q);
> +}
> +
> +void blk_mq_exit_no_io(struct request_queue *q)
> +{
> +	blk_mq_freeze_queue_nomemsave(q);
> +	q->no_io = false;
> +	blk_mq_unfreeze_queue_nomemrestore(q);
> +}
> +
>  /**
>   * blk_mq_wait_quiesce_done() - wait until in-progress quiesce is done
>   * @set: tag_set to wait on
> diff --git a/block/blk-mq.h b/block/blk-mq.h
> index 3011a78cf16a..f49070c8c05f 100644
> --- a/block/blk-mq.h
> +++ b/block/blk-mq.h
> @@ -452,4 +452,23 @@ static inline bool blk_mq_can_poll(struct request_queue *q)
>  		q->tag_set->map[HCTX_TYPE_POLL].nr_queues;
>  }
>  
> +void blk_mq_enter_no_io(struct request_queue *q);
> +void blk_mq_exit_no_io(struct request_queue *q);
> +
> +static inline unsigned int __must_check
> +blk_mq_enter_no_io_memsave(struct request_queue *q)
> +{
> +	unsigned int memflags = memalloc_noio_save();
> +
> +	blk_mq_enter_no_io(q);
> +	return memflags;
> +}
> +
> +static inline void
> +blk_mq_exit_no_io_memrestore(struct request_queue *q, unsigned int memflags)
> +{
> +	blk_mq_exit_no_io(q);
> +	memalloc_noio_restore(memflags);
> +}
> +
>  #endif
> diff --git a/block/blk.h b/block/blk.h
> index 006e3be433d2..7d0994c1d3ad 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -56,8 +56,9 @@ static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
>  	 * The code that increments the pm_only counter must ensure that the
>  	 * counter is globally visible before the queue is unfrozen.
>  	 */
> -	if (blk_queue_pm_only(q) &&
> -	    (!pm || queue_rpm_status(q) == RPM_SUSPENDED))
> +	if ((blk_queue_pm_only(q) &&
> +	    (!pm || queue_rpm_status(q) == RPM_SUSPENDED)) ||

Same with above.


Thanks, 
Ming
Nilay Shroff April 2, 2025, 1:50 p.m. UTC | #2
On 4/2/25 10:08 AM, Ming Lei wrote:
> Add blk_mq_enter_no_io() and blk_mq_exit_no_io() for preventing queue
> from handling any FS or passthrough IO, meantime the queue is kept in
> non-freeze state.
> 
> The added two APIs are for avoiding many potential lock risk related
> with freeze lock.
> 
> Also add two variants of memsave version.
> 
> Signed-off-by: Ming Lei <ming.lei@redhat.com>

I hope you will spin another patch replacing '||' with '&&' in 
blk_queue_enter and __bio_queue_enter as you mentioned in another
mail. With that change, this looks good to me:

Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
diff mbox series

Patch

diff --git a/block/blk-core.c b/block/blk-core.c
index 4623de79effa..a54a18fada8a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -319,7 +319,8 @@  int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 		smp_rmb();
 		wait_event(q->mq_freeze_wq,
 			   (!q->mq_freeze_depth &&
-			    blk_pm_resume_queue(pm, q)) ||
+			    (blk_pm_resume_queue(pm, q) ||
+			     !blk_queue_no_io(q))) ||
 			   blk_queue_dying(q));
 		if (blk_queue_dying(q))
 			return -ENODEV;
@@ -352,7 +353,8 @@  int __bio_queue_enter(struct request_queue *q, struct bio *bio)
 		smp_rmb();
 		wait_event(q->mq_freeze_wq,
 			   (!q->mq_freeze_depth &&
-			    blk_pm_resume_queue(false, q)) ||
+			    (blk_pm_resume_queue(false, q) ||
+			     !blk_queue_no_io(q))) ||
 			   test_bit(GD_DEAD, &disk->state));
 		if (test_bit(GD_DEAD, &disk->state))
 			goto dead;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ae8494d88897..075ee51066b3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -222,8 +222,7 @@  bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
 	bool unfreeze;
 
 	mutex_lock(&q->mq_freeze_lock);
-	if (force_atomic)
-		q->q_usage_counter.data->force_atomic = true;
+	q->q_usage_counter.data->force_atomic = force_atomic;
 	q->mq_freeze_depth--;
 	WARN_ON_ONCE(q->mq_freeze_depth < 0);
 	if (!q->mq_freeze_depth) {
@@ -278,6 +277,21 @@  void blk_mq_quiesce_queue_nowait(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
 
+void blk_mq_enter_no_io(struct request_queue *q)
+{
+	blk_mq_freeze_queue_nomemsave(q);
+	q->no_io = true;
+	if (__blk_mq_unfreeze_queue(q, true))
+		blk_unfreeze_release_lock(q);
+}
+
+void blk_mq_exit_no_io(struct request_queue *q)
+{
+	blk_mq_freeze_queue_nomemsave(q);
+	q->no_io = false;
+	blk_mq_unfreeze_queue_nomemrestore(q);
+}
+
 /**
  * blk_mq_wait_quiesce_done() - wait until in-progress quiesce is done
  * @set: tag_set to wait on
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 3011a78cf16a..f49070c8c05f 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -452,4 +452,23 @@  static inline bool blk_mq_can_poll(struct request_queue *q)
 		q->tag_set->map[HCTX_TYPE_POLL].nr_queues;
 }
 
+void blk_mq_enter_no_io(struct request_queue *q);
+void blk_mq_exit_no_io(struct request_queue *q);
+
+static inline unsigned int __must_check
+blk_mq_enter_no_io_memsave(struct request_queue *q)
+{
+	unsigned int memflags = memalloc_noio_save();
+
+	blk_mq_enter_no_io(q);
+	return memflags;
+}
+
+static inline void
+blk_mq_exit_no_io_memrestore(struct request_queue *q, unsigned int memflags)
+{
+	blk_mq_exit_no_io(q);
+	memalloc_noio_restore(memflags);
+}
+
 #endif
diff --git a/block/blk.h b/block/blk.h
index 006e3be433d2..7d0994c1d3ad 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -56,8 +56,9 @@  static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
 	 * The code that increments the pm_only counter must ensure that the
 	 * counter is globally visible before the queue is unfrozen.
 	 */
-	if (blk_queue_pm_only(q) &&
-	    (!pm || queue_rpm_status(q) == RPM_SUSPENDED))
+	if ((blk_queue_pm_only(q) &&
+	    (!pm || queue_rpm_status(q) == RPM_SUSPENDED)) ||
+			blk_queue_no_io(q))
 		goto fail_put;
 
 	rcu_read_unlock();
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e39c45bc0a97..1b8fd63eee80 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -498,6 +498,13 @@  struct request_queue {
 
 	int			quiesce_depth;
 
+	/*
+	 * Prevent queue from handling IO
+	 *
+	 * keep it in same cache line with q_usage_counter
+	 */
+	bool			no_io;
+
 	struct gendisk		*disk;
 
 	/*
@@ -679,6 +686,7 @@  void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
 #define blk_queue_sq_sched(q)	test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags)
 #define blk_queue_skip_tagset_quiesce(q) \
 	((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE)
+#define blk_queue_no_io(q)	(q->no_io)
 
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);