diff mbox series

[4/9] ublk: rely on ->canceling for dealing with ublk_nosrv_dev_should_queue_io

Message ID 20250414112554.3025113-5-ming.lei@redhat.com (mailing list archive)
State New
Headers show
Series ublk: simplify & improve IO canceling | expand

Commit Message

Ming Lei April 14, 2025, 11:25 a.m. UTC
Now ublk deals with ublk_nosrv_dev_should_queue_io() by keeping request
queue as quiesced. This way is fragile because queue quiesce crosses syscalls
or process contexts.

Switch to rely on ubq->canceling for dealing with ublk_nosrv_dev_should_queue_io(),
because it has been used for this purpose during io_uring context exiting, and it
can be reused before recovering too.

Meantime we have to move reset of ubq->canceling from ublk_ctrl_end_recovery() to
ublk_ctrl_end_recovery(), when IO handling can be recovered completely.

Then blk_mq_quiesce_queue() and blk_mq_unquiesce_queue() are always used
in same context.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/block/ublk_drv.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

Comments

Uday Shankar April 14, 2025, 8:15 p.m. UTC | #1
On Mon, Apr 14, 2025 at 07:25:45PM +0800, Ming Lei wrote:
> Now ublk deals with ublk_nosrv_dev_should_queue_io() by keeping request
> queue as quiesced. This way is fragile because queue quiesce crosses syscalls
> or process contexts.
> 
> Switch to rely on ubq->canceling for dealing with ublk_nosrv_dev_should_queue_io(),
> because it has been used for this purpose during io_uring context exiting, and it
> can be reused before recovering too.
> 
> Meantime we have to move reset of ubq->canceling from ublk_ctrl_end_recovery() to
> ublk_ctrl_end_recovery(), when IO handling can be recovered completely.

First one here should be ublk_ctrl_start_recovery or ublk_queue_reinit

> 
> Then blk_mq_quiesce_queue() and blk_mq_unquiesce_queue() are always used
> in same context.
> 
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>  drivers/block/ublk_drv.c | 31 +++++++++++++++++--------------
>  1 file changed, 17 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 7e2c4084c243..e0213222e3cf 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -1734,13 +1734,19 @@ static void ublk_wait_tagset_rqs_idle(struct ublk_device *ub)
>  
>  static void __ublk_quiesce_dev(struct ublk_device *ub)
>  {
> +	int i;
> +
>  	pr_devel("%s: quiesce ub: dev_id %d state %s\n",
>  			__func__, ub->dev_info.dev_id,
>  			ub->dev_info.state == UBLK_S_DEV_LIVE ?
>  			"LIVE" : "QUIESCED");
>  	blk_mq_quiesce_queue(ub->ub_disk->queue);
> +	/* mark every queue as canceling */
> +	for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
> +		ublk_get_queue(ub, i)->canceling = true;
>  	ublk_wait_tagset_rqs_idle(ub);
>  	ub->dev_info.state = UBLK_S_DEV_QUIESCED;
> +	blk_mq_unquiesce_queue(ub->ub_disk->queue);

So the queue is not actually quiesced when we are in UBLK_S_DEV_QUIESCED
anymore, and we rely on __ublk_abort_rq to requeue I/O submitted in this
QUIESCED state. This requeued I/O will immediately resubmit, right?
Isn't this a waste of CPU?

>  }
>  
>  static void ublk_force_abort_dev(struct ublk_device *ub)
> @@ -2950,7 +2956,6 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
>  	/* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
>  	ubq->ubq_daemon = NULL;
>  	ubq->timeout = false;
> -	ubq->canceling = false;
>  
>  	for (i = 0; i < ubq->q_depth; i++) {
>  		struct ublk_io *io = &ubq->ios[i];
> @@ -3037,20 +3042,18 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
>  	pr_devel("%s: new ublksrv_pid %d, dev id %d\n",
>  			__func__, ublksrv_pid, header->dev_id);
>  
> -	if (ublk_nosrv_dev_should_queue_io(ub)) {
> -		ub->dev_info.state = UBLK_S_DEV_LIVE;
> -		blk_mq_unquiesce_queue(ub->ub_disk->queue);
> -		pr_devel("%s: queue unquiesced, dev id %d.\n",
> -				__func__, header->dev_id);
> -		blk_mq_kick_requeue_list(ub->ub_disk->queue);
> -	} else {
> -		blk_mq_quiesce_queue(ub->ub_disk->queue);
> -		ub->dev_info.state = UBLK_S_DEV_LIVE;
> -		for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
> -			ublk_get_queue(ub, i)->fail_io = false;
> -		}
> -		blk_mq_unquiesce_queue(ub->ub_disk->queue);
> +	blk_mq_quiesce_queue(ub->ub_disk->queue);
> +	ub->dev_info.state = UBLK_S_DEV_LIVE;
> +	for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
> +		struct ublk_queue *ubq = ublk_get_queue(ub, i);
> +
> +		ubq->canceling = false;
> +		ubq->fail_io = false;
>  	}
> +	blk_mq_unquiesce_queue(ub->ub_disk->queue);
> +	pr_devel("%s: queue unquiesced, dev id %d.\n",
> +			__func__, header->dev_id);
> +	blk_mq_kick_requeue_list(ub->ub_disk->queue);
>  
>  	ret = 0;
>   out_unlock:
> -- 
> 2.47.0
>
Ming Lei April 15, 2025, 1:48 a.m. UTC | #2
On Mon, Apr 14, 2025 at 02:15:39PM -0600, Uday Shankar wrote:
> On Mon, Apr 14, 2025 at 07:25:45PM +0800, Ming Lei wrote:
> > Now ublk deals with ublk_nosrv_dev_should_queue_io() by keeping request
> > queue as quiesced. This way is fragile because queue quiesce crosses syscalls
> > or process contexts.
> > 
> > Switch to rely on ubq->canceling for dealing with ublk_nosrv_dev_should_queue_io(),
> > because it has been used for this purpose during io_uring context exiting, and it
> > can be reused before recovering too.
> > 
> > Meantime we have to move reset of ubq->canceling from ublk_ctrl_end_recovery() to
> > ublk_ctrl_end_recovery(), when IO handling can be recovered completely.
> 
> First one here should be ublk_ctrl_start_recovery or ublk_queue_reinit

Yeah.

> 
> > 
> > Then blk_mq_quiesce_queue() and blk_mq_unquiesce_queue() are always used
> > in same context.
> > 
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> >  drivers/block/ublk_drv.c | 31 +++++++++++++++++--------------
> >  1 file changed, 17 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> > index 7e2c4084c243..e0213222e3cf 100644
> > --- a/drivers/block/ublk_drv.c
> > +++ b/drivers/block/ublk_drv.c
> > @@ -1734,13 +1734,19 @@ static void ublk_wait_tagset_rqs_idle(struct ublk_device *ub)
> >  
> >  static void __ublk_quiesce_dev(struct ublk_device *ub)
> >  {
> > +	int i;
> > +
> >  	pr_devel("%s: quiesce ub: dev_id %d state %s\n",
> >  			__func__, ub->dev_info.dev_id,
> >  			ub->dev_info.state == UBLK_S_DEV_LIVE ?
> >  			"LIVE" : "QUIESCED");
> >  	blk_mq_quiesce_queue(ub->ub_disk->queue);
> > +	/* mark every queue as canceling */
> > +	for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
> > +		ublk_get_queue(ub, i)->canceling = true;
> >  	ublk_wait_tagset_rqs_idle(ub);
> >  	ub->dev_info.state = UBLK_S_DEV_QUIESCED;
> > +	blk_mq_unquiesce_queue(ub->ub_disk->queue);
> 
> So the queue is not actually quiesced when we are in UBLK_S_DEV_QUIESCED
> anymore, and we rely on __ublk_abort_rq to requeue I/O submitted in this
> QUIESCED state. This requeued I/O will immediately resubmit, right?
> Isn't this a waste of CPU?

__ublk_abort_rq() just adds request into requeue list, and doesn't requeue
actually, so there isn't waste of CPU.

Thanks,
Ming
diff mbox series

Patch

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 7e2c4084c243..e0213222e3cf 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1734,13 +1734,19 @@  static void ublk_wait_tagset_rqs_idle(struct ublk_device *ub)
 
 static void __ublk_quiesce_dev(struct ublk_device *ub)
 {
+	int i;
+
 	pr_devel("%s: quiesce ub: dev_id %d state %s\n",
 			__func__, ub->dev_info.dev_id,
 			ub->dev_info.state == UBLK_S_DEV_LIVE ?
 			"LIVE" : "QUIESCED");
 	blk_mq_quiesce_queue(ub->ub_disk->queue);
+	/* mark every queue as canceling */
+	for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
+		ublk_get_queue(ub, i)->canceling = true;
 	ublk_wait_tagset_rqs_idle(ub);
 	ub->dev_info.state = UBLK_S_DEV_QUIESCED;
+	blk_mq_unquiesce_queue(ub->ub_disk->queue);
 }
 
 static void ublk_force_abort_dev(struct ublk_device *ub)
@@ -2950,7 +2956,6 @@  static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
 	/* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
 	ubq->ubq_daemon = NULL;
 	ubq->timeout = false;
-	ubq->canceling = false;
 
 	for (i = 0; i < ubq->q_depth; i++) {
 		struct ublk_io *io = &ubq->ios[i];
@@ -3037,20 +3042,18 @@  static int ublk_ctrl_end_recovery(struct ublk_device *ub,
 	pr_devel("%s: new ublksrv_pid %d, dev id %d\n",
 			__func__, ublksrv_pid, header->dev_id);
 
-	if (ublk_nosrv_dev_should_queue_io(ub)) {
-		ub->dev_info.state = UBLK_S_DEV_LIVE;
-		blk_mq_unquiesce_queue(ub->ub_disk->queue);
-		pr_devel("%s: queue unquiesced, dev id %d.\n",
-				__func__, header->dev_id);
-		blk_mq_kick_requeue_list(ub->ub_disk->queue);
-	} else {
-		blk_mq_quiesce_queue(ub->ub_disk->queue);
-		ub->dev_info.state = UBLK_S_DEV_LIVE;
-		for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
-			ublk_get_queue(ub, i)->fail_io = false;
-		}
-		blk_mq_unquiesce_queue(ub->ub_disk->queue);
+	blk_mq_quiesce_queue(ub->ub_disk->queue);
+	ub->dev_info.state = UBLK_S_DEV_LIVE;
+	for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
+		struct ublk_queue *ubq = ublk_get_queue(ub, i);
+
+		ubq->canceling = false;
+		ubq->fail_io = false;
 	}
+	blk_mq_unquiesce_queue(ub->ub_disk->queue);
+	pr_devel("%s: queue unquiesced, dev id %d.\n",
+			__func__, header->dev_id);
+	blk_mq_kick_requeue_list(ub->ub_disk->queue);
 
 	ret = 0;
  out_unlock: