Message ID | 20220913041707.197334-5-ZiyangZhang@linux.alibaba.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ublk_drv: add USER_RECOVERY support | expand |
On Tue, Sep 13, 2022 at 12:17:04PM +0800, ZiyangZhang wrote: > With recovery feature enabled, in ublk_queue_rq or task work > (in exit_task_work or fallback wq), we requeue rqs instead of > ending(aborting) them. Besides, No matter recovery feature is enabled > or disabled, we schedule monitor_work immediately. > > Signed-off-by: ZiyangZhang <ZiyangZhang@linux.alibaba.com> > --- > drivers/block/ublk_drv.c | 34 ++++++++++++++++++++++++++++++++-- > 1 file changed, 32 insertions(+), 2 deletions(-) > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > index 23337bd7c105..b067f33a1913 100644 > --- a/drivers/block/ublk_drv.c > +++ b/drivers/block/ublk_drv.c > @@ -682,6 +682,21 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res) > > #define UBLK_REQUEUE_DELAY_MS 3 > > +static inline void __ublk_abort_rq_in_task_work(struct ublk_queue *ubq, > + struct request *rq) > +{ > + pr_devel("%s: %s q_id %d tag %d io_flags %x.\n", __func__, > + (ublk_queue_can_use_recovery(ubq)) ? "requeue" : "abort", > + ubq->q_id, rq->tag, ubq->ios[rq->tag].flags); > + /* We cannot process this rq so just requeue it. */ > + if (ublk_queue_can_use_recovery(ubq)) { > + blk_mq_requeue_request(rq, false); > + blk_mq_delay_kick_requeue_list(rq->q, UBLK_REQUEUE_DELAY_MS); Here you needn't to kick requeue list since we know it can't make progress. And you can do that once before deleting gendisk or the queue is recovered. > + } else { > + blk_mq_end_request(rq, BLK_STS_IOERR); > + } > +} > + > static inline void __ublk_rq_task_work(struct request *req) > { > struct ublk_queue *ubq = req->mq_hctx->driver_data; > @@ -704,7 +719,7 @@ static inline void __ublk_rq_task_work(struct request *req) > * (2) current->flags & PF_EXITING. > */ > if (unlikely(current != ubq->ubq_daemon || current->flags & PF_EXITING)) { > - blk_mq_end_request(req, BLK_STS_IOERR); > + __ublk_abort_rq_in_task_work(ubq, req); > mod_delayed_work(system_wq, &ub->monitor_work, 0); > return; > } > @@ -779,6 +794,21 @@ static void ublk_rq_task_work_fn(struct callback_head *work) > __ublk_rq_task_work(req); > } > > +static inline blk_status_t __ublk_abort_rq(struct ublk_queue *ubq, > + struct request *rq) > +{ > + pr_devel("%s: %s q_id %d tag %d io_flags %x.\n", __func__, > + (ublk_queue_can_use_recovery(ubq)) ? "requeue" : "abort", > + ubq->q_id, rq->tag, ubq->ios[rq->tag].flags); > + /* We cannot process this rq so just requeue it. */ > + if (ublk_queue_can_use_recovery(ubq)) { > + blk_mq_requeue_request(rq, false); > + blk_mq_delay_kick_requeue_list(rq->q, UBLK_REQUEUE_DELAY_MS); Same with above. Thanks, Ming
On 2022/9/19 11:55, Ming Lei wrote: > On Tue, Sep 13, 2022 at 12:17:04PM +0800, ZiyangZhang wrote: >> With recovery feature enabled, in ublk_queue_rq or task work >> (in exit_task_work or fallback wq), we requeue rqs instead of >> ending(aborting) them. Besides, No matter recovery feature is enabled >> or disabled, we schedule monitor_work immediately. >> >> Signed-off-by: ZiyangZhang <ZiyangZhang@linux.alibaba.com> >> --- >> drivers/block/ublk_drv.c | 34 ++++++++++++++++++++++++++++++++-- >> 1 file changed, 32 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c >> index 23337bd7c105..b067f33a1913 100644 >> --- a/drivers/block/ublk_drv.c >> +++ b/drivers/block/ublk_drv.c >> @@ -682,6 +682,21 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res) >> >> #define UBLK_REQUEUE_DELAY_MS 3 >> >> +static inline void __ublk_abort_rq_in_task_work(struct ublk_queue *ubq, >> + struct request *rq) >> +{ >> + pr_devel("%s: %s q_id %d tag %d io_flags %x.\n", __func__, >> + (ublk_queue_can_use_recovery(ubq)) ? "requeue" : "abort", >> + ubq->q_id, rq->tag, ubq->ios[rq->tag].flags); >> + /* We cannot process this rq so just requeue it. */ >> + if (ublk_queue_can_use_recovery(ubq)) { >> + blk_mq_requeue_request(rq, false); >> + blk_mq_delay_kick_requeue_list(rq->q, UBLK_REQUEUE_DELAY_MS); > > Here you needn't to kick requeue list since we know it can't make > progress. And you can do that once before deleting gendisk > or the queue is recovered. No, kicking rq here is necessary. Consider USER_RECOVERY is enabled and everything goes well. User sends STOP_DEV, and we have kicked requeue list in ublk_stop_dev() and are going to call del_gendisk(). However, a crash happens now. Then rqs may be still requeued by ublk_queue_rq() because ublk_queue_rq() sees a dying ubq_daemon. So del_gendisk() will hang because there are rqs leaving in requeue list and no one kicks them. BTW, kicking requeue list after requeue rqs is really harmless since we schedule quiesce_work immediately after finding a dying ubq_daemon. So few rqs have chance to be re-dispatched. > >> + } else { >> + blk_mq_end_request(rq, BLK_STS_IOERR); >> + } >> +} >> + >> static inline void __ublk_rq_task_work(struct request *req) >> { >> struct ublk_queue *ubq = req->mq_hctx->driver_data; >> @@ -704,7 +719,7 @@ static inline void __ublk_rq_task_work(struct request *req) >> * (2) current->flags & PF_EXITING. >> */ >> if (unlikely(current != ubq->ubq_daemon || current->flags & PF_EXITING)) { >> - blk_mq_end_request(req, BLK_STS_IOERR); >> + __ublk_abort_rq_in_task_work(ubq, req); >> mod_delayed_work(system_wq, &ub->monitor_work, 0); >> return; >> } >> @@ -779,6 +794,21 @@ static void ublk_rq_task_work_fn(struct callback_head *work) >> __ublk_rq_task_work(req); >> } >> >> +static inline blk_status_t __ublk_abort_rq(struct ublk_queue *ubq, >> + struct request *rq) >> +{ >> + pr_devel("%s: %s q_id %d tag %d io_flags %x.\n", __func__, >> + (ublk_queue_can_use_recovery(ubq)) ? "requeue" : "abort", >> + ubq->q_id, rq->tag, ubq->ios[rq->tag].flags); >> + /* We cannot process this rq so just requeue it. */ >> + if (ublk_queue_can_use_recovery(ubq)) { >> + blk_mq_requeue_request(rq, false); >> + blk_mq_delay_kick_requeue_list(rq->q, UBLK_REQUEUE_DELAY_MS); > > Same with above. > > > Thanks, > Ming
On Mon, Sep 19, 2022 at 05:12:21PM +0800, Ziyang Zhang wrote: > On 2022/9/19 11:55, Ming Lei wrote: > > On Tue, Sep 13, 2022 at 12:17:04PM +0800, ZiyangZhang wrote: > >> With recovery feature enabled, in ublk_queue_rq or task work > >> (in exit_task_work or fallback wq), we requeue rqs instead of > >> ending(aborting) them. Besides, No matter recovery feature is enabled > >> or disabled, we schedule monitor_work immediately. > >> > >> Signed-off-by: ZiyangZhang <ZiyangZhang@linux.alibaba.com> > >> --- > >> drivers/block/ublk_drv.c | 34 ++++++++++++++++++++++++++++++++-- > >> 1 file changed, 32 insertions(+), 2 deletions(-) > >> > >> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > >> index 23337bd7c105..b067f33a1913 100644 > >> --- a/drivers/block/ublk_drv.c > >> +++ b/drivers/block/ublk_drv.c > >> @@ -682,6 +682,21 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res) > >> > >> #define UBLK_REQUEUE_DELAY_MS 3 > >> > >> +static inline void __ublk_abort_rq_in_task_work(struct ublk_queue *ubq, > >> + struct request *rq) > >> +{ > >> + pr_devel("%s: %s q_id %d tag %d io_flags %x.\n", __func__, > >> + (ublk_queue_can_use_recovery(ubq)) ? "requeue" : "abort", > >> + ubq->q_id, rq->tag, ubq->ios[rq->tag].flags); > >> + /* We cannot process this rq so just requeue it. */ > >> + if (ublk_queue_can_use_recovery(ubq)) { > >> + blk_mq_requeue_request(rq, false); > >> + blk_mq_delay_kick_requeue_list(rq->q, UBLK_REQUEUE_DELAY_MS); > > > > Here you needn't to kick requeue list since we know it can't make > > progress. And you can do that once before deleting gendisk > > or the queue is recovered. > > No, kicking rq here is necessary. > > Consider USER_RECOVERY is enabled and everything goes well. > User sends STOP_DEV, and we have kicked requeue list in > ublk_stop_dev() and are going to call del_gendisk(). > However, a crash happens now. Then rqs may be still requeued > by ublk_queue_rq() because ublk_queue_rq() sees a dying > ubq_daemon. So del_gendisk() will hang because there are > rqs leaving in requeue list and no one kicks them. Why can't you kick requeue list before calling del_gendisk(). > > BTW, kicking requeue list after requeue rqs is really harmless > since we schedule quiesce_work immediately after finding a > dying ubq_daemon. So few rqs have chance to be re-dispatched. Do you think it makes sense to kick requeue list when the queue can't handle any request? Thanks, Ming
On 2022/9/19 20:39, Ming Lei wrote: > On Mon, Sep 19, 2022 at 05:12:21PM +0800, Ziyang Zhang wrote: >> On 2022/9/19 11:55, Ming Lei wrote: >>> On Tue, Sep 13, 2022 at 12:17:04PM +0800, ZiyangZhang wrote: >>>> With recovery feature enabled, in ublk_queue_rq or task work >>>> (in exit_task_work or fallback wq), we requeue rqs instead of >>>> ending(aborting) them. Besides, No matter recovery feature is enabled >>>> or disabled, we schedule monitor_work immediately. >>>> >>>> Signed-off-by: ZiyangZhang <ZiyangZhang@linux.alibaba.com> >>>> --- >>>> drivers/block/ublk_drv.c | 34 ++++++++++++++++++++++++++++++++-- >>>> 1 file changed, 32 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c >>>> index 23337bd7c105..b067f33a1913 100644 >>>> --- a/drivers/block/ublk_drv.c >>>> +++ b/drivers/block/ublk_drv.c >>>> @@ -682,6 +682,21 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res) >>>> >>>> #define UBLK_REQUEUE_DELAY_MS 3 >>>> >>>> +static inline void __ublk_abort_rq_in_task_work(struct ublk_queue *ubq, >>>> + struct request *rq) >>>> +{ >>>> + pr_devel("%s: %s q_id %d tag %d io_flags %x.\n", __func__, >>>> + (ublk_queue_can_use_recovery(ubq)) ? "requeue" : "abort", >>>> + ubq->q_id, rq->tag, ubq->ios[rq->tag].flags); >>>> + /* We cannot process this rq so just requeue it. */ >>>> + if (ublk_queue_can_use_recovery(ubq)) { >>>> + blk_mq_requeue_request(rq, false); >>>> + blk_mq_delay_kick_requeue_list(rq->q, UBLK_REQUEUE_DELAY_MS); >>> >>> Here you needn't to kick requeue list since we know it can't make >>> progress. And you can do that once before deleting gendisk >>> or the queue is recovered. >> >> No, kicking rq here is necessary. >> >> Consider USER_RECOVERY is enabled and everything goes well. >> User sends STOP_DEV, and we have kicked requeue list in >> ublk_stop_dev() and are going to call del_gendisk(). >> However, a crash happens now. Then rqs may be still requeued >> by ublk_queue_rq() because ublk_queue_rq() sees a dying >> ubq_daemon. So del_gendisk() will hang because there are >> rqs leaving in requeue list and no one kicks them. > > Why can't you kick requeue list before calling del_gendisk(). Yes, we can kick requeue list once before calling del_gendisk(). But a crash may happen just after kicking but before del_gendisk(). So some rqs may be requeued at this moment. But we have already kicked the requeue list! Then del_gendisk() will hang, right? > >> >> BTW, kicking requeue list after requeue rqs is really harmless >> since we schedule quiesce_work immediately after finding a >> dying ubq_daemon. So few rqs have chance to be re-dispatched. > > Do you think it makes sense to kick requeue list when the queue > can't handle any request? I know it does not make sense while ubq_daemon is dying, but it is good for handling the situation I discribed before. Regards, Zhang.
On Tue, Sep 20, 2022 at 09:31:54AM +0800, Ziyang Zhang wrote: > On 2022/9/19 20:39, Ming Lei wrote: > > On Mon, Sep 19, 2022 at 05:12:21PM +0800, Ziyang Zhang wrote: > >> On 2022/9/19 11:55, Ming Lei wrote: > >>> On Tue, Sep 13, 2022 at 12:17:04PM +0800, ZiyangZhang wrote: > >>>> With recovery feature enabled, in ublk_queue_rq or task work > >>>> (in exit_task_work or fallback wq), we requeue rqs instead of > >>>> ending(aborting) them. Besides, No matter recovery feature is enabled > >>>> or disabled, we schedule monitor_work immediately. > >>>> > >>>> Signed-off-by: ZiyangZhang <ZiyangZhang@linux.alibaba.com> > >>>> --- > >>>> drivers/block/ublk_drv.c | 34 ++++++++++++++++++++++++++++++++-- > >>>> 1 file changed, 32 insertions(+), 2 deletions(-) > >>>> > >>>> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > >>>> index 23337bd7c105..b067f33a1913 100644 > >>>> --- a/drivers/block/ublk_drv.c > >>>> +++ b/drivers/block/ublk_drv.c > >>>> @@ -682,6 +682,21 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res) > >>>> > >>>> #define UBLK_REQUEUE_DELAY_MS 3 > >>>> > >>>> +static inline void __ublk_abort_rq_in_task_work(struct ublk_queue *ubq, > >>>> + struct request *rq) > >>>> +{ > >>>> + pr_devel("%s: %s q_id %d tag %d io_flags %x.\n", __func__, > >>>> + (ublk_queue_can_use_recovery(ubq)) ? "requeue" : "abort", > >>>> + ubq->q_id, rq->tag, ubq->ios[rq->tag].flags); > >>>> + /* We cannot process this rq so just requeue it. */ > >>>> + if (ublk_queue_can_use_recovery(ubq)) { > >>>> + blk_mq_requeue_request(rq, false); > >>>> + blk_mq_delay_kick_requeue_list(rq->q, UBLK_REQUEUE_DELAY_MS); > >>> > >>> Here you needn't to kick requeue list since we know it can't make > >>> progress. And you can do that once before deleting gendisk > >>> or the queue is recovered. > >> > >> No, kicking rq here is necessary. > >> > >> Consider USER_RECOVERY is enabled and everything goes well. > >> User sends STOP_DEV, and we have kicked requeue list in > >> ublk_stop_dev() and are going to call del_gendisk(). > >> However, a crash happens now. Then rqs may be still requeued > >> by ublk_queue_rq() because ublk_queue_rq() sees a dying > >> ubq_daemon. So del_gendisk() will hang because there are > >> rqs leaving in requeue list and no one kicks them. > > > > Why can't you kick requeue list before calling del_gendisk(). > > Yes, we can kick requeue list once before calling del_gendisk(). > But a crash may happen just after kicking but before del_gendisk(). > So some rqs may be requeued at this moment. But we have already > kicked the requeue list! Then del_gendisk() will hang, right? ->force_abort is set before kicking in ublk_unquiesce_dev(), so all new requests are failed immediately instead of being requeued, right? Thanks, Ming
On 2022/9/20 10:39, Ming Lei wrote: > On Tue, Sep 20, 2022 at 09:31:54AM +0800, Ziyang Zhang wrote: >> On 2022/9/19 20:39, Ming Lei wrote: >>> On Mon, Sep 19, 2022 at 05:12:21PM +0800, Ziyang Zhang wrote: >>>> On 2022/9/19 11:55, Ming Lei wrote: >>>>> On Tue, Sep 13, 2022 at 12:17:04PM +0800, ZiyangZhang wrote: >>>>>> With recovery feature enabled, in ublk_queue_rq or task work >>>>>> (in exit_task_work or fallback wq), we requeue rqs instead of >>>>>> ending(aborting) them. Besides, No matter recovery feature is enabled >>>>>> or disabled, we schedule monitor_work immediately. >>>>>> >>>>>> Signed-off-by: ZiyangZhang <ZiyangZhang@linux.alibaba.com> >>>>>> --- >>>>>> drivers/block/ublk_drv.c | 34 ++++++++++++++++++++++++++++++++-- >>>>>> 1 file changed, 32 insertions(+), 2 deletions(-) >>>>>> >>>>>> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c >>>>>> index 23337bd7c105..b067f33a1913 100644 >>>>>> --- a/drivers/block/ublk_drv.c >>>>>> +++ b/drivers/block/ublk_drv.c >>>>>> @@ -682,6 +682,21 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res) >>>>>> >>>>>> #define UBLK_REQUEUE_DELAY_MS 3 >>>>>> >>>>>> +static inline void __ublk_abort_rq_in_task_work(struct ublk_queue *ubq, >>>>>> + struct request *rq) >>>>>> +{ >>>>>> + pr_devel("%s: %s q_id %d tag %d io_flags %x.\n", __func__, >>>>>> + (ublk_queue_can_use_recovery(ubq)) ? "requeue" : "abort", >>>>>> + ubq->q_id, rq->tag, ubq->ios[rq->tag].flags); >>>>>> + /* We cannot process this rq so just requeue it. */ >>>>>> + if (ublk_queue_can_use_recovery(ubq)) { >>>>>> + blk_mq_requeue_request(rq, false); >>>>>> + blk_mq_delay_kick_requeue_list(rq->q, UBLK_REQUEUE_DELAY_MS); >>>>> >>>>> Here you needn't to kick requeue list since we know it can't make >>>>> progress. And you can do that once before deleting gendisk >>>>> or the queue is recovered. >>>> >>>> No, kicking rq here is necessary. >>>> >>>> Consider USER_RECOVERY is enabled and everything goes well. >>>> User sends STOP_DEV, and we have kicked requeue list in >>>> ublk_stop_dev() and are going to call del_gendisk(). >>>> However, a crash happens now. Then rqs may be still requeued >>>> by ublk_queue_rq() because ublk_queue_rq() sees a dying >>>> ubq_daemon. So del_gendisk() will hang because there are >>>> rqs leaving in requeue list and no one kicks them. >>> >>> Why can't you kick requeue list before calling del_gendisk(). >> >> Yes, we can kick requeue list once before calling del_gendisk(). >> But a crash may happen just after kicking but before del_gendisk(). >> So some rqs may be requeued at this moment. But we have already >> kicked the requeue list! Then del_gendisk() will hang, right? > > ->force_abort is set before kicking in ublk_unquiesce_dev(), so > all new requests are failed immediately instead of being requeued, > right? > ->force_abort is not heplful here because there may be fallback wq running which can requeue rqs after kicking requeue list. In ublk_unquiesce_dev(), I simply disable recovery feature if ub's state is UBLK_S_DEV_LIVE while stopping ublk_dev. Note: We can make sure fallback wq does not run if we wait until all rqs with ACTIVE flag set are requeued. This is done in quiesce_work(). But it cannot run while ublk_stop_dev() is running... Regards, Zhang.
On Tue, Sep 20, 2022 at 11:04:30AM +0800, Ziyang Zhang wrote: > On 2022/9/20 10:39, Ming Lei wrote: > > On Tue, Sep 20, 2022 at 09:31:54AM +0800, Ziyang Zhang wrote: > >> On 2022/9/19 20:39, Ming Lei wrote: > >>> On Mon, Sep 19, 2022 at 05:12:21PM +0800, Ziyang Zhang wrote: > >>>> On 2022/9/19 11:55, Ming Lei wrote: > >>>>> On Tue, Sep 13, 2022 at 12:17:04PM +0800, ZiyangZhang wrote: > >>>>>> With recovery feature enabled, in ublk_queue_rq or task work > >>>>>> (in exit_task_work or fallback wq), we requeue rqs instead of > >>>>>> ending(aborting) them. Besides, No matter recovery feature is enabled > >>>>>> or disabled, we schedule monitor_work immediately. > >>>>>> > >>>>>> Signed-off-by: ZiyangZhang <ZiyangZhang@linux.alibaba.com> > >>>>>> --- > >>>>>> drivers/block/ublk_drv.c | 34 ++++++++++++++++++++++++++++++++-- > >>>>>> 1 file changed, 32 insertions(+), 2 deletions(-) > >>>>>> > >>>>>> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > >>>>>> index 23337bd7c105..b067f33a1913 100644 > >>>>>> --- a/drivers/block/ublk_drv.c > >>>>>> +++ b/drivers/block/ublk_drv.c > >>>>>> @@ -682,6 +682,21 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res) > >>>>>> > >>>>>> #define UBLK_REQUEUE_DELAY_MS 3 > >>>>>> > >>>>>> +static inline void __ublk_abort_rq_in_task_work(struct ublk_queue *ubq, > >>>>>> + struct request *rq) > >>>>>> +{ > >>>>>> + pr_devel("%s: %s q_id %d tag %d io_flags %x.\n", __func__, > >>>>>> + (ublk_queue_can_use_recovery(ubq)) ? "requeue" : "abort", > >>>>>> + ubq->q_id, rq->tag, ubq->ios[rq->tag].flags); > >>>>>> + /* We cannot process this rq so just requeue it. */ > >>>>>> + if (ublk_queue_can_use_recovery(ubq)) { > >>>>>> + blk_mq_requeue_request(rq, false); > >>>>>> + blk_mq_delay_kick_requeue_list(rq->q, UBLK_REQUEUE_DELAY_MS); > >>>>> > >>>>> Here you needn't to kick requeue list since we know it can't make > >>>>> progress. And you can do that once before deleting gendisk > >>>>> or the queue is recovered. > >>>> > >>>> No, kicking rq here is necessary. > >>>> > >>>> Consider USER_RECOVERY is enabled and everything goes well. > >>>> User sends STOP_DEV, and we have kicked requeue list in > >>>> ublk_stop_dev() and are going to call del_gendisk(). > >>>> However, a crash happens now. Then rqs may be still requeued > >>>> by ublk_queue_rq() because ublk_queue_rq() sees a dying > >>>> ubq_daemon. So del_gendisk() will hang because there are > >>>> rqs leaving in requeue list and no one kicks them. > >>> > >>> Why can't you kick requeue list before calling del_gendisk(). > >> > >> Yes, we can kick requeue list once before calling del_gendisk(). > >> But a crash may happen just after kicking but before del_gendisk(). > >> So some rqs may be requeued at this moment. But we have already > >> kicked the requeue list! Then del_gendisk() will hang, right? > > > > ->force_abort is set before kicking in ublk_unquiesce_dev(), so > > all new requests are failed immediately instead of being requeued, > > right? > > > > ->force_abort is not heplful here because there may be fallback wq running > which can requeue rqs after kicking requeue list. After ublk_wait_tagset_rqs_idle() returns, there can't be any pending requests in fallback wq or task work, can there? Please look at the 2nd point of the comment log, and I did ask you to add the words for fallback wq and task work. > > In ublk_unquiesce_dev(), I simply disable recovery feature > if ub's state is UBLK_S_DEV_LIVE while stopping ublk_dev. monitor work will provide forward progress in case of not applying user recovery. > > Note: We can make sure fallback wq does not run if we wait until all rqs with > ACTIVE flag set are requeued. This is done in quiesce_work(). But it cannot > run while ublk_stop_dev() is running... Please take a look at the delta patch I just sent, which is supposed to be simpler for addressing this corner case. Thanks, Ming
On 2022/9/20 11:18, Ming Lei wrote: > On Tue, Sep 20, 2022 at 11:04:30AM +0800, Ziyang Zhang wrote: >> On 2022/9/20 10:39, Ming Lei wrote: >>> On Tue, Sep 20, 2022 at 09:31:54AM +0800, Ziyang Zhang wrote: >>>> On 2022/9/19 20:39, Ming Lei wrote: >>>>> On Mon, Sep 19, 2022 at 05:12:21PM +0800, Ziyang Zhang wrote: >>>>>> On 2022/9/19 11:55, Ming Lei wrote: >>>>>>> On Tue, Sep 13, 2022 at 12:17:04PM +0800, ZiyangZhang wrote: >>>>>>>> With recovery feature enabled, in ublk_queue_rq or task work >>>>>>>> (in exit_task_work or fallback wq), we requeue rqs instead of >>>>>>>> ending(aborting) them. Besides, No matter recovery feature is enabled >>>>>>>> or disabled, we schedule monitor_work immediately. >>>>>>>> >>>>>>>> Signed-off-by: ZiyangZhang <ZiyangZhang@linux.alibaba.com> >>>>>>>> --- >>>>>>>> drivers/block/ublk_drv.c | 34 ++++++++++++++++++++++++++++++++-- >>>>>>>> 1 file changed, 32 insertions(+), 2 deletions(-) >>>>>>>> >>>>>>>> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c >>>>>>>> index 23337bd7c105..b067f33a1913 100644 >>>>>>>> --- a/drivers/block/ublk_drv.c >>>>>>>> +++ b/drivers/block/ublk_drv.c >>>>>>>> @@ -682,6 +682,21 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res) >>>>>>>> >>>>>>>> #define UBLK_REQUEUE_DELAY_MS 3 >>>>>>>> >>>>>>>> +static inline void __ublk_abort_rq_in_task_work(struct ublk_queue *ubq, >>>>>>>> + struct request *rq) >>>>>>>> +{ >>>>>>>> + pr_devel("%s: %s q_id %d tag %d io_flags %x.\n", __func__, >>>>>>>> + (ublk_queue_can_use_recovery(ubq)) ? "requeue" : "abort", >>>>>>>> + ubq->q_id, rq->tag, ubq->ios[rq->tag].flags); >>>>>>>> + /* We cannot process this rq so just requeue it. */ >>>>>>>> + if (ublk_queue_can_use_recovery(ubq)) { >>>>>>>> + blk_mq_requeue_request(rq, false); >>>>>>>> + blk_mq_delay_kick_requeue_list(rq->q, UBLK_REQUEUE_DELAY_MS); >>>>>>> >>>>>>> Here you needn't to kick requeue list since we know it can't make >>>>>>> progress. And you can do that once before deleting gendisk >>>>>>> or the queue is recovered. >>>>>> >>>>>> No, kicking rq here is necessary. >>>>>> >>>>>> Consider USER_RECOVERY is enabled and everything goes well. >>>>>> User sends STOP_DEV, and we have kicked requeue list in >>>>>> ublk_stop_dev() and are going to call del_gendisk(). >>>>>> However, a crash happens now. Then rqs may be still requeued >>>>>> by ublk_queue_rq() because ublk_queue_rq() sees a dying >>>>>> ubq_daemon. So del_gendisk() will hang because there are >>>>>> rqs leaving in requeue list and no one kicks them. >>>>> >>>>> Why can't you kick requeue list before calling del_gendisk(). >>>> >>>> Yes, we can kick requeue list once before calling del_gendisk(). >>>> But a crash may happen just after kicking but before del_gendisk(). >>>> So some rqs may be requeued at this moment. But we have already >>>> kicked the requeue list! Then del_gendisk() will hang, right? >>> >>> ->force_abort is set before kicking in ublk_unquiesce_dev(), so >>> all new requests are failed immediately instead of being requeued, >>> right? >>> >> >> ->force_abort is not heplful here because there may be fallback wq running >> which can requeue rqs after kicking requeue list. > > After ublk_wait_tagset_rqs_idle() returns, there can't be any > pending requests in fallback wq or task work, can there Please consider this case: a crash happens while ublk_stop_dev() is calling. In such case I cannot schedule quiesce_work or call ublk_wait_tagset_rqs_idle(). This is because quiesce_work has to accquire ub_mutex to quiesce request queue. > > Please look at the 2nd point of the comment log, and I did ask you > to add the words for fallback wq and task work. > >> >> In ublk_unquiesce_dev(), I simply disable recovery feature >> if ub's state is UBLK_S_DEV_LIVE while stopping ublk_dev. > > monitor work will provide forward progress in case of not applying > user recovery. Yes, that's why I disable recovery feature in ublk_stop_dev if quiesce_work has not run. In this case nonitor_work can abort rqs. > >> >> Note: We can make sure fallback wq does not run if we wait until all rqs with >> ACTIVE flag set are requeued. This is done in quiesce_work(). But it cannot >> run while ublk_stop_dev() is running... > > Please take a look at the delta patch I just sent, which is supposed to be > simpler for addressing this corner case. I saw your patch, it is for rqs with ACTIVE unset, but I am concerning on rqs with ACTIVE set. Regards, Zhang.
On Tue, Sep 20, 2022 at 11:34:32AM +0800, Ziyang Zhang wrote: > On 2022/9/20 11:18, Ming Lei wrote: > > On Tue, Sep 20, 2022 at 11:04:30AM +0800, Ziyang Zhang wrote: > >> On 2022/9/20 10:39, Ming Lei wrote: > >>> On Tue, Sep 20, 2022 at 09:31:54AM +0800, Ziyang Zhang wrote: > >>>> On 2022/9/19 20:39, Ming Lei wrote: > >>>>> On Mon, Sep 19, 2022 at 05:12:21PM +0800, Ziyang Zhang wrote: > >>>>>> On 2022/9/19 11:55, Ming Lei wrote: > >>>>>>> On Tue, Sep 13, 2022 at 12:17:04PM +0800, ZiyangZhang wrote: > >>>>>>>> With recovery feature enabled, in ublk_queue_rq or task work > >>>>>>>> (in exit_task_work or fallback wq), we requeue rqs instead of > >>>>>>>> ending(aborting) them. Besides, No matter recovery feature is enabled > >>>>>>>> or disabled, we schedule monitor_work immediately. > >>>>>>>> > >>>>>>>> Signed-off-by: ZiyangZhang <ZiyangZhang@linux.alibaba.com> > >>>>>>>> --- > >>>>>>>> drivers/block/ublk_drv.c | 34 ++++++++++++++++++++++++++++++++-- > >>>>>>>> 1 file changed, 32 insertions(+), 2 deletions(-) > >>>>>>>> > >>>>>>>> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > >>>>>>>> index 23337bd7c105..b067f33a1913 100644 > >>>>>>>> --- a/drivers/block/ublk_drv.c > >>>>>>>> +++ b/drivers/block/ublk_drv.c > >>>>>>>> @@ -682,6 +682,21 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res) > >>>>>>>> > >>>>>>>> #define UBLK_REQUEUE_DELAY_MS 3 > >>>>>>>> > >>>>>>>> +static inline void __ublk_abort_rq_in_task_work(struct ublk_queue *ubq, > >>>>>>>> + struct request *rq) > >>>>>>>> +{ > >>>>>>>> + pr_devel("%s: %s q_id %d tag %d io_flags %x.\n", __func__, > >>>>>>>> + (ublk_queue_can_use_recovery(ubq)) ? "requeue" : "abort", > >>>>>>>> + ubq->q_id, rq->tag, ubq->ios[rq->tag].flags); > >>>>>>>> + /* We cannot process this rq so just requeue it. */ > >>>>>>>> + if (ublk_queue_can_use_recovery(ubq)) { > >>>>>>>> + blk_mq_requeue_request(rq, false); > >>>>>>>> + blk_mq_delay_kick_requeue_list(rq->q, UBLK_REQUEUE_DELAY_MS); > >>>>>>> > >>>>>>> Here you needn't to kick requeue list since we know it can't make > >>>>>>> progress. And you can do that once before deleting gendisk > >>>>>>> or the queue is recovered. > >>>>>> > >>>>>> No, kicking rq here is necessary. > >>>>>> > >>>>>> Consider USER_RECOVERY is enabled and everything goes well. > >>>>>> User sends STOP_DEV, and we have kicked requeue list in > >>>>>> ublk_stop_dev() and are going to call del_gendisk(). > >>>>>> However, a crash happens now. Then rqs may be still requeued > >>>>>> by ublk_queue_rq() because ublk_queue_rq() sees a dying > >>>>>> ubq_daemon. So del_gendisk() will hang because there are > >>>>>> rqs leaving in requeue list and no one kicks them. > >>>>> > >>>>> Why can't you kick requeue list before calling del_gendisk(). > >>>> > >>>> Yes, we can kick requeue list once before calling del_gendisk(). > >>>> But a crash may happen just after kicking but before del_gendisk(). > >>>> So some rqs may be requeued at this moment. But we have already > >>>> kicked the requeue list! Then del_gendisk() will hang, right? > >>> > >>> ->force_abort is set before kicking in ublk_unquiesce_dev(), so > >>> all new requests are failed immediately instead of being requeued, > >>> right? > >>> > >> > >> ->force_abort is not heplful here because there may be fallback wq running > >> which can requeue rqs after kicking requeue list. > > > > After ublk_wait_tagset_rqs_idle() returns, there can't be any > > pending requests in fallback wq or task work, can there > Please consider this case: a crash happens while ublk_stop_dev() is > calling. In such case I cannot schedule quiesce_work or call > ublk_wait_tagset_rqs_idle(). This is because quiesce_work has to > accquire ub_mutex to quiesce request queue. The issue can be addressed in the following way more reliably & cleanly & consistently, then you needn't to switch between the two modes. ublk_stop_dev() if (ublk_can_use_recovery(ub)) { if (ub->dev_info.state == UBLK_S_DEV_LIVE) __ublk_quiesce_dev(ub); //lockless version ublk_unquiesce_dev(); } Meantime not necessary to disable recovery feature in ublk_unquiesce_dev any more. thanks, Ming
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 23337bd7c105..b067f33a1913 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -682,6 +682,21 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res) #define UBLK_REQUEUE_DELAY_MS 3 +static inline void __ublk_abort_rq_in_task_work(struct ublk_queue *ubq, + struct request *rq) +{ + pr_devel("%s: %s q_id %d tag %d io_flags %x.\n", __func__, + (ublk_queue_can_use_recovery(ubq)) ? "requeue" : "abort", + ubq->q_id, rq->tag, ubq->ios[rq->tag].flags); + /* We cannot process this rq so just requeue it. */ + if (ublk_queue_can_use_recovery(ubq)) { + blk_mq_requeue_request(rq, false); + blk_mq_delay_kick_requeue_list(rq->q, UBLK_REQUEUE_DELAY_MS); + } else { + blk_mq_end_request(rq, BLK_STS_IOERR); + } +} + static inline void __ublk_rq_task_work(struct request *req) { struct ublk_queue *ubq = req->mq_hctx->driver_data; @@ -704,7 +719,7 @@ static inline void __ublk_rq_task_work(struct request *req) * (2) current->flags & PF_EXITING. */ if (unlikely(current != ubq->ubq_daemon || current->flags & PF_EXITING)) { - blk_mq_end_request(req, BLK_STS_IOERR); + __ublk_abort_rq_in_task_work(ubq, req); mod_delayed_work(system_wq, &ub->monitor_work, 0); return; } @@ -779,6 +794,21 @@ static void ublk_rq_task_work_fn(struct callback_head *work) __ublk_rq_task_work(req); } +static inline blk_status_t __ublk_abort_rq(struct ublk_queue *ubq, + struct request *rq) +{ + pr_devel("%s: %s q_id %d tag %d io_flags %x.\n", __func__, + (ublk_queue_can_use_recovery(ubq)) ? "requeue" : "abort", + ubq->q_id, rq->tag, ubq->ios[rq->tag].flags); + /* We cannot process this rq so just requeue it. */ + if (ublk_queue_can_use_recovery(ubq)) { + blk_mq_requeue_request(rq, false); + blk_mq_delay_kick_requeue_list(rq->q, UBLK_REQUEUE_DELAY_MS); + return BLK_STS_OK; + } + return BLK_STS_IOERR; +} + static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -796,7 +826,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(ubq_daemon_is_dying(ubq))) { fail: mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0); - return BLK_STS_IOERR; + return __ublk_abort_rq(ubq, rq); } if (ublk_can_use_task_work(ubq)) {
With recovery feature enabled, in ublk_queue_rq or task work (in exit_task_work or fallback wq), we requeue rqs instead of ending(aborting) them. Besides, No matter recovery feature is enabled or disabled, we schedule monitor_work immediately. Signed-off-by: ZiyangZhang <ZiyangZhang@linux.alibaba.com> --- drivers/block/ublk_drv.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-)