Message ID | 20210415231530.95464-4-snitzer@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | nvme: improve error handling and ana_state to work well with dm-multipath | expand |
On 4/16/21 1:15 AM, Mike Snitzer wrote: > If REQ_FAILFAST_TRANSPORT is set it means the driver should not retry > IO that completed with transport errors. REQ_FAILFAST_TRANSPORT is > set by multipathing software (e.g. dm-multipath) before it issues IO. > > Update NVMe to allow failover of requests marked with either > REQ_NVME_MPATH or REQ_FAILFAST_TRANSPORT. This allows such requests > to be given a disposition of either FAILOVER or FAILUP respectively. > FAILUP handling ensures a retryable error is returned up from NVMe. > > Introduce nvme_failup_req() for use in nvme_complete_rq() if > nvme_decide_disposition() returns FAILUP. nvme_failup_req() ensures > the request is completed with a retryable IO error when appropriate. > __nvme_end_req() was factored out for use by both nvme_end_req() and > nvme_failup_req(). > > Signed-off-by: Mike Snitzer <snitzer@redhat.com> > --- > drivers/nvme/host/core.c | 31 ++++++++++++++++++++++++++----- > 1 file changed, 26 insertions(+), 5 deletions(-) > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c > index 4134cf3c7e48..10375197dd53 100644 > --- a/drivers/nvme/host/core.c > +++ b/drivers/nvme/host/core.c > @@ -299,6 +299,7 @@ enum nvme_disposition { > COMPLETE, > RETRY, > FAILOVER, > + FAILUP, > }; > > static inline enum nvme_disposition nvme_decide_disposition(struct request *req) > @@ -318,10 +319,11 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) > nvme_req(req)->retries >= nvme_max_retries) > return COMPLETE; > > - if (req->cmd_flags & REQ_NVME_MPATH) { > + if (req->cmd_flags & (REQ_NVME_MPATH | REQ_FAILFAST_TRANSPORT)) { > if (nvme_is_path_error(nvme_req(req)->status) || > blk_queue_dying(req->q)) > - return FAILOVER; > + return (req->cmd_flags & REQ_NVME_MPATH) ? > + FAILOVER : FAILUP; > } else { > if (blk_queue_dying(req->q)) > return COMPLETE; > @@ -330,10 +332,8 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) > return RETRY; > } > > -static inline void nvme_end_req(struct request *req) > +static inline void __nvme_end_req(struct request *req, blk_status_t status) > { > - blk_status_t status = nvme_error_status(nvme_req(req)->status); > - > if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && > req_op(req) == REQ_OP_ZONE_APPEND) > req->__sector = nvme_lba_to_sect(req->q->queuedata, > @@ -343,6 +343,24 @@ static inline void nvme_end_req(struct request *req) > blk_mq_end_request(req, status); > } > > +static inline void nvme_end_req(struct request *req) > +{ > + __nvme_end_req(req, nvme_error_status(nvme_req(req)->status)); > +} > + > +static void nvme_failup_req(struct request *req) > +{ > + blk_status_t status = nvme_error_status(nvme_req(req)->status); > + > + if (WARN_ON_ONCE(!blk_path_error(status))) { > + pr_debug("Request meant for failover but blk_status_t (errno=%d) was not retryable.\n", > + blk_status_to_errno(status)); > + status = BLK_STS_IOERR; > + } > + > + __nvme_end_req(req, status); > +} > + > void nvme_complete_rq(struct request *req) > { > trace_nvme_complete_rq(req); > @@ -361,6 +379,9 @@ void nvme_complete_rq(struct request *req) > case FAILOVER: > nvme_failover_req(req); > return; > + case FAILUP: > + nvme_failup_req(req); > + return; > } > } > EXPORT_SYMBOL_GPL(nvme_complete_rq); > Hmm. Quite convoluted, methinks. Shouldn't this achieve the same thing? diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e89ec2522ca6..8c36a2196b66 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -303,8 +303,10 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) if (likely(nvme_req(req)->status == 0)) return COMPLETE; - if (blk_noretry_request(req) || - (nvme_req(req)->status & NVME_SC_DNR) || + if (blk_noretry_request(req)) + nvme_req(req)->status |= NVME_SC_DNR; + + if ((nvme_req(req)->status & NVME_SC_DNR) || nvme_req(req)->retries >= nvme_max_retries) return COMPLETE; Cheers, Hannes
On Fri, Apr 16 2021 at 10:07am -0400, Hannes Reinecke <hare@suse.de> wrote: > On 4/16/21 1:15 AM, Mike Snitzer wrote: > > If REQ_FAILFAST_TRANSPORT is set it means the driver should not retry > > IO that completed with transport errors. REQ_FAILFAST_TRANSPORT is > > set by multipathing software (e.g. dm-multipath) before it issues IO. > > > > Update NVMe to allow failover of requests marked with either > > REQ_NVME_MPATH or REQ_FAILFAST_TRANSPORT. This allows such requests > > to be given a disposition of either FAILOVER or FAILUP respectively. > > FAILUP handling ensures a retryable error is returned up from NVMe. > > > > Introduce nvme_failup_req() for use in nvme_complete_rq() if > > nvme_decide_disposition() returns FAILUP. nvme_failup_req() ensures > > the request is completed with a retryable IO error when appropriate. > > __nvme_end_req() was factored out for use by both nvme_end_req() and > > nvme_failup_req(). > > > > Signed-off-by: Mike Snitzer <snitzer@redhat.com> > > --- > > drivers/nvme/host/core.c | 31 ++++++++++++++++++++++++++----- > > 1 file changed, 26 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c > > index 4134cf3c7e48..10375197dd53 100644 > > --- a/drivers/nvme/host/core.c > > +++ b/drivers/nvme/host/core.c > > @@ -299,6 +299,7 @@ enum nvme_disposition { > > COMPLETE, > > RETRY, > > FAILOVER, > > + FAILUP, > > }; > > > > static inline enum nvme_disposition nvme_decide_disposition(struct request *req) > > @@ -318,10 +319,11 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) > > nvme_req(req)->retries >= nvme_max_retries) > > return COMPLETE; > > > > - if (req->cmd_flags & REQ_NVME_MPATH) { > > + if (req->cmd_flags & (REQ_NVME_MPATH | REQ_FAILFAST_TRANSPORT)) { > > if (nvme_is_path_error(nvme_req(req)->status) || > > blk_queue_dying(req->q)) > > - return FAILOVER; > > + return (req->cmd_flags & REQ_NVME_MPATH) ? > > + FAILOVER : FAILUP; > > } else { > > if (blk_queue_dying(req->q)) > > return COMPLETE; > > @@ -330,10 +332,8 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) > > return RETRY; > > } > > > > -static inline void nvme_end_req(struct request *req) > > +static inline void __nvme_end_req(struct request *req, blk_status_t status) > > { > > - blk_status_t status = nvme_error_status(nvme_req(req)->status); > > - > > if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && > > req_op(req) == REQ_OP_ZONE_APPEND) > > req->__sector = nvme_lba_to_sect(req->q->queuedata, > > @@ -343,6 +343,24 @@ static inline void nvme_end_req(struct request *req) > > blk_mq_end_request(req, status); > > } > > > > +static inline void nvme_end_req(struct request *req) > > +{ > > + __nvme_end_req(req, nvme_error_status(nvme_req(req)->status)); > > +} > > + > > +static void nvme_failup_req(struct request *req) > > +{ > > + blk_status_t status = nvme_error_status(nvme_req(req)->status); > > + > > + if (WARN_ON_ONCE(!blk_path_error(status))) { > > + pr_debug("Request meant for failover but blk_status_t (errno=%d) was not retryable.\n", > > + blk_status_to_errno(status)); > > + status = BLK_STS_IOERR; > > + } > > + > > + __nvme_end_req(req, status); > > +} > > + > > void nvme_complete_rq(struct request *req) > > { > > trace_nvme_complete_rq(req); > > @@ -361,6 +379,9 @@ void nvme_complete_rq(struct request *req) > > case FAILOVER: > > nvme_failover_req(req); > > return; > > + case FAILUP: > > + nvme_failup_req(req); > > + return; > > } > > } > > EXPORT_SYMBOL_GPL(nvme_complete_rq); > > > > Hmm. Quite convoluted, methinks. Maybe you didn't read the header or patch? I'm cool with critical review when it is clear the reviewer fully understands the patch but... ;) > Shouldn't this achieve the same thing? > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c > index e89ec2522ca6..8c36a2196b66 100644 > --- a/drivers/nvme/host/core.c > +++ b/drivers/nvme/host/core.c > @@ -303,8 +303,10 @@ static inline enum nvme_disposition > nvme_decide_disposition(struct request *req) > if (likely(nvme_req(req)->status == 0)) > return COMPLETE; > > - if (blk_noretry_request(req) || > - (nvme_req(req)->status & NVME_SC_DNR) || > + if (blk_noretry_request(req)) > + nvme_req(req)->status |= NVME_SC_DNR; > + > + if ((nvme_req(req)->status & NVME_SC_DNR) || > nvme_req(req)->retries >= nvme_max_retries) > return COMPLETE; Definitely won't achieve the same. And especially not with patch 1/4 ("nvme: return BLK_STS_DO_NOT_RETRY if the DNR bit is set") that you gave your Reviewed-by to earlier. Instead of "FAILUP", I thought about using "FAILUP_AND_OVER" to convey that this is a variant of failover. Meaning it takes the same patch as nvme "FAILOVER" until the very end; where it does REQ_FAILFAST_TRANSPORT specific work detailed in nvme_failup_req(). And then patch 4/4 makes further use of nvme_failup_req() by adding a call to the factored out nvme_update_ana(). Mike
On 4/16/21 5:03 PM, Mike Snitzer wrote: > On Fri, Apr 16 2021 at 10:07am -0400, > Hannes Reinecke <hare@suse.de> wrote: > >> On 4/16/21 1:15 AM, Mike Snitzer wrote: >>> If REQ_FAILFAST_TRANSPORT is set it means the driver should not retry >>> IO that completed with transport errors. REQ_FAILFAST_TRANSPORT is >>> set by multipathing software (e.g. dm-multipath) before it issues IO. >>> >>> Update NVMe to allow failover of requests marked with either >>> REQ_NVME_MPATH or REQ_FAILFAST_TRANSPORT. This allows such requests >>> to be given a disposition of either FAILOVER or FAILUP respectively. >>> FAILUP handling ensures a retryable error is returned up from NVMe. >>> >>> Introduce nvme_failup_req() for use in nvme_complete_rq() if >>> nvme_decide_disposition() returns FAILUP. nvme_failup_req() ensures >>> the request is completed with a retryable IO error when appropriate. >>> __nvme_end_req() was factored out for use by both nvme_end_req() and >>> nvme_failup_req(). >>> >>> Signed-off-by: Mike Snitzer <snitzer@redhat.com> >>> --- >>> drivers/nvme/host/core.c | 31 ++++++++++++++++++++++++++----- >>> 1 file changed, 26 insertions(+), 5 deletions(-) >>> >>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c >>> index 4134cf3c7e48..10375197dd53 100644 >>> --- a/drivers/nvme/host/core.c >>> +++ b/drivers/nvme/host/core.c >>> @@ -299,6 +299,7 @@ enum nvme_disposition { >>> COMPLETE, >>> RETRY, >>> FAILOVER, >>> + FAILUP, >>> }; >>> >>> static inline enum nvme_disposition nvme_decide_disposition(struct request *req) >>> @@ -318,10 +319,11 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) >>> nvme_req(req)->retries >= nvme_max_retries) >>> return COMPLETE; >>> >>> - if (req->cmd_flags & REQ_NVME_MPATH) { >>> + if (req->cmd_flags & (REQ_NVME_MPATH | REQ_FAILFAST_TRANSPORT)) { >>> if (nvme_is_path_error(nvme_req(req)->status) || >>> blk_queue_dying(req->q)) >>> - return FAILOVER; >>> + return (req->cmd_flags & REQ_NVME_MPATH) ? >>> + FAILOVER : FAILUP; >>> } else { >>> if (blk_queue_dying(req->q)) >>> return COMPLETE; >>> @@ -330,10 +332,8 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) >>> return RETRY; >>> } >>> >>> -static inline void nvme_end_req(struct request *req) >>> +static inline void __nvme_end_req(struct request *req, blk_status_t status) >>> { >>> - blk_status_t status = nvme_error_status(nvme_req(req)->status); >>> - >>> if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && >>> req_op(req) == REQ_OP_ZONE_APPEND) >>> req->__sector = nvme_lba_to_sect(req->q->queuedata, >>> @@ -343,6 +343,24 @@ static inline void nvme_end_req(struct request *req) >>> blk_mq_end_request(req, status); >>> } >>> >>> +static inline void nvme_end_req(struct request *req) >>> +{ >>> + __nvme_end_req(req, nvme_error_status(nvme_req(req)->status)); >>> +} >>> + >>> +static void nvme_failup_req(struct request *req) >>> +{ >>> + blk_status_t status = nvme_error_status(nvme_req(req)->status); >>> + >>> + if (WARN_ON_ONCE(!blk_path_error(status))) { >>> + pr_debug("Request meant for failover but blk_status_t (errno=%d) was not retryable.\n", >>> + blk_status_to_errno(status)); >>> + status = BLK_STS_IOERR; >>> + } >>> + >>> + __nvme_end_req(req, status); >>> +} >>> + >>> void nvme_complete_rq(struct request *req) >>> { >>> trace_nvme_complete_rq(req); >>> @@ -361,6 +379,9 @@ void nvme_complete_rq(struct request *req) >>> case FAILOVER: >>> nvme_failover_req(req); >>> return; >>> + case FAILUP: >>> + nvme_failup_req(req); >>> + return; >>> } >>> } >>> EXPORT_SYMBOL_GPL(nvme_complete_rq); >>> >> >> Hmm. Quite convoluted, methinks. > > Maybe you didn't read the header or patch? > > I'm cool with critical review when it is clear the reviewer fully > understands the patch but... ;) > >> Shouldn't this achieve the same thing? >> >> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c >> index e89ec2522ca6..8c36a2196b66 100644 >> --- a/drivers/nvme/host/core.c >> +++ b/drivers/nvme/host/core.c >> @@ -303,8 +303,10 @@ static inline enum nvme_disposition >> nvme_decide_disposition(struct request *req) >> if (likely(nvme_req(req)->status == 0)) >> return COMPLETE; >> >> - if (blk_noretry_request(req) || >> - (nvme_req(req)->status & NVME_SC_DNR) || >> + if (blk_noretry_request(req)) >> + nvme_req(req)->status |= NVME_SC_DNR; >> + >> + if ((nvme_req(req)->status & NVME_SC_DNR) || >> nvme_req(req)->retries >= nvme_max_retries) >> return COMPLETE; > > Definitely won't achieve the same. And especially not with patch 1/4 > ("nvme: return BLK_STS_DO_NOT_RETRY if the DNR bit is set") that you > gave your Reviewed-by to earlier. > Ah. Right. Sorry. > Instead of "FAILUP", I thought about using "FAILUP_AND_OVER" to convey > that this is a variant of failover. Meaning it takes the same patch as > nvme "FAILOVER" until the very end; where it does REQ_FAILFAST_TRANSPORT > specific work detailed in nvme_failup_req(). > All very intricate; will need to check the patches in their combined version. Not deliberately stalling, mind you, just wanting to figure out what the net result will be. Cheers, Hannes
On Fri, 2021-04-16 at 16:07 +0200, Hannes Reinecke wrote: > > Hmm. Quite convoluted, methinks. > Shouldn't this achieve the same thing? > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c > index e89ec2522ca6..8c36a2196b66 100644 > --- a/drivers/nvme/host/core.c > +++ b/drivers/nvme/host/core.c > @@ -303,8 +303,10 @@ static inline enum nvme_disposition > nvme_decide_disposition(struct request *req) > if (likely(nvme_req(req)->status == 0)) > return COMPLETE; > > - if (blk_noretry_request(req) || > - (nvme_req(req)->status & NVME_SC_DNR) || > + if (blk_noretry_request(req)) > + nvme_req(req)->status |= NVME_SC_DNR; > + > + if ((nvme_req(req)->status & NVME_SC_DNR) || > nvme_req(req)->retries >= nvme_max_retries) > return COMPLETE; > I am not in favor of altering ->status to set DNR jus to simplify the following conditional. -Ewan
On Thu, 2021-04-15 at 19:15 -0400, Mike Snitzer wrote: > If REQ_FAILFAST_TRANSPORT is set it means the driver should not retry > IO that completed with transport errors. REQ_FAILFAST_TRANSPORT is > set by multipathing software (e.g. dm-multipath) before it issues IO. > > Update NVMe to allow failover of requests marked with either > REQ_NVME_MPATH or REQ_FAILFAST_TRANSPORT. This allows such requests > to be given a disposition of either FAILOVER or FAILUP respectively. > FAILUP handling ensures a retryable error is returned up from NVMe. > > Introduce nvme_failup_req() for use in nvme_complete_rq() if > nvme_decide_disposition() returns FAILUP. nvme_failup_req() ensures > the request is completed with a retryable IO error when appropriate. > __nvme_end_req() was factored out for use by both nvme_end_req() and > nvme_failup_req(). > > Signed-off-by: Mike Snitzer <snitzer@redhat.com> > --- > drivers/nvme/host/core.c | 31 ++++++++++++++++++++++++++----- > 1 file changed, 26 insertions(+), 5 deletions(-) > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c > index 4134cf3c7e48..10375197dd53 100644 > --- a/drivers/nvme/host/core.c > +++ b/drivers/nvme/host/core.c > @@ -299,6 +299,7 @@ enum nvme_disposition { > COMPLETE, > RETRY, > FAILOVER, > + FAILUP, > }; > > static inline enum nvme_disposition nvme_decide_disposition(struct > request *req) > @@ -318,10 +319,11 @@ static inline enum nvme_disposition > nvme_decide_disposition(struct request *req) > nvme_req(req)->retries >= nvme_max_retries) > return COMPLETE; > > - if (req->cmd_flags & REQ_NVME_MPATH) { > + if (req->cmd_flags & (REQ_NVME_MPATH | REQ_FAILFAST_TRANSPORT)) > { > if (nvme_is_path_error(nvme_req(req)->status) || > blk_queue_dying(req->q)) > - return FAILOVER; > + return (req->cmd_flags & REQ_NVME_MPATH) ? > + FAILOVER : FAILUP; > } else { > if (blk_queue_dying(req->q)) > return COMPLETE; > @@ -330,10 +332,8 @@ static inline enum nvme_disposition > nvme_decide_disposition(struct request *req) > return RETRY; > } > > -static inline void nvme_end_req(struct request *req) > +static inline void __nvme_end_req(struct request *req, blk_status_t > status) > { > - blk_status_t status = nvme_error_status(nvme_req(req)->status); > - > if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && > req_op(req) == REQ_OP_ZONE_APPEND) > req->__sector = nvme_lba_to_sect(req->q->queuedata, > @@ -343,6 +343,24 @@ static inline void nvme_end_req(struct request > *req) > blk_mq_end_request(req, status); > } > > +static inline void nvme_end_req(struct request *req) > +{ > + __nvme_end_req(req, nvme_error_status(nvme_req(req)->status)); > +} > + > +static void nvme_failup_req(struct request *req) > +{ > + blk_status_t status = nvme_error_status(nvme_req(req)->status); > + > + if (WARN_ON_ONCE(!blk_path_error(status))) { > + pr_debug("Request meant for failover but blk_status_t > (errno=%d) was not retryable.\n", > + blk_status_to_errno(status)); > + status = BLK_STS_IOERR; > + } > + > + __nvme_end_req(req, status); It seems like adding __nvme_end_req() was unnecessary, since nvme_end_req() just calls it and does nothing else? -Ewan > +} > + > void nvme_complete_rq(struct request *req) > { > trace_nvme_complete_rq(req); > @@ -361,6 +379,9 @@ void nvme_complete_rq(struct request *req) > case FAILOVER: > nvme_failover_req(req); > return; > + case FAILUP: > + nvme_failup_req(req); > + return; > } > } > EXPORT_SYMBOL_GPL(nvme_complete_rq);
On Fri, Apr 16 2021 at 4:52pm -0400, Ewan D. Milne <emilne@redhat.com> wrote: > On Thu, 2021-04-15 at 19:15 -0400, Mike Snitzer wrote: > > If REQ_FAILFAST_TRANSPORT is set it means the driver should not retry > > IO that completed with transport errors. REQ_FAILFAST_TRANSPORT is > > set by multipathing software (e.g. dm-multipath) before it issues IO. > > > > Update NVMe to allow failover of requests marked with either > > REQ_NVME_MPATH or REQ_FAILFAST_TRANSPORT. This allows such requests > > to be given a disposition of either FAILOVER or FAILUP respectively. > > FAILUP handling ensures a retryable error is returned up from NVMe. > > > > Introduce nvme_failup_req() for use in nvme_complete_rq() if > > nvme_decide_disposition() returns FAILUP. nvme_failup_req() ensures > > the request is completed with a retryable IO error when appropriate. > > __nvme_end_req() was factored out for use by both nvme_end_req() and > > nvme_failup_req(). > > > > Signed-off-by: Mike Snitzer <snitzer@redhat.com> > > --- > > drivers/nvme/host/core.c | 31 ++++++++++++++++++++++++++----- > > 1 file changed, 26 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c > > index 4134cf3c7e48..10375197dd53 100644 > > --- a/drivers/nvme/host/core.c > > +++ b/drivers/nvme/host/core.c > > @@ -299,6 +299,7 @@ enum nvme_disposition { > > COMPLETE, > > RETRY, > > FAILOVER, > > + FAILUP, > > }; > > > > static inline enum nvme_disposition nvme_decide_disposition(struct > > request *req) > > @@ -318,10 +319,11 @@ static inline enum nvme_disposition > > nvme_decide_disposition(struct request *req) > > nvme_req(req)->retries >= nvme_max_retries) > > return COMPLETE; > > > > - if (req->cmd_flags & REQ_NVME_MPATH) { > > + if (req->cmd_flags & (REQ_NVME_MPATH | REQ_FAILFAST_TRANSPORT)) > > { > > if (nvme_is_path_error(nvme_req(req)->status) || > > blk_queue_dying(req->q)) > > - return FAILOVER; > > + return (req->cmd_flags & REQ_NVME_MPATH) ? > > + FAILOVER : FAILUP; > > } else { > > if (blk_queue_dying(req->q)) > > return COMPLETE; > > @@ -330,10 +332,8 @@ static inline enum nvme_disposition > > nvme_decide_disposition(struct request *req) > > return RETRY; > > } > > > > -static inline void nvme_end_req(struct request *req) > > +static inline void __nvme_end_req(struct request *req, blk_status_t > > status) > > { > > - blk_status_t status = nvme_error_status(nvme_req(req)->status); > > - > > if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && > > req_op(req) == REQ_OP_ZONE_APPEND) > > req->__sector = nvme_lba_to_sect(req->q->queuedata, > > @@ -343,6 +343,24 @@ static inline void nvme_end_req(struct request > > *req) > > blk_mq_end_request(req, status); > > } > > > > +static inline void nvme_end_req(struct request *req) > > +{ > > + __nvme_end_req(req, nvme_error_status(nvme_req(req)->status)); > > +} > > + > > +static void nvme_failup_req(struct request *req) > > +{ > > + blk_status_t status = nvme_error_status(nvme_req(req)->status); > > + > > + if (WARN_ON_ONCE(!blk_path_error(status))) { > > + pr_debug("Request meant for failover but blk_status_t > > (errno=%d) was not retryable.\n", > > + blk_status_to_errno(status)); > > + status = BLK_STS_IOERR; > > + } > > + > > + __nvme_end_req(req, status); > > It seems like adding __nvme_end_req() was unnecessary, since > nvme_end_req() just calls it and does nothing else? The __nvme_end_req helper allowed the status to be passed in, making it easy to override status for unlikley edge-case where the BLK_STS is !blk_path_error() but should be given nvme_is_path_error() returned true. Anyway, I can avoid the need to factor out __nvme_end_req() and just: nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR; nvme_end_req(req); Thanks, Mike
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4134cf3c7e48..10375197dd53 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -299,6 +299,7 @@ enum nvme_disposition { COMPLETE, RETRY, FAILOVER, + FAILUP, }; static inline enum nvme_disposition nvme_decide_disposition(struct request *req) @@ -318,10 +319,11 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) nvme_req(req)->retries >= nvme_max_retries) return COMPLETE; - if (req->cmd_flags & REQ_NVME_MPATH) { + if (req->cmd_flags & (REQ_NVME_MPATH | REQ_FAILFAST_TRANSPORT)) { if (nvme_is_path_error(nvme_req(req)->status) || blk_queue_dying(req->q)) - return FAILOVER; + return (req->cmd_flags & REQ_NVME_MPATH) ? + FAILOVER : FAILUP; } else { if (blk_queue_dying(req->q)) return COMPLETE; @@ -330,10 +332,8 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) return RETRY; } -static inline void nvme_end_req(struct request *req) +static inline void __nvme_end_req(struct request *req, blk_status_t status) { - blk_status_t status = nvme_error_status(nvme_req(req)->status); - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && req_op(req) == REQ_OP_ZONE_APPEND) req->__sector = nvme_lba_to_sect(req->q->queuedata, @@ -343,6 +343,24 @@ static inline void nvme_end_req(struct request *req) blk_mq_end_request(req, status); } +static inline void nvme_end_req(struct request *req) +{ + __nvme_end_req(req, nvme_error_status(nvme_req(req)->status)); +} + +static void nvme_failup_req(struct request *req) +{ + blk_status_t status = nvme_error_status(nvme_req(req)->status); + + if (WARN_ON_ONCE(!blk_path_error(status))) { + pr_debug("Request meant for failover but blk_status_t (errno=%d) was not retryable.\n", + blk_status_to_errno(status)); + status = BLK_STS_IOERR; + } + + __nvme_end_req(req, status); +} + void nvme_complete_rq(struct request *req) { trace_nvme_complete_rq(req); @@ -361,6 +379,9 @@ void nvme_complete_rq(struct request *req) case FAILOVER: nvme_failover_req(req); return; + case FAILUP: + nvme_failup_req(req); + return; } } EXPORT_SYMBOL_GPL(nvme_complete_rq);
If REQ_FAILFAST_TRANSPORT is set it means the driver should not retry IO that completed with transport errors. REQ_FAILFAST_TRANSPORT is set by multipathing software (e.g. dm-multipath) before it issues IO. Update NVMe to allow failover of requests marked with either REQ_NVME_MPATH or REQ_FAILFAST_TRANSPORT. This allows such requests to be given a disposition of either FAILOVER or FAILUP respectively. FAILUP handling ensures a retryable error is returned up from NVMe. Introduce nvme_failup_req() for use in nvme_complete_rq() if nvme_decide_disposition() returns FAILUP. nvme_failup_req() ensures the request is completed with a retryable IO error when appropriate. __nvme_end_req() was factored out for use by both nvme_end_req() and nvme_failup_req(). Signed-off-by: Mike Snitzer <snitzer@redhat.com> --- drivers/nvme/host/core.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-)