Message ID | ZTlt0HPbVZf0gYcI@redhat.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Mike Snitzer |
Headers | show |
Series | [v2] dm: respect REQ_NOWAIT flag in bios issued to DM | expand |
On Wed, Oct 25 2023 at 3:34P -0400, Mike Snitzer <snitzer@kernel.org> wrote: > From: Mikulas Patocka <mpatocka@redhat.com> > > Update DM core's IO submission to allocate required memory using > GFP_NOWAIT if REQ_NOWAIT is set. Lone exception is in the error path > where DM's requeue support needs to allocate a clone bio in > dm_io_rewind() to allow the IO to be resubmitted: GFP_NOWAIT is used > first but if it fails GFP_NOIO is used as a last resort. > > Tested with simple test provided in commit a9ce385344f916 ("dm: don't > attempt to queue IO under RCU protection") that was enhanced to check > error codes. Also tested using fio's pvsync2 with nowait=1. > > But testing with induced GFP_NOWAIT allocation failures wasn't > performed. > > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> > Co-developed-by: Mike Snitzer <snitzer@kernel.org> > Signed-off-by: Mike Snitzer <snitzer@kernel.org> > --- > drivers/md/dm-io-rewind.c | 13 ++++++-- > drivers/md/dm.c | 66 ++++++++++++++++++++++++++++++++------- > 2 files changed, 65 insertions(+), 14 deletions(-) Backfilling some comments on this v2: 1) I'm not completely clear if REQ_NOWAIT not ever sleeping (waiting on IO) is a hint vs a hard requirement -- so the dm_io_rewind() change to try GFP_NOWAIT and failback to GFP_NOIO might be bogus... 2) What is a good way to reliably simulate low memory allocations such that mempool backed GFP_NOWAIT allocations fail? Thanks for any review/feedback. Mike > > diff --git a/drivers/md/dm-io-rewind.c b/drivers/md/dm-io-rewind.c > index 6155b0117c9d..bde5a53e2d88 100644 > --- a/drivers/md/dm-io-rewind.c > +++ b/drivers/md/dm-io-rewind.c > @@ -143,8 +143,17 @@ static void dm_bio_rewind(struct bio *bio, unsigned int bytes) > void dm_io_rewind(struct dm_io *io, struct bio_set *bs) > { > struct bio *orig = io->orig_bio; > - struct bio *new_orig = bio_alloc_clone(orig->bi_bdev, orig, > - GFP_NOIO, bs); > + struct bio *new_orig; > + > + new_orig = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOWAIT, bs); > + if (unlikely(!new_orig)) { > + /* > + * Returning early and failing rewind isn't an option, even > + * if orig has REQ_NOWAIT set, so retry alloc with GFP_NOIO. > + */ > + new_orig = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOIO, bs); > + } > + > /* > * dm_bio_rewind can restore to previous position since the > * end sector is fixed for original bio, but we still need > diff --git a/drivers/md/dm.c b/drivers/md/dm.c > index 1113a8da3c47..b0e1425af475 100644 > --- a/drivers/md/dm.c > +++ b/drivers/md/dm.c > @@ -87,6 +87,8 @@ struct clone_info { > unsigned int sector_count; > bool is_abnormal_io:1; > bool submit_as_polled:1; > + bool is_nowait:1; > + bool nowait_failed:1; > }; > > static inline struct dm_target_io *clone_to_tio(struct bio *clone) > @@ -576,7 +578,13 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) > struct dm_target_io *tio; > struct bio *clone; > > - clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs); > + if (unlikely(bio->bi_opf & REQ_NOWAIT)) { > + clone = bio_alloc_clone(NULL, bio, GFP_NOWAIT, &md->mempools->io_bs); > + if (!clone) > + return NULL; > + } else { > + clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs); > + } > tio = clone_to_tio(clone); > tio->flags = 0; > dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO); > @@ -1500,9 +1508,17 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, > mutex_unlock(&ci->io->md->table_devices_lock); > if (bio_nr == num_bios) > return; > - > + /* > + * Failed to allocate all bios, all or nothing, so rewind partial > + * allocations and short-circuit retry with GFP_NOIO if 'is_nowait' > + */ > while ((bio = bio_list_pop(blist))) > free_tio(bio); > + > + if (ci->is_nowait) { > + ci->nowait_failed = true; > + return; > + } > } > } > > @@ -1519,7 +1535,15 @@ static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_targe > case 1: > if (len) > setup_split_accounting(ci, *len); > - clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); > + if (unlikely(ci->is_nowait)) { > + clone = alloc_tio(ci, ti, 0, len, GFP_NOWAIT); > + if (!clone) { > + ci->nowait_failed = true; > + return 0; > + } > + } else { > + clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); > + } > __map_bio(clone); > ret = 1; > break; > @@ -1539,7 +1563,7 @@ static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_targe > return ret; > } > > -static void __send_empty_flush(struct clone_info *ci) > +static blk_status_t __send_empty_flush(struct clone_info *ci) > { > struct dm_table *t = ci->map; > struct bio flush_bio; > @@ -1572,6 +1596,8 @@ static void __send_empty_flush(struct clone_info *ci) > atomic_sub(1, &ci->io->io_count); > > bio_uninit(ci->bio); > + > + return likely(!ci->nowait_failed) ? BLK_STS_OK : BLK_STS_AGAIN; > } > > static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, > @@ -1656,7 +1682,7 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci, > > __send_changing_extent_only(ci, ti, num_bios, > max_granularity, max_sectors); > - return BLK_STS_OK; > + return likely(!ci->nowait_failed) ? BLK_STS_OK : BLK_STS_AGAIN; > } > > /* > @@ -1714,7 +1740,7 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) > if (unlikely(!ti)) > return BLK_STS_IOERR; > > - if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) && > + if (unlikely(ci->is_nowait) && > unlikely(!dm_target_supports_nowait(ti->type))) > return BLK_STS_NOTSUPP; > > @@ -1729,7 +1755,15 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) > > len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); > setup_split_accounting(ci, len); > - clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO); > + if (unlikely(ci->is_nowait)) { > + clone = alloc_tio(ci, ti, 0, &len, GFP_NOWAIT); > + if (unlikely(!clone)) { > + ci->nowait_failed = true; /* unchecked, set for consistency */ > + return BLK_STS_AGAIN; > + } > + } else { > + clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO); > + } > __map_bio(clone); > > ci->sector += len; > @@ -1738,11 +1772,13 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) > return BLK_STS_OK; > } > > -static void init_clone_info(struct clone_info *ci, struct mapped_device *md, > +static void init_clone_info(struct clone_info *ci, struct dm_io *io, > struct dm_table *map, struct bio *bio, bool is_abnormal) > { > + ci->is_nowait = !!(bio->bi_opf & REQ_NOWAIT); > + ci->nowait_failed = false; > ci->map = map; > - ci->io = alloc_io(md, bio); > + ci->io = io; > ci->bio = bio; > ci->is_abnormal_io = is_abnormal; > ci->submit_as_polled = false; > @@ -1777,11 +1813,17 @@ static void dm_split_and_process_bio(struct mapped_device *md, > return; > } > > - init_clone_info(&ci, md, map, bio, is_abnormal); > - io = ci.io; > + io = alloc_io(md, bio); > + if (!io) { > + /* Unable to do anything without dm_io. */ > + bio_wouldblock_error(bio); > + return; > + } > + > + init_clone_info(&ci, io, map, bio, is_abnormal); > > if (bio->bi_opf & REQ_PREFLUSH) { > - __send_empty_flush(&ci); > + error = __send_empty_flush(&ci); > /* dm_io_complete submits any data associated with flush */ > goto out; > } > -- > 2.40.0 >
On Wed, Oct 25, 2023 at 03:34:40PM -0400, Mike Snitzer wrote: > From: Mikulas Patocka <mpatocka@redhat.com> > > Update DM core's IO submission to allocate required memory using > GFP_NOWAIT if REQ_NOWAIT is set. Lone exception is in the error path > where DM's requeue support needs to allocate a clone bio in > dm_io_rewind() to allow the IO to be resubmitted: GFP_NOWAIT is used > first but if it fails GFP_NOIO is used as a last resort. > > Tested with simple test provided in commit a9ce385344f916 ("dm: don't > attempt to queue IO under RCU protection") that was enhanced to check > error codes. Also tested using fio's pvsync2 with nowait=1. > > But testing with induced GFP_NOWAIT allocation failures wasn't > performed. > > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> > Co-developed-by: Mike Snitzer <snitzer@kernel.org> > Signed-off-by: Mike Snitzer <snitzer@kernel.org> > --- > drivers/md/dm-io-rewind.c | 13 ++++++-- > drivers/md/dm.c | 66 ++++++++++++++++++++++++++++++++------- > 2 files changed, 65 insertions(+), 14 deletions(-) > > diff --git a/drivers/md/dm-io-rewind.c b/drivers/md/dm-io-rewind.c > index 6155b0117c9d..bde5a53e2d88 100644 > --- a/drivers/md/dm-io-rewind.c > +++ b/drivers/md/dm-io-rewind.c > @@ -143,8 +143,17 @@ static void dm_bio_rewind(struct bio *bio, unsigned int bytes) > void dm_io_rewind(struct dm_io *io, struct bio_set *bs) > { > struct bio *orig = io->orig_bio; > - struct bio *new_orig = bio_alloc_clone(orig->bi_bdev, orig, > - GFP_NOIO, bs); > + struct bio *new_orig; > + > + new_orig = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOWAIT, bs); > + if (unlikely(!new_orig)) { > + /* > + * Returning early and failing rewind isn't an option, even > + * if orig has REQ_NOWAIT set, so retry alloc with GFP_NOIO. > + */ > + new_orig = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOIO, bs); > + } dm_bio_rewind() is only called in requeue work fn, so there shouldn't be issue with NOWAIT, cause upper IO submission code path won't be blocked by requeue. Thanks, Ming
diff --git a/drivers/md/dm-io-rewind.c b/drivers/md/dm-io-rewind.c index 6155b0117c9d..bde5a53e2d88 100644 --- a/drivers/md/dm-io-rewind.c +++ b/drivers/md/dm-io-rewind.c @@ -143,8 +143,17 @@ static void dm_bio_rewind(struct bio *bio, unsigned int bytes) void dm_io_rewind(struct dm_io *io, struct bio_set *bs) { struct bio *orig = io->orig_bio; - struct bio *new_orig = bio_alloc_clone(orig->bi_bdev, orig, - GFP_NOIO, bs); + struct bio *new_orig; + + new_orig = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOWAIT, bs); + if (unlikely(!new_orig)) { + /* + * Returning early and failing rewind isn't an option, even + * if orig has REQ_NOWAIT set, so retry alloc with GFP_NOIO. + */ + new_orig = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOIO, bs); + } + /* * dm_bio_rewind can restore to previous position since the * end sector is fixed for original bio, but we still need diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1113a8da3c47..b0e1425af475 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -87,6 +87,8 @@ struct clone_info { unsigned int sector_count; bool is_abnormal_io:1; bool submit_as_polled:1; + bool is_nowait:1; + bool nowait_failed:1; }; static inline struct dm_target_io *clone_to_tio(struct bio *clone) @@ -576,7 +578,13 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) struct dm_target_io *tio; struct bio *clone; - clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs); + if (unlikely(bio->bi_opf & REQ_NOWAIT)) { + clone = bio_alloc_clone(NULL, bio, GFP_NOWAIT, &md->mempools->io_bs); + if (!clone) + return NULL; + } else { + clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs); + } tio = clone_to_tio(clone); tio->flags = 0; dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO); @@ -1500,9 +1508,17 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, mutex_unlock(&ci->io->md->table_devices_lock); if (bio_nr == num_bios) return; - + /* + * Failed to allocate all bios, all or nothing, so rewind partial + * allocations and short-circuit retry with GFP_NOIO if 'is_nowait' + */ while ((bio = bio_list_pop(blist))) free_tio(bio); + + if (ci->is_nowait) { + ci->nowait_failed = true; + return; + } } } @@ -1519,7 +1535,15 @@ static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_targe case 1: if (len) setup_split_accounting(ci, *len); - clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); + if (unlikely(ci->is_nowait)) { + clone = alloc_tio(ci, ti, 0, len, GFP_NOWAIT); + if (!clone) { + ci->nowait_failed = true; + return 0; + } + } else { + clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); + } __map_bio(clone); ret = 1; break; @@ -1539,7 +1563,7 @@ static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_targe return ret; } -static void __send_empty_flush(struct clone_info *ci) +static blk_status_t __send_empty_flush(struct clone_info *ci) { struct dm_table *t = ci->map; struct bio flush_bio; @@ -1572,6 +1596,8 @@ static void __send_empty_flush(struct clone_info *ci) atomic_sub(1, &ci->io->io_count); bio_uninit(ci->bio); + + return likely(!ci->nowait_failed) ? BLK_STS_OK : BLK_STS_AGAIN; } static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, @@ -1656,7 +1682,7 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci, __send_changing_extent_only(ci, ti, num_bios, max_granularity, max_sectors); - return BLK_STS_OK; + return likely(!ci->nowait_failed) ? BLK_STS_OK : BLK_STS_AGAIN; } /* @@ -1714,7 +1740,7 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) if (unlikely(!ti)) return BLK_STS_IOERR; - if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) && + if (unlikely(ci->is_nowait) && unlikely(!dm_target_supports_nowait(ti->type))) return BLK_STS_NOTSUPP; @@ -1729,7 +1755,15 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); setup_split_accounting(ci, len); - clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO); + if (unlikely(ci->is_nowait)) { + clone = alloc_tio(ci, ti, 0, &len, GFP_NOWAIT); + if (unlikely(!clone)) { + ci->nowait_failed = true; /* unchecked, set for consistency */ + return BLK_STS_AGAIN; + } + } else { + clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO); + } __map_bio(clone); ci->sector += len; @@ -1738,11 +1772,13 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) return BLK_STS_OK; } -static void init_clone_info(struct clone_info *ci, struct mapped_device *md, +static void init_clone_info(struct clone_info *ci, struct dm_io *io, struct dm_table *map, struct bio *bio, bool is_abnormal) { + ci->is_nowait = !!(bio->bi_opf & REQ_NOWAIT); + ci->nowait_failed = false; ci->map = map; - ci->io = alloc_io(md, bio); + ci->io = io; ci->bio = bio; ci->is_abnormal_io = is_abnormal; ci->submit_as_polled = false; @@ -1777,11 +1813,17 @@ static void dm_split_and_process_bio(struct mapped_device *md, return; } - init_clone_info(&ci, md, map, bio, is_abnormal); - io = ci.io; + io = alloc_io(md, bio); + if (!io) { + /* Unable to do anything without dm_io. */ + bio_wouldblock_error(bio); + return; + } + + init_clone_info(&ci, io, map, bio, is_abnormal); if (bio->bi_opf & REQ_PREFLUSH) { - __send_empty_flush(&ci); + error = __send_empty_flush(&ci); /* dm_io_complete submits any data associated with flush */ goto out; }