Message ID | 1460832928-15514-2-git-send-email-hch@lst.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sun, Apr 17, 2016 at 2:55 AM, Christoph Hellwig <hch@lst.de> wrote: > It can be replaced with a combination of bio_chain and submit_bio_wait. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > Signed-off-by: Ming Lin <ming.l@ssi.samsung.com> > Signed-off-by: Sagi Grimberg <sagig@grimberg.me> > --- > block/blk-lib.c | 118 +++++++++++++------------------------------------------- > 1 file changed, 27 insertions(+), 91 deletions(-) > > diff --git a/block/blk-lib.c b/block/blk-lib.c > index 9ebf653..700d248 100644 > --- a/block/blk-lib.c > +++ b/block/blk-lib.c > @@ -9,21 +9,17 @@ > > #include "blk.h" > > -struct bio_batch { > - atomic_t done; > - int error; > - struct completion *wait; > -}; > - > -static void bio_batch_end_io(struct bio *bio) > +static struct bio *next_bio(struct bio *bio, int rw, unsigned int nr_pages, > + gfp_t gfp) > { > - struct bio_batch *bb = bio->bi_private; > + struct bio *new = bio_alloc(gfp, nr_pages); > + > + if (bio) { > + bio_chain(bio, new); > + submit_bio(rw, bio); > + } > > - if (bio->bi_error && bio->bi_error != -EOPNOTSUPP) > - bb->error = bio->bi_error; > - if (atomic_dec_and_test(&bb->done)) > - complete(bb->wait); > - bio_put(bio); > + return new; > } > > /** > @@ -40,13 +36,11 @@ static void bio_batch_end_io(struct bio *bio) > int blkdev_issue_discard(struct block_device *bdev, sector_t sector, > sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) > { > - DECLARE_COMPLETION_ONSTACK(wait); > struct request_queue *q = bdev_get_queue(bdev); > int type = REQ_WRITE | REQ_DISCARD; > unsigned int granularity; > int alignment; > - struct bio_batch bb; > - struct bio *bio; > + struct bio *bio = NULL; > int ret = 0; > struct blk_plug plug; > > @@ -66,25 +60,15 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, > type |= REQ_SECURE; > } > > - atomic_set(&bb.done, 1); > - bb.error = 0; > - bb.wait = &wait; > - > blk_start_plug(&plug); > while (nr_sects) { > unsigned int req_sects; > sector_t end_sect, tmp; > > - bio = bio_alloc(gfp_mask, 1); > - if (!bio) { > - ret = -ENOMEM; > - break; > - } > - > /* Make sure bi_size doesn't overflow */ > req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9); > > - /* > + /** > * If splitting a request, and the next starting sector would be > * misaligned, stop the discard at the previous aligned sector. > */ > @@ -98,18 +82,14 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, > req_sects = end_sect - sector; > } > > + bio = next_bio(bio, type, 1, gfp_mask); > bio->bi_iter.bi_sector = sector; > - bio->bi_end_io = bio_batch_end_io; > bio->bi_bdev = bdev; > - bio->bi_private = &bb; > > bio->bi_iter.bi_size = req_sects << 9; > nr_sects -= req_sects; > sector = end_sect; > > - atomic_inc(&bb.done); > - submit_bio(type, bio); > - > /* > * We can loop for a long time in here, if someone does > * full device discards (like mkfs). Be nice and allow > @@ -118,15 +98,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, > */ > cond_resched(); > } > + if (bio) > + ret = submit_bio_wait(type, bio); > blk_finish_plug(&plug); The patch itself is correct, and the idea is good. But it can be simpler or more readable by always chaining bios into the same parent bio, which is submitted as the last one. Thanks, > > - /* Wait for bios in-flight */ > - if (!atomic_dec_and_test(&bb.done)) > - wait_for_completion_io(&wait); > - > - if (bb.error) > - return bb.error; > - return ret; > + return ret != -EOPNOTSUPP ? ret : 0; > } > EXPORT_SYMBOL(blkdev_issue_discard); > > @@ -145,11 +121,9 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, > sector_t nr_sects, gfp_t gfp_mask, > struct page *page) > { > - DECLARE_COMPLETION_ONSTACK(wait); > struct request_queue *q = bdev_get_queue(bdev); > unsigned int max_write_same_sectors; > - struct bio_batch bb; > - struct bio *bio; > + struct bio *bio = NULL; > int ret = 0; > > if (!q) > @@ -158,21 +132,10 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, > /* Ensure that max_write_same_sectors doesn't overflow bi_size */ > max_write_same_sectors = UINT_MAX >> 9; > > - atomic_set(&bb.done, 1); > - bb.error = 0; > - bb.wait = &wait; > - > while (nr_sects) { > - bio = bio_alloc(gfp_mask, 1); > - if (!bio) { > - ret = -ENOMEM; > - break; > - } > - > + bio = next_bio(bio, REQ_WRITE | REQ_WRITE_SAME, 1, gfp_mask); > bio->bi_iter.bi_sector = sector; > - bio->bi_end_io = bio_batch_end_io; > bio->bi_bdev = bdev; > - bio->bi_private = &bb; > bio->bi_vcnt = 1; > bio->bi_io_vec->bv_page = page; > bio->bi_io_vec->bv_offset = 0; > @@ -186,18 +149,11 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, > bio->bi_iter.bi_size = nr_sects << 9; > nr_sects = 0; > } > - > - atomic_inc(&bb.done); > - submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio); > } > > - /* Wait for bios in-flight */ > - if (!atomic_dec_and_test(&bb.done)) > - wait_for_completion_io(&wait); > - > - if (bb.error) > - return bb.error; > - return ret; > + if (bio) > + ret = submit_bio_wait(REQ_WRITE | REQ_WRITE_SAME, bio); > + return ret != -EOPNOTSUPP ? ret : 0; > } > EXPORT_SYMBOL(blkdev_issue_write_same); > > @@ -216,28 +172,15 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, > sector_t nr_sects, gfp_t gfp_mask) > { > int ret; > - struct bio *bio; > - struct bio_batch bb; > + struct bio *bio = NULL; > unsigned int sz; > - DECLARE_COMPLETION_ONSTACK(wait); > > - atomic_set(&bb.done, 1); > - bb.error = 0; > - bb.wait = &wait; > - > - ret = 0; > while (nr_sects != 0) { > - bio = bio_alloc(gfp_mask, > - min(nr_sects, (sector_t)BIO_MAX_PAGES)); > - if (!bio) { > - ret = -ENOMEM; > - break; > - } > - > + bio = next_bio(bio, WRITE, > + min(nr_sects, (sector_t)BIO_MAX_PAGES), > + gfp_mask); > bio->bi_iter.bi_sector = sector; > bio->bi_bdev = bdev; > - bio->bi_end_io = bio_batch_end_io; > - bio->bi_private = &bb; > > while (nr_sects != 0) { > sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); > @@ -247,18 +190,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, > if (ret < (sz << 9)) > break; > } > - ret = 0; > - atomic_inc(&bb.done); > - submit_bio(WRITE, bio); > } > > - /* Wait for bios in-flight */ > - if (!atomic_dec_and_test(&bb.done)) > - wait_for_completion_io(&wait); > - > - if (bb.error) > - return bb.error; > - return ret; > + if (bio) > + return submit_bio_wait(WRITE, bio); > + return 0; > } > > /** > -- > 2.1.4 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-block" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Apr 17, 2016 at 10:25:09PM +0800, Ming Lei wrote: > The patch itself is correct, and the idea is good. > > But it can be simpler or more readable by always chaining bios into > the same parent bio, which is submitted as the last one. Which means we now messed up our I/O order to not be sequential for no good reason. -- To unsubscribe from this list: send the line "unsubscribe linux-block" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Apr 18, 2016 at 2:07 AM, Christoph Hellwig <hch@lst.de> wrote: > On Sun, Apr 17, 2016 at 10:25:09PM +0800, Ming Lei wrote: >> The patch itself is correct, and the idea is good. >> >> But it can be simpler or more readable by always chaining bios into >> the same parent bio, which is submitted as the last one. > > Which means we now messed up our I/O order to not be sequential > for no good reason. From I/O order view, I don't see any difference among the two approachs, and all are like the following, and both are similar with the way in linus tree too. - submit(sector: s[0], size:sz[0]) - submit(sector: s[1], size:sz[1]) ... - submit(sector:s[n-1], size:sz[n-1]) - submit_wait(sector:s[n], size:sz[n]) And there is always the euquation: s[i+1] = s[i] + sz[i] for i = 0, ..., n - 1, and s[n] + sz[n] = sector + nr_sects. Thanks, Ming Lei -- To unsubscribe from this list: send the line "unsubscribe linux-block" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
I'm confused on what you're proposing. Can you just show what you mean with an incremental patch? -- To unsubscribe from this list: send the line "unsubscribe linux-block" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Apr 18, 2016 at 5:29 AM, Christoph Hellwig <hch@lst.de> wrote: > I'm confused on what you're proposing. Can you just show what you mean with > an incremental patch? I mean all async bios can chain to one same parent, but looks that implementation can't be so clean with this one, so I am fine with it now: Reviewed-by: Ming Lei <tom.leiming@gmail.com>
diff --git a/block/blk-lib.c b/block/blk-lib.c index 9ebf653..700d248 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -9,21 +9,17 @@ #include "blk.h" -struct bio_batch { - atomic_t done; - int error; - struct completion *wait; -}; - -static void bio_batch_end_io(struct bio *bio) +static struct bio *next_bio(struct bio *bio, int rw, unsigned int nr_pages, + gfp_t gfp) { - struct bio_batch *bb = bio->bi_private; + struct bio *new = bio_alloc(gfp, nr_pages); + + if (bio) { + bio_chain(bio, new); + submit_bio(rw, bio); + } - if (bio->bi_error && bio->bi_error != -EOPNOTSUPP) - bb->error = bio->bi_error; - if (atomic_dec_and_test(&bb->done)) - complete(bb->wait); - bio_put(bio); + return new; } /** @@ -40,13 +36,11 @@ static void bio_batch_end_io(struct bio *bio) int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) { - DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q = bdev_get_queue(bdev); int type = REQ_WRITE | REQ_DISCARD; unsigned int granularity; int alignment; - struct bio_batch bb; - struct bio *bio; + struct bio *bio = NULL; int ret = 0; struct blk_plug plug; @@ -66,25 +60,15 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, type |= REQ_SECURE; } - atomic_set(&bb.done, 1); - bb.error = 0; - bb.wait = &wait; - blk_start_plug(&plug); while (nr_sects) { unsigned int req_sects; sector_t end_sect, tmp; - bio = bio_alloc(gfp_mask, 1); - if (!bio) { - ret = -ENOMEM; - break; - } - /* Make sure bi_size doesn't overflow */ req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9); - /* + /** * If splitting a request, and the next starting sector would be * misaligned, stop the discard at the previous aligned sector. */ @@ -98,18 +82,14 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, req_sects = end_sect - sector; } + bio = next_bio(bio, type, 1, gfp_mask); bio->bi_iter.bi_sector = sector; - bio->bi_end_io = bio_batch_end_io; bio->bi_bdev = bdev; - bio->bi_private = &bb; bio->bi_iter.bi_size = req_sects << 9; nr_sects -= req_sects; sector = end_sect; - atomic_inc(&bb.done); - submit_bio(type, bio); - /* * We can loop for a long time in here, if someone does * full device discards (like mkfs). Be nice and allow @@ -118,15 +98,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, */ cond_resched(); } + if (bio) + ret = submit_bio_wait(type, bio); blk_finish_plug(&plug); - /* Wait for bios in-flight */ - if (!atomic_dec_and_test(&bb.done)) - wait_for_completion_io(&wait); - - if (bb.error) - return bb.error; - return ret; + return ret != -EOPNOTSUPP ? ret : 0; } EXPORT_SYMBOL(blkdev_issue_discard); @@ -145,11 +121,9 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page) { - DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q = bdev_get_queue(bdev); unsigned int max_write_same_sectors; - struct bio_batch bb; - struct bio *bio; + struct bio *bio = NULL; int ret = 0; if (!q) @@ -158,21 +132,10 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, /* Ensure that max_write_same_sectors doesn't overflow bi_size */ max_write_same_sectors = UINT_MAX >> 9; - atomic_set(&bb.done, 1); - bb.error = 0; - bb.wait = &wait; - while (nr_sects) { - bio = bio_alloc(gfp_mask, 1); - if (!bio) { - ret = -ENOMEM; - break; - } - + bio = next_bio(bio, REQ_WRITE | REQ_WRITE_SAME, 1, gfp_mask); bio->bi_iter.bi_sector = sector; - bio->bi_end_io = bio_batch_end_io; bio->bi_bdev = bdev; - bio->bi_private = &bb; bio->bi_vcnt = 1; bio->bi_io_vec->bv_page = page; bio->bi_io_vec->bv_offset = 0; @@ -186,18 +149,11 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, bio->bi_iter.bi_size = nr_sects << 9; nr_sects = 0; } - - atomic_inc(&bb.done); - submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio); } - /* Wait for bios in-flight */ - if (!atomic_dec_and_test(&bb.done)) - wait_for_completion_io(&wait); - - if (bb.error) - return bb.error; - return ret; + if (bio) + ret = submit_bio_wait(REQ_WRITE | REQ_WRITE_SAME, bio); + return ret != -EOPNOTSUPP ? ret : 0; } EXPORT_SYMBOL(blkdev_issue_write_same); @@ -216,28 +172,15 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask) { int ret; - struct bio *bio; - struct bio_batch bb; + struct bio *bio = NULL; unsigned int sz; - DECLARE_COMPLETION_ONSTACK(wait); - atomic_set(&bb.done, 1); - bb.error = 0; - bb.wait = &wait; - - ret = 0; while (nr_sects != 0) { - bio = bio_alloc(gfp_mask, - min(nr_sects, (sector_t)BIO_MAX_PAGES)); - if (!bio) { - ret = -ENOMEM; - break; - } - + bio = next_bio(bio, WRITE, + min(nr_sects, (sector_t)BIO_MAX_PAGES), + gfp_mask); bio->bi_iter.bi_sector = sector; bio->bi_bdev = bdev; - bio->bi_end_io = bio_batch_end_io; - bio->bi_private = &bb; while (nr_sects != 0) { sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); @@ -247,18 +190,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, if (ret < (sz << 9)) break; } - ret = 0; - atomic_inc(&bb.done); - submit_bio(WRITE, bio); } - /* Wait for bios in-flight */ - if (!atomic_dec_and_test(&bb.done)) - wait_for_completion_io(&wait); - - if (bb.error) - return bb.error; - return ret; + if (bio) + return submit_bio_wait(WRITE, bio); + return 0; } /**