Message ID | 20181121032327.8434-16-ming.lei@redhat.com (mailing list archive) |
---|---|
State | Not Applicable, archived |
Headers | show |
Series | block: support multi-page bvec | expand |
On Wed, Nov 21, 2018 at 11:23:23AM +0800, Ming Lei wrote: > if (bio->bi_vcnt > 0) { > - struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; > + struct bio_vec bv; > + struct bio_vec *seg = &bio->bi_io_vec[bio->bi_vcnt - 1]; > > - if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) { > - bv->bv_len += len; > + bvec_last_segment(seg, &bv); > + > + if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) { I think this we can simplify the try to merge into bio case a bit, and also document it better with something like this: diff --git a/block/bio.c b/block/bio.c index 854676edc438..cc913281a723 100644 --- a/block/bio.c +++ b/block/bio.c @@ -822,54 +822,40 @@ EXPORT_SYMBOL(bio_add_pc_page); * @page: page to add * @len: length of the data to add * @off: offset of the data in @page + * @same_page: if %true only merge if the new data is in the same physical + * page as the last segment of the bio. * - * Try to add the data at @page + @off to the last page of @bio. This is a + * Try to add the data at @page + @off to the last bvec of @bio. This is a * a useful optimisation for file systems with a block size smaller than the * page size. * * Return %true on success or %false on failure. */ bool __bio_try_merge_page(struct bio *bio, struct page *page, - unsigned int len, unsigned int off) + unsigned int len, unsigned int off, bool same_page) { if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) return false; if (bio->bi_vcnt > 0) { - struct bio_vec bv; - struct bio_vec *seg = &bio->bi_io_vec[bio->bi_vcnt - 1]; - - bvec_last_segment(seg, &bv); - - if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) { - seg->bv_len += len; - bio->bi_iter.bi_size += len; - return true; - } + struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; + phys_addr_t vec_addr = page_to_phys(bv->bv_page); + phys_addr_t page_addr = page_to_phys(page); + + if (vec_addr + bv->bv_offset + bv->bv_len != page_addr + off) + return false; + if (same_page && + (vec_addr & PAGE_SIZE) != (page_addr & PAGE_SIZE)) + return false; + + bv->bv_len += len; + bio->bi_iter.bi_size += len; + return true; } return false; } EXPORT_SYMBOL_GPL(__bio_try_merge_page); -static bool bio_try_merge_segment(struct bio *bio, struct page *page, - unsigned int len, unsigned int off) -{ - if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) - return false; - - if (bio->bi_vcnt > 0) { - struct bio_vec *seg = &bio->bi_io_vec[bio->bi_vcnt - 1]; - - if (page_to_phys(seg->bv_page) + seg->bv_offset + seg->bv_len == - page_to_phys(page) + off) { - seg->bv_len += len; - bio->bi_iter.bi_size += len; - return true; - } - } - return false; -} - /** * __bio_add_page - add page to a bio in a new segment * @bio: destination bio @@ -910,7 +896,7 @@ EXPORT_SYMBOL_GPL(__bio_add_page); int bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { - if (!bio_try_merge_segment(bio, page, len, offset)) { + if (!__bio_try_merge_page(bio, page, len, offset, false)) { if (bio_full(bio)) return 0; __bio_add_page(bio, page, len, offset); diff --git a/fs/iomap.c b/fs/iomap.c index ccc2ba115f4d..d918acb9bfc9 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -313,7 +313,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, */ sector = iomap_sector(iomap, pos); if (ctx->bio && bio_end_sector(ctx->bio) == sector) { - if (__bio_try_merge_page(ctx->bio, page, plen, poff)) + if (__bio_try_merge_page(ctx->bio, page, plen, poff, true)) goto done; is_contig = true; } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 5c2190216614..b9fd44168f61 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -616,7 +616,7 @@ xfs_add_to_ioend( bdev, sector); } - if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff)) { + if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, true)) { if (iop) atomic_inc(&iop->write_count); if (bio_full(wpc->ioend->io_bio)) diff --git a/include/linux/bio.h b/include/linux/bio.h index e5b975fa0558..f08e6940c1ab 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -442,7 +442,7 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); bool __bio_try_merge_page(struct bio *bio, struct page *page, - unsigned int len, unsigned int off); + unsigned int len, unsigned int off, bool same_page); void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
On Wed, Nov 21, 2018 at 03:55:02PM +0100, Christoph Hellwig wrote: > On Wed, Nov 21, 2018 at 11:23:23AM +0800, Ming Lei wrote: > > if (bio->bi_vcnt > 0) { > > - struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; > > + struct bio_vec bv; > > + struct bio_vec *seg = &bio->bi_io_vec[bio->bi_vcnt - 1]; > > > > - if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) { > > - bv->bv_len += len; > > + bvec_last_segment(seg, &bv); > > + > > + if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) { > > I think this we can simplify the try to merge into bio case a bit, > and also document it better with something like this: > > diff --git a/block/bio.c b/block/bio.c > index 854676edc438..cc913281a723 100644 > --- a/block/bio.c > +++ b/block/bio.c > @@ -822,54 +822,40 @@ EXPORT_SYMBOL(bio_add_pc_page); > * @page: page to add > * @len: length of the data to add > * @off: offset of the data in @page > + * @same_page: if %true only merge if the new data is in the same physical > + * page as the last segment of the bio. > * > - * Try to add the data at @page + @off to the last page of @bio. This is a > + * Try to add the data at @page + @off to the last bvec of @bio. This is a > * a useful optimisation for file systems with a block size smaller than the > * page size. > * > * Return %true on success or %false on failure. > */ > bool __bio_try_merge_page(struct bio *bio, struct page *page, > - unsigned int len, unsigned int off) > + unsigned int len, unsigned int off, bool same_page) > { > if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) > return false; > > if (bio->bi_vcnt > 0) { > - struct bio_vec bv; > - struct bio_vec *seg = &bio->bi_io_vec[bio->bi_vcnt - 1]; > - > - bvec_last_segment(seg, &bv); > - > - if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) { > - seg->bv_len += len; > - bio->bi_iter.bi_size += len; > - return true; > - } > + struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; > + phys_addr_t vec_addr = page_to_phys(bv->bv_page); > + phys_addr_t page_addr = page_to_phys(page); > + > + if (vec_addr + bv->bv_offset + bv->bv_len != page_addr + off) > + return false; > + if (same_page && > + (vec_addr & PAGE_SIZE) != (page_addr & PAGE_SIZE)) > + return false; I guess the correct check should be: end_addr = vec_addr + bv->bv_offset + bv->bv_len; if (same_page && (end_addr & PAGE_MASK) != (page_addr & PAGE_MASK)) return false; And this approach is good, will take it in V12. Thanks, Ming
On Wed, Nov 21, 2018 at 11:48:13PM +0800, Ming Lei wrote: > I guess the correct check should be: > > end_addr = vec_addr + bv->bv_offset + bv->bv_len; > if (same_page && > (end_addr & PAGE_MASK) != (page_addr & PAGE_MASK)) > return false; Indeed.
On Wed, Nov 21, 2018 at 05:12:06PM +0100, Christoph Hellwig wrote: > On Wed, Nov 21, 2018 at 11:48:13PM +0800, Ming Lei wrote: > > I guess the correct check should be: > > > > end_addr = vec_addr + bv->bv_offset + bv->bv_len; > > if (same_page && > > (end_addr & PAGE_MASK) != (page_addr & PAGE_MASK)) > > return false; > > Indeed. The above is still not totally correct, and it should have been: end_addr = vec_addr + bv->bv_offset + bv->bv_len - 1; if (same_page && (end_addr & PAGE_MASK) != page_addr) return false; Also bv->bv_len should be guaranteed as being bigger than zero. It also shows that it is quite easy to figure out the last page as wrong, :-( Thanks, Ming
diff --git a/block/bio.c b/block/bio.c index 0f1635b9ec50..854676edc438 100644 --- a/block/bio.c +++ b/block/bio.c @@ -823,7 +823,7 @@ EXPORT_SYMBOL(bio_add_pc_page); * @len: length of the data to add * @off: offset of the data in @page * - * Try to add the data at @page + @off to the last bvec of @bio. This is a + * Try to add the data at @page + @off to the last page of @bio. This is a * a useful optimisation for file systems with a block size smaller than the * page size. * @@ -836,10 +836,13 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page, return false; if (bio->bi_vcnt > 0) { - struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; + struct bio_vec bv; + struct bio_vec *seg = &bio->bi_io_vec[bio->bi_vcnt - 1]; - if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) { - bv->bv_len += len; + bvec_last_segment(seg, &bv); + + if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) { + seg->bv_len += len; bio->bi_iter.bi_size += len; return true; } @@ -848,6 +851,25 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page, } EXPORT_SYMBOL_GPL(__bio_try_merge_page); +static bool bio_try_merge_segment(struct bio *bio, struct page *page, + unsigned int len, unsigned int off) +{ + if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) + return false; + + if (bio->bi_vcnt > 0) { + struct bio_vec *seg = &bio->bi_io_vec[bio->bi_vcnt - 1]; + + if (page_to_phys(seg->bv_page) + seg->bv_offset + seg->bv_len == + page_to_phys(page) + off) { + seg->bv_len += len; + bio->bi_iter.bi_size += len; + return true; + } + } + return false; +} + /** * __bio_add_page - add page to a bio in a new segment * @bio: destination bio @@ -888,7 +910,7 @@ EXPORT_SYMBOL_GPL(__bio_add_page); int bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { - if (!__bio_try_merge_page(bio, page, len, offset)) { + if (!bio_try_merge_segment(bio, page, len, offset)) { if (bio_full(bio)) return 0; __bio_add_page(bio, page, len, offset); diff --git a/fs/iomap.c b/fs/iomap.c index f5fb8bf75cc8..ccc2ba115f4d 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -344,7 +344,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, ctx->bio->bi_end_io = iomap_read_end_io; } - __bio_add_page(ctx->bio, page, plen, poff); + bio_add_page(ctx->bio, page, plen, poff); done: /* * Move the caller beyond our range so that it keeps making progress. diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 1f1829e506e8..5c2190216614 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -621,7 +621,7 @@ xfs_add_to_ioend( atomic_inc(&iop->write_count); if (bio_full(wpc->ioend->io_bio)) xfs_chain_bio(wpc->ioend, wbc, bdev, sector); - __bio_add_page(wpc->ioend->io_bio, page, len, poff); + bio_add_page(wpc->ioend->io_bio, page, len, poff); } wpc->ioend->io_size += len;
This patch pulls the trigger for multi-page bvecs. Signed-off-by: Ming Lei <ming.lei@redhat.com> --- block/bio.c | 32 +++++++++++++++++++++++++++----- fs/iomap.c | 2 +- fs/xfs/xfs_aops.c | 2 +- 3 files changed, 29 insertions(+), 7 deletions(-)