Message ID | 95b275ed47f1e4bdaba53040fe6de9eefdf3a5fd.1583789410.git.osandov@fb.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: read repair/direct I/O improvements | expand |
On 3/9/20 5:32 PM, Omar Sandoval wrote: > From: Omar Sandoval <osandov@fb.com> > > The next commit will get rid of btrfs_dio_private->orig_bio. The only > thing we really need it for is containing all of the checksums, but we > can easily put those in btrfs_dio_private and get rid of the awkward > logic that looks up the checksums for orig_bio when the first split bio > is submitted. (Interestingly, btrfs_dio_private did contain the > checksums before commit 23ea8e5a0767 ("Btrfs: load checksum data once > when submitting a direct read io"), but it didn't look them up up > front.) > > Signed-off-by: Omar Sandoval <osandov@fb.com> Reviewed-by: Josef Bacik <josef@toxicpanda.com> Thanks, Josef
On 9.03.20 г. 23:32 ч., Omar Sandoval wrote: > From: Omar Sandoval <osandov@fb.com> > > The next commit will get rid of btrfs_dio_private->orig_bio. The only > thing we really need it for is containing all of the checksums, but we > can easily put those in btrfs_dio_private and get rid of the awkward > logic that looks up the checksums for orig_bio when the first split bio > is submitted. (Interestingly, btrfs_dio_private did contain the > checksums before commit 23ea8e5a0767 ("Btrfs: load checksum data once > when submitting a direct read io"), but it didn't look them up up > front.) nit: It would be useful to surmise the structural changes this patch does. Essentially this makes each individual cloned bio to index its checksums into the global checksum storage array anchored at btrfs_dio_private::sums. > > Signed-off-by: Omar Sandoval <osandov@fb.com> Reviewed-by: Nikolay Borisov <nborisov@suse.com>
On Mon, Mar 09, 2020 at 02:32:37PM -0700, Omar Sandoval wrote: > - file_offset -= dip->logical_offset; > - file_offset >>= inode->i_sb->s_blocksize_bits; > - csum_size = btrfs_super_csum_size(btrfs_sb(inode->i_sb)->super_copy); > - io_bio->csum = orig_io_bio->csum + csum_size * file_offset; > - ret = btrfs_lookup_and_bind_dio_csum(inode, dip, bio, > - file_offset); > - if (ret) > - goto err; > + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); > + size_t csum_offset; > + > + csum_offset = ((file_offset - dip->logical_offset) >> > + inode->i_sb->s_blocksize_bits) * csum_size; Please split the expression like it used to be in the original code (above). > + btrfs_io_bio(bio)->csum = dip->sums + csum_offset; > } > map: > ret = btrfs_map_bio(fs_info, bio, 0); > @@ -8047,13 +8018,25 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode, > loff_t file_offset) > { > struct btrfs_dio_private *dip = NULL; > + size_t dip_size; > struct bio *bio = NULL; > struct btrfs_io_bio *io_bio; > bool write = (bio_op(dio_bio) == REQ_OP_WRITE); > + const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); > > bio = btrfs_bio_clone(dio_bio); > > - dip = kzalloc(sizeof(*dip), GFP_NOFS); > + dip_size = sizeof(*dip); > + if (!write && csum) { > + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); > + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); > + size_t nblocks = (dio_bio->bi_iter.bi_size >> > + inode->i_sb->s_blocksize_bits); nblocks = dio_bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits; This overflows 80 chars/line but is IMHO more readable than the split line.
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a7fb0ba8cde4..4a2e44f3e66e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -84,6 +84,9 @@ struct btrfs_dio_private { */ blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *, blk_status_t); + + /* Checksums. */ + u8 sums[]; }; struct btrfs_dio_data { @@ -7753,7 +7756,6 @@ static void btrfs_endio_direct_read(struct bio *bio) dio_bio->bi_status = err; dio_end_io(dio_bio); - btrfs_io_bio_free_csum(io_bio); bio_put(bio); } @@ -7865,39 +7867,6 @@ static void btrfs_end_dio_bio(struct bio *bio) bio_put(bio); } -static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, - struct btrfs_dio_private *dip, - struct bio *bio, - u64 file_offset) -{ - struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); - struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio); - u16 csum_size; - blk_status_t ret; - - /* - * We load all the csum data we need when we submit - * the first bio to reduce the csum tree search and - * contention. - */ - if (dip->logical_offset == file_offset) { - ret = btrfs_lookup_bio_sums(inode, dip->orig_bio, file_offset, - NULL); - if (ret) - return ret; - } - - if (bio == dip->orig_bio) - return 0; - - file_offset -= dip->logical_offset; - file_offset >>= inode->i_sb->s_blocksize_bits; - csum_size = btrfs_super_csum_size(btrfs_sb(inode->i_sb)->super_copy); - io_bio->csum = orig_io_bio->csum + csum_size * file_offset; - - return 0; -} - static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset, int async_submit) { @@ -7933,10 +7902,12 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio, if (ret) goto err; } else { - ret = btrfs_lookup_and_bind_dio_csum(inode, dip, bio, - file_offset); - if (ret) - goto err; + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); + size_t csum_offset; + + csum_offset = ((file_offset - dip->logical_offset) >> + inode->i_sb->s_blocksize_bits) * csum_size; + btrfs_io_bio(bio)->csum = dip->sums + csum_offset; } map: ret = btrfs_map_bio(fs_info, bio, 0); @@ -8047,13 +8018,25 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode, loff_t file_offset) { struct btrfs_dio_private *dip = NULL; + size_t dip_size; struct bio *bio = NULL; struct btrfs_io_bio *io_bio; bool write = (bio_op(dio_bio) == REQ_OP_WRITE); + const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); bio = btrfs_bio_clone(dio_bio); - dip = kzalloc(sizeof(*dip), GFP_NOFS); + dip_size = sizeof(*dip); + if (!write && csum) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); + size_t nblocks = (dio_bio->bi_iter.bi_size >> + inode->i_sb->s_blocksize_bits); + + dip_size += csum_size * nblocks; + } + + dip = kzalloc(dip_size, GFP_NOFS); if (!dip) { if (!write) { unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, @@ -8093,11 +8076,27 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode, dip->bytes; dio_data->unsubmitted_oe_range_start = dio_data->unsubmitted_oe_range_end; - bio->bi_end_io = btrfs_endio_direct_write; } else { bio->bi_end_io = btrfs_endio_direct_read; dip->subio_endio = btrfs_subio_endio_read; + + if (csum) { + blk_status_t status; + + /* + * Load the csums up front to reduce csum tree searches + * and contention when submitting bios. + */ + status = btrfs_lookup_bio_sums(inode, dio_bio, + file_offset, dip->sums); + if (status != BLK_STS_OK) { + dip->errors = 1; + if (refcount_dec_and_test(&dip->refs)) + bio_io_error(dip->orig_bio); + return; + } + } } btrfs_submit_direct_hook(dip);