Message ID | ae795cbe844de282bb9efa6681d1dec2e043f7a0.1587072977.git.osandov@fb.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: read repair/direct I/O improvements | expand |
On 17.04.20 г. 0:46 ч., Omar Sandoval wrote: > From: Omar Sandoval <osandov@fb.com> > > Direct I/O read repair was originally implemented in commit 8b110e393c5a > ("Btrfs: implement repair function when direct read fails"). This > implementation is unnecessarily complicated. There is major code > duplication between __btrfs_subio_endio_read() (checks checksums and > handles I/O errors for files with checksums), > __btrfs_correct_data_nocsum() (handles I/O errors for files without > checksums), btrfs_retry_endio() (checks checksums and handles I/O errors > for retries of files with checksums), and btrfs_retry_endio_nocsum() > (handles I/O errors for retries of files without checksum). If it sounds > like these should be one function, that's because they should. > Additionally, these functions are very hard to follow due to their > excessive use of goto. > > This commit replaces the original implementation. After the previous > commit getting rid of orig_bio, we can reuse the same endio callback for > repair I/O and the original I/O, we just need to track the file offset > and original iterator in the repair bio. We can also unify the handling > of files with and without checksums and simplify the control flow. We > also no longer have to wait for each repair I/O to complete one by one. > > Reviewed-by: Josef Bacik <josef@toxicpanda.com> > Signed-off-by: Omar Sandoval <osandov@fb.com> > --- > fs/btrfs/extent_io.c | 2 + > fs/btrfs/inode.c | 268 +++++++------------------------------------ > 2 files changed, 44 insertions(+), 226 deletions(-) > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c > index 85e98ba349a8..6e1d97bb7652 100644 > --- a/fs/btrfs/extent_io.c > +++ b/fs/btrfs/extent_io.c > @@ -2631,6 +2631,8 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, > } > > bio_add_page(bio, page, failrec->len, pg_offset); > + btrfs_io_bio(bio)->logical = failrec->start; > + btrfs_io_bio(bio)->iter = bio->bi_iter; > > return bio; > } > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 79b884d2f3ed..2580f2d251d4 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -7435,19 +7435,17 @@ static int btrfs_check_dio_repairable(struct inode *inode, > > static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, > struct page *page, unsigned int pgoff, > - u64 start, u64 end, int failed_mirror, > - bio_end_io_t *repair_endio, void *repair_arg) > + u64 start, u64 end, int failed_mirror) > { > + struct btrfs_dio_private *dip = failed_bio->bi_private; > struct io_failure_record *failrec; > struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; > struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; > struct bio *bio; > int isector; > unsigned int read_mode = 0; > - int segs; > int ret; > blk_status_t status; > - struct bio_vec bvec; > > BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); > > @@ -7462,261 +7460,79 @@ static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, > return BLK_STS_IOERR; > } > > - segs = bio_segments(failed_bio); > - bio_get_first_bvec(failed_bio, &bvec); > - if (segs > 1 || > - (bvec.bv_len > btrfs_inode_sectorsize(inode))) > + if (btrfs_io_bio(failed_bio)->iter.bi_size > inode->i_sb->s_blocksize) > read_mode |= REQ_FAILFAST_DEV; > > isector = start - btrfs_io_bio(failed_bio)->logical; > isector >>= inode->i_sb->s_blocksize_bits; > - bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, > - pgoff, isector, repair_endio, repair_arg); > + bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, pgoff, > + isector, failed_bio->bi_end_io, dip); > bio->bi_opf = REQ_OP_READ | read_mode; > > btrfs_debug(BTRFS_I(inode)->root->fs_info, > "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d", > read_mode, failrec->this_mirror, failrec->in_validation); > > + refcount_inc(&dip->refs); > status = submit_dio_repair_bio(inode, bio, failrec->this_mirror); > if (status) { > free_io_failure(failure_tree, io_tree, failrec); > bio_put(bio); > + refcount_dec(&dip->refs); > } > > return status; > } > > -struct btrfs_retry_complete { > - struct completion done; > - struct inode *inode; > - u64 start; > - int uptodate; > -}; > - > -static void btrfs_retry_endio_nocsum(struct bio *bio) > -{ > - struct btrfs_retry_complete *done = bio->bi_private; > - struct inode *inode = done->inode; > - struct bio_vec *bvec; > - struct extent_io_tree *io_tree, *failure_tree; > - struct bvec_iter_all iter_all; > - > - if (bio->bi_status) > - goto end; > - > - ASSERT(bio->bi_vcnt == 1); > - io_tree = &BTRFS_I(inode)->io_tree; > - failure_tree = &BTRFS_I(inode)->io_failure_tree; > - ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(inode)); > - > - done->uptodate = 1; > - ASSERT(!bio_flagged(bio, BIO_CLONED)); > - bio_for_each_segment_all(bvec, bio, iter_all) > - clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree, > - io_tree, done->start, bvec->bv_page, > - btrfs_ino(BTRFS_I(inode)), 0); > -end: > - complete(&done->done); > - bio_put(bio); > -} > - > -static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode, > - struct btrfs_io_bio *io_bio) > +static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, > + struct btrfs_io_bio *io_bio, > + const bool uptodate) > { > - struct btrfs_fs_info *fs_info; > + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; > + u32 sectorsize = fs_info->sectorsize; > + struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; > + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; > + const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); > struct bio_vec bvec; > struct bvec_iter iter; > - struct btrfs_retry_complete done; > - u64 start; > - unsigned int pgoff; > - u32 sectorsize; > - int nr_sectors; > - blk_status_t ret; > + u64 start = io_bio->logical; > + int icsum = 0; > blk_status_t err = BLK_STS_OK; > > - fs_info = BTRFS_I(inode)->root->fs_info; > - sectorsize = fs_info->sectorsize; > - > - start = io_bio->logical; > - done.inode = inode; > - io_bio->bio.bi_iter = io_bio->iter; > + __bio_for_each_segment(bvec, &io_bio->bio, iter, io_bio->iter) { > + unsigned int i, nr_sectors, pgoff; > nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len); > pgoff = bvec.bv_offset; <snip> > + for (i = 0; i < nr_sectors; i++) { > ASSERT(pgoff < PAGE_SIZE); <snip> > + if (uptodate && > + (!csum || !check_data_csum(inode, io_bio, icsum, > + bvec.bv_page, pgoff, > + start, sectorsize))) { > + clean_io_failure(fs_info, failure_tree, io_tree, > + start, bvec.bv_page, > + btrfs_ino(BTRFS_I(inode)), > + pgoff); Why do you have to call clean_io_failure in case there isn't a failure since that function: a) Will return because the tree is empty altogether (count_range_bits returns 0). b) Or if there was a corruption a different segment - get_state_failrec would return -ENOENT. Can't this be reworked to: if (!uptodate || (csum && check_data_csum)) { dio_read_error } > + } else { > + blk_status_t status; > + > + status = dio_read_error(inode, &io_bio->bio, > + bvec.bv_page, pgoff, > + start, > + start + sectorsize - 1, > + io_bio->mirror_num); > + if (status) > + err = status; > + } > + start += sectorsize; > + icsum++; > pgoff += sectorsize; > - ASSERT(pgoff < PAGE_SIZE); > - goto next_block; > } > } > - > return err; > } <snip>
On 21.04.20 г. 16:53 ч., Nikolay Borisov wrote: > > > On 17.04.20 г. 0:46 ч., Omar Sandoval wrote: >> From: Omar Sandoval <osandov@fb.com> >> >> Direct I/O read repair was originally implemented in commit 8b110e393c5a >> ("Btrfs: implement repair function when direct read fails"). This >> implementation is unnecessarily complicated. There is major code >> duplication between __btrfs_subio_endio_read() (checks checksums and >> handles I/O errors for files with checksums), >> __btrfs_correct_data_nocsum() (handles I/O errors for files without >> checksums), btrfs_retry_endio() (checks checksums and handles I/O errors >> for retries of files with checksums), and btrfs_retry_endio_nocsum() >> (handles I/O errors for retries of files without checksum). If it sounds >> like these should be one function, that's because they should. >> Additionally, these functions are very hard to follow due to their >> excessive use of goto. >> >> This commit replaces the original implementation. After the previous >> commit getting rid of orig_bio, we can reuse the same endio callback for >> repair I/O and the original I/O, we just need to track the file offset >> and original iterator in the repair bio. We can also unify the handling >> of files with and without checksums and simplify the control flow. We >> also no longer have to wait for each repair I/O to complete one by one. >> >> Reviewed-by: Josef Bacik <josef@toxicpanda.com> >> Signed-off-by: Omar Sandoval <osandov@fb.com> >> --- >> fs/btrfs/extent_io.c | 2 + >> fs/btrfs/inode.c | 268 +++++++------------------------------------ >> 2 files changed, 44 insertions(+), 226 deletions(-) >> >> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c >> index 85e98ba349a8..6e1d97bb7652 100644 >> --- a/fs/btrfs/extent_io.c >> +++ b/fs/btrfs/extent_io.c >> @@ -2631,6 +2631,8 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, >> } >> >> bio_add_page(bio, page, failrec->len, pg_offset); >> + btrfs_io_bio(bio)->logical = failrec->start; >> + btrfs_io_bio(bio)->iter = bio->bi_iter; >> >> return bio; >> } >> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c >> index 79b884d2f3ed..2580f2d251d4 100644 >> --- a/fs/btrfs/inode.c >> +++ b/fs/btrfs/inode.c >> @@ -7435,19 +7435,17 @@ static int btrfs_check_dio_repairable(struct inode *inode, >> >> static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, >> struct page *page, unsigned int pgoff, >> - u64 start, u64 end, int failed_mirror, >> - bio_end_io_t *repair_endio, void *repair_arg) >> + u64 start, u64 end, int failed_mirror) >> { >> + struct btrfs_dio_private *dip = failed_bio->bi_private; >> struct io_failure_record *failrec; >> struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; >> struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; >> struct bio *bio; >> int isector; >> unsigned int read_mode = 0; >> - int segs; >> int ret; >> blk_status_t status; >> - struct bio_vec bvec; >> >> BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); >> >> @@ -7462,261 +7460,79 @@ static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, >> return BLK_STS_IOERR; >> } >> >> - segs = bio_segments(failed_bio); >> - bio_get_first_bvec(failed_bio, &bvec); >> - if (segs > 1 || >> - (bvec.bv_len > btrfs_inode_sectorsize(inode))) >> + if (btrfs_io_bio(failed_bio)->iter.bi_size > inode->i_sb->s_blocksize) >> read_mode |= REQ_FAILFAST_DEV; >> >> isector = start - btrfs_io_bio(failed_bio)->logical; >> isector >>= inode->i_sb->s_blocksize_bits; >> - bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, >> - pgoff, isector, repair_endio, repair_arg); >> + bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, pgoff, >> + isector, failed_bio->bi_end_io, dip); >> bio->bi_opf = REQ_OP_READ | read_mode; >> >> btrfs_debug(BTRFS_I(inode)->root->fs_info, >> "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d", >> read_mode, failrec->this_mirror, failrec->in_validation); >> >> + refcount_inc(&dip->refs); >> status = submit_dio_repair_bio(inode, bio, failrec->this_mirror); >> if (status) { >> free_io_failure(failure_tree, io_tree, failrec); >> bio_put(bio); >> + refcount_dec(&dip->refs); >> } >> >> return status; >> } >> >> -struct btrfs_retry_complete { >> - struct completion done; >> - struct inode *inode; >> - u64 start; >> - int uptodate; >> -}; >> - >> -static void btrfs_retry_endio_nocsum(struct bio *bio) >> -{ >> - struct btrfs_retry_complete *done = bio->bi_private; >> - struct inode *inode = done->inode; >> - struct bio_vec *bvec; >> - struct extent_io_tree *io_tree, *failure_tree; >> - struct bvec_iter_all iter_all; >> - >> - if (bio->bi_status) >> - goto end; >> - >> - ASSERT(bio->bi_vcnt == 1); >> - io_tree = &BTRFS_I(inode)->io_tree; >> - failure_tree = &BTRFS_I(inode)->io_failure_tree; >> - ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(inode)); >> - >> - done->uptodate = 1; >> - ASSERT(!bio_flagged(bio, BIO_CLONED)); >> - bio_for_each_segment_all(bvec, bio, iter_all) >> - clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree, >> - io_tree, done->start, bvec->bv_page, >> - btrfs_ino(BTRFS_I(inode)), 0); >> -end: >> - complete(&done->done); >> - bio_put(bio); >> -} >> - >> -static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode, >> - struct btrfs_io_bio *io_bio) >> +static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, >> + struct btrfs_io_bio *io_bio, >> + const bool uptodate) >> { >> - struct btrfs_fs_info *fs_info; >> + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; >> + u32 sectorsize = fs_info->sectorsize; >> + struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; >> + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; >> + const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); >> struct bio_vec bvec; >> struct bvec_iter iter; >> - struct btrfs_retry_complete done; >> - u64 start; >> - unsigned int pgoff; >> - u32 sectorsize; >> - int nr_sectors; >> - blk_status_t ret; >> + u64 start = io_bio->logical; >> + int icsum = 0; >> blk_status_t err = BLK_STS_OK; >> >> - fs_info = BTRFS_I(inode)->root->fs_info; >> - sectorsize = fs_info->sectorsize; >> - >> - start = io_bio->logical; >> - done.inode = inode; >> - io_bio->bio.bi_iter = io_bio->iter; >> + __bio_for_each_segment(bvec, &io_bio->bio, iter, io_bio->iter) { >> + unsigned int i, nr_sectors, pgoff; >> nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len); >> pgoff = bvec.bv_offset; > <snip> >> + for (i = 0; i < nr_sectors; i++) { >> ASSERT(pgoff < PAGE_SIZE); > <snip> >> + if (uptodate && >> + (!csum || !check_data_csum(inode, io_bio, icsum, >> + bvec.bv_page, pgoff, >> + start, sectorsize))) { >> + clean_io_failure(fs_info, failure_tree, io_tree, >> + start, bvec.bv_page, >> + btrfs_ino(BTRFS_I(inode)), >> + pgoff); > > Why do you have to call clean_io_failure in case there isn't a failure > since that function: > > a) Will return because the tree is empty altogether (count_range_bits > returns 0). > > b) Or if there was a corruption a different segment - get_state_failrec > would return -ENOENT. > > Can't this be reworked to: > > if (!uptodate || (csum && check_data_csum)) { > dio_read_error > } Thinking about it a bit more this seems to handle the case when we have failed one read and subsequently queued a repair bio. When this repair bio finishes we'll execute the clean_ioo_failure to remove the failure rec created initially. In this case this patch LGTM, so: Reviewed-by: Nikolay Borisov <nborisov@suse.com> > >> + } else { >> + blk_status_t status; >> + >> + status = dio_read_error(inode, &io_bio->bio, >> + bvec.bv_page, pgoff, >> + start, >> + start + sectorsize - 1, >> + io_bio->mirror_num); >> + if (status) >> + err = status; >> + } >> + start += sectorsize; >> + icsum++; >> pgoff += sectorsize; >> - ASSERT(pgoff < PAGE_SIZE); >> - goto next_block; >> } >> } >> - >> return err; >> } > > <snip> >
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 85e98ba349a8..6e1d97bb7652 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2631,6 +2631,8 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, } bio_add_page(bio, page, failrec->len, pg_offset); + btrfs_io_bio(bio)->logical = failrec->start; + btrfs_io_bio(bio)->iter = bio->bi_iter; return bio; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 79b884d2f3ed..2580f2d251d4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7435,19 +7435,17 @@ static int btrfs_check_dio_repairable(struct inode *inode, static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, struct page *page, unsigned int pgoff, - u64 start, u64 end, int failed_mirror, - bio_end_io_t *repair_endio, void *repair_arg) + u64 start, u64 end, int failed_mirror) { + struct btrfs_dio_private *dip = failed_bio->bi_private; struct io_failure_record *failrec; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; struct bio *bio; int isector; unsigned int read_mode = 0; - int segs; int ret; blk_status_t status; - struct bio_vec bvec; BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); @@ -7462,261 +7460,79 @@ static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, return BLK_STS_IOERR; } - segs = bio_segments(failed_bio); - bio_get_first_bvec(failed_bio, &bvec); - if (segs > 1 || - (bvec.bv_len > btrfs_inode_sectorsize(inode))) + if (btrfs_io_bio(failed_bio)->iter.bi_size > inode->i_sb->s_blocksize) read_mode |= REQ_FAILFAST_DEV; isector = start - btrfs_io_bio(failed_bio)->logical; isector >>= inode->i_sb->s_blocksize_bits; - bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, - pgoff, isector, repair_endio, repair_arg); + bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, pgoff, + isector, failed_bio->bi_end_io, dip); bio->bi_opf = REQ_OP_READ | read_mode; btrfs_debug(BTRFS_I(inode)->root->fs_info, "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d", read_mode, failrec->this_mirror, failrec->in_validation); + refcount_inc(&dip->refs); status = submit_dio_repair_bio(inode, bio, failrec->this_mirror); if (status) { free_io_failure(failure_tree, io_tree, failrec); bio_put(bio); + refcount_dec(&dip->refs); } return status; } -struct btrfs_retry_complete { - struct completion done; - struct inode *inode; - u64 start; - int uptodate; -}; - -static void btrfs_retry_endio_nocsum(struct bio *bio) -{ - struct btrfs_retry_complete *done = bio->bi_private; - struct inode *inode = done->inode; - struct bio_vec *bvec; - struct extent_io_tree *io_tree, *failure_tree; - struct bvec_iter_all iter_all; - - if (bio->bi_status) - goto end; - - ASSERT(bio->bi_vcnt == 1); - io_tree = &BTRFS_I(inode)->io_tree; - failure_tree = &BTRFS_I(inode)->io_failure_tree; - ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(inode)); - - done->uptodate = 1; - ASSERT(!bio_flagged(bio, BIO_CLONED)); - bio_for_each_segment_all(bvec, bio, iter_all) - clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree, - io_tree, done->start, bvec->bv_page, - btrfs_ino(BTRFS_I(inode)), 0); -end: - complete(&done->done); - bio_put(bio); -} - -static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode, - struct btrfs_io_bio *io_bio) +static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, + struct btrfs_io_bio *io_bio, + const bool uptodate) { - struct btrfs_fs_info *fs_info; + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + u32 sectorsize = fs_info->sectorsize; + struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); struct bio_vec bvec; struct bvec_iter iter; - struct btrfs_retry_complete done; - u64 start; - unsigned int pgoff; - u32 sectorsize; - int nr_sectors; - blk_status_t ret; + u64 start = io_bio->logical; + int icsum = 0; blk_status_t err = BLK_STS_OK; - fs_info = BTRFS_I(inode)->root->fs_info; - sectorsize = fs_info->sectorsize; - - start = io_bio->logical; - done.inode = inode; - io_bio->bio.bi_iter = io_bio->iter; + __bio_for_each_segment(bvec, &io_bio->bio, iter, io_bio->iter) { + unsigned int i, nr_sectors, pgoff; - bio_for_each_segment(bvec, &io_bio->bio, iter) { nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len); pgoff = bvec.bv_offset; - -next_block_or_try_again: - done.uptodate = 0; - done.start = start; - init_completion(&done.done); - - ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page, - pgoff, start, start + sectorsize - 1, - io_bio->mirror_num, - btrfs_retry_endio_nocsum, &done); - if (ret) { - err = ret; - goto next; - } - - wait_for_completion_io(&done.done); - - if (!done.uptodate) { - /* We might have another mirror, so try again */ - goto next_block_or_try_again; - } - -next: - start += sectorsize; - - nr_sectors--; - if (nr_sectors) { - pgoff += sectorsize; + for (i = 0; i < nr_sectors; i++) { ASSERT(pgoff < PAGE_SIZE); - goto next_block_or_try_again; - } - } - - return err; -} - -static void btrfs_retry_endio(struct bio *bio) -{ - struct btrfs_retry_complete *done = bio->bi_private; - struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); - struct extent_io_tree *io_tree, *failure_tree; - struct inode *inode = done->inode; - struct bio_vec *bvec; - int uptodate; - int ret; - int i = 0; - struct bvec_iter_all iter_all; - - if (bio->bi_status) - goto end; - - uptodate = 1; - - ASSERT(bio->bi_vcnt == 1); - ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(done->inode)); - - io_tree = &BTRFS_I(inode)->io_tree; - failure_tree = &BTRFS_I(inode)->io_failure_tree; - - ASSERT(!bio_flagged(bio, BIO_CLONED)); - bio_for_each_segment_all(bvec, bio, iter_all) { - ret = check_data_csum(inode, io_bio, i, bvec->bv_page, - bvec->bv_offset, done->start, - bvec->bv_len); - if (!ret) - clean_io_failure(BTRFS_I(inode)->root->fs_info, - failure_tree, io_tree, done->start, - bvec->bv_page, - btrfs_ino(BTRFS_I(inode)), - bvec->bv_offset); - else - uptodate = 0; - i++; - } - - done->uptodate = uptodate; -end: - complete(&done->done); - bio_put(bio); -} - -static blk_status_t __btrfs_subio_endio_read(struct inode *inode, - struct btrfs_io_bio *io_bio, blk_status_t err) -{ - struct btrfs_fs_info *fs_info; - struct bio_vec bvec; - struct bvec_iter iter; - struct btrfs_retry_complete done; - u64 start; - u64 offset = 0; - u32 sectorsize; - int nr_sectors; - unsigned int pgoff; - int csum_pos; - bool uptodate = (err == 0); - int ret; - blk_status_t status; - - fs_info = BTRFS_I(inode)->root->fs_info; - sectorsize = fs_info->sectorsize; - - err = BLK_STS_OK; - start = io_bio->logical; - done.inode = inode; - io_bio->bio.bi_iter = io_bio->iter; - - bio_for_each_segment(bvec, &io_bio->bio, iter) { - nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len); - - pgoff = bvec.bv_offset; -next_block: - if (uptodate) { - csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset); - ret = check_data_csum(inode, io_bio, csum_pos, - bvec.bv_page, pgoff, start, - sectorsize); - if (likely(!ret)) - goto next; - } -try_again: - done.uptodate = 0; - done.start = start; - init_completion(&done.done); - - status = dio_read_error(inode, &io_bio->bio, bvec.bv_page, - pgoff, start, start + sectorsize - 1, - io_bio->mirror_num, btrfs_retry_endio, - &done); - if (status) { - err = status; - goto next; - } - - wait_for_completion_io(&done.done); - - if (!done.uptodate) { - /* We might have another mirror, so try again */ - goto try_again; - } -next: - offset += sectorsize; - start += sectorsize; - - ASSERT(nr_sectors); - - nr_sectors--; - if (nr_sectors) { + if (uptodate && + (!csum || !check_data_csum(inode, io_bio, icsum, + bvec.bv_page, pgoff, + start, sectorsize))) { + clean_io_failure(fs_info, failure_tree, io_tree, + start, bvec.bv_page, + btrfs_ino(BTRFS_I(inode)), + pgoff); + } else { + blk_status_t status; + + status = dio_read_error(inode, &io_bio->bio, + bvec.bv_page, pgoff, + start, + start + sectorsize - 1, + io_bio->mirror_num); + if (status) + err = status; + } + start += sectorsize; + icsum++; pgoff += sectorsize; - ASSERT(pgoff < PAGE_SIZE); - goto next_block; } } - return err; } -static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, - struct btrfs_io_bio *io_bio, - blk_status_t err) -{ - bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; - - if (skip_csum) { - if (unlikely(err)) - return __btrfs_correct_data_nocsum(inode, io_bio); - else - return BLK_STS_OK; - } else { - return __btrfs_subio_endio_read(inode, io_bio, err); - } -} - static void __endio_write_update_ordered(struct inode *inode, const u64 offset, const u64 bytes, const bool uptodate) @@ -7785,7 +7601,7 @@ static void btrfs_end_dio_bio(struct bio *bio) if (bio_op(bio) == REQ_OP_READ) { err = btrfs_check_read_dio_bio(dip->inode, btrfs_io_bio(bio), - err); + !err); } if (err)