@@ -1099,15 +1099,35 @@ static void scrub_read_endio(struct btrfs_bio *bbio)
{
struct scrub_stripe *stripe = bbio->private;
struct bio_vec *bvec;
- int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio));
+ int sector_nr;
int num_sectors;
u32 bio_size = 0;
int i;
- ASSERT(sector_nr < stripe->nr_sectors);
bio_for_each_bvec_all(bvec, &bbio->bio, i)
bio_size += bvec->bv_len;
- num_sectors = bio_size >> stripe->bg->fs_info->sectorsize_bits;
+ /*
+ * For RST scrub we can error out with empty bbio. In that case mark
+ * the range until the end as error.
+ */
+ if (unlikely(bio_size == 0)) {
+ /*
+ * Since the bbio didn't really get submitted, the logical
+ * (bi_sector) should be untouched.
+ */
+ u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
+
+ ASSERT(logical >= stripe->logical &&
+ logical < stripe->logical + BTRFS_STRIPE_LEN);
+ ASSERT(bbio->bio.bi_status);
+ sector_nr = (logical - stripe->logical) >>
+ bbio->fs_info->sectorsize_bits;
+ num_sectors = stripe->nr_sectors - sector_nr;
+ } else {
+ sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio));
+ ASSERT(sector_nr < stripe->nr_sectors);
+ num_sectors = bio_size >> stripe->bg->fs_info->sectorsize_bits;
+ }
if (bbio->bio.bi_status) {
bitmap_set(&stripe->io_error_bitmap, sector_nr, num_sectors);
[BUG] When running btrfs/060 with forced RST feature, it would crash the following ASSERT() inside scrub_read_endio(): ASSERT(sector_nr < stripe->nr_sectors); Before that, we would have tree dump from btrfs_get_raid_extent_offset(), as we failed to find the RST entry for the range. [CAUSE] Inside scrub_submit_extent_sector_read() every time we allocated a new bbio we immediate call btrfs_map_block() to make sure there is some RST range covering the scrub target. But if btrfs_map_block() failed, we immediately call endio for the bbio, meanwhile since the bbio is newly allocated, it's completely empty. Then inside scrub_read_endio(), we go through the bvecs to find the sector number (as bi_sector is no longer reliable if the bio is submitted to lower layers). And since the bio is empty, such bvecs iteration would not find any sector matching the sector, and return sector_nr == stripe->nr_sectors, triggering the ASSERT(). [FIX] Instead of going calc_sector_number() directly which expects a non-empty bio, do an early bio length check first. If the bio is empty, just grab the sector_nr using bi_sector, as the bio never really got submitted bi_sector is untouched and reliable. Then mark all the sectors until the stripe end as error. Otherwise go the regular routine. By this we have an extra safenet for possible RST related errors. Signed-off-by: Qu Wenruo <wqu@suse.com> --- REASON FOR RFC: This is only a fix for the direct cause, that we do not have proper error handling for rst related errors. But to be honest, I'm not that confident with the manual btrfs_map_block() call inside scrub. We may want to do the call before we allocate a new bbio, so that we can avoid such empty bbio. Furthermore we're still not sure why the RST lookup failed. During the scrub we should have prevented a new transaction to be committed, thus the RST lookup should match with extent_sector_bitmap, but that's not the case. Anyway for now just add a safenet until we pin down the root cause of the RST error, then determine if we need to btrfs_map_block() at the current location. --- fs/btrfs/scrub.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-)