Message ID | 20230914-raid-stripe-tree-v9-6-15d423829637@wdc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: introduce RAID stripe tree | expand |
On 2023/9/15 01:37, Johannes Thumshirn wrote: > A filesystem that uses the RAID stripe tree for logical to physical > address translation can't use the regular scrub path, that reads all > stripes and then checks if a sector is unused afterwards. > > When using the RAID stripe tree, this will result in lookup errors, as the > stripe tree doesn't know the requested logical addresses. > > Instead, look up stripes that are backed by the extent bitmap. > > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > --- > fs/btrfs/bio.c | 2 ++ > fs/btrfs/raid-stripe-tree.c | 8 ++++++- > fs/btrfs/scrub.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ > fs/btrfs/volumes.h | 1 + > 4 files changed, 63 insertions(+), 1 deletion(-) > > diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c > index ddbe6f8d4ea2..bdb6e3effdbb 100644 > --- a/fs/btrfs/bio.c > +++ b/fs/btrfs/bio.c > @@ -663,6 +663,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) > blk_status_t ret; > int error; > > + smap.is_scrub = !bbio->inode; > + > btrfs_bio_counter_inc_blocked(fs_info); > error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length, > &bioc, &smap, &mirror_num, 1); > diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c > index 697a6e1fd255..63bf62c33436 100644 > --- a/fs/btrfs/raid-stripe-tree.c > +++ b/fs/btrfs/raid-stripe-tree.c > @@ -334,6 +334,11 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, > if (!path) > return -ENOMEM; > > + if (stripe->is_scrub) { > + path->skip_locking = 1; > + path->search_commit_root = 1; > + } > + > ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0); > if (ret < 0) > goto free_path; > @@ -420,7 +425,8 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, > out: > if (ret > 0) > ret = -ENOENT; > - if (ret && ret != -EIO) { > + if (ret && ret != -EIO && !stripe->is_scrub) { > + One extra newline. And why scrub path doesn't need the warning? IIRC if our rst doesn't match extent tree, it can be a problem and we need some error messages. Thanks, Qu > if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) > btrfs_print_tree(leaf, 1); > btrfs_err(fs_info, > diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c > index f16220ce5fba..42948b66d4be 100644 > --- a/fs/btrfs/scrub.c > +++ b/fs/btrfs/scrub.c > @@ -23,6 +23,7 @@ > #include "accessors.h" > #include "file-item.h" > #include "scrub.h" > +#include "raid-stripe-tree.h" > > /* > * This is only the first step towards a full-features scrub. It reads all > @@ -1634,6 +1635,53 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe) > } > } > > +static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, > + struct scrub_stripe *stripe) > +{ > + struct btrfs_fs_info *fs_info = stripe->bg->fs_info; > + struct btrfs_bio *bbio = NULL; > + int mirror = stripe->mirror_num; > + int i; > + > + atomic_inc(&stripe->pending_io); > + > + for_each_set_bit(i, &stripe->extent_sector_bitmap, stripe->nr_sectors) { > + struct page *page = scrub_stripe_get_page(stripe, i); > + unsigned int pgoff = scrub_stripe_get_page_offset(stripe, i); > + > + /* The current sector cannot be merged, submit the bio. */ > + if (bbio && > + ((i > 0 && !test_bit(i - 1, &stripe->extent_sector_bitmap)) || > + bbio->bio.bi_iter.bi_size >= BTRFS_STRIPE_LEN)) { > + ASSERT(bbio->bio.bi_iter.bi_size); > + atomic_inc(&stripe->pending_io); > + btrfs_submit_bio(bbio, mirror); > + bbio = NULL; > + } > + > + if (!bbio) { > + bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ, > + fs_info, scrub_read_endio, stripe); > + bbio->bio.bi_iter.bi_sector = (stripe->logical + > + (i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT; > + } > + > + __bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); > + } > + > + if (bbio) { > + ASSERT(bbio->bio.bi_iter.bi_size); > + atomic_inc(&stripe->pending_io); > + btrfs_submit_bio(bbio, mirror); > + } > + > + if (atomic_dec_and_test(&stripe->pending_io)) { > + wake_up(&stripe->io_wait); > + INIT_WORK(&stripe->work, scrub_stripe_read_repair_worker); > + queue_work(stripe->bg->fs_info->scrub_workers, &stripe->work); > + } > +} > + > static void scrub_submit_initial_read(struct scrub_ctx *sctx, > struct scrub_stripe *stripe) > { > @@ -1645,6 +1693,11 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, > ASSERT(stripe->mirror_num > 0); > ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state)); > > + if (btrfs_need_stripe_tree_update(fs_info, stripe->bg->flags)) { > + scrub_submit_extent_sector_read(sctx, stripe); > + return; > + } > + > bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info, > scrub_read_endio, stripe); > > diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h > index 2043aff6e966..067859de8f4c 100644 > --- a/fs/btrfs/volumes.h > +++ b/fs/btrfs/volumes.h > @@ -393,6 +393,7 @@ struct btrfs_io_stripe { > /* Block mapping */ > u64 physical; > u64 length; > + bool is_scrub; > /* For the endio handler */ > struct btrfs_io_context *bioc; > }; >
On Fri, Sep 15, 2023 at 10:28:50AM +0930, Qu Wenruo wrote: > > > On 2023/9/15 01:37, Johannes Thumshirn wrote: > > A filesystem that uses the RAID stripe tree for logical to physical > > address translation can't use the regular scrub path, that reads all > > stripes and then checks if a sector is unused afterwards. > > > > When using the RAID stripe tree, this will result in lookup errors, as the > > stripe tree doesn't know the requested logical addresses. > > > > Instead, look up stripes that are backed by the extent bitmap. > > > > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > > --- > > fs/btrfs/bio.c | 2 ++ > > fs/btrfs/raid-stripe-tree.c | 8 ++++++- > > fs/btrfs/scrub.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ > > fs/btrfs/volumes.h | 1 + > > 4 files changed, 63 insertions(+), 1 deletion(-) > > > > diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c > > index ddbe6f8d4ea2..bdb6e3effdbb 100644 > > --- a/fs/btrfs/bio.c > > +++ b/fs/btrfs/bio.c > > @@ -663,6 +663,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) > > blk_status_t ret; > > int error; > > > > + smap.is_scrub = !bbio->inode; > > + > > btrfs_bio_counter_inc_blocked(fs_info); > > error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length, > > &bioc, &smap, &mirror_num, 1); > > diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c > > index 697a6e1fd255..63bf62c33436 100644 > > --- a/fs/btrfs/raid-stripe-tree.c > > +++ b/fs/btrfs/raid-stripe-tree.c > > @@ -334,6 +334,11 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, > > if (!path) > > return -ENOMEM; > > > > + if (stripe->is_scrub) { > > + path->skip_locking = 1; > > + path->search_commit_root = 1; > > + } > > + > > ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0); > > if (ret < 0) > > goto free_path; > > @@ -420,7 +425,8 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, > > out: > > if (ret > 0) > > ret = -ENOENT; > > - if (ret && ret != -EIO) { > > + if (ret && ret != -EIO && !stripe->is_scrub) { > > + > > One extra newline. There were way more stray newlines, you don't have to point that out in reviews, I fix them once we have version that would not change too much.
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index ddbe6f8d4ea2..bdb6e3effdbb 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -663,6 +663,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) blk_status_t ret; int error; + smap.is_scrub = !bbio->inode; + btrfs_bio_counter_inc_blocked(fs_info); error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length, &bioc, &smap, &mirror_num, 1); diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c index 697a6e1fd255..63bf62c33436 100644 --- a/fs/btrfs/raid-stripe-tree.c +++ b/fs/btrfs/raid-stripe-tree.c @@ -334,6 +334,11 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, if (!path) return -ENOMEM; + if (stripe->is_scrub) { + path->skip_locking = 1; + path->search_commit_root = 1; + } + ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0); if (ret < 0) goto free_path; @@ -420,7 +425,8 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, out: if (ret > 0) ret = -ENOENT; - if (ret && ret != -EIO) { + if (ret && ret != -EIO && !stripe->is_scrub) { + if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) btrfs_print_tree(leaf, 1); btrfs_err(fs_info, diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index f16220ce5fba..42948b66d4be 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -23,6 +23,7 @@ #include "accessors.h" #include "file-item.h" #include "scrub.h" +#include "raid-stripe-tree.h" /* * This is only the first step towards a full-features scrub. It reads all @@ -1634,6 +1635,53 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe) } } +static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, + struct scrub_stripe *stripe) +{ + struct btrfs_fs_info *fs_info = stripe->bg->fs_info; + struct btrfs_bio *bbio = NULL; + int mirror = stripe->mirror_num; + int i; + + atomic_inc(&stripe->pending_io); + + for_each_set_bit(i, &stripe->extent_sector_bitmap, stripe->nr_sectors) { + struct page *page = scrub_stripe_get_page(stripe, i); + unsigned int pgoff = scrub_stripe_get_page_offset(stripe, i); + + /* The current sector cannot be merged, submit the bio. */ + if (bbio && + ((i > 0 && !test_bit(i - 1, &stripe->extent_sector_bitmap)) || + bbio->bio.bi_iter.bi_size >= BTRFS_STRIPE_LEN)) { + ASSERT(bbio->bio.bi_iter.bi_size); + atomic_inc(&stripe->pending_io); + btrfs_submit_bio(bbio, mirror); + bbio = NULL; + } + + if (!bbio) { + bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ, + fs_info, scrub_read_endio, stripe); + bbio->bio.bi_iter.bi_sector = (stripe->logical + + (i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT; + } + + __bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); + } + + if (bbio) { + ASSERT(bbio->bio.bi_iter.bi_size); + atomic_inc(&stripe->pending_io); + btrfs_submit_bio(bbio, mirror); + } + + if (atomic_dec_and_test(&stripe->pending_io)) { + wake_up(&stripe->io_wait); + INIT_WORK(&stripe->work, scrub_stripe_read_repair_worker); + queue_work(stripe->bg->fs_info->scrub_workers, &stripe->work); + } +} + static void scrub_submit_initial_read(struct scrub_ctx *sctx, struct scrub_stripe *stripe) { @@ -1645,6 +1693,11 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, ASSERT(stripe->mirror_num > 0); ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state)); + if (btrfs_need_stripe_tree_update(fs_info, stripe->bg->flags)) { + scrub_submit_extent_sector_read(sctx, stripe); + return; + } + bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info, scrub_read_endio, stripe); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2043aff6e966..067859de8f4c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -393,6 +393,7 @@ struct btrfs_io_stripe { /* Block mapping */ u64 physical; u64 length; + bool is_scrub; /* For the endio handler */ struct btrfs_io_context *bioc; };
A filesystem that uses the RAID stripe tree for logical to physical address translation can't use the regular scrub path, that reads all stripes and then checks if a sector is unused afterwards. When using the RAID stripe tree, this will result in lookup errors, as the stripe tree doesn't know the requested logical addresses. Instead, look up stripes that are backed by the extent bitmap. Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> --- fs/btrfs/bio.c | 2 ++ fs/btrfs/raid-stripe-tree.c | 8 ++++++- fs/btrfs/scrub.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.h | 1 + 4 files changed, 63 insertions(+), 1 deletion(-)