diff mbox series

[v8,06/11] btrfs: implement RST version of scrub

Message ID 20230911-raid-stripe-tree-v8-6-647676fa852c@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: introduce RAID stripe tree | expand

Commit Message

Johannes Thumshirn Sept. 11, 2023, 12:52 p.m. UTC
A filesystem that uses the RAID stripe tree for logical to physical
address translation can't use the regular scrub path, that reads all
stripes and then checks if a sector is unused afterwards.

When using the RAID stripe tree, this will result in lookup errors, as the
stripe tree doesn't know the requested logical addresses.

Instead, look up stripes that are backed by the extent bitmap.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/scrub.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

Comments

Qu Wenruo Sept. 13, 2023, 9:51 a.m. UTC | #1
On 2023/9/11 22:22, Johannes Thumshirn wrote:
> A filesystem that uses the RAID stripe tree for logical to physical
> address translation can't use the regular scrub path, that reads all
> stripes and then checks if a sector is unused afterwards.
>
> When using the RAID stripe tree, this will result in lookup errors, as the
> stripe tree doesn't know the requested logical addresses.
>
> Instead, look up stripes that are backed by the extent bitmap.
>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>   fs/btrfs/scrub.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 56 insertions(+)
>
> diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
> index f16220ce5fba..5101e0a3f83e 100644
> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -23,6 +23,7 @@
>   #include "accessors.h"
>   #include "file-item.h"
>   #include "scrub.h"
> +#include "raid-stripe-tree.h"
>
>   /*
>    * This is only the first step towards a full-features scrub. It reads all
> @@ -1634,6 +1635,56 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe)
>   	}
>   }
>
> +static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
> +					    struct scrub_stripe *stripe)
> +{
> +	struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
> +	struct btrfs_bio *bbio = NULL;
> +	int mirror = stripe->mirror_num;
> +	int i;
> +
> +	atomic_inc(&stripe->pending_io);
> +
> +	for_each_set_bit(i, &stripe->extent_sector_bitmap, stripe->nr_sectors) {
> +		struct page *page;
> +		int pgoff;
> +
> +		page = scrub_stripe_get_page(stripe, i);
> +		pgoff = scrub_stripe_get_page_offset(stripe, i);
> +
> +		/* The current sector cannot be merged, submit the bio. */
> +		if (bbio &&
> +		    ((i > 0 && !test_bit(i - 1, &stripe->extent_sector_bitmap)) ||
> +		     bbio->bio.bi_iter.bi_size >= BTRFS_STRIPE_LEN)) {
> +			ASSERT(bbio->bio.bi_iter.bi_size);
> +			atomic_inc(&stripe->pending_io);
> +			btrfs_submit_bio(bbio, mirror);
> +			bbio = NULL;
> +		}
> +
> +		if (!bbio) {
> +			bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
> +				fs_info, scrub_read_endio, stripe);
> +			bbio->bio.bi_iter.bi_sector = (stripe->logical +
> +				(i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT;
> +		}
> +
> +		__bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
> +	}
> +
> +	if (bbio) {
> +		ASSERT(bbio->bio.bi_iter.bi_size);
> +		atomic_inc(&stripe->pending_io);
> +		btrfs_submit_bio(bbio, mirror);

Since RST is looked up during btrfs_submit_bio() (to be more accurate,
set_io_stripe()), and I just checked there is no special requirement to
make btrfs to lookup using commit root.

This means we can have a problem that extent items and RST are out-of-sync.

For scrub, all the extent items are searched using commit root, but
btrfs_get_raid_extent_offset() is only using current root.
Thus you would got some problems during fsstress and scrub.


We need some way to distinguish scrub bbio from regular ones (which is a
completely new requirement).
For now only scrub doesn't initialize bbio->inode, thus it can be used
to do the distinguish (at least for now).

Thanks,
Qu
> +	}
> +
> +	if (atomic_dec_and_test(&stripe->pending_io)) {
> +		wake_up(&stripe->io_wait);
> +		INIT_WORK(&stripe->work, scrub_stripe_read_repair_worker);
> +		queue_work(stripe->bg->fs_info->scrub_workers, &stripe->work);
> +	}
> +}
> +
>   static void scrub_submit_initial_read(struct scrub_ctx *sctx,
>   				      struct scrub_stripe *stripe)
>   {
> @@ -1645,6 +1696,11 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
>   	ASSERT(stripe->mirror_num > 0);
>   	ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state));
>
> +	if (btrfs_need_stripe_tree_update(fs_info, stripe->bg->flags)) {
> +		scrub_submit_extent_sector_read(sctx, stripe);
> +		return;
> +	}
> +
>   	bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info,
>   			       scrub_read_endio, stripe);
>
>
David Sterba Sept. 13, 2023, 4:59 p.m. UTC | #2
On Mon, Sep 11, 2023 at 05:52:07AM -0700, Johannes Thumshirn wrote:
> A filesystem that uses the RAID stripe tree for logical to physical
> address translation can't use the regular scrub path, that reads all
> stripes and then checks if a sector is unused afterwards.
> 
> When using the RAID stripe tree, this will result in lookup errors, as the
> stripe tree doesn't know the requested logical addresses.
> 
> Instead, look up stripes that are backed by the extent bitmap.
> 
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/scrub.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 56 insertions(+)
> 
> diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
> index f16220ce5fba..5101e0a3f83e 100644
> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -23,6 +23,7 @@
>  #include "accessors.h"
>  #include "file-item.h"
>  #include "scrub.h"
> +#include "raid-stripe-tree.h"
>  
>  /*
>   * This is only the first step towards a full-features scrub. It reads all
> @@ -1634,6 +1635,56 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe)
>  	}
>  }
>  
> +static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
> +					    struct scrub_stripe *stripe)
> +{
> +	struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
> +	struct btrfs_bio *bbio = NULL;
> +	int mirror = stripe->mirror_num;
> +	int i;
> +
> +	atomic_inc(&stripe->pending_io);
> +
> +	for_each_set_bit(i, &stripe->extent_sector_bitmap, stripe->nr_sectors) {
> +		struct page *page;
> +		int pgoff;

This should be unsigned int.

> +
> +		page = scrub_stripe_get_page(stripe, i);
> +		pgoff = scrub_stripe_get_page_offset(stripe, i);

You can probably move the initializations right to the declarations, I
think we have that elsewhere too.

> +		/* The current sector cannot be merged, submit the bio. */
> +		if (bbio &&
> +		    ((i > 0 && !test_bit(i - 1, &stripe->extent_sector_bitmap)) ||
> +		     bbio->bio.bi_iter.bi_size >= BTRFS_STRIPE_LEN)) {
> +			ASSERT(bbio->bio.bi_iter.bi_size);
> +			atomic_inc(&stripe->pending_io);
> +			btrfs_submit_bio(bbio, mirror);
> +			bbio = NULL;
> +		}
> +
> +		if (!bbio) {
> +			bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
> +				fs_info, scrub_read_endio, stripe);
> +			bbio->bio.bi_iter.bi_sector = (stripe->logical +
> +				(i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT;
> +		}
> +
> +		__bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
> +	}
> +
> +	if (bbio) {
> +		ASSERT(bbio->bio.bi_iter.bi_size);
> +		atomic_inc(&stripe->pending_io);
> +		btrfs_submit_bio(bbio, mirror);
> +	}
> +
> +	if (atomic_dec_and_test(&stripe->pending_io)) {
> +		wake_up(&stripe->io_wait);
> +		INIT_WORK(&stripe->work, scrub_stripe_read_repair_worker);
> +		queue_work(stripe->bg->fs_info->scrub_workers, &stripe->work);
> +	}
> +}
diff mbox series

Patch

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f16220ce5fba..5101e0a3f83e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -23,6 +23,7 @@ 
 #include "accessors.h"
 #include "file-item.h"
 #include "scrub.h"
+#include "raid-stripe-tree.h"
 
 /*
  * This is only the first step towards a full-features scrub. It reads all
@@ -1634,6 +1635,56 @@  static void scrub_reset_stripe(struct scrub_stripe *stripe)
 	}
 }
 
+static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
+					    struct scrub_stripe *stripe)
+{
+	struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+	struct btrfs_bio *bbio = NULL;
+	int mirror = stripe->mirror_num;
+	int i;
+
+	atomic_inc(&stripe->pending_io);
+
+	for_each_set_bit(i, &stripe->extent_sector_bitmap, stripe->nr_sectors) {
+		struct page *page;
+		int pgoff;
+
+		page = scrub_stripe_get_page(stripe, i);
+		pgoff = scrub_stripe_get_page_offset(stripe, i);
+
+		/* The current sector cannot be merged, submit the bio. */
+		if (bbio &&
+		    ((i > 0 && !test_bit(i - 1, &stripe->extent_sector_bitmap)) ||
+		     bbio->bio.bi_iter.bi_size >= BTRFS_STRIPE_LEN)) {
+			ASSERT(bbio->bio.bi_iter.bi_size);
+			atomic_inc(&stripe->pending_io);
+			btrfs_submit_bio(bbio, mirror);
+			bbio = NULL;
+		}
+
+		if (!bbio) {
+			bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
+				fs_info, scrub_read_endio, stripe);
+			bbio->bio.bi_iter.bi_sector = (stripe->logical +
+				(i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT;
+		}
+
+		__bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
+	}
+
+	if (bbio) {
+		ASSERT(bbio->bio.bi_iter.bi_size);
+		atomic_inc(&stripe->pending_io);
+		btrfs_submit_bio(bbio, mirror);
+	}
+
+	if (atomic_dec_and_test(&stripe->pending_io)) {
+		wake_up(&stripe->io_wait);
+		INIT_WORK(&stripe->work, scrub_stripe_read_repair_worker);
+		queue_work(stripe->bg->fs_info->scrub_workers, &stripe->work);
+	}
+}
+
 static void scrub_submit_initial_read(struct scrub_ctx *sctx,
 				      struct scrub_stripe *stripe)
 {
@@ -1645,6 +1696,11 @@  static void scrub_submit_initial_read(struct scrub_ctx *sctx,
 	ASSERT(stripe->mirror_num > 0);
 	ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state));
 
+	if (btrfs_need_stripe_tree_update(fs_info, stripe->bg->flags)) {
+		scrub_submit_extent_sector_read(sctx, stripe);
+		return;
+	}
+
 	bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info,
 			       scrub_read_endio, stripe);