@@ -1932,11 +1932,10 @@ static void recover_rbio(struct btrfs_raid_bio *rbio)
/*
* Read everything that hasn't failed. However this time we will
- * not trust any cached sector.
+ * not trust any cached sector, unless it's explicitly required.
+ *
* As we may read out some stale data but higher layer is not reading
* that stale part.
- *
- * So here we always re-read everything in recovery path.
*/
for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
total_sector_nr++) {
@@ -1960,6 +1959,14 @@ static void recover_rbio(struct btrfs_raid_bio *rbio)
}
sector = rbio_stripe_sector(rbio, stripe, sectornr);
+
+ /*
+ * If we're forced to use cache and the sector is already cached,
+ * then we can skip this sector.
+ * Otherwise we should still read from disk.
+ */
+ if (rbio->cached && sector->uptodate)
+ continue;
ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
sectornr, REQ_OP_READ);
if (ret < 0) {
@@ -2765,22 +2772,32 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
}
/*
- * This is for scrub call sites where we already have correct data contents.
- * This allows us to avoid reading data stripes again.
+ * This is for scrub call sites where we already have correct stripe contents.
+ * This allows us to avoid reading on-disk stripes again.
*
* Unfortunately here we have to do page copy, other than reusing the pages.
* This is due to the fact rbio has its own page management for its cache.
+ * But this is also a good thing for recovery tries, this prevents the recovery
+ * path to modify the stripes until we've verified the recovered data.
+ *
+ * @rbio: The allocated rbio by raid56_parity_alloc_*_rbio()
+ * @pages: The pages which contains the stripe contents.
+ * @stripe_num: The stripe number, 0 means the first data stripe, and
+ * @rbio->nr_data means the P stripe.
*/
-void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
- struct page **data_pages, u64 data_logical)
+void raid56_parity_cache_pages(struct btrfs_raid_bio *rbio, struct page **pages,
+ int stripe_num)
{
- const u64 offset_in_full_stripe = data_logical -
- rbio->bioc->full_stripe_logical;
+ const u64 offset_in_full_stripe = btrfs_stripe_nr_to_offset(stripe_num);
const int page_index = offset_in_full_stripe >> PAGE_SHIFT;
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
const u32 sectors_per_page = PAGE_SIZE / sectorsize;
int ret;
+ if (stripe_num >= rbio->nr_data)
+ ret = alloc_rbio_parity_pages(rbio);
+ else
+ ret = alloc_rbio_data_pages(rbio);
/*
* If we hit ENOMEM temporarily, but later at
* raid56_parity_submit_scrub_rbio() time it succeeded, we just do
@@ -2789,17 +2806,16 @@ void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
* If we hit ENOMEM later at raid56_parity_submit_scrub_rbio() time,
* the bio would got proper error number set.
*/
- ret = alloc_rbio_data_pages(rbio);
if (ret < 0)
return;
- /* data_logical must be at stripe boundary and inside the full stripe. */
+ /* The stripe must be at stripe boundary and inside the full stripe. */
ASSERT(IS_ALIGNED(offset_in_full_stripe, BTRFS_STRIPE_LEN));
- ASSERT(offset_in_full_stripe < btrfs_stripe_nr_to_offset(rbio->nr_data));
+ ASSERT(offset_in_full_stripe < btrfs_stripe_nr_to_offset(rbio->real_stripes));
for (int page_nr = 0; page_nr < (BTRFS_STRIPE_LEN >> PAGE_SHIFT); page_nr++) {
struct page *dst = rbio->stripe_pages[page_nr + page_index];
- struct page *src = data_pages[page_nr];
+ struct page *src = pages[page_nr];
memcpy_page(dst, 0, src, 0, PAGE_SIZE);
for (int sector_nr = sectors_per_page * page_index;
@@ -2807,4 +2823,5 @@ void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
sector_nr++)
rbio->stripe_sectors[sector_nr].uptodate = true;
}
+ rbio->cached = true;
}
@@ -82,6 +82,13 @@ struct btrfs_raid_bio {
*/
int bio_list_bytes;
+ /*
+ * If this rbio is forced to use cached stripes provided by the caller.
+ *
+ * Used by scrub path to reduce IO.
+ */
+ bool cached;
+
refcount_t refs;
atomic_t stripes_pending;
@@ -191,9 +198,8 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
struct btrfs_device *scrub_dev,
unsigned long *dbitmap);
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
-
-void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
- struct page **data_pages, u64 data_logical);
+void raid56_parity_cache_pages(struct btrfs_raid_bio *rbio, struct page **pages,
+ int stripe_num);
int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
@@ -1925,8 +1925,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
for (int i = 0; i < data_stripes; i++) {
stripe = &sctx->raid56_data_stripes[i];
- raid56_parity_cache_data_pages(rbio, stripe->pages,
- full_stripe_start + btrfs_stripe_nr_to_offset(i));
+ raid56_parity_cache_pages(rbio, stripe->pages, i);
}
raid56_parity_submit_scrub_rbio(rbio);
wait_for_completion_io(&io_done);
Currently raid56_parity_cache_scrub_pages() only allows to cache data stripes, as the only scrub call site is only going to scrub P/Q stripes. But later we want to use cached pages to do recovery, thus we need to cache P/Q stripes. This patch would do the following changes to allow such new ability by: - Rename the function to raid56_parity_cache_pages() - Use @stripe_num to indicate where the cached stripe is - Change the ASSERT() to allow P/Q stripes to be cached - Introduce a new member, btrfs_raid_bio::cached, to allow recovery path to use cached pages Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/raid56.c | 43 ++++++++++++++++++++++++++++++------------- fs/btrfs/raid56.h | 12 +++++++++--- fs/btrfs/scrub.c | 3 +-- 3 files changed, 40 insertions(+), 18 deletions(-)