diff mbox

Btrfs: fix race between scrub and block group deletion

Message ID 1447930640-13192-1-git-send-email-fdmanana@kernel.org (mailing list archive)
State Accepted
Headers show

Commit Message

Filipe Manana Nov. 19, 2015, 10:57 a.m. UTC
From: Filipe Manana <fdmanana@suse.com>

Scrub can race with the cleaner kthread deleting block groups that are
unused (and with relocation too) leading to a failure with error -EINVAL
that gets returned to user space.

The following diagram illustrates how it happens:

              CPU 1                                 CPU 2

 cleaner kthread
   btrfs_delete_unused_bgs()

     gets block group X from
     fs_info->unused_bgs

     sets block group to RO

       btrfs_remove_chunk(bg X)

         deletes device extents

                                         scrub_enumerate_chunks()

                                           searches device tree using
                                           its commit root

                                           finds device extent for
                                           block group X

                                           gets block group X from the tree
                                           fs_info->block_group_cache_tree
                                           (via btrfs_lookup_block_group())

                                           sets bg X to RO (again)

          btrfs_remove_block_group(bg X)

            deletes block group from
            fs_info->block_group_cache_tree

            removes extent map from
            fs_info->mapping_tree

                                               scrub_chunk(offset X)

                                                 searches fs_info->mapping_tree
                                                 for extent map starting at
                                                 offset X

                                                    --> doesn't find any such
                                                        extent map
                                                    --> returns -EINVAL and scrub
                                                        errors out to userspace
                                                        with -EINVAL

Fix this by dealing with an extent map lookup failure as an indicator of
block group deletion.
Issue reproduced with fstest btrfs/071.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/scrub.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)
diff mbox

Patch

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 2907a77..28fbd83 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3432,7 +3432,9 @@  out:
 static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
 					  struct btrfs_device *scrub_dev,
 					  u64 chunk_offset, u64 length,
-					  u64 dev_offset, int is_dev_replace)
+					  u64 dev_offset,
+					  struct btrfs_block_group_cache *cache,
+					  int is_dev_replace)
 {
 	struct btrfs_mapping_tree *map_tree =
 		&sctx->dev_root->fs_info->mapping_tree;
@@ -3445,8 +3447,18 @@  static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
 	em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
 	read_unlock(&map_tree->map_tree.lock);
 
-	if (!em)
-		return -EINVAL;
+	if (!em) {
+		/*
+		 * Might have been an unused block group deleted by the cleaner
+		 * kthread or relocation.
+		 */
+		spin_lock(&cache->lock);
+		if (!cache->removed)
+			ret = -EINVAL;
+		spin_unlock(&cache->lock);
+
+		return ret;
+	}
 
 	map = (struct map_lookup *)em->bdev;
 	if (em->start != chunk_offset)
@@ -3577,7 +3589,7 @@  int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 		dev_replace->cursor_left = found_key.offset;
 		dev_replace->item_needs_writeback = 1;
 		ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
-				  found_key.offset, is_dev_replace);
+				  found_key.offset, cache, is_dev_replace);
 
 		/*
 		 * flush, submit all pending read and write bios, afterwards