diff mbox series

btrfs: tree-checker: reject inline extent items with 0 ref count

Message ID 839022e69496e80413ade9dc8287fb9ccaef9557.1733282486.git.wqu@suse.com (mailing list archive)
State New
Headers show
Series btrfs: tree-checker: reject inline extent items with 0 ref count | expand

Commit Message

Qu Wenruo Dec. 4, 2024, 3:22 a.m. UTC
[BUG]
There is a bug report in the mailing list where btrfs_run_delayed_refs()
failed to drop the ref count for logical 25870311358464 num_bytes
2113536.

The involved leaf dump looks like this:

  item 166 key (25870311358464 168 2113536) itemoff 10091 itemsize 50
    extent refs 1 gen 84178 flags 1
    ref#0: shared data backref parent 32399126528000 count 0 <<<
    ref#1: shared data backref parent 31808973717504 count 1

Notice the count number is 0.

[CAUSE]
There is no concrete evidence yet, but considering 0 -> 1 is also a
single bit flipped, it's possible that hardware memory bitflip is
involved, causing the on-disk extent tree to be corrupted.

[FIX]
To prevent us reading such corrupted extent item, or writing such
damaged extent item back to disk, enhance the handling of
BTRFS_EXTENT_DATA_REF_KEY and BTRFS_SHARED_DATA_REF_KEY keys for both
inlined and key items, to detect such 0 ref count and reject them.

Link: https://lore.kernel.org/linux-btrfs/7c69dd49-c346-4806-86e7-e6f863a66f48@app.fastmail.com/
Reported-by: Frankie Fisher <frankie@terrorise.me.uk>
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/tree-checker.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

Comments

Filipe Manana Dec. 4, 2024, 11:26 a.m. UTC | #1
On Wed, Dec 4, 2024 at 3:56 AM Qu Wenruo <wqu@suse.com> wrote:
>
> [BUG]
> There is a bug report in the mailing list where btrfs_run_delayed_refs()
> failed to drop the ref count for logical 25870311358464 num_bytes
> 2113536.
>
> The involved leaf dump looks like this:
>
>   item 166 key (25870311358464 168 2113536) itemoff 10091 itemsize 50
>     extent refs 1 gen 84178 flags 1
>     ref#0: shared data backref parent 32399126528000 count 0 <<<
>     ref#1: shared data backref parent 31808973717504 count 1
>
> Notice the count number is 0.
>
> [CAUSE]
> There is no concrete evidence yet, but considering 0 -> 1 is also a
> single bit flipped, it's possible that hardware memory bitflip is
> involved, causing the on-disk extent tree to be corrupted.
>
> [FIX]
> To prevent us reading such corrupted extent item, or writing such
> damaged extent item back to disk, enhance the handling of
> BTRFS_EXTENT_DATA_REF_KEY and BTRFS_SHARED_DATA_REF_KEY keys for both
> inlined and key items, to detect such 0 ref count and reject them.
>
> Link: https://lore.kernel.org/linux-btrfs/7c69dd49-c346-4806-86e7-e6f863a66f48@app.fastmail.com/
> Reported-by: Frankie Fisher <frankie@terrorise.me.uk>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>  fs/btrfs/tree-checker.c | 27 ++++++++++++++++++++++++++-
>  1 file changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
> index 148d8cefa40e..0b9891f416ec 100644
> --- a/fs/btrfs/tree-checker.c
> +++ b/fs/btrfs/tree-checker.c
> @@ -1527,6 +1527,11 @@ static int check_extent_item(struct extent_buffer *leaf,
>                                            dref_offset, fs_info->sectorsize);
>                                 return -EUCLEAN;
>                         }
> +                       if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) {
> +                               extent_err(leaf, slot,
> +                       "invalidate data ref count, should have non-zero value");

invalidate -> invalid

> +                               return -EUCLEAN;
> +                       }
>                         inline_refs += btrfs_extent_data_ref_count(leaf, dref);
>                         break;
>                 /* Contains parent bytenr and ref count */
> @@ -1539,6 +1544,11 @@ static int check_extent_item(struct extent_buffer *leaf,
>                                            inline_offset, fs_info->sectorsize);
>                                 return -EUCLEAN;
>                         }
> +                       if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) {
> +                               extent_err(leaf, slot,
> +                       "invalidate shared data ref count, should have non-zero value");

invalidate -> invalid

> +                               return -EUCLEAN;
> +                       }
>                         inline_refs += btrfs_shared_data_ref_count(leaf, sref);
>                         break;
>                 case BTRFS_EXTENT_OWNER_REF_KEY:
> @@ -1611,8 +1621,18 @@ static int check_simple_keyed_refs(struct extent_buffer *leaf,
>  {
>         u32 expect_item_size = 0;
>
> -       if (key->type == BTRFS_SHARED_DATA_REF_KEY)
> +       if (key->type == BTRFS_SHARED_DATA_REF_KEY) {
> +               struct btrfs_shared_data_ref *sref;
> +
> +               sref = btrfs_item_ptr(leaf, slot, struct btrfs_shared_data_ref);
> +               if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) {
> +                       extent_err(leaf, slot,
> +               "invalid shared data backref count, should have non-zero value");

Here it's correct.

> +                       return -EUCLEAN;
> +               }
> +
>                 expect_item_size = sizeof(struct btrfs_shared_data_ref);
> +       }
>
>         if (unlikely(btrfs_item_size(leaf, slot) != expect_item_size)) {
>                 generic_err(leaf, slot,
> @@ -1689,6 +1709,11 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
>                                    offset, leaf->fs_info->sectorsize);
>                         return -EUCLEAN;
>                 }
> +               if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) {
> +                       extent_err(leaf, slot,
> +       "invalidate extent data backref count, should have non-zero value");

invalidate -> invalid

With those small fixes:

Reviewed-by: Filipe Manana <fdmanana@suse.com>

Thanks.

> +                       return -EUCLEAN;
> +               }
>         }
>         return 0;
>  }
> --
> 2.47.1
>
>
diff mbox series

Patch

diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 148d8cefa40e..0b9891f416ec 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1527,6 +1527,11 @@  static int check_extent_item(struct extent_buffer *leaf,
 					   dref_offset, fs_info->sectorsize);
 				return -EUCLEAN;
 			}
+			if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) {
+				extent_err(leaf, slot,
+			"invalidate data ref count, should have non-zero value");
+				return -EUCLEAN;
+			}
 			inline_refs += btrfs_extent_data_ref_count(leaf, dref);
 			break;
 		/* Contains parent bytenr and ref count */
@@ -1539,6 +1544,11 @@  static int check_extent_item(struct extent_buffer *leaf,
 					   inline_offset, fs_info->sectorsize);
 				return -EUCLEAN;
 			}
+			if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) {
+				extent_err(leaf, slot,
+			"invalidate shared data ref count, should have non-zero value");
+				return -EUCLEAN;
+			}
 			inline_refs += btrfs_shared_data_ref_count(leaf, sref);
 			break;
 		case BTRFS_EXTENT_OWNER_REF_KEY:
@@ -1611,8 +1621,18 @@  static int check_simple_keyed_refs(struct extent_buffer *leaf,
 {
 	u32 expect_item_size = 0;
 
-	if (key->type == BTRFS_SHARED_DATA_REF_KEY)
+	if (key->type == BTRFS_SHARED_DATA_REF_KEY) {
+		struct btrfs_shared_data_ref *sref;
+
+		sref = btrfs_item_ptr(leaf, slot, struct btrfs_shared_data_ref);
+		if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) {
+			extent_err(leaf, slot,
+		"invalid shared data backref count, should have non-zero value");
+			return -EUCLEAN;
+		}
+
 		expect_item_size = sizeof(struct btrfs_shared_data_ref);
+	}
 
 	if (unlikely(btrfs_item_size(leaf, slot) != expect_item_size)) {
 		generic_err(leaf, slot,
@@ -1689,6 +1709,11 @@  static int check_extent_data_ref(struct extent_buffer *leaf,
 				   offset, leaf->fs_info->sectorsize);
 			return -EUCLEAN;
 		}
+		if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) {
+			extent_err(leaf, slot,
+	"invalidate extent data backref count, should have non-zero value");
+			return -EUCLEAN;
+		}
 	}
 	return 0;
 }