diff mbox series

[v2] btrfs: enhance ordered extent double freeing detection

Message ID 192dc91881d143a4ae2b13cd9d8769f62b6fda8c.1733908542.git.wqu@suse.com (mailing list archive)
State New
Headers show
Series [v2] btrfs: enhance ordered extent double freeing detection | expand

Commit Message

Qu Wenruo Dec. 11, 2024, 9:16 a.m. UTC
With recent bugs exposed through run_delalloc_range() failure, the
importance of detecting double accounting is obvious.

Currently the way to detect such errors is to just check if we underflow
the btrfs_ordered_extent::bytes_left member.

That's fine but that only shows the length we're trying to decrease, not
enough to show the problem.

Here we enhance the situation by:

- Introduce btrfs_ordered_extent::finished_bitmap
  This is a new bitmap to indicate which blocks are already finished.
  This bitmap will be initialized at alloc_ordered_extent() and release
  when the ordered extent is freed.

- Detect any already finished block during can_finish_ordered_extent()
  If double accounting detected, show the full range we're trying and the bitmap.

- Make sure the bitmap is all set when the OE is finished

- Show the full range we're finishing for the existing double accounting
  detection
  This is to enhance the code to work with the new run_delalloc_range()
  error messages.

This will have extra memory and runtime cost, now an ordered extent can
have as large as 4K memory just for the finished_bitmap, and extra
operations to detect such double accounting.

Thus this double accounting detection is only enabled for
CONFIG_BTRFS_DEBUG build for developers.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
Changelog:
v2:
- Fix a false alert when the ordered extent is truncated
  In that case the bitmap's length should be based on truncated_len.
---
 fs/btrfs/misc.h         | 28 +++++++++++++++++++
 fs/btrfs/ordered-data.c | 60 +++++++++++++++++++++++++++++++++++++++--
 fs/btrfs/ordered-data.h |  9 +++++++
 3 files changed, 95 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index 0d599fd847c9..438887a1ca32 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -163,4 +163,32 @@  static inline bool bitmap_test_range_all_zero(const unsigned long *addr,
 	return (found_set == start + nbits);
 }
 
+/*
+ * Count how many bits are set in the bitmap.
+ *
+ * Similar to bitmap_weight() but accepts a subrange of the bitmap.
+ */
+static inline unsigned int bitmap_count_set(const unsigned long *addr,
+					    unsigned long start,
+					    unsigned long nbits)
+{
+	const unsigned long bitmap_nbits = start + nbits;
+	unsigned long cur = start;
+	unsigned long total_set = 0;
+
+	while (cur < bitmap_nbits) {
+		unsigned long found_zero;
+		unsigned long found_set;
+
+		found_zero = find_next_zero_bit(addr, bitmap_nbits, cur);
+		total_set += found_zero - cur;
+
+		cur = found_zero;
+		if (cur >= bitmap_nbits)
+			break;
+		found_set = find_next_bit(addr, bitmap_nbits, cur);
+		cur = found_set;
+	}
+	return total_set;
+}
 #endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 30eceaf829a7..31a2c6f06776 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -194,6 +194,16 @@  static struct btrfs_ordered_extent *alloc_ordered_extent(
 	INIT_LIST_HEAD(&entry->bioc_list);
 	init_completion(&entry->completion);
 
+	if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
+		struct btrfs_fs_info *fs_info = inode->root->fs_info;
+
+		entry->finished_bitmap = bitmap_zalloc(
+			num_bytes >> fs_info->sectorsize_bits, GFP_NOFS);
+		if (!entry->finished_bitmap) {
+			kmem_cache_free(btrfs_ordered_extent_cache, entry);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
 	/*
 	 * We don't need the count_max_extents here, we can assume that all of
 	 * that work has been done at higher layers, so this is truly the
@@ -356,13 +366,38 @@  static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
 		btrfs_folio_clear_ordered(fs_info, folio, file_offset, len);
 	}
 
+	if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
+		unsigned long start_bit;
+		unsigned long nbits;
+		unsigned long nr_set;
+
+		ASSERT(file_offset >= ordered->file_offset);
+		ASSERT(file_offset + len <= ordered->file_offset  + ordered->num_bytes);
+
+		start_bit = (file_offset - ordered->file_offset) >> fs_info->sectorsize_bits;
+		nbits = len >> fs_info->sectorsize_bits;
+
+		nr_set = bitmap_count_set(ordered->finished_bitmap, start_bit, nbits);
+		if (WARN_ON(nr_set)) {
+			btrfs_crit(fs_info,
+"double ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu range offset=%llu range len=%llu already finished len=%lu finish_bitmap=%*pbl",
+				   btrfs_root_id(inode->root), btrfs_ino(inode),
+				   ordered->file_offset, ordered->num_bytes,
+				   file_offset, len, nr_set << fs_info->sectorsize_bits,
+				   (int)(ordered->num_bytes >> fs_info->sectorsize_bits),
+				   ordered->finished_bitmap);
+		}
+		bitmap_set(ordered->finished_bitmap, start_bit, nbits);
+		len -= (nr_set << fs_info->sectorsize_bits);
+	}
+
 	/* Now we're fine to update the accounting. */
 	if (WARN_ON_ONCE(len > ordered->bytes_left)) {
 		btrfs_crit(fs_info,
-"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%llu left=%llu",
+"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu range start=%llu range len=%llu left=%llu",
 			   btrfs_root_id(inode->root), btrfs_ino(inode),
 			   ordered->file_offset, ordered->num_bytes,
-			   len, ordered->bytes_left);
+			   file_offset, len, ordered->bytes_left);
 		ordered->bytes_left = 0;
 	} else {
 		ordered->bytes_left -= len;
@@ -379,6 +414,25 @@  static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
 	 * the finish_func to be executed.
 	 */
 	set_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags);
+	if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
+		u64 real_len;
+
+		if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags))
+			real_len = ordered->truncated_len;
+		else
+			real_len = ordered->num_bytes;
+
+		if (WARN_ON(!bitmap_full(ordered->finished_bitmap,
+					 real_len >> fs_info->sectorsize_bits))) {
+			btrfs_crit(fs_info,
+"ordered extent finished bitmap desync, root=%llu ino=%llu OE offset=%llu OE len=%llu bytes_left=%llu bitmap=%*pbl",
+				btrfs_root_id(inode->root), btrfs_ino(inode),
+				ordered->file_offset, ordered->num_bytes,
+				ordered->bytes_left,
+				(int)(ordered->num_bytes >> fs_info->sectorsize_bits),
+				ordered->finished_bitmap);
+		}
+	}
 	cond_wake_up(&ordered->wait);
 	refcount_inc(&ordered->refs);
 	trace_btrfs_ordered_extent_mark_finished(inode, ordered);
@@ -624,6 +678,8 @@  void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
 			list_del(&sum->list);
 			kvfree(sum);
 		}
+		if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
+			bitmap_free(entry->finished_bitmap);
 		kmem_cache_free(btrfs_ordered_extent_cache, entry);
 	}
 }
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 4e152736d06c..f04a2f7a593a 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -154,6 +154,15 @@  struct btrfs_ordered_extent {
 	struct list_head work_list;
 
 	struct list_head bioc_list;
+
+#ifdef CONFIG_BTRFS_DEBUG
+	/*
+	 * Set if one block has finished.
+	 *
+	 * To catch double freeing with more accuracy.
+	 */
+	unsigned long *finished_bitmap;
+#endif
 };
 
 int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent);