diff mbox series

[v3] btrfs: enhance ordered extent double freeing detection

Message ID 640bcf8290eec7a39a0ed543478b5420fdf627c0.1733947908.git.wqu@suse.com (mailing list archive)
State New
Headers show
Series [v3] btrfs: enhance ordered extent double freeing detection | expand

Commit Message

Qu Wenruo Dec. 11, 2024, 8:13 p.m. UTC
With recent bugs exposed through run_delalloc_range() failure, the
importance of detecting double accounting is obvious.

Currently the way to detect such errors is to just check if we underflow
the btrfs_ordered_extent::bytes_left member.

That's fine but that only shows the length we're trying to decrease, not
enough to show the problem.

Here we enhance the situation by:

- Introduce btrfs_ordered_extent::finished_bitmap
  This is a new bitmap to indicate which blocks are already finished.
  This bitmap will be initialized at alloc_ordered_extent() and release
  when the ordered extent is freed.

- Detect any already finished block during can_finish_ordered_extent()
  If double accounting detected, show the full range we're trying and the bitmap.

- Make sure the bitmap is all set when the OE is finished

- Show the full range we're finishing for the existing double accounting
  detection
  This is to enhance the code to work with the new run_delalloc_range()
  error messages.

This will have extra memory and runtime cost, now an ordered extent can
have as large as 4K memory just for the finished_bitmap, and extra
operations to detect such double accounting.

Thus this double accounting detection is only enabled for
CONFIG_BTRFS_DEBUG build for developers.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
Changelog:
v3:
- Use #ifdef #endif to replace IS_ENABLED(){}
  Or it will cause compiler errors.

  Some locations which need local variables are kept inside a {}, mostly
  due to the fact we do not allow mixing variable declaration with code.

v2:
- Fix a false alert when the ordered extent is truncated
  In that case the bitmap's length should be based on truncated_len.
---
 fs/btrfs/misc.h         | 28 ++++++++++++++++++
 fs/btrfs/ordered-data.c | 63 +++++++++++++++++++++++++++++++++++++++--
 fs/btrfs/ordered-data.h |  9 ++++++
 3 files changed, 98 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index 0d599fd847c9..438887a1ca32 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -163,4 +163,32 @@  static inline bool bitmap_test_range_all_zero(const unsigned long *addr,
 	return (found_set == start + nbits);
 }
 
+/*
+ * Count how many bits are set in the bitmap.
+ *
+ * Similar to bitmap_weight() but accepts a subrange of the bitmap.
+ */
+static inline unsigned int bitmap_count_set(const unsigned long *addr,
+					    unsigned long start,
+					    unsigned long nbits)
+{
+	const unsigned long bitmap_nbits = start + nbits;
+	unsigned long cur = start;
+	unsigned long total_set = 0;
+
+	while (cur < bitmap_nbits) {
+		unsigned long found_zero;
+		unsigned long found_set;
+
+		found_zero = find_next_zero_bit(addr, bitmap_nbits, cur);
+		total_set += found_zero - cur;
+
+		cur = found_zero;
+		if (cur >= bitmap_nbits)
+			break;
+		found_set = find_next_bit(addr, bitmap_nbits, cur);
+		cur = found_set;
+	}
+	return total_set;
+}
 #endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 30eceaf829a7..04e54454b169 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -194,6 +194,14 @@  static struct btrfs_ordered_extent *alloc_ordered_extent(
 	INIT_LIST_HEAD(&entry->bioc_list);
 	init_completion(&entry->completion);
 
+#ifdef CONFIG_BTRFS_DEBUG
+	entry->finished_bitmap = bitmap_zalloc(
+		num_bytes >> inode->root->fs_info->sectorsize_bits, GFP_NOFS);
+	if (!entry->finished_bitmap) {
+		kmem_cache_free(btrfs_ordered_extent_cache, entry);
+		return ERR_PTR(-ENOMEM);
+	}
+#endif
 	/*
 	 * We don't need the count_max_extents here, we can assume that all of
 	 * that work has been done at higher layers, so this is truly the
@@ -356,13 +364,39 @@  static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
 		btrfs_folio_clear_ordered(fs_info, folio, file_offset, len);
 	}
 
+#ifdef CONFIG_BTRFS_DEBUG
+	{
+		unsigned long start_bit;
+		unsigned long nbits;
+		unsigned long nr_set;
+
+		ASSERT(file_offset >= ordered->file_offset);
+		ASSERT(file_offset + len <= ordered->file_offset  + ordered->num_bytes);
+
+		start_bit = (file_offset - ordered->file_offset) >> fs_info->sectorsize_bits;
+		nbits = len >> fs_info->sectorsize_bits;
+
+		nr_set = bitmap_count_set(ordered->finished_bitmap, start_bit, nbits);
+		if (WARN_ON(nr_set)) {
+			btrfs_crit(fs_info,
+"double ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu range offset=%llu range len=%llu already finished len=%lu finish_bitmap=%*pbl",
+				   btrfs_root_id(inode->root), btrfs_ino(inode),
+				   ordered->file_offset, ordered->num_bytes,
+				   file_offset, len, nr_set << fs_info->sectorsize_bits,
+				   (int)(ordered->num_bytes >> fs_info->sectorsize_bits),
+				   ordered->finished_bitmap);
+		}
+		bitmap_set(ordered->finished_bitmap, start_bit, nbits);
+		len -= (nr_set << fs_info->sectorsize_bits);
+	}
+#endif
 	/* Now we're fine to update the accounting. */
 	if (WARN_ON_ONCE(len > ordered->bytes_left)) {
 		btrfs_crit(fs_info,
-"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%llu left=%llu",
+"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu range start=%llu range len=%llu left=%llu",
 			   btrfs_root_id(inode->root), btrfs_ino(inode),
 			   ordered->file_offset, ordered->num_bytes,
-			   len, ordered->bytes_left);
+			   file_offset, len, ordered->bytes_left);
 		ordered->bytes_left = 0;
 	} else {
 		ordered->bytes_left -= len;
@@ -379,6 +413,28 @@  static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
 	 * the finish_func to be executed.
 	 */
 	set_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags);
+
+#ifdef CONFIG_BTRFS_DEBUG
+	{
+		u64 real_len;
+
+		if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags))
+			real_len = ordered->truncated_len;
+		else
+			real_len = ordered->num_bytes;
+
+		if (WARN_ON(!bitmap_full(ordered->finished_bitmap,
+				 real_len >> fs_info->sectorsize_bits))) {
+			btrfs_crit(fs_info,
+"ordered extent finished bitmap desync, root=%llu ino=%llu OE offset=%llu OE len=%llu bytes_left=%llu bitmap=%*pbl",
+				btrfs_root_id(inode->root), btrfs_ino(inode),
+				ordered->file_offset, ordered->num_bytes,
+				ordered->bytes_left,
+				(int)(real_len >> fs_info->sectorsize_bits),
+				ordered->finished_bitmap);
+		}
+	}
+#endif
 	cond_wake_up(&ordered->wait);
 	refcount_inc(&ordered->refs);
 	trace_btrfs_ordered_extent_mark_finished(inode, ordered);
@@ -624,6 +680,9 @@  void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
 			list_del(&sum->list);
 			kvfree(sum);
 		}
+#ifdef CONFIG_BTRFS_DEBUG
+		bitmap_free(entry->finished_bitmap);
+#endif
 		kmem_cache_free(btrfs_ordered_extent_cache, entry);
 	}
 }
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 4e152736d06c..f04a2f7a593a 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -154,6 +154,15 @@  struct btrfs_ordered_extent {
 	struct list_head work_list;
 
 	struct list_head bioc_list;
+
+#ifdef CONFIG_BTRFS_DEBUG
+	/*
+	 * Set if one block has finished.
+	 *
+	 * To catch double freeing with more accuracy.
+	 */
+	unsigned long *finished_bitmap;
+#endif
 };
 
 int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent);