diff mbox series

[v2,4/5] btrfs: zoned: properly finish block group on metadata write

Message ID 5dfca7a3630ec2fe8c1ae1219cbbea7240f6d6de.1651624820.git.naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: zoned: fixes for zone finishing | expand

Commit Message

Naohiro Aota May 4, 2022, 12:48 a.m. UTC
Commit be1a1d7a5d24 ("btrfs: zoned: finish fully written block group")
introduced zone finishing code both for data and metadata end_io path.
However, the metadata side is not working as it should be. First, it
compares logical address (eb->start + eb->len) with offset within a block
group (cache->zone_capacity) in submit_eb_page(). That essentially disabled
zone finishing on metadata end_io path.

Furthermore, fixing the issue above revealed we cannot call
btrfs_zone_finish_endio() in end_extent_buffer_writeback(). We cannot call
btrfs_lookup_block_group() which require spin lock inside end_io context.

This commit introduces btrfs_schedule_zone_finish_bg() to wait for the
extent buffer writeback and do the zone finish IO in a workqueue.

Also, drop EXTENT_BUFFER_ZONE_FINISH as it is no longer used.

Cc: stable@vger.kernel.org # 5.16+
Fixes: be1a1d7a5d24 ("btrfs: zoned: finish fully written block group")
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/block-group.h |  2 ++
 fs/btrfs/extent_io.c   |  6 +-----
 fs/btrfs/extent_io.h   |  1 -
 fs/btrfs/zoned.c       | 34 ++++++++++++++++++++++++++++++++++
 fs/btrfs/zoned.h       |  5 +++++
 5 files changed, 42 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index c9bf01dd10e8..3ac668ace50a 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -212,6 +212,8 @@  struct btrfs_block_group {
 	u64 meta_write_pointer;
 	struct map_lookup *physical_map;
 	struct list_head active_bg_list;
+	struct work_struct zone_finish_work;
+	struct extent_buffer *last_eb;
 };
 
 static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1b1baeb0d76b..588c7c606a2c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4251,9 +4251,6 @@  void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
 
 static void end_extent_buffer_writeback(struct extent_buffer *eb)
 {
-	if (test_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags))
-		btrfs_zone_finish_endio(eb->fs_info, eb->start, eb->len);
-
 	clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
 	smp_mb__after_atomic();
 	wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
@@ -4843,8 +4840,7 @@  static int submit_eb_page(struct page *page, struct writeback_control *wbc,
 		/*
 		 * Implies write in zoned mode. Mark the last eb in a block group.
 		 */
-		if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
-			set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
+		btrfs_schedule_zone_finish_bg(cache, eb);
 		btrfs_put_block_group(cache);
 	}
 	ret = write_one_eb(eb, wbc, epd);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 17674b7e699c..956fa434df43 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -26,7 +26,6 @@  enum {
 	/* write IO error */
 	EXTENT_BUFFER_WRITE_ERR,
 	EXTENT_BUFFER_NO_CHECK,
-	EXTENT_BUFFER_ZONE_FINISH,
 };
 
 /* these are flags for __process_pages_contig */
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 320bb7ba1c49..905ce5498ee0 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2046,6 +2046,40 @@  void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len
 	btrfs_put_block_group(block_group);
 }
 
+static void btrfs_zone_finish_endio_workfn(struct work_struct *work)
+{
+	struct btrfs_block_group *bg =
+		container_of(work, struct btrfs_block_group, zone_finish_work);
+
+	wait_on_extent_buffer_writeback(bg->last_eb);
+	free_extent_buffer(bg->last_eb);
+	btrfs_zone_finish_endio(bg->fs_info, bg->start, bg->length);
+	btrfs_put_block_group(bg);
+}
+
+void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
+				   struct extent_buffer *eb)
+{
+	if (!bg->seq_zone ||
+	    eb->start + eb->len * 2 <= bg->start + bg->zone_capacity)
+		return;
+
+	if (WARN_ON(bg->zone_finish_work.func ==
+		    btrfs_zone_finish_endio_workfn)) {
+		btrfs_err(bg->fs_info,
+			  "double scheduling of BG %llu zone finishing",
+			  bg->start);
+		return;
+	}
+
+	/* For the work */
+	btrfs_get_block_group(bg);
+	atomic_inc(&eb->refs);
+	bg->last_eb = eb;
+	INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn);
+	queue_work(system_unbound_wq, &bg->zone_finish_work);
+}
+
 void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
 {
 	struct btrfs_fs_info *fs_info = bg->fs_info;
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 98f277ed5138..a4126ec6b909 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -72,6 +72,8 @@  int btrfs_zone_finish(struct btrfs_block_group *block_group);
 bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags);
 void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
 			     u64 length);
+void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
+				   struct extent_buffer *eb);
 void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
 void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
 bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info);
@@ -230,6 +232,9 @@  static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
 static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
 					   u64 logical, u64 length) { }
 
+static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
+						 struct extent_buffer *eb) { }
+
 static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
 
 static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }