diff mbox series

[v2,06/10] btrfs: zoned: reserve zones for an active metadata/system block group

Message ID 790055decdb2cfa7dfaa3a47dd43b0a1f9129814.1690823282.git.naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: zoned: write-time activation of metadata block group | expand

Commit Message

Naohiro Aota July 31, 2023, 5:17 p.m. UTC
Ensure a metadata and system block group can be activated on write time, by
leaving a certain number of active zones when trying to activate a data
block group.

When both metadata and system profiles are set to SINGLE, we need to
reserve two zones. When both are DUP, we need to reserve four zones.

In the case only one of them is DUP, we should reserve three zones.
However, handling the case requires at least two bits to track if we have
seen DUP profile for metadata and system, which is cumbersome. So, just
reserve four zones in that case for now.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/fs.h    |  6 ++++++
 fs/btrfs/zoned.c | 39 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 43 insertions(+), 2 deletions(-)

Comments

Johannes Thumshirn Aug. 1, 2023, 12:23 p.m. UTC | #1
Looks good,

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Naohiro Aota Aug. 2, 2023, 4:50 a.m. UTC | #2
On Tue, Aug 01, 2023 at 02:17:15AM +0900, Naohiro Aota wrote:
> Ensure a metadata and system block group can be activated on write time, by
> leaving a certain number of active zones when trying to activate a data
> block group.
> 
> When both metadata and system profiles are set to SINGLE, we need to
> reserve two zones. When both are DUP, we need to reserve four zones.
> 
> In the case only one of them is DUP, we should reserve three zones.
> However, handling the case requires at least two bits to track if we have
> seen DUP profile for metadata and system, which is cumbersome. So, just
> reserve four zones in that case for now.
> 
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>

I noticed this over-reserves the zones. Even when there are already
metadata and system block group allocated, it still leaves 2 (or 4 in
DUP) zones to be claimed by a data block group.

The reservation count must be increased when we free a metadata block group
and increased when we allocate one.

Or, in fact, we only need to reserve the zones when pivoting the block
group. With the zoned_meta_io_lock, metadata and system block group won't
pivot at the same time. So, adding a bit BTRFS_*_ZONED_PIVOT_META_BG would
be enough.

Anyway, I'll rework this patch.
diff mbox series

Patch

diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index ef07c6c252d8..2ce391959b6a 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -775,6 +775,12 @@  struct btrfs_fs_info {
 	spinlock_t zone_active_bgs_lock;
 	struct list_head zone_active_bgs;
 
+	/*
+	 * Reserved active zones per-device for one metadata and one system
+	 * block group.
+	 */
+	unsigned int reserved_active_zones;
+
 	/* Updates are not protected by any lock */
 	struct btrfs_commit_stats commit_stats;
 
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 3902c16b9188..9dbcd747ee74 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -525,6 +525,12 @@  int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
 		atomic_set(&zone_info->active_zones_left,
 			   max_active_zones - nactive);
 		set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags);
+		/*
+		 * First, reserve zones for SINGLE metadata and SINGLE system
+		 * profile. The reservation will be increased when seeing DUP
+		 * profile.
+		 */
+		fs_info->reserved_active_zones = 2;
 	}
 
 	/* Validate superblock log */
@@ -1515,6 +1521,22 @@  int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		}
 		cache->alloc_offset = alloc_offsets[0];
 		cache->zone_capacity = min(caps[0], caps[1]);
+
+		/*
+		 * DUP profile needs two zones on the same device. Reserve 2
+		 * zones * 2 types (metadata and system) = 4 zones.
+		 *
+		 * Technically, we can have SINGLE metadata and DUP system
+		 * config. And, in that case, we only need 3 zones, wasting one
+		 * active zone. But, to do the precise reservation, we need one
+		 * more variable just to track we already seen a DUP block group
+		 * or not, which is cumbersome.
+		 *
+		 * For now, let's be lazy and just reserve 4 zones.
+		 */
+		if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) &&
+		    !(cache->flags & BTRFS_BLOCK_GROUP_DATA))
+			fs_info->reserved_active_zones = 4;
 		break;
 	case BTRFS_BLOCK_GROUP_RAID1:
 	case BTRFS_BLOCK_GROUP_RAID0:
@@ -1888,6 +1910,8 @@  bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 	struct btrfs_space_info *space_info = block_group->space_info;
 	struct map_lookup *map;
 	struct btrfs_device *device;
+	const unsigned int reserved = (block_group->flags & BTRFS_BLOCK_GROUP_DATA) ?
+		fs_info->reserved_active_zones : 0;
 	u64 physical;
 	bool ret;
 	int i;
@@ -1917,6 +1941,15 @@  bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 		if (device->zone_info->max_active_zones == 0)
 			continue;
 
+		/*
+		 * For the data block group, leave active zones for one
+		 * metadata block group and one system block group.
+		 */
+		if (atomic_read(&device->zone_info->active_zones_left) <= reserved) {
+			ret = false;
+			goto out_unlock;
+		}
+
 		if (!btrfs_dev_set_active_zone(device, physical)) {
 			/* Cannot activate the zone */
 			ret = false;
@@ -2111,6 +2144,8 @@  bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
 {
 	struct btrfs_fs_info *fs_info = fs_devices->fs_info;
 	struct btrfs_device *device;
+	const unsigned int reserved = (flags & BTRFS_BLOCK_GROUP_DATA) ?
+		fs_info->reserved_active_zones : 0;
 	bool ret = false;
 
 	if (!btrfs_is_zoned(fs_info))
@@ -2131,10 +2166,10 @@  bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
 
 		switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
 		case 0: /* single */
-			ret = (atomic_read(&zinfo->active_zones_left) >= 1);
+			ret = (atomic_read(&zinfo->active_zones_left) >= (1 + reserved));
 			break;
 		case BTRFS_BLOCK_GROUP_DUP:
-			ret = (atomic_read(&zinfo->active_zones_left) >= 2);
+			ret = (atomic_read(&zinfo->active_zones_left) >= (2 + reserved));
 			break;
 		}
 		if (ret)