@@ -3285,6 +3285,15 @@ int open_ctree(struct super_block *sb,
}
}
+ ret = btrfs_hmzoned_check_metadata_space(fs_info);
+ if (ret) {
+ btrfs_warn(fs_info, "failed to allocate metadata space: %d",
+ ret);
+ btrfs_warn(fs_info, "try remount with readonly");
+ close_ctree(fs_info);
+ return ret;
+ }
+
down_read(&fs_info->cleanup_work_sem);
if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
(ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
@@ -13,6 +13,8 @@
#include "hmzoned.h"
#include "rcu-string.h"
#include "disk-io.h"
+#include "space-info.h"
+#include "transaction.h"
/* Maximum number of zones to report per blkdev_report_zones() call */
#define BTRFS_REPORT_NR_ZONES 4096
@@ -548,3 +550,46 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group_cache *cache)
return ret;
}
+
+/*
+ * On/After degraded mount, we might have no writable metadata block
+ * group due to broken write pointers. If you e.g. balance the FS
+ * before writing any data, alloc_tree_block_no_bg_flush() (called
+ * from insert_balance_item())fails to allocate a tree block for
+ * it. To avoid such situations, ensure we have some metadata BG here.
+ */
+int btrfs_hmzoned_check_metadata_space(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *root = fs_info->extent_root;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_space_info *info;
+ u64 left;
+ int ret;
+
+ if (!btrfs_fs_incompat(fs_info, HMZONED))
+ return 0;
+
+ info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+ spin_lock(&info->lock);
+ left = info->total_bytes - btrfs_space_info_used(info, true);
+ spin_unlock(&info->lock);
+
+ if (left)
+ return 0;
+
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ mutex_lock(&fs_info->chunk_mutex);
+ ret = btrfs_alloc_chunk(trans, btrfs_metadata_alloc_profile(fs_info));
+ if (ret) {
+ mutex_unlock(&fs_info->chunk_mutex);
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
+ mutex_unlock(&fs_info->chunk_mutex);
+
+ return btrfs_commit_transaction(trans);
+}
@@ -32,6 +32,7 @@ int btrfs_check_mountopts_hmzoned(struct btrfs_fs_info *info);
bool btrfs_check_allocatable_zones(struct btrfs_device *device, u64 pos,
u64 num_bytes);
int btrfs_load_block_group_zone_info(struct btrfs_block_group_cache *cache);
+int btrfs_hmzoned_check_metadata_space(struct btrfs_fs_info *fs_info);
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
{
On/After degraded mount, we might have no writable metadata block group due to broken write pointers. If you e.g. balance the FS before writing any data, alloc_tree_block_no_bg_flush() (called from insert_balance_item()) fails to allocate a tree block for it, due to global reservation failure. We can reproduce this situation with xfstests btrfs/124. While we can workaround the failure if we write some data and, as a result of writing, let a new metadata block group allocated, it's a bad practice to apply. This commit avoids such failures by ensuring that read-write mounted volume has non-zero metadata space. If metadata space is empty, it forces new metadata block group allocation. Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> --- fs/btrfs/disk-io.c | 9 +++++++++ fs/btrfs/hmzoned.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/hmzoned.h | 1 + 3 files changed, 55 insertions(+)