@@ -1009,12 +1009,17 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
WARN_ON(block_group->space_info->total_bytes
< block_group->length);
WARN_ON(block_group->space_info->bytes_readonly
- < block_group->length);
+ < block_group->length - block_group->zone_unusable);
+ WARN_ON(block_group->space_info->bytes_zone_unusable
+ < block_group->zone_unusable);
WARN_ON(block_group->space_info->disk_total
< block_group->length * factor);
}
block_group->space_info->total_bytes -= block_group->length;
- block_group->space_info->bytes_readonly -= block_group->length;
+ block_group->space_info->bytes_readonly -=
+ (block_group->length - block_group->zone_unusable);
+ block_group->space_info->bytes_zone_unusable -=
+ block_group->zone_unusable;
block_group->space_info->disk_total -= block_group->length * factor;
spin_unlock(&block_group->space_info->lock);
@@ -1158,7 +1163,7 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
}
num_bytes = cache->length - cache->reserved - cache->pinned -
- cache->bytes_super - cache->used;
+ cache->bytes_super - cache->zone_unusable - cache->used;
/*
* Data never overcommits, even in mixed mode, so do just the straight
@@ -1189,6 +1194,12 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
if (!ret) {
sinfo->bytes_readonly += num_bytes;
+ if (btrfs_is_zoned(cache->fs_info)) {
+ /* Migrate zone_unusable bytes to readonly */
+ sinfo->bytes_readonly += cache->zone_unusable;
+ sinfo->bytes_zone_unusable -= cache->zone_unusable;
+ cache->zone_unusable = 0;
+ }
cache->ro++;
list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
}
@@ -1876,12 +1887,20 @@ static int read_one_block_group(struct btrfs_fs_info *info,
}
/*
- * Check for two cases, either we are full, and therefore don't need
- * to bother with the caching work since we won't find any space, or we
- * are empty, and we can just add all the space in and be done with it.
- * This saves us _a_lot_ of time, particularly in the full case.
+ * For zoned filesystem, space after the allocation offset is the only
+ * free space for a block group. So, we don't need any caching work.
+ * btrfs_calc_zone_unusable() will set the amount of free space and
+ * zone_unusable space.
+ *
+ * For regular filesystem, check for two cases, either we are full, and
+ * therefore don't need to bother with the caching work since we won't
+ * find any space, or we are empty, and we can just add all the space
+ * in and be done with it. This saves us _a_lot_ of time, particularly
+ * in the full case.
*/
- if (cache->length == cache->used) {
+ if (btrfs_is_zoned(info)) {
+ btrfs_calc_zone_unusable(cache);
+ } else if (cache->length == cache->used) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
btrfs_free_excluded_extents(cache);
@@ -1900,7 +1919,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
}
trace_btrfs_add_block_group(info, cache, 0);
btrfs_update_space_info(info, cache->flags, cache->length,
- cache->used, cache->bytes_super, &space_info);
+ cache->used, cache->bytes_super,
+ cache->zone_unusable, &space_info);
cache->space_info = space_info;
@@ -1956,7 +1976,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
break;
}
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
- 0, &space_info);
+ 0, 0, &space_info);
bg->space_info = space_info;
link_block_group(bg);
@@ -2197,7 +2217,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
*/
trace_btrfs_add_block_group(fs_info, cache, 1);
btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
- cache->bytes_super, &cache->space_info);
+ cache->bytes_super, 0, &cache->space_info);
btrfs_update_global_block_rsv(fs_info);
link_block_group(cache);
@@ -2305,8 +2325,15 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
spin_lock(&cache->lock);
if (!--cache->ro) {
num_bytes = cache->length - cache->reserved -
- cache->pinned - cache->bytes_super - cache->used;
+ cache->pinned - cache->bytes_super -
+ cache->zone_unusable - cache->used;
sinfo->bytes_readonly -= num_bytes;
+ if (btrfs_is_zoned(cache->fs_info)) {
+ /* Migrate zone_unusable bytes back */
+ cache->zone_unusable = cache->alloc_offset - cache->used;
+ sinfo->bytes_zone_unusable += cache->zone_unusable;
+ sinfo->bytes_readonly -= cache->zone_unusable;
+ }
list_del_init(&cache->ro_list);
}
spin_unlock(&cache->lock);
@@ -189,6 +189,7 @@ struct btrfs_block_group {
* allocation. This is used only on a zoned filesystem.
*/
u64 alloc_offset;
+ u64 zone_unusable;
};
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
@@ -34,6 +34,7 @@
#include "block-group.h"
#include "discard.h"
#include "rcu-string.h"
+#include "zoned.h"
#undef SCRAMBLE_DELAYED_REFS
@@ -2740,6 +2741,10 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
if (cache->ro) {
space_info->bytes_readonly += len;
readonly = true;
+ } else if (btrfs_is_zoned(fs_info)) {
+ /* Need reset before reusing in a zoned block group */
+ space_info->bytes_zone_unusable += len;
+ readonly = true;
}
spin_unlock(&cache->lock);
if (!readonly && return_free_space &&
@@ -2477,6 +2477,8 @@ int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
int ret = 0;
u64 filter_bytes = bytes;
+ ASSERT(!btrfs_is_zoned(fs_info));
+
info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
if (!info)
return -ENOMEM;
@@ -2534,11 +2536,49 @@ int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
return ret;
}
+static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
+ u64 bytenr, u64 size, bool used)
+{
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+ u64 offset = bytenr - block_group->start;
+ u64 to_free, to_unusable;
+
+ spin_lock(&ctl->tree_lock);
+ if (!used)
+ to_free = size;
+ else if (offset >= block_group->alloc_offset)
+ to_free = size;
+ else if (offset + size <= block_group->alloc_offset)
+ to_free = 0;
+ else
+ to_free = offset + size - block_group->alloc_offset;
+ to_unusable = size - to_free;
+
+ ctl->free_space += to_free;
+ block_group->zone_unusable += to_unusable;
+ spin_unlock(&ctl->tree_lock);
+ if (!used) {
+ spin_lock(&block_group->lock);
+ block_group->alloc_offset -= size;
+ spin_unlock(&block_group->lock);
+ }
+
+ /* All the region is now unusable. Mark it as unused and reclaim */
+ if (block_group->zone_unusable == block_group->length)
+ btrfs_mark_bg_unused(block_group);
+
+ return 0;
+}
+
int btrfs_add_free_space(struct btrfs_block_group *block_group,
u64 bytenr, u64 size)
{
enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
+ if (btrfs_is_zoned(block_group->fs_info))
+ return __btrfs_add_free_space_zoned(block_group, bytenr, size,
+ true);
+
if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC))
trim_state = BTRFS_TRIM_STATE_TRIMMED;
@@ -2547,6 +2587,16 @@ int btrfs_add_free_space(struct btrfs_block_group *block_group,
bytenr, size, trim_state);
}
+int btrfs_add_free_space_unused(struct btrfs_block_group *block_group,
+ u64 bytenr, u64 size)
+{
+ if (btrfs_is_zoned(block_group->fs_info))
+ return __btrfs_add_free_space_zoned(block_group, bytenr, size,
+ false);
+
+ return btrfs_add_free_space(block_group, bytenr, size);
+}
+
/*
* This is a subtle distinction because when adding free space back in general,
* we want it to be added as untrimmed for async. But in the case where we add
@@ -2557,6 +2607,10 @@ int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
{
enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
+ if (btrfs_is_zoned(block_group->fs_info))
+ return __btrfs_add_free_space_zoned(block_group, bytenr, size,
+ true);
+
if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) ||
btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
trim_state = BTRFS_TRIM_STATE_TRIMMED;
@@ -2574,6 +2628,9 @@ int btrfs_remove_free_space(struct btrfs_block_group *block_group,
int ret;
bool re_search = false;
+ if (btrfs_is_zoned(block_group->fs_info))
+ return 0;
+
spin_lock(&ctl->tree_lock);
again:
@@ -2668,6 +2725,16 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
struct rb_node *n;
int count = 0;
+ /*
+ * Zoned btrfs does not use free space tree and cluster. Just print
+ * out the free space after the allocation offset.
+ */
+ if (btrfs_is_zoned(fs_info)) {
+ btrfs_info(fs_info, "free space %llu",
+ block_group->length - block_group->alloc_offset);
+ return;
+ }
+
spin_lock(&ctl->tree_lock);
for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
info = rb_entry(n, struct btrfs_free_space, offset_index);
@@ -107,6 +107,8 @@ int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
enum btrfs_trim_state trim_state);
int btrfs_add_free_space(struct btrfs_block_group *block_group,
u64 bytenr, u64 size);
+int btrfs_add_free_space_unused(struct btrfs_block_group *block_group,
+ u64 bytenr, u64 size);
int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
u64 bytenr, u64 size);
int btrfs_remove_free_space(struct btrfs_block_group *block_group,
@@ -169,6 +169,7 @@ u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
ASSERT(s_info);
return s_info->bytes_used + s_info->bytes_reserved +
s_info->bytes_pinned + s_info->bytes_readonly +
+ s_info->bytes_zone_unusable +
(may_use_included ? s_info->bytes_may_use : 0);
}
@@ -264,7 +265,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
- u64 bytes_readonly,
+ u64 bytes_readonly, u64 bytes_zone_unusable,
struct btrfs_space_info **space_info)
{
struct btrfs_space_info *found;
@@ -280,6 +281,7 @@ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
found->bytes_used += bytes_used;
found->disk_used += bytes_used * factor;
found->bytes_readonly += bytes_readonly;
+ found->bytes_zone_unusable += bytes_zone_unusable;
if (total_bytes > 0)
found->full = 0;
btrfs_try_granting_tickets(info, found);
@@ -429,10 +431,10 @@ static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
info->total_bytes - btrfs_space_info_used(info, true),
info->full ? "" : "not ");
btrfs_info(fs_info,
- "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
+ "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu zone_unusable=%llu",
info->total_bytes, info->bytes_used, info->bytes_pinned,
info->bytes_reserved, info->bytes_may_use,
- info->bytes_readonly);
+ info->bytes_readonly, info->bytes_zone_unusable);
DUMP_BLOCK_RSV(fs_info, global_block_rsv);
DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
@@ -461,9 +463,10 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
list_for_each_entry(cache, &info->block_groups[index], list) {
spin_lock(&cache->lock);
btrfs_info(fs_info,
- "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
+ "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu zone_unusable %s",
cache->start, cache->length, cache->used, cache->pinned,
- cache->reserved, cache->ro ? "[readonly]" : "");
+ cache->reserved, cache->zone_unusable,
+ cache->ro ? "[readonly]" : "");
spin_unlock(&cache->lock);
btrfs_dump_free_space(cache, bytes);
}
@@ -17,6 +17,8 @@ struct btrfs_space_info {
u64 bytes_may_use; /* number of bytes that may be used for
delalloc/allocations */
u64 bytes_readonly; /* total bytes that are read only */
+ u64 bytes_zone_unusable; /* total bytes that are unusable until
+ resetting the device zone */
u64 max_extent_size; /* This will hold the maximum extent size of
the space info if we had an ENOSPC in the
@@ -123,7 +125,7 @@ DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned");
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
- u64 bytes_readonly,
+ u64 bytes_readonly, u64 bytes_zone_unusable,
struct btrfs_space_info **space_info);
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
u64 flags);
@@ -666,6 +666,7 @@ SPACE_INFO_ATTR(bytes_pinned);
SPACE_INFO_ATTR(bytes_reserved);
SPACE_INFO_ATTR(bytes_may_use);
SPACE_INFO_ATTR(bytes_readonly);
+SPACE_INFO_ATTR(bytes_zone_unusable);
SPACE_INFO_ATTR(disk_used);
SPACE_INFO_ATTR(disk_total);
BTRFS_ATTR(space_info, total_bytes_pinned,
@@ -679,6 +680,7 @@ static struct attribute *space_info_attrs[] = {
BTRFS_ATTR_PTR(space_info, bytes_reserved),
BTRFS_ATTR_PTR(space_info, bytes_may_use),
BTRFS_ATTR_PTR(space_info, bytes_readonly),
+ BTRFS_ATTR_PTR(space_info, bytes_zone_unusable),
BTRFS_ATTR_PTR(space_info, disk_used),
BTRFS_ATTR_PTR(space_info, disk_total),
BTRFS_ATTR_PTR(space_info, total_bytes_pinned),
@@ -1160,3 +1160,24 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
return ret;
}
+
+void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
+{
+ u64 unusable, free;
+
+ if (!btrfs_is_zoned(cache->fs_info))
+ return;
+
+ WARN_ON(cache->bytes_super != 0);
+ unusable = cache->alloc_offset - cache->used;
+ free = cache->length - cache->alloc_offset;
+
+ /* We only need ->free_space in ALLOC_SEQ block groups */
+ cache->last_byte_to_unpin = (u64)-1;
+ cache->cached = BTRFS_CACHE_FINISHED;
+ cache->free_space_ctl->free_space = free;
+ cache->zone_unusable = unusable;
+
+ /* Should not have any excluded extents. Just in case, though */
+ btrfs_free_excluded_extents(cache);
+}
@@ -42,6 +42,7 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
u64 length, u64 *bytes);
int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size);
int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new);
+void btrfs_calc_zone_unusable(struct btrfs_block_group *cache);
#else /* CONFIG_BLK_DEV_ZONED */
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone)
@@ -123,6 +124,8 @@ static inline int btrfs_load_block_group_zone_info(
return 0;
}
+static inline void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) { }
+
#endif
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)