@@ -558,14 +558,15 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
static int sample_block_group_extent_item(struct btrfs_caching_control *caching_ctl,
struct btrfs_block_group *block_group,
int index, int max_index,
- struct btrfs_key *key)
+ struct btrfs_key *found_key)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_root *extent_root;
- int ret = 0;
u64 search_offset;
u64 search_end = block_group->start + block_group->length;
struct btrfs_path *path;
+ struct btrfs_key search_key;
+ int ret = 0;
ASSERT(index >= 0);
ASSERT(index <= max_index);
@@ -585,37 +586,24 @@ static int sample_block_group_extent_item(struct btrfs_caching_control *caching_
path->reada = READA_FORWARD;
search_offset = index * div_u64(block_group->length, max_index);
- key->objectid = block_group->start + search_offset;
- key->type = BTRFS_EXTENT_ITEM_KEY;
- key->offset = 0;
+ search_key.objectid = block_group->start + search_offset;
+ search_key.type = BTRFS_EXTENT_ITEM_KEY;
+ search_key.offset = 0;
- while (1) {
- ret = btrfs_search_forward(extent_root, key, path, 0);
- if (ret != 0)
- goto out;
+ btrfs_for_each_slot(extent_root, &search_key, found_key, path, ret) {
/* Success; sampled an extent item in the block group */
- if (key->type == BTRFS_EXTENT_ITEM_KEY &&
- key->objectid >= block_group->start &&
- key->objectid + key->offset <= search_end)
- goto out;
+ if (found_key->type == BTRFS_EXTENT_ITEM_KEY &&
+ found_key->objectid >= block_group->start &&
+ found_key->objectid + found_key->offset <= search_end)
+ break;
/* We can't possibly find a valid extent item anymore */
- if (key->objectid >= search_end) {
+ if (found_key->objectid >= search_end) {
ret = 1;
break;
}
- if (key->type < BTRFS_EXTENT_ITEM_KEY)
- key->type = BTRFS_EXTENT_ITEM_KEY;
- else
- key->objectid++;
- btrfs_release_path(path);
- up_read(&fs_info->commit_root_sem);
- mutex_unlock(&caching_ctl->mutex);
- cond_resched();
- mutex_lock(&caching_ctl->mutex);
- down_read(&fs_info->commit_root_sem);
}
-out:
+
lockdep_assert_held(&caching_ctl->mutex);
lockdep_assert_held_read(&fs_info->commit_root_sem);
btrfs_free_path(path);
@@ -659,6 +647,7 @@ static int sample_block_group_extent_item(struct btrfs_caching_control *caching_
static int load_block_group_size_class(struct btrfs_caching_control *caching_ctl,
struct btrfs_block_group *block_group)
{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_key key;
int i;
u64 min_size = block_group->length;
@@ -668,6 +657,8 @@ static int load_block_group_size_class(struct btrfs_caching_control *caching_ctl
if (!btrfs_block_group_should_use_size_class(block_group))
return 0;
+ lockdep_assert_held(&caching_ctl->mutex);
+ lockdep_assert_held_read(&fs_info->commit_root_sem);
for (i = 0; i < 5; ++i) {
ret = sample_block_group_extent_item(caching_ctl, block_group, i, 5, &key);
if (ret < 0)
@@ -682,7 +673,6 @@ static int load_block_group_size_class(struct btrfs_caching_control *caching_ctl
block_group->size_class = size_class;
spin_unlock(&block_group->lock);
}
-
out:
return ret;
}
This is another incremental patch fixing bugs in: btrfs: load block group size class when caching The use of search_forward was incorrect, and I have replaced it with btrfs_for_each_slot. Since we only consider five samples (five search slots), don't bother with the complexity of looking for commit_root_sem contention. If necessary, it can be added to the load function in between samples. The mistake was Reported-by: Filipe Manana <fdmanana@kernel.org> The commit message should be: btrfs: load block group size class when caching Since the size class is an artifact of an arbitrary anti fragmentation strategy, it doesn't really make sense to persist it. Furthermore, most of the size class logic assumes fresh block groups. That is of course not a reasonable assumption -- we will be upgrading kernels with existing filesystems whose block groups are not classified. To work around those issues, implement logic to compute the size class of the block groups as we cache them in. To perfectly assess the state of a block group, we would have to read the entire extent tree (since the free space cache mashes together contiguous extent items) which would be prohibitively expensive for larger file systems with more extents. We can do it relatively cheaply by implementing a simple heuristic of sampling a handful of extents and picking the smallest one we see. In the happy case where the block group was classified, we will only see extents of the correct size. In the unhappy case, we will hopefully find one of the smaller extents, but there is no perfect answer anyway. Autorelocation will eventually churn up the block group if there is significant freeing anyway. There was no regression in mount performance at end state of the fsperf test suite, and the delay until the block group is marked cached is minimized by the constant number of extent samples. Signed-off-by: Boris Burkov <boris@bur.io> --- fs/btrfs/block-group.c | 42 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 26 deletions(-)