Message ID | 534c381d897ad3f29948594014910310fe504bbc.1707475586.git.johannes.thumshirn@wdc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: zoned: don't skip block group profile checks on conv zones | expand |
On Fri, Feb 09, 2024 at 02:46:26AM -0800, Johannes Thumshirn wrote: > On a zoned filesystem with conventional zones, we're skipping the block > group profile checks for the conventional zones. > > This allows converting a zoned filesystem's data block groups to RAID when > all of the zones backing the chunk are on conventional zones. But this > will lead to problems, once we're trying to allocate chunks backed by > sequential zones. > > So also check for conventional zones when loading a block group's profile > on them. > > Reported-by: 韩于惟 <hrx@bupt.moe> > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > --- > fs/btrfs/zoned.c | 30 +++++++++++++++++++++++++++--- > 1 file changed, 27 insertions(+), 3 deletions(-) This seems complex than necessary. It is duplicating calculate_alloc_pointer() code into the per-profile loading functions. How about adding a check equivalent below after the out label? if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", btrfs_bg_type_to_raid_name(map->type)); return -EINVAL; } > diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c > index d9716456bce0..5beb6b936e61 100644 > --- a/fs/btrfs/zoned.c > +++ b/fs/btrfs/zoned.c > @@ -1369,8 +1369,10 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg, > return -EIO; > } > > - bg->alloc_offset = info->alloc_offset; > - bg->zone_capacity = info->capacity; > + if (info->alloc_offset != WP_CONVENTIONAL) { > + bg->alloc_offset = info->alloc_offset; > + bg->zone_capacity = info->capacity; > + } > if (test_bit(0, active)) > set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); > return 0; > @@ -1406,6 +1408,16 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, > return -EIO; > } > > + if (zone_info[0].alloc_offset == WP_CONVENTIONAL) { > + zone_info[0].alloc_offset = bg->alloc_offset; > + zone_info[0].capacity = bg->length; > + } > + > + if (zone_info[1].alloc_offset == WP_CONVENTIONAL) { > + zone_info[1].alloc_offset = bg->alloc_offset; > + zone_info[1].capacity = bg->length; > + } > + > if (test_bit(0, active) != test_bit(1, active)) { > if (!btrfs_zone_activate(bg)) > return -EIO; > @@ -1458,6 +1470,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, > zone_info[1].capacity); > } > > + if (zone_info[0].alloc_offset == WP_CONVENTIONAL) > + zone_info[0].alloc_offset = bg->alloc_offset; > + > if (zone_info[0].alloc_offset != WP_MISSING_DEV) > bg->alloc_offset = zone_info[0].alloc_offset; > else > @@ -1479,6 +1494,11 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, > return -EINVAL; > } > > + for (int i = 0; i < map->num_stripes; i++) { > + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) > + zone_info[i].alloc_offset = bg->alloc_offset; > + } > + > for (int i = 0; i < map->num_stripes; i++) { > if (zone_info[i].alloc_offset == WP_MISSING_DEV || > zone_info[i].alloc_offset == WP_CONVENTIONAL) > @@ -1511,6 +1531,11 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, > return -EINVAL; > } > > + for (int i = 0; i < map->num_stripes; i++) { > + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) > + zone_info[i].alloc_offset = bg->alloc_offset; > + } > + > for (int i = 0; i < map->num_stripes; i++) { > if (zone_info[i].alloc_offset == WP_MISSING_DEV || > zone_info[i].alloc_offset == WP_CONVENTIONAL) > @@ -1605,7 +1630,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) > } else if (map->num_stripes == num_conventional) { > cache->alloc_offset = last_alloc; > set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); > - goto out; > } > } > > -- > 2.43.0 >
On 2/9/24 19:46, Johannes Thumshirn wrote: > On a zoned filesystem with conventional zones, we're skipping the block > group profile checks for the conventional zones. > > This allows converting a zoned filesystem's data block groups to RAID when > all of the zones backing the chunk are on conventional zones. But this > will lead to problems, once we're trying to allocate chunks backed by > sequential zones. > > So also check for conventional zones when loading a block group's profile > on them. > > Reported-by: 韩于惟 <hrx@bupt.moe> Let's keep using the roman alphabet for names please... Yuwei, Not all kernel developers can read Chinese, so please sign your emails with your name written in roman alphabet. > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > --- > fs/btrfs/zoned.c | 30 +++++++++++++++++++++++++++--- > 1 file changed, 27 insertions(+), 3 deletions(-) > > diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c > index d9716456bce0..5beb6b936e61 100644 > --- a/fs/btrfs/zoned.c > +++ b/fs/btrfs/zoned.c > @@ -1369,8 +1369,10 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg, > return -EIO; > } > > - bg->alloc_offset = info->alloc_offset; > - bg->zone_capacity = info->capacity; > + if (info->alloc_offset != WP_CONVENTIONAL) { > + bg->alloc_offset = info->alloc_offset; > + bg->zone_capacity = info->capacity; > + } > if (test_bit(0, active)) > set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); > return 0; > @@ -1406,6 +1408,16 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, > return -EIO; > } > > + if (zone_info[0].alloc_offset == WP_CONVENTIONAL) { > + zone_info[0].alloc_offset = bg->alloc_offset; > + zone_info[0].capacity = bg->length; > + } > + > + if (zone_info[1].alloc_offset == WP_CONVENTIONAL) { > + zone_info[1].alloc_offset = bg->alloc_offset; > + zone_info[1].capacity = bg->length; > + } > + > if (test_bit(0, active) != test_bit(1, active)) { > if (!btrfs_zone_activate(bg)) > return -EIO; > @@ -1458,6 +1470,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, > zone_info[1].capacity); > } > > + if (zone_info[0].alloc_offset == WP_CONVENTIONAL) > + zone_info[0].alloc_offset = bg->alloc_offset; > + > if (zone_info[0].alloc_offset != WP_MISSING_DEV) > bg->alloc_offset = zone_info[0].alloc_offset; > else > @@ -1479,6 +1494,11 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, > return -EINVAL; > } > > + for (int i = 0; i < map->num_stripes; i++) { > + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) > + zone_info[i].alloc_offset = bg->alloc_offset; > + } > + > for (int i = 0; i < map->num_stripes; i++) { > if (zone_info[i].alloc_offset == WP_MISSING_DEV || > zone_info[i].alloc_offset == WP_CONVENTIONAL) > @@ -1511,6 +1531,11 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, > return -EINVAL; > } > > + for (int i = 0; i < map->num_stripes; i++) { > + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) > + zone_info[i].alloc_offset = bg->alloc_offset; > + } > + > for (int i = 0; i < map->num_stripes; i++) { > if (zone_info[i].alloc_offset == WP_MISSING_DEV || > zone_info[i].alloc_offset == WP_CONVENTIONAL) > @@ -1605,7 +1630,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) > } else if (map->num_stripes == num_conventional) { > cache->alloc_offset = last_alloc; > set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); > - goto out; > } > } >
在 2024/2/12 9:59, Damien Le Moal 写道: > On 2/9/24 19:46, Johannes Thumshirn wrote: >> On a zoned filesystem with conventional zones, we're skipping the block >> group profile checks for the conventional zones. >> >> This allows converting a zoned filesystem's data block groups to RAID when >> all of the zones backing the chunk are on conventional zones. But this >> will lead to problems, once we're trying to allocate chunks backed by >> sequential zones. >> >> So also check for conventional zones when loading a block group's profile >> on them. >> >> Reported-by: 韩于惟 <hrx@bupt.moe> > Let's keep using the roman alphabet for names please... Updated. Can change to "HAN Yuwei". >> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> >> --- >> fs/btrfs/zoned.c | 30 +++++++++++++++++++++++++++--- >> 1 file changed, 27 insertions(+), 3 deletions(-) >> >> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c >> index d9716456bce0..5beb6b936e61 100644 >> --- a/fs/btrfs/zoned.c >> +++ b/fs/btrfs/zoned.c >> @@ -1369,8 +1369,10 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg, >> return -EIO; >> } >> >> - bg->alloc_offset = info->alloc_offset; >> - bg->zone_capacity = info->capacity; >> + if (info->alloc_offset != WP_CONVENTIONAL) { >> + bg->alloc_offset = info->alloc_offset; >> + bg->zone_capacity = info->capacity; >> + } >> if (test_bit(0, active)) >> set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); >> return 0; >> @@ -1406,6 +1408,16 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, >> return -EIO; >> } >> >> + if (zone_info[0].alloc_offset == WP_CONVENTIONAL) { >> + zone_info[0].alloc_offset = bg->alloc_offset; >> + zone_info[0].capacity = bg->length; >> + } >> + >> + if (zone_info[1].alloc_offset == WP_CONVENTIONAL) { >> + zone_info[1].alloc_offset = bg->alloc_offset; >> + zone_info[1].capacity = bg->length; >> + } >> + >> if (test_bit(0, active) != test_bit(1, active)) { >> if (!btrfs_zone_activate(bg)) >> return -EIO; >> @@ -1458,6 +1470,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, >> zone_info[1].capacity); >> } >> >> + if (zone_info[0].alloc_offset == WP_CONVENTIONAL) >> + zone_info[0].alloc_offset = bg->alloc_offset; >> + >> if (zone_info[0].alloc_offset != WP_MISSING_DEV) >> bg->alloc_offset = zone_info[0].alloc_offset; >> else >> @@ -1479,6 +1494,11 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, >> return -EINVAL; >> } >> >> + for (int i = 0; i < map->num_stripes; i++) { >> + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) >> + zone_info[i].alloc_offset = bg->alloc_offset; >> + } >> + >> for (int i = 0; i < map->num_stripes; i++) { >> if (zone_info[i].alloc_offset == WP_MISSING_DEV || >> zone_info[i].alloc_offset == WP_CONVENTIONAL) >> @@ -1511,6 +1531,11 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, >> return -EINVAL; >> } >> >> + for (int i = 0; i < map->num_stripes; i++) { >> + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) >> + zone_info[i].alloc_offset = bg->alloc_offset; >> + } >> + >> for (int i = 0; i < map->num_stripes; i++) { >> if (zone_info[i].alloc_offset == WP_MISSING_DEV || >> zone_info[i].alloc_offset == WP_CONVENTIONAL) >> @@ -1605,7 +1630,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) >> } else if (map->num_stripes == num_conventional) { >> cache->alloc_offset = last_alloc; >> set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); >> - goto out; >> } >> } >>
On 09.02.24 15:29, Naohiro Aota wrote: > On Fri, Feb 09, 2024 at 02:46:26AM -0800, Johannes Thumshirn wrote: >> On a zoned filesystem with conventional zones, we're skipping the block >> group profile checks for the conventional zones. >> >> This allows converting a zoned filesystem's data block groups to RAID when >> all of the zones backing the chunk are on conventional zones. But this >> will lead to problems, once we're trying to allocate chunks backed by >> sequential zones. >> >> So also check for conventional zones when loading a block group's profile >> on them. >> >> Reported-by: 韩于惟 <hrx@bupt.moe> >> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> >> --- >> fs/btrfs/zoned.c | 30 +++++++++++++++++++++++++++--- >> 1 file changed, 27 insertions(+), 3 deletions(-) > > This seems complex than necessary. It is duplicating > calculate_alloc_pointer() code into the per-profile loading functions. How > about adding a check equivalent below after the out label? > > if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { > btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", > btrfs_bg_type_to_raid_name(map->type)); > return -EINVAL; > } OK will do
On Fri, Feb 09, 2024 at 02:46:26AM -0800, Johannes Thumshirn wrote: > On a zoned filesystem with conventional zones, we're skipping the block > group profile checks for the conventional zones. > > This allows converting a zoned filesystem's data block groups to RAID when > all of the zones backing the chunk are on conventional zones. But this > will lead to problems, once we're trying to allocate chunks backed by > sequential zones. I don't remember if or to what extent the seq+zoned devices are supported. There are many places with btrfs_is_zoned() that does not distinguish if the sequential area is present. Some of the changes could work on sequential but this could lead to strange problems, like in this case. We could support as much as is practical but now I don't see which use cases are that.
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index d9716456bce0..5beb6b936e61 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1369,8 +1369,10 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg, return -EIO; } - bg->alloc_offset = info->alloc_offset; - bg->zone_capacity = info->capacity; + if (info->alloc_offset != WP_CONVENTIONAL) { + bg->alloc_offset = info->alloc_offset; + bg->zone_capacity = info->capacity; + } if (test_bit(0, active)) set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); return 0; @@ -1406,6 +1408,16 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, return -EIO; } + if (zone_info[0].alloc_offset == WP_CONVENTIONAL) { + zone_info[0].alloc_offset = bg->alloc_offset; + zone_info[0].capacity = bg->length; + } + + if (zone_info[1].alloc_offset == WP_CONVENTIONAL) { + zone_info[1].alloc_offset = bg->alloc_offset; + zone_info[1].capacity = bg->length; + } + if (test_bit(0, active) != test_bit(1, active)) { if (!btrfs_zone_activate(bg)) return -EIO; @@ -1458,6 +1470,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, zone_info[1].capacity); } + if (zone_info[0].alloc_offset == WP_CONVENTIONAL) + zone_info[0].alloc_offset = bg->alloc_offset; + if (zone_info[0].alloc_offset != WP_MISSING_DEV) bg->alloc_offset = zone_info[0].alloc_offset; else @@ -1479,6 +1494,11 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, return -EINVAL; } + for (int i = 0; i < map->num_stripes; i++) { + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) + zone_info[i].alloc_offset = bg->alloc_offset; + } + for (int i = 0; i < map->num_stripes; i++) { if (zone_info[i].alloc_offset == WP_MISSING_DEV || zone_info[i].alloc_offset == WP_CONVENTIONAL) @@ -1511,6 +1531,11 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, return -EINVAL; } + for (int i = 0; i < map->num_stripes; i++) { + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) + zone_info[i].alloc_offset = bg->alloc_offset; + } + for (int i = 0; i < map->num_stripes; i++) { if (zone_info[i].alloc_offset == WP_MISSING_DEV || zone_info[i].alloc_offset == WP_CONVENTIONAL) @@ -1605,7 +1630,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } else if (map->num_stripes == num_conventional) { cache->alloc_offset = last_alloc; set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); - goto out; } }
On a zoned filesystem with conventional zones, we're skipping the block group profile checks for the conventional zones. This allows converting a zoned filesystem's data block groups to RAID when all of the zones backing the chunk are on conventional zones. But this will lead to problems, once we're trying to allocate chunks backed by sequential zones. So also check for conventional zones when loading a block group's profile on them. Reported-by: 韩于惟 <hrx@bupt.moe> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> --- fs/btrfs/zoned.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-)