Message ID | 20230911-raid-stripe-tree-v8-7-647676fa852c@wdc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: introduce RAID stripe tree | expand |
On Mon, Sep 11, 2023 at 05:52:08AM -0700, Johannes Thumshirn wrote: > When we have a raid-stripe-tree, we can do RAID0/1/10 on zoned devices for > data block-groups. For meta-data block-groups, we don't actually need > anything special, as all meta-data I/O is protected by the > btrfs_zoned_meta_io_lock() already. > > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > --- > fs/btrfs/raid-stripe-tree.h | 7 ++- > fs/btrfs/volumes.c | 2 + > fs/btrfs/zoned.c | 113 +++++++++++++++++++++++++++++++++++++++++++- > 3 files changed, 119 insertions(+), 3 deletions(-) > > diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h > index 40aa553ae8aa..30c7d5981890 100644 > --- a/fs/btrfs/raid-stripe-tree.h > +++ b/fs/btrfs/raid-stripe-tree.h > @@ -8,6 +8,11 @@ > > #include "disk-io.h" > > +#define BTRFS_RST_SUPP_BLOCK_GROUP_MASK (BTRFS_BLOCK_GROUP_DUP |\ > + BTRFS_BLOCK_GROUP_RAID1_MASK |\ > + BTRFS_BLOCK_GROUP_RAID0 |\ > + BTRFS_BLOCK_GROUP_RAID10) > + > struct btrfs_io_context; > struct btrfs_io_stripe; > > @@ -32,7 +37,7 @@ static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info, > if (type != BTRFS_BLOCK_GROUP_DATA) > return false; > > - if (profile & BTRFS_BLOCK_GROUP_RAID1_MASK) > + if (profile & BTRFS_RST_SUPP_BLOCK_GROUP_MASK) > return true; > > return false; > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index 7c25f5c77788..9f17e5f290f4 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -6438,6 +6438,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, > * I/O context structure. > */ > if (smap && num_alloc_stripes == 1 && > + !(btrfs_need_stripe_tree_update(fs_info, map->type) && > + op != BTRFS_MAP_READ) && > !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) { > ret = set_io_stripe(fs_info, op, logical, length, smap, map, > stripe_index, stripe_offset, stripe_nr); > diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c > index c6eedf4bfba9..4ca36875058c 100644 > --- a/fs/btrfs/zoned.c > +++ b/fs/btrfs/zoned.c > @@ -1481,8 +1481,9 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) > set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); > break; > case BTRFS_BLOCK_GROUP_DUP: > - if (map->type & BTRFS_BLOCK_GROUP_DATA) { > - btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg"); > + if (map->type & BTRFS_BLOCK_GROUP_DATA && > + !btrfs_stripe_tree_root(fs_info)) { > + btrfs_err(fs_info, "zoned: data DUP profile needs stripe_root"); > ret = -EINVAL; > goto out; > } > @@ -1520,8 +1521,116 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) > cache->zone_capacity = min(caps[0], caps[1]); > break; > case BTRFS_BLOCK_GROUP_RAID1: > + case BTRFS_BLOCK_GROUP_RAID1C3: > + case BTRFS_BLOCK_GROUP_RAID1C4: This > + if (map->type & BTRFS_BLOCK_GROUP_DATA && > + !btrfs_stripe_tree_root(fs_info)) { > + btrfs_err(fs_info, > + "zoned: data %s needs stripe_root", > + btrfs_bg_type_to_raid_name(map->type)); > + ret = -EIO; > + goto out; > + > + } > + > + for (i = 0; i < map->num_stripes; i++) { > + if (alloc_offsets[i] == WP_MISSING_DEV || > + alloc_offsets[i] == WP_CONVENTIONAL) > + continue; > + > + if ((alloc_offsets[0] != alloc_offsets[i]) && > + !btrfs_test_opt(fs_info, DEGRADED)) { > + btrfs_err(fs_info, > + "zoned: write pointer offset mismatch of zones in %s profile", > + btrfs_bg_type_to_raid_name(map->type)); > + ret = -EIO; > + goto out; > + } > + if (test_bit(0, active) != test_bit(i, active)) { > + if (!btrfs_test_opt(fs_info, DEGRADED) && > + !btrfs_zone_activate(cache)) { > + ret = -EIO; > + goto out; > + } > + } else { > + if (test_bit(0, active)) > + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, > + &cache->runtime_flags); > + } > + /* > + * In case a device is missing we have a cap of 0, so don't > + * use it. > + */ > + cache->zone_capacity = min_not_zero(caps[0], caps[i]); > + } > + > + if (alloc_offsets[0] != WP_MISSING_DEV) > + cache->alloc_offset = alloc_offsets[0]; > + else > + cache->alloc_offset = alloc_offsets[i - 1]; whole block > + break; > case BTRFS_BLOCK_GROUP_RAID0: and > + if (map->type & BTRFS_BLOCK_GROUP_DATA && > + !btrfs_stripe_tree_root(fs_info)) { > + btrfs_err(fs_info, > + "zoned: data %s needs stripe_root", > + btrfs_bg_type_to_raid_name(map->type)); > + ret = -EIO; > + goto out; > + > + } > + for (i = 0; i < map->num_stripes; i++) { > + if (alloc_offsets[i] == WP_MISSING_DEV || > + alloc_offsets[i] == WP_CONVENTIONAL) > + continue; > + > + if (test_bit(0, active) != test_bit(i, active)) { > + if (!btrfs_zone_activate(cache)) { > + ret = -EIO; > + goto out; > + } > + } else { > + if (test_bit(0, active)) > + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, > + &cache->runtime_flags); > + } > + cache->zone_capacity += caps[i]; > + cache->alloc_offset += alloc_offsets[i]; > + > + } > + break; > case BTRFS_BLOCK_GROUP_RAID10: > + if (map->type & BTRFS_BLOCK_GROUP_DATA && > + !btrfs_stripe_tree_root(fs_info)) { > + btrfs_err(fs_info, > + "zoned: data %s needs stripe_root", > + btrfs_bg_type_to_raid_name(map->type)); > + ret = -EIO; > + goto out; > + > + } > + for (i = 0; i < map->num_stripes; i++) { > + if (alloc_offsets[i] == WP_MISSING_DEV || > + alloc_offsets[i] == WP_CONVENTIONAL) > + continue; > + > + if (test_bit(0, active) != test_bit(i, active)) { > + if (!btrfs_zone_activate(cache)) { > + ret = -EIO; > + goto out; > + } > + } else { > + if (test_bit(0, active)) > + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, > + &cache->runtime_flags); > + } > + if ((i % map->sub_stripes) == 0) { > + cache->zone_capacity += caps[i]; > + cache->alloc_offset += alloc_offsets[i]; > + } > + > + } > + break; Seem to be quite long and nested for a case, can they be factored to helpers? > case BTRFS_BLOCK_GROUP_RAID5: > case BTRFS_BLOCK_GROUP_RAID6: > /* non-single profiles are not supported yet */ > > -- > 2.41.0
On 12.09.23 22:49, David Sterba wrote: > On Mon, Sep 11, 2023 at 05:52:08AM -0700, Johannes Thumshirn wrote: >> When we have a raid-stripe-tree, we can do RAID0/1/10 on zoned devices for >> data block-groups. For meta-data block-groups, we don't actually need >> anything special, as all meta-data I/O is protected by the >> btrfs_zoned_meta_io_lock() already. >> >> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> >> --- >> fs/btrfs/raid-stripe-tree.h | 7 ++- >> fs/btrfs/volumes.c | 2 + >> fs/btrfs/zoned.c | 113 +++++++++++++++++++++++++++++++++++++++++++- >> 3 files changed, 119 insertions(+), 3 deletions(-) >> >> diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h >> index 40aa553ae8aa..30c7d5981890 100644 >> --- a/fs/btrfs/raid-stripe-tree.h >> +++ b/fs/btrfs/raid-stripe-tree.h >> @@ -8,6 +8,11 @@ >> >> #include "disk-io.h" >> >> +#define BTRFS_RST_SUPP_BLOCK_GROUP_MASK (BTRFS_BLOCK_GROUP_DUP |\ >> + BTRFS_BLOCK_GROUP_RAID1_MASK |\ >> + BTRFS_BLOCK_GROUP_RAID0 |\ >> + BTRFS_BLOCK_GROUP_RAID10) >> + >> struct btrfs_io_context; >> struct btrfs_io_stripe; >> >> @@ -32,7 +37,7 @@ static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info, >> if (type != BTRFS_BLOCK_GROUP_DATA) >> return false; >> >> - if (profile & BTRFS_BLOCK_GROUP_RAID1_MASK) >> + if (profile & BTRFS_RST_SUPP_BLOCK_GROUP_MASK) >> return true; >> >> return false; >> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c >> index 7c25f5c77788..9f17e5f290f4 100644 >> --- a/fs/btrfs/volumes.c >> +++ b/fs/btrfs/volumes.c >> @@ -6438,6 +6438,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, >> * I/O context structure. >> */ >> if (smap && num_alloc_stripes == 1 && >> + !(btrfs_need_stripe_tree_update(fs_info, map->type) && >> + op != BTRFS_MAP_READ) && >> !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) { >> ret = set_io_stripe(fs_info, op, logical, length, smap, map, >> stripe_index, stripe_offset, stripe_nr); >> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c >> index c6eedf4bfba9..4ca36875058c 100644 >> --- a/fs/btrfs/zoned.c >> +++ b/fs/btrfs/zoned.c >> @@ -1481,8 +1481,9 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) >> set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); >> break; >> case BTRFS_BLOCK_GROUP_DUP: >> - if (map->type & BTRFS_BLOCK_GROUP_DATA) { >> - btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg"); >> + if (map->type & BTRFS_BLOCK_GROUP_DATA && >> + !btrfs_stripe_tree_root(fs_info)) { >> + btrfs_err(fs_info, "zoned: data DUP profile needs stripe_root"); >> ret = -EINVAL; >> goto out; >> } >> @@ -1520,8 +1521,116 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) >> cache->zone_capacity = min(caps[0], caps[1]); >> break; >> case BTRFS_BLOCK_GROUP_RAID1: >> + case BTRFS_BLOCK_GROUP_RAID1C3: >> + case BTRFS_BLOCK_GROUP_RAID1C4: > > This > >> + if (map->type & BTRFS_BLOCK_GROUP_DATA && >> + !btrfs_stripe_tree_root(fs_info)) { >> + btrfs_err(fs_info, >> + "zoned: data %s needs stripe_root", >> + btrfs_bg_type_to_raid_name(map->type)); >> + ret = -EIO; >> + goto out; >> + >> + } >> + >> + for (i = 0; i < map->num_stripes; i++) { >> + if (alloc_offsets[i] == WP_MISSING_DEV || >> + alloc_offsets[i] == WP_CONVENTIONAL) >> + continue; >> + >> + if ((alloc_offsets[0] != alloc_offsets[i]) && >> + !btrfs_test_opt(fs_info, DEGRADED)) { >> + btrfs_err(fs_info, >> + "zoned: write pointer offset mismatch of zones in %s profile", >> + btrfs_bg_type_to_raid_name(map->type)); >> + ret = -EIO; >> + goto out; >> + } >> + if (test_bit(0, active) != test_bit(i, active)) { >> + if (!btrfs_test_opt(fs_info, DEGRADED) && >> + !btrfs_zone_activate(cache)) { >> + ret = -EIO; >> + goto out; >> + } >> + } else { >> + if (test_bit(0, active)) >> + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, >> + &cache->runtime_flags); >> + } >> + /* >> + * In case a device is missing we have a cap of 0, so don't >> + * use it. >> + */ >> + cache->zone_capacity = min_not_zero(caps[0], caps[i]); >> + } >> + >> + if (alloc_offsets[0] != WP_MISSING_DEV) >> + cache->alloc_offset = alloc_offsets[0]; >> + else >> + cache->alloc_offset = alloc_offsets[i - 1]; > > whole block > >> + break; >> case BTRFS_BLOCK_GROUP_RAID0: > > and > >> + if (map->type & BTRFS_BLOCK_GROUP_DATA && >> + !btrfs_stripe_tree_root(fs_info)) { >> + btrfs_err(fs_info, >> + "zoned: data %s needs stripe_root", >> + btrfs_bg_type_to_raid_name(map->type)); >> + ret = -EIO; >> + goto out; >> + >> + } >> + for (i = 0; i < map->num_stripes; i++) { >> + if (alloc_offsets[i] == WP_MISSING_DEV || >> + alloc_offsets[i] == WP_CONVENTIONAL) >> + continue; >> + >> + if (test_bit(0, active) != test_bit(i, active)) { >> + if (!btrfs_zone_activate(cache)) { >> + ret = -EIO; >> + goto out; >> + } >> + } else { >> + if (test_bit(0, active)) >> + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, >> + &cache->runtime_flags); >> + } >> + cache->zone_capacity += caps[i]; >> + cache->alloc_offset += alloc_offsets[i]; >> + >> + } >> + break; >> case BTRFS_BLOCK_GROUP_RAID10: >> + if (map->type & BTRFS_BLOCK_GROUP_DATA && >> + !btrfs_stripe_tree_root(fs_info)) { >> + btrfs_err(fs_info, >> + "zoned: data %s needs stripe_root", >> + btrfs_bg_type_to_raid_name(map->type)); >> + ret = -EIO; >> + goto out; >> + >> + } >> + for (i = 0; i < map->num_stripes; i++) { >> + if (alloc_offsets[i] == WP_MISSING_DEV || >> + alloc_offsets[i] == WP_CONVENTIONAL) >> + continue; >> + >> + if (test_bit(0, active) != test_bit(i, active)) { >> + if (!btrfs_zone_activate(cache)) { >> + ret = -EIO; >> + goto out; >> + } >> + } else { >> + if (test_bit(0, active)) >> + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, >> + &cache->runtime_flags); >> + } >> + if ((i % map->sub_stripes) == 0) { >> + cache->zone_capacity += caps[i]; >> + cache->alloc_offset += alloc_offsets[i]; >> + } >> + >> + } >> + break; > > Seem to be quite long and nested for a case, can they be factored to > helpers? Sure, but I'd love to have https://lore.kernel.org/all/20230605085108.580976-1-hch@lst.de/ pulled in first. This patchset handles (among other things) the DUP and single cases as well.
On Wed, Sep 13, 2023 at 05:41:52AM +0000, Johannes Thumshirn wrote: > On 12.09.23 22:49, David Sterba wrote: > > On Mon, Sep 11, 2023 at 05:52:08AM -0700, Johannes Thumshirn wrote: > >> When we have a raid-stripe-tree, we can do RAID0/1/10 on zoned devices for > >> data block-groups. For meta-data block-groups, we don't actually need > >> anything special, as all meta-data I/O is protected by the > >> btrfs_zoned_meta_io_lock() already. > >> > >> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > >> --- > >> fs/btrfs/raid-stripe-tree.h | 7 ++- > >> fs/btrfs/volumes.c | 2 + > >> fs/btrfs/zoned.c | 113 +++++++++++++++++++++++++++++++++++++++++++- > >> 3 files changed, 119 insertions(+), 3 deletions(-) > >> > >> diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h > >> index 40aa553ae8aa..30c7d5981890 100644 > >> --- a/fs/btrfs/raid-stripe-tree.h > >> +++ b/fs/btrfs/raid-stripe-tree.h > >> @@ -8,6 +8,11 @@ > >> > >> #include "disk-io.h" > >> > >> +#define BTRFS_RST_SUPP_BLOCK_GROUP_MASK (BTRFS_BLOCK_GROUP_DUP |\ > >> + BTRFS_BLOCK_GROUP_RAID1_MASK |\ > >> + BTRFS_BLOCK_GROUP_RAID0 |\ > >> + BTRFS_BLOCK_GROUP_RAID10) > >> + > >> struct btrfs_io_context; > >> struct btrfs_io_stripe; > >> > >> @@ -32,7 +37,7 @@ static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info, > >> if (type != BTRFS_BLOCK_GROUP_DATA) > >> return false; > >> > >> - if (profile & BTRFS_BLOCK_GROUP_RAID1_MASK) > >> + if (profile & BTRFS_RST_SUPP_BLOCK_GROUP_MASK) > >> return true; > >> > >> return false; > >> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > >> index 7c25f5c77788..9f17e5f290f4 100644 > >> --- a/fs/btrfs/volumes.c > >> +++ b/fs/btrfs/volumes.c > >> @@ -6438,6 +6438,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, > >> * I/O context structure. > >> */ > >> if (smap && num_alloc_stripes == 1 && > >> + !(btrfs_need_stripe_tree_update(fs_info, map->type) && > >> + op != BTRFS_MAP_READ) && > >> !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) { > >> ret = set_io_stripe(fs_info, op, logical, length, smap, map, > >> stripe_index, stripe_offset, stripe_nr); > >> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c > >> index c6eedf4bfba9..4ca36875058c 100644 > >> --- a/fs/btrfs/zoned.c > >> +++ b/fs/btrfs/zoned.c > >> @@ -1481,8 +1481,9 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) > >> set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); > >> break; > >> case BTRFS_BLOCK_GROUP_DUP: > >> - if (map->type & BTRFS_BLOCK_GROUP_DATA) { > >> - btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg"); > >> + if (map->type & BTRFS_BLOCK_GROUP_DATA && > >> + !btrfs_stripe_tree_root(fs_info)) { > >> + btrfs_err(fs_info, "zoned: data DUP profile needs stripe_root"); > >> ret = -EINVAL; > >> goto out; > >> } > >> @@ -1520,8 +1521,116 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) > >> cache->zone_capacity = min(caps[0], caps[1]); > >> break; > >> case BTRFS_BLOCK_GROUP_RAID1: > >> + case BTRFS_BLOCK_GROUP_RAID1C3: > >> + case BTRFS_BLOCK_GROUP_RAID1C4: > > > > This > > > >> + if (map->type & BTRFS_BLOCK_GROUP_DATA && > >> + !btrfs_stripe_tree_root(fs_info)) { > >> + btrfs_err(fs_info, > >> + "zoned: data %s needs stripe_root", > >> + btrfs_bg_type_to_raid_name(map->type)); > >> + ret = -EIO; > >> + goto out; > >> + > >> + } > >> + > >> + for (i = 0; i < map->num_stripes; i++) { > >> + if (alloc_offsets[i] == WP_MISSING_DEV || > >> + alloc_offsets[i] == WP_CONVENTIONAL) > >> + continue; > >> + > >> + if ((alloc_offsets[0] != alloc_offsets[i]) && > >> + !btrfs_test_opt(fs_info, DEGRADED)) { > >> + btrfs_err(fs_info, > >> + "zoned: write pointer offset mismatch of zones in %s profile", > >> + btrfs_bg_type_to_raid_name(map->type)); > >> + ret = -EIO; > >> + goto out; > >> + } > >> + if (test_bit(0, active) != test_bit(i, active)) { > >> + if (!btrfs_test_opt(fs_info, DEGRADED) && > >> + !btrfs_zone_activate(cache)) { > >> + ret = -EIO; > >> + goto out; > >> + } > >> + } else { > >> + if (test_bit(0, active)) > >> + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, > >> + &cache->runtime_flags); > >> + } > >> + /* > >> + * In case a device is missing we have a cap of 0, so don't > >> + * use it. > >> + */ > >> + cache->zone_capacity = min_not_zero(caps[0], caps[i]); > >> + } > >> + > >> + if (alloc_offsets[0] != WP_MISSING_DEV) > >> + cache->alloc_offset = alloc_offsets[0]; > >> + else > >> + cache->alloc_offset = alloc_offsets[i - 1]; > > > > whole block > > > >> + break; > >> case BTRFS_BLOCK_GROUP_RAID0: > > > > and > > > >> + if (map->type & BTRFS_BLOCK_GROUP_DATA && > >> + !btrfs_stripe_tree_root(fs_info)) { > >> + btrfs_err(fs_info, > >> + "zoned: data %s needs stripe_root", > >> + btrfs_bg_type_to_raid_name(map->type)); > >> + ret = -EIO; > >> + goto out; > >> + > >> + } > >> + for (i = 0; i < map->num_stripes; i++) { > >> + if (alloc_offsets[i] == WP_MISSING_DEV || > >> + alloc_offsets[i] == WP_CONVENTIONAL) > >> + continue; > >> + > >> + if (test_bit(0, active) != test_bit(i, active)) { > >> + if (!btrfs_zone_activate(cache)) { > >> + ret = -EIO; > >> + goto out; > >> + } > >> + } else { > >> + if (test_bit(0, active)) > >> + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, > >> + &cache->runtime_flags); > >> + } > >> + cache->zone_capacity += caps[i]; > >> + cache->alloc_offset += alloc_offsets[i]; > >> + > >> + } > >> + break; > >> case BTRFS_BLOCK_GROUP_RAID10: > >> + if (map->type & BTRFS_BLOCK_GROUP_DATA && > >> + !btrfs_stripe_tree_root(fs_info)) { > >> + btrfs_err(fs_info, > >> + "zoned: data %s needs stripe_root", > >> + btrfs_bg_type_to_raid_name(map->type)); > >> + ret = -EIO; > >> + goto out; > >> + > >> + } > >> + for (i = 0; i < map->num_stripes; i++) { > >> + if (alloc_offsets[i] == WP_MISSING_DEV || > >> + alloc_offsets[i] == WP_CONVENTIONAL) > >> + continue; > >> + > >> + if (test_bit(0, active) != test_bit(i, active)) { > >> + if (!btrfs_zone_activate(cache)) { > >> + ret = -EIO; > >> + goto out; > >> + } > >> + } else { > >> + if (test_bit(0, active)) > >> + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, > >> + &cache->runtime_flags); > >> + } > >> + if ((i % map->sub_stripes) == 0) { > >> + cache->zone_capacity += caps[i]; > >> + cache->alloc_offset += alloc_offsets[i]; > >> + } > >> + > >> + } > >> + break; > > > > Seem to be quite long and nested for a case, can they be factored to > > helpers? > > Sure, but I'd love to have > https://lore.kernel.org/all/20230605085108.580976-1-hch@lst.de/ > pulled in first. This patchset handles (among other things) the DUP and > single cases as well. I see, the patches still apply cleanly so I'll add them to misc-next.
On 13.09.23 16:52, David Sterba wrote: >> Sure, but I'd love to have >> https://lore.kernel.org/all/20230605085108.580976-1-hch@lst.de/ >> pulled in first. This patchset handles (among other things) the DUP and >> single cases as well. > > I see, the patches still apply cleanly so I'll add them to misc-next. > Thanks
diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h index 40aa553ae8aa..30c7d5981890 100644 --- a/fs/btrfs/raid-stripe-tree.h +++ b/fs/btrfs/raid-stripe-tree.h @@ -8,6 +8,11 @@ #include "disk-io.h" +#define BTRFS_RST_SUPP_BLOCK_GROUP_MASK (BTRFS_BLOCK_GROUP_DUP |\ + BTRFS_BLOCK_GROUP_RAID1_MASK |\ + BTRFS_BLOCK_GROUP_RAID0 |\ + BTRFS_BLOCK_GROUP_RAID10) + struct btrfs_io_context; struct btrfs_io_stripe; @@ -32,7 +37,7 @@ static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info, if (type != BTRFS_BLOCK_GROUP_DATA) return false; - if (profile & BTRFS_BLOCK_GROUP_RAID1_MASK) + if (profile & BTRFS_RST_SUPP_BLOCK_GROUP_MASK) return true; return false; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7c25f5c77788..9f17e5f290f4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6438,6 +6438,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, * I/O context structure. */ if (smap && num_alloc_stripes == 1 && + !(btrfs_need_stripe_tree_update(fs_info, map->type) && + op != BTRFS_MAP_READ) && !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) { ret = set_io_stripe(fs_info, op, logical, length, smap, map, stripe_index, stripe_offset, stripe_nr); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index c6eedf4bfba9..4ca36875058c 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1481,8 +1481,9 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); break; case BTRFS_BLOCK_GROUP_DUP: - if (map->type & BTRFS_BLOCK_GROUP_DATA) { - btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg"); + if (map->type & BTRFS_BLOCK_GROUP_DATA && + !btrfs_stripe_tree_root(fs_info)) { + btrfs_err(fs_info, "zoned: data DUP profile needs stripe_root"); ret = -EINVAL; goto out; } @@ -1520,8 +1521,116 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) cache->zone_capacity = min(caps[0], caps[1]); break; case BTRFS_BLOCK_GROUP_RAID1: + case BTRFS_BLOCK_GROUP_RAID1C3: + case BTRFS_BLOCK_GROUP_RAID1C4: + if (map->type & BTRFS_BLOCK_GROUP_DATA && + !btrfs_stripe_tree_root(fs_info)) { + btrfs_err(fs_info, + "zoned: data %s needs stripe_root", + btrfs_bg_type_to_raid_name(map->type)); + ret = -EIO; + goto out; + + } + + for (i = 0; i < map->num_stripes; i++) { + if (alloc_offsets[i] == WP_MISSING_DEV || + alloc_offsets[i] == WP_CONVENTIONAL) + continue; + + if ((alloc_offsets[0] != alloc_offsets[i]) && + !btrfs_test_opt(fs_info, DEGRADED)) { + btrfs_err(fs_info, + "zoned: write pointer offset mismatch of zones in %s profile", + btrfs_bg_type_to_raid_name(map->type)); + ret = -EIO; + goto out; + } + if (test_bit(0, active) != test_bit(i, active)) { + if (!btrfs_test_opt(fs_info, DEGRADED) && + !btrfs_zone_activate(cache)) { + ret = -EIO; + goto out; + } + } else { + if (test_bit(0, active)) + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, + &cache->runtime_flags); + } + /* + * In case a device is missing we have a cap of 0, so don't + * use it. + */ + cache->zone_capacity = min_not_zero(caps[0], caps[i]); + } + + if (alloc_offsets[0] != WP_MISSING_DEV) + cache->alloc_offset = alloc_offsets[0]; + else + cache->alloc_offset = alloc_offsets[i - 1]; + break; case BTRFS_BLOCK_GROUP_RAID0: + if (map->type & BTRFS_BLOCK_GROUP_DATA && + !btrfs_stripe_tree_root(fs_info)) { + btrfs_err(fs_info, + "zoned: data %s needs stripe_root", + btrfs_bg_type_to_raid_name(map->type)); + ret = -EIO; + goto out; + + } + for (i = 0; i < map->num_stripes; i++) { + if (alloc_offsets[i] == WP_MISSING_DEV || + alloc_offsets[i] == WP_CONVENTIONAL) + continue; + + if (test_bit(0, active) != test_bit(i, active)) { + if (!btrfs_zone_activate(cache)) { + ret = -EIO; + goto out; + } + } else { + if (test_bit(0, active)) + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, + &cache->runtime_flags); + } + cache->zone_capacity += caps[i]; + cache->alloc_offset += alloc_offsets[i]; + + } + break; case BTRFS_BLOCK_GROUP_RAID10: + if (map->type & BTRFS_BLOCK_GROUP_DATA && + !btrfs_stripe_tree_root(fs_info)) { + btrfs_err(fs_info, + "zoned: data %s needs stripe_root", + btrfs_bg_type_to_raid_name(map->type)); + ret = -EIO; + goto out; + + } + for (i = 0; i < map->num_stripes; i++) { + if (alloc_offsets[i] == WP_MISSING_DEV || + alloc_offsets[i] == WP_CONVENTIONAL) + continue; + + if (test_bit(0, active) != test_bit(i, active)) { + if (!btrfs_zone_activate(cache)) { + ret = -EIO; + goto out; + } + } else { + if (test_bit(0, active)) + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, + &cache->runtime_flags); + } + if ((i % map->sub_stripes) == 0) { + cache->zone_capacity += caps[i]; + cache->alloc_offset += alloc_offsets[i]; + } + + } + break; case BTRFS_BLOCK_GROUP_RAID5: case BTRFS_BLOCK_GROUP_RAID6: /* non-single profiles are not supported yet */
When we have a raid-stripe-tree, we can do RAID0/1/10 on zoned devices for data block-groups. For meta-data block-groups, we don't actually need anything special, as all meta-data I/O is protected by the btrfs_zoned_meta_io_lock() already. Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> --- fs/btrfs/raid-stripe-tree.h | 7 ++- fs/btrfs/volumes.c | 2 + fs/btrfs/zoned.c | 113 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 119 insertions(+), 3 deletions(-)