Message ID | 20241023132518.19830-2-jth@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | implement truncation for RAID stripe-extents | expand |
On Wed, Oct 23, 2024 at 2:25 PM Johannes Thumshirn <jth@kernel.org> wrote: > > From: Johannes Thumshirn <johannes.thumshirn@wdc.com> > > In our CI system, the RAID stripe tree configuration sometimes fails with > the following ASSERT(): > > assertion failed: found_start >= start && found_end <= end, in fs/btrfs/raid-stripe-tree.c:64 > > This ASSERT()ion triggers, because for the initial design of RAID > stripe-tree, I had the "one ordered-extent equals one bio" rule of zoned > btrfs in mind. > > But for a RAID stripe-tree based system, that is not hosted on a zoned > storage device, but on a regular device this rule doesn't apply. > > So in case the range we want to delete starts in the middle of the > previous item, grab the item and "truncate" it's length. That is, clone > the item, subtract the deleted portion from the key's offset, delete the > old item and insert the new one. > > In case the range to delete ends in the middle of an item, we have to > adjust both the item's key as well as the stripe extents and then > re-insert the modified clone into the tree after deleting the old stripe > extent. > > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Reviewed-by: Filipe Manana <fdmanana@suse.com> Looks good now, thanks. > --- > fs/btrfs/raid-stripe-tree.c | 81 +++++++++++++++++++++++++++++++++---- > 1 file changed, 74 insertions(+), 7 deletions(-) > > diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c > index 41970bbdb05f..9ffc79f250fb 100644 > --- a/fs/btrfs/raid-stripe-tree.c > +++ b/fs/btrfs/raid-stripe-tree.c > @@ -13,6 +13,39 @@ > #include "volumes.h" > #include "print-tree.h" > > +static void btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans, > + struct btrfs_path *path, > + const struct btrfs_key *oldkey, > + u64 newlen, u64 frontpad) > +{ > + struct btrfs_stripe_extent *extent; > + struct extent_buffer *leaf; > + int slot; > + size_t item_size; > + struct btrfs_key newkey = { > + .objectid = oldkey->objectid + frontpad, > + .type = BTRFS_RAID_STRIPE_KEY, > + .offset = newlen, > + }; > + > + ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY); > + > + leaf = path->nodes[0]; > + slot = path->slots[0]; > + item_size = btrfs_item_size(leaf, slot); > + extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent); > + > + for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) { > + struct btrfs_raid_stride *stride = &extent->strides[i]; > + u64 phys; > + > + phys = btrfs_raid_stride_physical(leaf, stride); > + btrfs_set_raid_stride_physical(leaf, stride, phys + frontpad); > + } > + > + btrfs_set_item_key_safe(trans, path, &newkey); > +} > + > int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length) > { > struct btrfs_fs_info *fs_info = trans->fs_info; > @@ -36,23 +69,24 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le > while (1) { > key.objectid = start; > key.type = BTRFS_RAID_STRIPE_KEY; > - key.offset = length; > + key.offset = 0; > > ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1); > if (ret < 0) > break; > - if (ret > 0) { > - ret = 0; > - if (path->slots[0] == 0) > - break; > + > + if (path->slots[0] == btrfs_header_nritems(path->nodes[0])) > path->slots[0]--; > - } > > leaf = path->nodes[0]; > slot = path->slots[0]; > btrfs_item_key_to_cpu(leaf, &key, slot); > found_start = key.objectid; > found_end = found_start + key.offset; > + ret = 0; > + > + if (key.type != BTRFS_RAID_STRIPE_KEY) > + break; > > /* That stripe ends before we start, we're done. */ > if (found_end <= start) > @@ -61,7 +95,40 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le > trace_btrfs_raid_extent_delete(fs_info, start, end, > found_start, found_end); > > - ASSERT(found_start >= start && found_end <= end); > + /* > + * The stripe extent starts before the range we want to delete: > + * > + * |--- RAID Stripe Extent ---| > + * |--- keep ---|--- drop ---| > + * > + * This means we have to duplicate the tree item, truncate the > + * length to the new size and then re-insert the item. > + */ > + if (found_start < start) { > + u64 diff = start - found_start; > + > + btrfs_partially_delete_raid_extent(trans, path, &key, > + diff, 0); > + break; > + } > + > + /* > + * The stripe extent ends after the range we want to delete: > + * > + * |--- RAID Stripe Extent ---| > + * |--- drop ---|--- keep ---| > + * > + * This means we have to duplicate the tree item, truncate the > + * length to the new size and then re-insert the item. > + */ > + if (found_end > end) { > + u64 diff = found_end - end; > + > + btrfs_partially_delete_raid_extent(trans, path, &key, > + diff, diff); > + break; > + } > + > ret = btrfs_del_item(trans, stripe_root, path); > if (ret) > break; > -- > 2.43.0 > >
diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c index 41970bbdb05f..9ffc79f250fb 100644 --- a/fs/btrfs/raid-stripe-tree.c +++ b/fs/btrfs/raid-stripe-tree.c @@ -13,6 +13,39 @@ #include "volumes.h" #include "print-tree.h" +static void btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans, + struct btrfs_path *path, + const struct btrfs_key *oldkey, + u64 newlen, u64 frontpad) +{ + struct btrfs_stripe_extent *extent; + struct extent_buffer *leaf; + int slot; + size_t item_size; + struct btrfs_key newkey = { + .objectid = oldkey->objectid + frontpad, + .type = BTRFS_RAID_STRIPE_KEY, + .offset = newlen, + }; + + ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY); + + leaf = path->nodes[0]; + slot = path->slots[0]; + item_size = btrfs_item_size(leaf, slot); + extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent); + + for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) { + struct btrfs_raid_stride *stride = &extent->strides[i]; + u64 phys; + + phys = btrfs_raid_stride_physical(leaf, stride); + btrfs_set_raid_stride_physical(leaf, stride, phys + frontpad); + } + + btrfs_set_item_key_safe(trans, path, &newkey); +} + int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -36,23 +69,24 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le while (1) { key.objectid = start; key.type = BTRFS_RAID_STRIPE_KEY; - key.offset = length; + key.offset = 0; ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1); if (ret < 0) break; - if (ret > 0) { - ret = 0; - if (path->slots[0] == 0) - break; + + if (path->slots[0] == btrfs_header_nritems(path->nodes[0])) path->slots[0]--; - } leaf = path->nodes[0]; slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &key, slot); found_start = key.objectid; found_end = found_start + key.offset; + ret = 0; + + if (key.type != BTRFS_RAID_STRIPE_KEY) + break; /* That stripe ends before we start, we're done. */ if (found_end <= start) @@ -61,7 +95,40 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le trace_btrfs_raid_extent_delete(fs_info, start, end, found_start, found_end); - ASSERT(found_start >= start && found_end <= end); + /* + * The stripe extent starts before the range we want to delete: + * + * |--- RAID Stripe Extent ---| + * |--- keep ---|--- drop ---| + * + * This means we have to duplicate the tree item, truncate the + * length to the new size and then re-insert the item. + */ + if (found_start < start) { + u64 diff = start - found_start; + + btrfs_partially_delete_raid_extent(trans, path, &key, + diff, 0); + break; + } + + /* + * The stripe extent ends after the range we want to delete: + * + * |--- RAID Stripe Extent ---| + * |--- drop ---|--- keep ---| + * + * This means we have to duplicate the tree item, truncate the + * length to the new size and then re-insert the item. + */ + if (found_end > end) { + u64 diff = found_end - end; + + btrfs_partially_delete_raid_extent(trans, path, &key, + diff, diff); + break; + } + ret = btrfs_del_item(trans, stripe_root, path); if (ret) break;