diff mbox series

[v5,1/2] btrfs: implement partial deletion of RAID stripe extents

Message ID 20241023132518.19830-2-jth@kernel.org (mailing list archive)
State New
Headers show
Series implement truncation for RAID stripe-extents | expand

Commit Message

Johannes Thumshirn Oct. 23, 2024, 1:25 p.m. UTC
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>

In our CI system, the RAID stripe tree configuration sometimes fails with
the following ASSERT():

 assertion failed: found_start >= start && found_end <= end, in fs/btrfs/raid-stripe-tree.c:64

This ASSERT()ion triggers, because for the initial design of RAID
stripe-tree, I had the "one ordered-extent equals one bio" rule of zoned
btrfs in mind.

But for a RAID stripe-tree based system, that is not hosted on a zoned
storage device, but on a regular device this rule doesn't apply.

So in case the range we want to delete starts in the middle of the
previous item, grab the item and "truncate" it's length. That is, clone
the item, subtract the deleted portion from the key's offset, delete the
old item and insert the new one.

In case the range to delete ends in the middle of an item, we have to
adjust both the item's key as well as the stripe extents and then
re-insert the modified clone into the tree after deleting the old stripe
extent.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/raid-stripe-tree.c | 81 +++++++++++++++++++++++++++++++++----
 1 file changed, 74 insertions(+), 7 deletions(-)

Comments

Filipe Manana Oct. 23, 2024, 1:46 p.m. UTC | #1
On Wed, Oct 23, 2024 at 2:25 PM Johannes Thumshirn <jth@kernel.org> wrote:
>
> From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
>
> In our CI system, the RAID stripe tree configuration sometimes fails with
> the following ASSERT():
>
>  assertion failed: found_start >= start && found_end <= end, in fs/btrfs/raid-stripe-tree.c:64
>
> This ASSERT()ion triggers, because for the initial design of RAID
> stripe-tree, I had the "one ordered-extent equals one bio" rule of zoned
> btrfs in mind.
>
> But for a RAID stripe-tree based system, that is not hosted on a zoned
> storage device, but on a regular device this rule doesn't apply.
>
> So in case the range we want to delete starts in the middle of the
> previous item, grab the item and "truncate" it's length. That is, clone
> the item, subtract the deleted portion from the key's offset, delete the
> old item and insert the new one.
>
> In case the range to delete ends in the middle of an item, we have to
> adjust both the item's key as well as the stripe extents and then
> re-insert the modified clone into the tree after deleting the old stripe
> extent.
>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>

Reviewed-by: Filipe Manana <fdmanana@suse.com>

Looks good now, thanks.

> ---
>  fs/btrfs/raid-stripe-tree.c | 81 +++++++++++++++++++++++++++++++++----
>  1 file changed, 74 insertions(+), 7 deletions(-)
>
> diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
> index 41970bbdb05f..9ffc79f250fb 100644
> --- a/fs/btrfs/raid-stripe-tree.c
> +++ b/fs/btrfs/raid-stripe-tree.c
> @@ -13,6 +13,39 @@
>  #include "volumes.h"
>  #include "print-tree.h"
>
> +static void btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
> +                                              struct btrfs_path *path,
> +                                              const struct btrfs_key *oldkey,
> +                                              u64 newlen, u64 frontpad)
> +{
> +       struct btrfs_stripe_extent *extent;
> +       struct extent_buffer *leaf;
> +       int slot;
> +       size_t item_size;
> +       struct btrfs_key newkey = {
> +               .objectid = oldkey->objectid + frontpad,
> +               .type = BTRFS_RAID_STRIPE_KEY,
> +               .offset = newlen,
> +       };
> +
> +       ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY);
> +
> +       leaf = path->nodes[0];
> +       slot = path->slots[0];
> +       item_size = btrfs_item_size(leaf, slot);
> +       extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
> +
> +       for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
> +               struct btrfs_raid_stride *stride = &extent->strides[i];
> +               u64 phys;
> +
> +               phys = btrfs_raid_stride_physical(leaf, stride);
> +               btrfs_set_raid_stride_physical(leaf, stride, phys + frontpad);
> +       }
> +
> +       btrfs_set_item_key_safe(trans, path, &newkey);
> +}
> +
>  int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length)
>  {
>         struct btrfs_fs_info *fs_info = trans->fs_info;
> @@ -36,23 +69,24 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
>         while (1) {
>                 key.objectid = start;
>                 key.type = BTRFS_RAID_STRIPE_KEY;
> -               key.offset = length;
> +               key.offset = 0;
>
>                 ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1);
>                 if (ret < 0)
>                         break;
> -               if (ret > 0) {
> -                       ret = 0;
> -                       if (path->slots[0] == 0)
> -                               break;
> +
> +               if (path->slots[0] == btrfs_header_nritems(path->nodes[0]))
>                         path->slots[0]--;
> -               }
>
>                 leaf = path->nodes[0];
>                 slot = path->slots[0];
>                 btrfs_item_key_to_cpu(leaf, &key, slot);
>                 found_start = key.objectid;
>                 found_end = found_start + key.offset;
> +               ret = 0;
> +
> +               if (key.type != BTRFS_RAID_STRIPE_KEY)
> +                       break;
>
>                 /* That stripe ends before we start, we're done. */
>                 if (found_end <= start)
> @@ -61,7 +95,40 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
>                 trace_btrfs_raid_extent_delete(fs_info, start, end,
>                                                found_start, found_end);
>
> -               ASSERT(found_start >= start && found_end <= end);
> +               /*
> +                * The stripe extent starts before the range we want to delete:
> +                *
> +                * |--- RAID Stripe Extent ---|
> +                * |--- keep  ---|--- drop ---|
> +                *
> +                * This means we have to duplicate the tree item, truncate the
> +                * length to the new size and then re-insert the item.
> +                */
> +               if (found_start < start) {
> +                       u64 diff = start - found_start;
> +
> +                       btrfs_partially_delete_raid_extent(trans, path, &key,
> +                                                          diff, 0);
> +                       break;
> +               }
> +
> +               /*
> +                * The stripe extent ends after the range we want to delete:
> +                *
> +                * |--- RAID Stripe Extent ---|
> +                * |--- drop  ---|--- keep ---|
> +                *
> +                * This means we have to duplicate the tree item, truncate the
> +                * length to the new size and then re-insert the item.
> +                */
> +               if (found_end > end) {
> +                       u64 diff = found_end - end;
> +
> +                       btrfs_partially_delete_raid_extent(trans, path, &key,
> +                                                          diff, diff);
> +                       break;
> +               }
> +
>                 ret = btrfs_del_item(trans, stripe_root, path);
>                 if (ret)
>                         break;
> --
> 2.43.0
>
>
diff mbox series

Patch

diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
index 41970bbdb05f..9ffc79f250fb 100644
--- a/fs/btrfs/raid-stripe-tree.c
+++ b/fs/btrfs/raid-stripe-tree.c
@@ -13,6 +13,39 @@ 
 #include "volumes.h"
 #include "print-tree.h"
 
+static void btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
+					       struct btrfs_path *path,
+					       const struct btrfs_key *oldkey,
+					       u64 newlen, u64 frontpad)
+{
+	struct btrfs_stripe_extent *extent;
+	struct extent_buffer *leaf;
+	int slot;
+	size_t item_size;
+	struct btrfs_key newkey = {
+		.objectid = oldkey->objectid + frontpad,
+		.type = BTRFS_RAID_STRIPE_KEY,
+		.offset = newlen,
+	};
+
+	ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY);
+
+	leaf = path->nodes[0];
+	slot = path->slots[0];
+	item_size = btrfs_item_size(leaf, slot);
+	extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
+
+	for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
+		struct btrfs_raid_stride *stride = &extent->strides[i];
+		u64 phys;
+
+		phys = btrfs_raid_stride_physical(leaf, stride);
+		btrfs_set_raid_stride_physical(leaf, stride, phys + frontpad);
+	}
+
+	btrfs_set_item_key_safe(trans, path, &newkey);
+}
+
 int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length)
 {
 	struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -36,23 +69,24 @@  int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
 	while (1) {
 		key.objectid = start;
 		key.type = BTRFS_RAID_STRIPE_KEY;
-		key.offset = length;
+		key.offset = 0;
 
 		ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1);
 		if (ret < 0)
 			break;
-		if (ret > 0) {
-			ret = 0;
-			if (path->slots[0] == 0)
-				break;
+
+		if (path->slots[0] == btrfs_header_nritems(path->nodes[0]))
 			path->slots[0]--;
-		}
 
 		leaf = path->nodes[0];
 		slot = path->slots[0];
 		btrfs_item_key_to_cpu(leaf, &key, slot);
 		found_start = key.objectid;
 		found_end = found_start + key.offset;
+		ret = 0;
+
+		if (key.type != BTRFS_RAID_STRIPE_KEY)
+			break;
 
 		/* That stripe ends before we start, we're done. */
 		if (found_end <= start)
@@ -61,7 +95,40 @@  int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
 		trace_btrfs_raid_extent_delete(fs_info, start, end,
 					       found_start, found_end);
 
-		ASSERT(found_start >= start && found_end <= end);
+		/*
+		 * The stripe extent starts before the range we want to delete:
+		 *
+		 * |--- RAID Stripe Extent ---|
+		 * |--- keep  ---|--- drop ---|
+		 *
+		 * This means we have to duplicate the tree item, truncate the
+		 * length to the new size and then re-insert the item.
+		 */
+		if (found_start < start) {
+			u64 diff = start - found_start;
+
+			btrfs_partially_delete_raid_extent(trans, path, &key,
+							   diff, 0);
+			break;
+		}
+
+		/*
+		 * The stripe extent ends after the range we want to delete:
+		 *
+		 * |--- RAID Stripe Extent ---|
+		 * |--- drop  ---|--- keep ---|
+		 *
+		 * This means we have to duplicate the tree item, truncate the
+		 * length to the new size and then re-insert the item.
+		 */
+		if (found_end > end) {
+			u64 diff = found_end - end;
+
+			btrfs_partially_delete_raid_extent(trans, path, &key,
+							   diff, diff);
+			break;
+		}
+
 		ret = btrfs_del_item(trans, stripe_root, path);
 		if (ret)
 			break;