diff mbox series

[1/2] qcow2: handle discard-no-unref in measure

Message ID 20240605090639.3402698-2-jean-louis@dupond.be (mailing list archive)
State New, archived
Headers show
Series [1/2] qcow2: handle discard-no-unref in measure | expand

Commit Message

Jean-Louis Dupond June 5, 2024, 9:06 a.m. UTC
When doing a measure on an image with a backing file and
discard-no-unref is enabled, the code should take this into account.

If for example you have a snapshot image with a base, and you do a
discard within the snapshot, it will be ZERO and ALLOCATED, but without
host offset.
Now if we commit this snapshot, and the clusters in the base image were
allocated, the clusters will only be set to ZERO, but the host offset
will not be cleared.
Therefor ZERO & ALLOCATED clusters in the top image need to check the
base to see if space will be freed or not, to have a correct measure
output.

Bug-Url: https://gitlab.com/qemu-project/qemu/-/issues/2369
Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be>
---
 block/qcow2.c | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

Comments

Jean-Louis Dupond June 5, 2024, 10:59 a.m. UTC | #1
On 5/06/2024 11:06, Jean-Louis Dupond wrote:
> When doing a measure on an image with a backing file and
> discard-no-unref is enabled, the code should take this into account.
>
> If for example you have a snapshot image with a base, and you do a
> discard within the snapshot, it will be ZERO and ALLOCATED, but without
> host offset.
> Now if we commit this snapshot, and the clusters in the base image were
> allocated, the clusters will only be set to ZERO, but the host offset
> will not be cleared.
> Therefor ZERO & ALLOCATED clusters in the top image need to check the
> base to see if space will be freed or not, to have a correct measure
> output.
>
> Bug-Url: https://gitlab.com/qemu-project/qemu/-/issues/2369
> Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be>
> ---
>   block/qcow2.c | 36 +++++++++++++++++++++++++++++++++---
>   1 file changed, 33 insertions(+), 3 deletions(-)
>
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 956128b409..1ce7ebbab4 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -5163,9 +5163,16 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
>           } else {
>               int64_t offset;
>               int64_t pnum = 0;
> +            BlockDriverState *parent = bdrv_filter_or_cow_bs(in_bs);
> +            BDRVQcow2State *s = NULL;
> +
> +            if (parent) {
> +                s = parent->opaque;
> +            }
>   
>               for (offset = 0; offset < ssize; offset += pnum) {
>                   int ret;
> +                int retp = 0;
>   
>                   ret = bdrv_block_status_above(in_bs, NULL, offset,
>                                                 ssize - offset, &pnum, NULL,
> @@ -5176,10 +5183,33 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
>                       goto err;
>                   }
>   
> -                if (ret & BDRV_BLOCK_ZERO) {
> +                /* If we have a parent in the chain and the current block is zero but allocated,
> +                 * then we want to check the allocation state of the parent block.
> +                 * If it was allocated and now zero, we want
> +                 * to include it into the calculation, cause it will not free space when
> +                 * committing the top into base with discard-no-unref enabled.
> +                 */
> +                if (parent &&
> +                    ((ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED)) ==
> +                     (BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED)) &&
> +                     s->discard_no_unref) {
> +                        int64_t pnum_parent = 0;
> +                        retp = bdrv_block_status_above(parent, NULL, offset,
> +                                              ssize - offset, &pnum_parent, NULL,
> +                                              NULL);
> +                        // Check if parent block has an offset
> +                        if (retp & BDRV_BLOCK_OFFSET_VALID) {
> +                            pnum = retp;
This should be `pnum = pnum_parent` of course :)
> +                        }
> +                }
> +                if (ret & BDRV_BLOCK_ZERO && !retp) {
>                       /* Skip zero regions (safe with no backing file) */
> -                } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
> -                           (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
> +                } else if (((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
> +                            (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ||
> +                           (((ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED)) ==
> +                             (BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED)) &&
> +                            s && s->discard_no_unref &&
> +                            retp & BDRV_BLOCK_OFFSET_VALID)) {
>                       /* Extend pnum to end of cluster for next iteration */
>                       pnum = ROUND_UP(offset + pnum, cluster_size) - offset;
>   


This seems to work fine in my tests with the following commands:
  ./build/qemu-img create -f qcow2 /tmp/test.qcow2 128M
  ./build/qemu-io -c 'open /tmp/test.qcow2' -c 'write 0 8M' -c 'write 
56M 20M' -c 'write 10M 8M' -c 'write 24M 32M'
  ./build/qemu-img create -f qcow2 -b /tmp/test.qcow2 -F qcow2 
/tmp/test_snap.qcow2
  ./build/qemu-io -c 'open -o discard=unmap,discard-no-unref=on 
/tmp/test_snap.qcow2' -c 'write 16M 8M' -c 'discard 60M 20M' -c 'write 
84M 10M'
  ./build/qemu-img measure --output json -O qcow2 'json:{"file": 
{"driver": "file", "filename": "/tmp/test_snap.qcow2"}, "driver": 
"qcow2", "backing": {"driver": "qcow2", "file": {"driver": "file", 
"filename": "/tmp/test.qcow2"}, "backing": null}}'
  ./build/qemu-img measure --output json -O qcow2 'json:{"file": 
{"driver": "file", "filename": "/tmp/test_snap.qcow2"}, "driver": 
"qcow2", "discard":"unmap", "discard-no-unref":true, "backing": 
{"driver": "qcow2", "discard-no-unref":true, "file": {"driver": "file", 
"filename": "/tmp/test.qcow2"}, "backing": null}}'


But it does not seem to work when the base image has ZERO ALLOCATED 
clusters that overlap with the ZERO ALLOCATED clusters in the snapshot.
As its then seen as a single zero cluster by the bdrv_block_status_above 
function.
This happens for example when the base vm was initially running without 
discard-no-unref and enabled it only later.

Any idea's on how to handle that ?

Thanks
Jean-Louis
diff mbox series

Patch

diff --git a/block/qcow2.c b/block/qcow2.c
index 956128b409..1ce7ebbab4 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -5163,9 +5163,16 @@  static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
         } else {
             int64_t offset;
             int64_t pnum = 0;
+            BlockDriverState *parent = bdrv_filter_or_cow_bs(in_bs);
+            BDRVQcow2State *s = NULL;
+
+            if (parent) {
+                s = parent->opaque;
+            }
 
             for (offset = 0; offset < ssize; offset += pnum) {
                 int ret;
+                int retp = 0;
 
                 ret = bdrv_block_status_above(in_bs, NULL, offset,
                                               ssize - offset, &pnum, NULL,
@@ -5176,10 +5183,33 @@  static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
                     goto err;
                 }
 
-                if (ret & BDRV_BLOCK_ZERO) {
+                /* If we have a parent in the chain and the current block is zero but allocated,
+                 * then we want to check the allocation state of the parent block.
+                 * If it was allocated and now zero, we want
+                 * to include it into the calculation, cause it will not free space when
+                 * committing the top into base with discard-no-unref enabled.
+                 */
+                if (parent &&
+                    ((ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED)) ==
+                     (BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED)) &&
+                     s->discard_no_unref) {
+                        int64_t pnum_parent = 0;
+                        retp = bdrv_block_status_above(parent, NULL, offset,
+                                              ssize - offset, &pnum_parent, NULL,
+                                              NULL);
+                        // Check if parent block has an offset
+                        if (retp & BDRV_BLOCK_OFFSET_VALID) {
+                            pnum = retp;
+                        }
+                }
+                if (ret & BDRV_BLOCK_ZERO && !retp) {
                     /* Skip zero regions (safe with no backing file) */
-                } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
-                           (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
+                } else if (((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
+                            (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ||
+                           (((ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED)) ==
+                             (BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED)) &&
+                            s && s->discard_no_unref &&
+                            retp & BDRV_BLOCK_OFFSET_VALID)) {
                     /* Extend pnum to end of cluster for next iteration */
                     pnum = ROUND_UP(offset + pnum, cluster_size) - offset;