diff mbox series

[1/4] qemu-img: implement compare --stat

Message ID 20210929133427.44923-2-vsementsov@virtuozzo.com (mailing list archive)
State New, archived
Headers show
Series qemu-img compare --stat | expand

Commit Message

Vladimir Sementsov-Ogievskiy Sept. 29, 2021, 1:34 p.m. UTC
With new option qemu-img compare will not stop at first mismatch, but
instead calculate statistics: how many clusters with different data,
how many clusters with equal data, how many clusters were unallocated
but become data and so on.

We compare images chunk by chunk. Chunk size depends on what
block_status returns for both images. It may return less than cluster
(remember about qcow2 subclusters), it may return more than cluster (if
several consecutive clusters share same status). Finally images may
have different cluster sizes. This all leads to ambiguity in how to
finally compare the data.

What we can say for sure is that, when we compare two qcow2 images with
same cluster size, we should compare clusters with data separately.
Otherwise, if we for example compare 10 consecutive clusters of data
where only one byte differs we'll report 10 different clusters.
Expected result in this case is 1 different cluster and 9 equal ones.

So, to serve this case and just to have some defined rule let's do the
following:

1. Select some block-size for compare procedure. In this commit it must
   be specified by user, next commit will add some automatic logic and
   make --block-size optional.

2. Go chunk-by-chunk using block_status as we do now with only one
   differency:
   If block_status() returns DATA region that intersects block-size
   aligned boundary, crop this region at this boundary.

This way it's still possible to compare less than cluster and report
subcluster-level accuracy, but we newer compare more than one cluster
of data.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 docs/tools/qemu-img.rst |  18 +++-
 qemu-img.c              | 206 +++++++++++++++++++++++++++++++++++++---
 qemu-img-cmds.hx        |   4 +-
 3 files changed, 212 insertions(+), 16 deletions(-)
diff mbox series

Patch

diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index d58980aef8..21164253d4 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -159,6 +159,18 @@  Parameters to compare subcommand:
 
   Strict mode - fail on different image size or sector allocation
 
+.. option:: --stat
+
+  Instead of exit on first mismatch compare the whole images and print
+  statistics on amount of different pairs of clusters, based on their
+  block-status and are they equal or not.
+
+.. option:: --block-size BLOCK_SIZE
+
+  Block size for comparing with ``--stat``. This doesn't guarantee exact
+  size of comparing chunks, but that guarantee that data chunks being
+  compared will never cross aligned block-size boundary.
+
 Parameters to convert subcommand:
 
 .. program:: qemu-img-convert
@@ -378,7 +390,7 @@  Command description:
 
   The rate limit for the commit process is specified by ``-r``.
 
-.. option:: compare [--object OBJECTDEF] [--image-opts] [-f FMT] [-F FMT] [-T SRC_CACHE] [-p] [-q] [-s] [-U] FILENAME1 FILENAME2
+.. option:: compare [--object OBJECTDEF] [--image-opts] [-f FMT] [-F FMT] [-T SRC_CACHE] [-p] [-q] [-s] [-U] [--stat --block-size BLOCK_SIZE] FILENAME1 FILENAME2
 
   Check if two images have the same content. You can compare images with
   different format or settings.
@@ -405,9 +417,9 @@  Command description:
   The following table sumarizes all exit codes of the compare subcommand:
 
   0
-    Images are identical (or requested help was printed)
+    Images are identical (or requested help was printed, or ``--stat`` was used)
   1
-    Images differ
+    Images differ (1 is never returned when ``--stat`` option specified)
   2
     Error on opening an image
   3
diff --git a/qemu-img.c b/qemu-img.c
index f036a1d428..79a0589167 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -83,6 +83,8 @@  enum {
     OPTION_BITMAPS = 275,
     OPTION_FORCE = 276,
     OPTION_SKIP_BROKEN = 277,
+    OPTION_STAT = 277,
+    OPTION_BLOCK_SIZE = 278,
 };
 
 typedef enum OutputFormat {
@@ -1304,6 +1306,107 @@  static int check_empty_sectors(BlockBackend *blk, int64_t offset,
     return 0;
 }
 
+#define IMG_CMP_STATUS_MASK (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO | \
+                             BDRV_BLOCK_ALLOCATED)
+#define IMG_CMP_STATUS_MAX (IMG_CMP_STATUS_MASK | BDRV_BLOCK_EOF)
+
+typedef struct ImgCmpStat {
+    /* stat: [ret: 0 is equal, 1 is not][status1][status2] -> n_bytes */
+    uint64_t stat[2][IMG_CMP_STATUS_MAX + 1][IMG_CMP_STATUS_MAX + 1];
+} ImgCmpStat;
+
+/*
+ * To denote chunks after EOF when compared files are of different size, use
+ * corresponding status = -1
+ */
+static void cmp_stat_account(ImgCmpStat *stat, bool differs,
+                             int status1, int status2, int64_t bytes)
+{
+    assert(status1 >= -1);
+    if (status1 == -1) {
+        /*
+         * Note BDRV_BLOCK_EOF: we abuse it here a bit. User is not interested
+         * in EOF flag in the last chunk of file (especially when we trim the
+         * chunk and EOF becomes incorrect), so BDRV_BLOCK_EOF is not in
+         * IMG_CMP_STATUS_MASK. We instead use BDRV_BLOCK_EOF to illustrate
+         * chunks after-end-of-file in shorter file.
+         */
+        status1 = BDRV_BLOCK_EOF | BDRV_BLOCK_ZERO;
+    } else {
+        status1 &= IMG_CMP_STATUS_MASK;
+    }
+
+    assert(status2 >= -1);
+    if (status2 == -1) {
+        status2 = BDRV_BLOCK_EOF | BDRV_BLOCK_ZERO;
+    } else {
+        status2 &= IMG_CMP_STATUS_MASK;
+    }
+
+    stat->stat[differs][status1][status2] += bytes;
+}
+
+static void cmp_stat_print_agenda(void)
+{
+    printf("Compare stats:\n"
+           "Agenda\n"
+           "D: DATA\n"
+           "Z: ZERO\n"
+           "A: ALLOCATED\n"
+           "E: after end of file\n\n");
+}
+
+static void cmp_stat_print_status(int status)
+{
+    printf("%s%s%s%s",
+           status & BDRV_BLOCK_DATA ? "D" : "_",
+           status & BDRV_BLOCK_ZERO ? "Z" : "_",
+           status & BDRV_BLOCK_ALLOCATED ? "A" : "_",
+           status & BDRV_BLOCK_EOF ? "E" : "_");
+}
+
+static void cmp_stat_print(ImgCmpStat *stat, int64_t total_bytes)
+{
+    int ret, status1, status2;
+
+    cmp_stat_print_agenda();
+
+    for (ret = 0; ret <= 1; ret++) {
+        uint64_t total_in_group = 0;
+
+        if (ret == 0) {
+            printf("Equal:\n");
+        } else {
+            printf("\nUnequal:\n");
+        }
+
+        for (status1 = 0; status1 <= IMG_CMP_STATUS_MAX; status1++) {
+            for (status2 = 0; status2 <= IMG_CMP_STATUS_MAX; status2++) {
+                uint64_t bytes = stat->stat[ret][status1][status2];
+                g_autofree char *bytes_str = NULL;
+
+                if (!bytes) {
+                    continue;
+                }
+
+                total_in_group += bytes;
+
+                cmp_stat_print_status(status1);
+                printf(" -> ");
+                cmp_stat_print_status(status2);
+
+                bytes_str = size_to_str(bytes);
+                printf(" %" PRIu64 " bytes (%s) %.1f%%\n", bytes, bytes_str,
+                       100.0 * bytes / total_bytes);
+            }
+        }
+
+        if (!total_in_group) {
+            printf("<nothing>\n");
+        }
+    }
+}
+
 /*
  * Compares two images. Exit codes:
  *
@@ -1320,6 +1423,8 @@  static int img_compare(int argc, char **argv)
     uint8_t *buf1 = NULL, *buf2 = NULL;
     int64_t pnum1, pnum2;
     int allocated1, allocated2;
+    int status1, status2;
+    int64_t block_end;
     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
     bool progress = false, quiet = false, strict = false;
     int flags;
@@ -1331,6 +1436,9 @@  static int img_compare(int argc, char **argv)
     uint64_t progress_base;
     bool image_opts = false;
     bool force_share = false;
+    ImgCmpStat stat = {0};
+    bool do_stat;
+    int64_t block_size = 0;
 
     cache = BDRV_DEFAULT_CACHE;
     for (;;) {
@@ -1339,6 +1447,8 @@  static int img_compare(int argc, char **argv)
             {"object", required_argument, 0, OPTION_OBJECT},
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {"force-share", no_argument, 0, 'U'},
+            {"stat", no_argument, 0, OPTION_STAT},
+            {"block-size", required_argument, 0, OPTION_BLOCK_SIZE},
             {0, 0, 0, 0}
         };
         c = getopt_long(argc, argv, ":hf:F:T:pqsU",
@@ -1395,6 +1505,15 @@  static int img_compare(int argc, char **argv)
         case OPTION_IMAGE_OPTS:
             image_opts = true;
             break;
+        case OPTION_STAT:
+            do_stat = true;
+            break;
+        case OPTION_BLOCK_SIZE:
+            block_size = cvtnum_full("block size", optarg, 1, INT64_MAX);
+            if (block_size < 0) {
+                exit(EXIT_SUCCESS);
+            }
+            break;
         }
     }
 
@@ -1410,6 +1529,20 @@  static int img_compare(int argc, char **argv)
     filename1 = argv[optind++];
     filename2 = argv[optind++];
 
+    if (!do_stat && block_size) {
+        error_report("--block-size can be used only together with --stat");
+        ret = 1;
+        goto out;
+    }
+
+    if (do_stat && !block_size) {
+        /* TODO: make block-size optional */
+        error_report("You must specify --block-size together with --stat");
+        ret = 1;
+        goto out;
+    }
+
+
     /* Initialize before goto out */
     qemu_progress_init(progress, 2.0);
 
@@ -1465,7 +1598,7 @@  static int img_compare(int argc, char **argv)
     }
 
     while (offset < total_size) {
-        int status1, status2;
+        block_end = QEMU_ALIGN_UP(offset + 1, block_size);
 
         status1 = bdrv_block_status_above(bs1, NULL, offset,
                                           total_size1 - offset, &pnum1, NULL,
@@ -1476,6 +1609,10 @@  static int img_compare(int argc, char **argv)
             goto out;
         }
         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
+        if (do_stat && (status1 & BDRV_BLOCK_DATA)) {
+            /* Don't compare cross-block data */
+            pnum1 = MIN(block_end, offset + pnum1) - offset;
+        }
 
         status2 = bdrv_block_status_above(bs2, NULL, offset,
                                           total_size2 - offset, &pnum2, NULL,
@@ -1486,11 +1623,15 @@  static int img_compare(int argc, char **argv)
             goto out;
         }
         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
+        if (do_stat && (status2 & BDRV_BLOCK_DATA)) {
+            /* Don't compare cross-block data */
+            pnum2 = MIN(block_end, offset + pnum2) - offset;
+        }
 
         assert(pnum1 && pnum2);
         chunk = MIN(pnum1, pnum2);
 
-        if (strict) {
+        if (strict && !do_stat) {
             if (status1 != status2) {
                 ret = 1;
                 qprintf(quiet, "Strict mode: Offset %" PRId64
@@ -1499,7 +1640,8 @@  static int img_compare(int argc, char **argv)
             }
         }
         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
-            /* nothing to do */
+            /* nothing to do, equal: */
+            ret = 0;
         } else if (allocated1 == allocated2) {
             if (allocated1) {
                 int64_t pnum;
@@ -1523,25 +1665,37 @@  static int img_compare(int argc, char **argv)
                 }
                 ret = compare_buffers(buf1, buf2, chunk, &pnum);
                 if (ret || pnum != chunk) {
-                    qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
+                    qprintf(quiet || do_stat,
+                            "Content mismatch at offset %" PRId64 "!\n",
                             offset + (ret ? 0 : pnum));
                     ret = 1;
-                    goto out;
                 }
+            } else {
+                /* Consider unallocated areas equal. */
+                ret = 0;
             }
         } else {
             chunk = MIN(chunk, IO_BUF_SIZE);
             if (allocated1) {
                 ret = check_empty_sectors(blk1, offset, chunk,
-                                          filename1, buf1, quiet);
+                                          filename1, buf1, quiet || do_stat);
             } else {
                 ret = check_empty_sectors(blk2, offset, chunk,
-                                          filename2, buf1, quiet);
+                                          filename2, buf1, quiet || do_stat);
             }
-            if (ret) {
+            assert(ret == 0 || ret == 1 || ret == 4);
+            if (ret == 4) {
                 goto out;
             }
         }
+        assert(ret == 0 || ret == 1);
+
+        if (do_stat) {
+            cmp_stat_account(&stat, ret, status1, status2, chunk);
+        } else if (ret) {
+            goto out;
+        }
+
         offset += chunk;
         qemu_progress_print(((float) chunk / progress_base) * 100, 100);
     }
@@ -1549,17 +1703,24 @@  static int img_compare(int argc, char **argv)
     if (total_size1 != total_size2) {
         BlockBackend *blk_over;
         const char *filename_over;
+        int *status_over;
 
         qprintf(quiet, "Warning: Image size mismatch!\n");
         if (total_size1 > total_size2) {
             blk_over = blk1;
             filename_over = filename1;
+            status_over = &status1;
+            status2 = -1; /* Denote after-EOF for cmp_stat_account() */
         } else {
             blk_over = blk2;
             filename_over = filename2;
+            status1 = -1; /* Denote after-EOF for cmp_stat_account() */
+            status_over = &status2;
         }
 
         while (offset < progress_base) {
+            block_end = QEMU_ALIGN_UP(offset + 1, block_size);
+
             ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
                                           progress_base - offset, &chunk,
                                           NULL, NULL);
@@ -1570,20 +1731,43 @@  static int img_compare(int argc, char **argv)
                 goto out;
 
             }
+            if (do_stat && (ret & BDRV_BLOCK_DATA)) {
+                /* Don't compare cross-block data */
+                chunk = MIN(block_end, offset + chunk) - offset;
+            }
+            *status_over = ret;
+
             if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
                 chunk = MIN(chunk, IO_BUF_SIZE);
                 ret = check_empty_sectors(blk_over, offset, chunk,
-                                          filename_over, buf1, quiet);
-                if (ret) {
+                                          filename_over, buf1,
+                                          quiet || do_stat);
+                assert(ret == 0 || ret == 1 || ret == 4);
+                if (ret == 4) {
                     goto out;
                 }
+            } else {
+                ret = 0;
             }
+            assert(ret == 0 || ret == 1);
+
+            if (do_stat) {
+                cmp_stat_account(&stat, ret, status1, status2, chunk);
+            } else if (ret) {
+                goto out;
+            }
+
             offset += chunk;
             qemu_progress_print(((float) chunk / progress_base) * 100, 100);
         }
     }
 
-    qprintf(quiet, "Images are identical.\n");
+    if (do_stat) {
+        cmp_stat_print(&stat, progress_base);
+    } else {
+        qprintf(quiet, "Images are identical.\n");
+    }
+
     ret = 0;
 
 out:
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 4c4d94ab22..0b2d63ca5f 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -40,9 +40,9 @@  SRST
 ERST
 
 DEF("compare", img_compare,
-    "compare [--object objectdef] [--image-opts] [-f fmt] [-F fmt] [-T src_cache] [-p] [-q] [-s] [-U] filename1 filename2")
+    "compare [--object objectdef] [--image-opts] [-f fmt] [-F fmt] [-T src_cache] [-p] [-q] [-s] [-U] [--stat --block-size BLOCK_SIZE] filename1 filename2")
 SRST
-.. option:: compare [--object OBJECTDEF] [--image-opts] [-f FMT] [-F FMT] [-T SRC_CACHE] [-p] [-q] [-s] [-U] FILENAME1 FILENAME2
+.. option:: compare [--object OBJECTDEF] [--image-opts] [-f FMT] [-F FMT] [-T SRC_CACHE] [-p] [-q] [-s] [-U] [--stat --block-size BLOCK_SIZE] FILENAME1 FILENAME2
 ERST
 
 DEF("convert", img_convert,