diff mbox series

[1/4] fsck: create scaffolding for rev-index checks

Message ID 6bc3c56453ee2d0263210c233dbc946b5dbdcb4d.1681748502.git.gitgitgadget@gmail.com (mailing list archive)
State Accepted
Commit 0d30feef3c55f63f8db1dc1e52071090d16dfaaf
Headers show
Series git fsck: check pack rev-index files | expand

Commit Message

Derrick Stolee April 17, 2023, 4:21 p.m. UTC
From: Derrick Stolee <derrickstolee@github.com>

The 'fsck' builtin checks many of Git's on-disk data structures, but
does not currently validate the pack rev-index files (a .rev file to
pair with a .pack and .idx file).

Before doing a more-involved check process, create the scaffolding
within builtin/fsck.c to have a new error type and add that error type
when the API method verify_pack_revindex() returns an error. That method
does nothing currently, but we will add checks to it in later changes.

For now, check that 'git fsck' succeeds without any errors in the normal
case. Future checks will be paired with tests that corrupt the .rev file
appropriately.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 builtin/fsck.c           | 30 ++++++++++++++++++++++++++++++
 pack-revindex.c          | 11 +++++++++++
 pack-revindex.h          |  8 ++++++++
 t/t5325-reverse-index.sh | 14 ++++++++++++++
 4 files changed, 63 insertions(+)

Comments

Taylor Blau April 17, 2023, 10:20 p.m. UTC | #1
On Mon, Apr 17, 2023 at 04:21:38PM +0000, Derrick Stolee via GitGitGadget wrote:
> From: Derrick Stolee <derrickstolee@github.com>
>
> The 'fsck' builtin checks many of Git's on-disk data structures, but
> does not currently validate the pack rev-index files (a .rev file to
> pair with a .pack and .idx file).
>
> Before doing a more-involved check process, create the scaffolding
> within builtin/fsck.c to have a new error type and add that error type
> when the API method verify_pack_revindex() returns an error. That method
> does nothing currently, but we will add checks to it in later changes.
>
> For now, check that 'git fsck' succeeds without any errors in the normal
> case. Future checks will be paired with tests that corrupt the .rev file
> appropriately.
>
> Signed-off-by: Derrick Stolee <derrickstolee@github.com>
> ---
>  builtin/fsck.c           | 30 ++++++++++++++++++++++++++++++
>  pack-revindex.c          | 11 +++++++++++
>  pack-revindex.h          |  8 ++++++++
>  t/t5325-reverse-index.sh | 14 ++++++++++++++
>  4 files changed, 63 insertions(+)
>
> diff --git a/builtin/fsck.c b/builtin/fsck.c
> index 095b39d3980..2ab78129bde 100644
> --- a/builtin/fsck.c
> +++ b/builtin/fsck.c
> @@ -24,6 +24,7 @@
>  #include "resolve-undo.h"
>  #include "run-command.h"
>  #include "worktree.h"
> +#include "pack-revindex.h"
>
>  #define REACHABLE 0x0001
>  #define SEEN      0x0002
> @@ -53,6 +54,7 @@ static int name_objects;
>  #define ERROR_REFS 010
>  #define ERROR_COMMIT_GRAPH 020
>  #define ERROR_MULTI_PACK_INDEX 040
> +#define ERROR_PACK_REV_INDEX 0100
>
>  static const char *describe_object(const struct object_id *oid)
>  {
> @@ -856,6 +858,32 @@ static int mark_packed_for_connectivity(const struct object_id *oid,
>  	return 0;
>  }
>
> +static int check_pack_rev_indexes(struct repository *r, int show_progress)
> +{
> +	struct progress *progress = NULL;
> +	uint32_t pack_count = 0;
> +	int res = 0;
> +
> +	if (show_progress) {
> +		for (struct packed_git *p = get_all_packs(the_repository); p; p = p->next)

It's going to take me a while to get used to these declarations inside
of for-loops!

> +			pack_count++;
> +		progress = start_delayed_progress("Verifying reverse pack-indexes", pack_count);

I wonder if we want to count over the sum of objects in packs rather
than the number of packs themselves. My worry would be that a rather
large pack would make it appear as if nothing is happening when in
reality we're just churning through a lot of objects.

> +		pack_count = 0;
> +	}
> +
> +	for (struct packed_git *p = get_all_packs(the_repository); p; p = p->next) {
> +		if (!load_pack_revindex(the_repository, p) &&

I was going to comment that I wasn't sure if `load_pack_revindex()` was
the right thing here, since we don't care about validating the
on-the-fly reverse indexes that we generate.

But I see in your 3/4 that you are comparing the values on disk to those
in memory, which is very nice.

> +		    verify_pack_revindex(p)) {

Inside of verify_pack_revindex(), it says that a negative number is
returned on error. Do we care about disambiguating >= 0 here? IOW,
should this be:

    if (!load_pack_revindex(the_repository, p) || verify_pack_revindex(p) < 0)

?

All looking good otherwise.

Thanks,
Taylor
diff mbox series

Patch

diff --git a/builtin/fsck.c b/builtin/fsck.c
index 095b39d3980..2ab78129bde 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -24,6 +24,7 @@ 
 #include "resolve-undo.h"
 #include "run-command.h"
 #include "worktree.h"
+#include "pack-revindex.h"
 
 #define REACHABLE 0x0001
 #define SEEN      0x0002
@@ -53,6 +54,7 @@  static int name_objects;
 #define ERROR_REFS 010
 #define ERROR_COMMIT_GRAPH 020
 #define ERROR_MULTI_PACK_INDEX 040
+#define ERROR_PACK_REV_INDEX 0100
 
 static const char *describe_object(const struct object_id *oid)
 {
@@ -856,6 +858,32 @@  static int mark_packed_for_connectivity(const struct object_id *oid,
 	return 0;
 }
 
+static int check_pack_rev_indexes(struct repository *r, int show_progress)
+{
+	struct progress *progress = NULL;
+	uint32_t pack_count = 0;
+	int res = 0;
+
+	if (show_progress) {
+		for (struct packed_git *p = get_all_packs(the_repository); p; p = p->next)
+			pack_count++;
+		progress = start_delayed_progress("Verifying reverse pack-indexes", pack_count);
+		pack_count = 0;
+	}
+
+	for (struct packed_git *p = get_all_packs(the_repository); p; p = p->next) {
+		if (!load_pack_revindex(the_repository, p) &&
+		    verify_pack_revindex(p)) {
+			error(_("invalid rev-index for pack '%s'"), p->pack_name);
+			res = ERROR_PACK_REV_INDEX;
+		}
+		display_progress(progress, ++pack_count);
+	}
+	stop_progress(&progress);
+
+	return res;
+}
+
 static char const * const fsck_usage[] = {
 	N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n"
 	   "         [--[no-]full] [--strict] [--verbose] [--lost-found]\n"
@@ -1019,6 +1047,8 @@  int cmd_fsck(int argc, const char **argv, const char *prefix)
 		free_worktrees(worktrees);
 	}
 
+	errors_found |= check_pack_rev_indexes(the_repository, show_progress);
+
 	check_connectivity();
 
 	if (the_repository->settings.core_commit_graph) {
diff --git a/pack-revindex.c b/pack-revindex.c
index 29f5358b256..c3f2aaa3fea 100644
--- a/pack-revindex.c
+++ b/pack-revindex.c
@@ -301,6 +301,17 @@  int load_pack_revindex(struct repository *r, struct packed_git *p)
 	return -1;
 }
 
+/*
+ * verify_pack_revindex verifies that the on-disk rev-index for the given
+ * pack-file is the same that would be created if written from scratch.
+ *
+ * A negative number is returned on error.
+ */
+int verify_pack_revindex(struct packed_git *p)
+{
+	return 0;
+}
+
 int load_midx_revindex(struct multi_pack_index *m)
 {
 	struct strbuf revindex_name = STRBUF_INIT;
diff --git a/pack-revindex.h b/pack-revindex.h
index 46e834064e1..c8861873b02 100644
--- a/pack-revindex.h
+++ b/pack-revindex.h
@@ -51,6 +51,14 @@  struct repository;
  */
 int load_pack_revindex(struct repository *r, struct packed_git *p);
 
+/*
+ * verify_pack_revindex verifies that the on-disk rev-index for the given
+ * pack-file is the same that would be created if written from scratch.
+ *
+ * A negative number is returned on error.
+ */
+int verify_pack_revindex(struct packed_git *p);
+
 /*
  * load_midx_revindex loads the '.rev' file corresponding to the given
  * multi-pack index by mmap-ing it and assigning pointers in the
diff --git a/t/t5325-reverse-index.sh b/t/t5325-reverse-index.sh
index 0548fce1aa6..206c412f50b 100755
--- a/t/t5325-reverse-index.sh
+++ b/t/t5325-reverse-index.sh
@@ -131,4 +131,18 @@  test_expect_success 'revindex in-memory vs on-disk' '
 		test_cmp on-disk in-core
 	)
 '
+
+test_expect_success 'fsck succeeds on good rev-index' '
+	test_when_finished rm -fr repo &&
+	git init repo &&
+	(
+		cd repo &&
+
+		test_commit commit &&
+		git -c pack.writeReverseIndex=true repack -ad &&
+		git fsck 2>err &&
+		test_must_be_empty err
+	)
+'
+
 test_done