diff mbox series

cache: add fake_lstat()

Message ID xmqqcyykig1l.fsf@gitster.g (mailing list archive)
State New, archived
Headers show
Series cache: add fake_lstat() | expand

Commit Message

Junio C Hamano Sept. 14, 2023, 9:46 p.m. UTC
At times, we may already know that a path represented by a
cache_entry ce has no changes via some out-of-line means, like
fsmonitor, and yet need the control to go through a codepath that
requires us to have "struct stat" obtained by lstat() on the path,
for various purposes (e.g. "ie_match_stat()" wants cached stat-info
is still current wrt "struct stat", "diff" wants to know st_mode).

The callers of lstat() on a tracked file, when its cache_entry knows
it is up-to-date, can instead call this helper to pretend that it
called lstat() by faking the "struct stat" information.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---

 * Just setting the .st_mode member in check_removed() relies on
   the assumption that other members of "struct stat" are never
   looked at a bit too much.  We could use something like this to
   bypass calling lstat() and instead populate from the data we have
   in cache_entry.

 builtin/ls-files.c |  5 ++++-
 read-cache-ll.h    |  8 ++++++++
 read-cache.c       | 27 +++++++++++++++++++++++++++
 statinfo.c         | 27 +++++++++++++++++++++++++++
 statinfo.h         |  8 ++++++++
 5 files changed, 74 insertions(+), 1 deletion(-)

Comments

Junio C Hamano Sept. 14, 2023, 10 p.m. UTC | #1
And your diff-lib change may look like this on top.  This one
unfortunately is completely untested, as I do not use macOS.

Reviewing, testing, and improving it is highly appreciated.

----- >8 ---------- >8 ---------- >8 -----
Subject: diff-lib: fix check_removed() when fsmonitor is active

`git diff-index` may return incorrect deleted entries when fsmonitor
is used in a repository with git submodules. This can be observed on
Mac machines, but it can affect all other supported platforms too.

If fsmonitor is used, `stat *st` is left uninitialied if cache_entry
has CE_FSMONITOR_VALID bit set.  But, there are three call sites
that rely on stat afterwards, which can result in incorrect results.

We can fill members of "struct stat" that matters well enough using
the information we have in "struct cache_entry" that fsmonitor told
us is up-to-date to solve this.

Helped-by: Josip Sokcevic <sokcevic@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff-lib.c                   | 9 ++++++++-
 t/t7527-builtin-fsmonitor.sh | 5 +++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git c/diff-lib.c w/diff-lib.c
index d8aa777a73..7799747a97 100644
--- c/diff-lib.c
+++ w/diff-lib.c
@@ -38,8 +38,15 @@
  */
 static int check_removed(const struct index_state *istate, const struct cache_entry *ce, struct stat *st)
 {
+	int stat_err;
+
 	assert(is_fsmonitor_refreshed(istate));
-	if (!(ce->ce_flags & CE_FSMONITOR_VALID) && lstat(ce->name, st) < 0) {
+
+	if (!(ce->ce_flags & CE_FSMONITOR_VALID))
+		stat_err = lstat(ce->name, st);
+	else
+		stat_err = fake_lstat(ce, st);
+	if (stat_err < 0) {
 		if (!is_missing_file_error(errno))
 			return -1;
 		return 1;
diff --git c/t/t7527-builtin-fsmonitor.sh w/t/t7527-builtin-fsmonitor.sh
index 0c241d6c14..78503158fd 100755
--- c/t/t7527-builtin-fsmonitor.sh
+++ w/t/t7527-builtin-fsmonitor.sh
@@ -809,6 +809,11 @@ my_match_and_clean () {
 		status --porcelain=v2 >actual.without &&
 	test_cmp actual.with actual.without &&
 
+	git -C super --no-optional-locks diff-index --name-status HEAD >actual.with &&
+	git -C super --no-optional-locks -c core.fsmonitor=false \
+		diff-index --name-status HEAD >actual.without &&
+	test_cmp actual.with actual.without &&
+
 	git -C super/dir_1/dir_2/sub reset --hard &&
 	git -C super/dir_1/dir_2/sub clean -d -f
 }
Josip Sokcevic Sept. 15, 2023, 12:17 a.m. UTC | #2
On Thu, Sep 14, 2023 at 3:00 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> And your diff-lib change may look like this on top.  This one
> unfortunately is completely untested, as I do not use macOS.
>
> Reviewing, testing, and improving it is highly appreciated.

The patches look good to me, and all tests pass on macOS.
diff mbox series

Patch

diff --git c/builtin/ls-files.c w/builtin/ls-files.c
index a0229c3277..7031ffcb0a 100644
--- c/builtin/ls-files.c
+++ w/builtin/ls-files.c
@@ -450,7 +450,10 @@  static void show_files(struct repository *repo, struct dir_struct *dir)
 			continue;
 		if (ce_skip_worktree(ce))
 			continue;
-		stat_err = lstat(fullname.buf, &st);
+		if (!(ce->ce_flags & CE_FSMONITOR_VALID))
+			stat_err = lstat(fullname.buf, &st);
+		else
+			stat_err = fake_lstat(ce, &st);
 		if (stat_err && (errno != ENOENT && errno != ENOTDIR))
 			error_errno("cannot lstat '%s'", fullname.buf);
 		if (stat_err && show_deleted) {
diff --git c/read-cache-ll.h w/read-cache-ll.h
index 9a1a7edc5a..2a50a784f0 100644
--- c/read-cache-ll.h
+++ w/read-cache-ll.h
@@ -436,6 +436,14 @@  int match_stat_data_racy(const struct index_state *istate,
 
 void fill_stat_cache_info(struct index_state *istate, struct cache_entry *ce, struct stat *st);
 
+/*
+ * Fill members of st by members of sd enough to convince match_stat()
+ * to consider that they match.  It should be usable as a replacement
+ * for lstat() for a tracked path that is known to be up-to-date via
+ * some out-of-line means (like fsmonitor).
+ */
+int fake_lstat(const struct cache_entry *ce, struct stat *st);
+
 #define REFRESH_REALLY                   (1 << 0) /* ignore_valid */
 #define REFRESH_UNMERGED                 (1 << 1) /* allow unmerged */
 #define REFRESH_QUIET                    (1 << 2) /* be quiet about it */
diff --git c/read-cache.c w/read-cache.c
index 080bd39713..eb750e2e49 100644
--- c/read-cache.c
+++ w/read-cache.c
@@ -197,6 +197,33 @@  void fill_stat_cache_info(struct index_state *istate, struct cache_entry *ce, st
 	}
 }
 
+static unsigned int st_mode_from_ce(const struct cache_entry *ce)
+{
+	extern int trust_executable_bit, has_symlinks;
+
+	switch (ce->ce_mode & S_IFMT) {
+	case S_IFLNK:
+		return has_symlinks ? S_IFLNK : (S_IFREG | 0644);
+	case S_IFREG:
+		return (ce->ce_mode & (trust_executable_bit ? 0755 : 0644)) | S_IFREG;
+	case S_IFGITLINK:
+		return S_IFDIR | 0755;
+	case S_IFDIR:
+		return ce->ce_mode;
+	default:
+		BUG("unsupported ce_mode: %o", ce->ce_mode);
+	}
+}
+
+int fake_lstat(const struct cache_entry *ce, struct stat *st)
+{
+	fake_lstat_data(&ce->ce_stat_data, st);
+	st->st_mode = st_mode_from_ce(ce);
+
+	/* always succeed as lstat() replacement */
+	return 0;
+}
+
 static int ce_compare_data(struct index_state *istate,
 			   const struct cache_entry *ce,
 			   struct stat *st)
diff --git c/statinfo.c w/statinfo.c
index 17bb8966c3..45156109de 100644
--- c/statinfo.c
+++ w/statinfo.c
@@ -15,6 +15,33 @@  void fill_stat_data(struct stat_data *sd, struct stat *st)
 	sd->sd_size = st->st_size;
 }
 
+static void set_times(struct stat *st, const struct stat_data *sd)
+{
+	st->st_ctime = sd->sd_ctime.sec;
+	st->st_mtime = sd->sd_mtime.sec;
+#ifdef NO_NSEC
+	; /* nothing */
+#else
+#ifdef USE_ST_TIMESPEC
+	st->st_ctimespec.tv_nsec = sd->sd_ctime.nsec;
+	st->st_mtimespec.tv_nsec = sd->sd_mtime.nsec;
+#else
+	st->st_ctim.tv_nsec = sd->sd_ctime.nsec;
+	st->st_mtim.tv_nsec = sd->sd_mtime.nsec;
+#endif
+#endif
+}
+
+void fake_lstat_data(const struct stat_data *sd, struct stat *st)
+{
+	set_times(st, sd);
+	st->st_dev = sd->sd_dev;
+	st->st_ino = sd->sd_ino;
+	st->st_uid = sd->sd_uid;
+	st->st_gid = sd->sd_gid;
+	st->st_size = sd->sd_size;
+}
+
 int match_stat_data(const struct stat_data *sd, struct stat *st)
 {
 	int changed = 0;
diff --git c/statinfo.h w/statinfo.h
index 700f502ac0..5b21a30f90 100644
--- c/statinfo.h
+++ w/statinfo.h
@@ -46,6 +46,14 @@  struct stat_validity {
  */
 void fill_stat_data(struct stat_data *sd, struct stat *st);
 
+/*
+ * The inverse of the above.  When we know the cache_entry that
+ * contains sd is up-to-date, but still need to pretend we called
+ * lstat() to learn that fact, this function fills "st" enough to
+ * fool ie_match_stat().
+ */
+void fake_lstat_data(const struct stat_data *sd, struct stat *st);
+
 /*
  * Return 0 if st is consistent with a file not having been changed
  * since sd was filled.  If there are differences, return a