diff mbox series

[v2,5/5] patch-id: tighten code to detect the patch header

Message ID 20240730011738.4032377-6-gitster@pobox.com (mailing list archive)
State Accepted
Commit a6e9429f728d088999b815c25adbd2f2c115e051
Headers show
Series Tighten patch header parsing in patch-id | expand

Commit Message

Junio C Hamano July 30, 2024, 1:17 a.m. UTC
The get_one_patchid() function unconditionally takes a line that
matches the patch header (namely, a line that begins with a full
object name, possibly prefixed by "commit" or "From" plus a space)
as the beginning of a patch.  Even when it is *not* looking for one
(namely, when the previous call found the patch header and returned,
and then we are called again to skip the log message and process the
patch whose header was found by the previous invocation).

As a consequence, a line in the commit log message that begins with
one of these patterns can be mistaken to start another patch, with
current message entirely skipped (because we haven't even reached
the patch at all).

Allow the caller to tell us if it called us already and saw the
patch header (in which case we shouldn't be looking for another one,
until we see the "diff" part of the patch; instead we simply should
be skipping these lines as part of the commit log message), and skip
the header processing logic when that is the case.  In the helper
function, it also needs to flip this "are we looking for a header?"
bit, once it finished skipping the commit log message and started
processing the patches, as the patch header of the _next_ message is
the only clue in the input that the current patch is done.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/patch-id.c  | 49 +++++++++++++++++++++++++++++++++------------
 t/t4204-patch-id.sh | 17 ++++++++++++++++
 2 files changed, 53 insertions(+), 13 deletions(-)
diff mbox series

Patch

diff --git a/builtin/patch-id.c b/builtin/patch-id.c
index a2fdc0505d..f2e8feb6d3 100644
--- a/builtin/patch-id.c
+++ b/builtin/patch-id.c
@@ -60,10 +60,17 @@  static int scan_hunk_header(const char *p, int *p_before, int *p_after)
 
 /*
  * flag bits to control get_one_patchid()'s behaviour.
+ *
+ * STABLE/VERBATIM are given from the command line option as
+ * --stable/--verbatim.  FIND_HEADER conveys the internal state
+ * maintained by the caller to allow the function to avoid mistaking
+ * lines of log message before seeing the "diff" part as the beginning
+ * of the next patch.
  */
 enum {
 	GOPID_STABLE = (1<<0),		/* --stable */
 	GOPID_VERBATIM = (1<<1),	/* --verbatim */
+	GOPID_FIND_HEADER = (1<<2),	/* stop at the beginning of patch message */
 };
 
 static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
@@ -71,6 +78,7 @@  static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
 {
 	int stable = flags & GOPID_STABLE;
 	int verbatim = flags & GOPID_VERBATIM;
+	int find_header = flags & GOPID_FIND_HEADER;
 	int patchlen = 0, found_next = 0;
 	int before = -1, after = -1;
 	int diff_is_binary = 0;
@@ -86,26 +94,39 @@  static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
 		int len;
 
 		/*
-		 * If we see a line that begins with "<object name>",
-		 * "commit <object name>" or "From <object name>", it is
-		 * the beginning of a patch.  Return to the caller, as
-		 * we are done with the one we have been processing.
+		 * The caller hasn't seen us find a patch header and
+		 * return to it, or we have started processing patch
+		 * and may encounter the beginning of the next patch.
 		 */
-		if (skip_prefix(line, "commit ", &p))
-			;
-		else if (skip_prefix(line, "From ", &p))
-			;
-		if (!get_oid_hex(p, next_oid)) {
-			if (verbatim)
-				the_hash_algo->update_fn(&ctx, line, strlen(line));
-			found_next = 1;
-			break;
+		if (find_header) {
+			/*
+			 * If we see a line that begins with "<object name>",
+			 * "commit <object name>" or "From <object name>", it is
+			 * the beginning of a patch.  Return to the caller, as
+			 * we are done with the one we have been processing.
+			 */
+			if (skip_prefix(line, "commit ", &p))
+				;
+			else if (skip_prefix(line, "From ", &p))
+				;
+			if (!get_oid_hex(p, next_oid)) {
+				if (verbatim)
+					the_hash_algo->update_fn(&ctx, line, strlen(line));
+				found_next = 1;
+				break;
+			}
 		}
 
 		/* Ignore commit comments */
 		if (!patchlen && !starts_with(line, "diff "))
 			continue;
 
+		/*
+		 * We are past the commit log message.  Prepare to
+		 * stop at the beginning of the next patch header.
+		 */
+		find_header = 1;
+
 		/* Parsing diff header?  */
 		if (before == -1) {
 			if (starts_with(line, "GIT binary patch") ||
@@ -201,11 +222,13 @@  static void generate_id_list(unsigned flags)
 	struct strbuf line_buf = STRBUF_INIT;
 
 	oidclr(&oid);
+	flags |= GOPID_FIND_HEADER;
 	while (!feof(stdin)) {
 		patchlen = get_one_patchid(&n, &result, &line_buf, flags);
 		if (patchlen)
 			flush_current_id(&oid, &result);
 		oidcpy(&oid, &n);
+		flags &= ~GOPID_FIND_HEADER;
 	}
 	strbuf_release(&line_buf);
 }
diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh
index 1627fdda1b..b1d98d4110 100755
--- a/t/t4204-patch-id.sh
+++ b/t/t4204-patch-id.sh
@@ -137,6 +137,23 @@  test_expect_success 'patch-id computes the same for various formats' '
 	test_cmp actual expect
 '
 
+hash=$(git rev-parse same:)
+for cruft in "$hash" "commit $hash is bad" "From $hash status"
+do
+	test_expect_success "patch-id with <$cruft> in log message" '
+		git format-patch -1 --stdout same >patch-0 &&
+		git patch-id <patch-0 >expect &&
+
+		{
+			sed -e "/^$/q" patch-0 &&
+			printf "random message\n%s\n\n" "$cruft" &&
+			sed -e "1,/^$/d" patch-0
+		} >patch-cruft &&
+		git patch-id <patch-cruft >actual &&
+		test_cmp actual expect
+	'
+done
+
 test_expect_success 'whitespace is irrelevant in footer' '
 	get_patch_id main &&
 	git checkout same &&