diff mbox series

[v3] receive-pack.c: consolidate find header logic

Message ID pull.1125.v3.git.git.1640931460758.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series [v3] receive-pack.c: consolidate find header logic | expand

Commit Message

John Cai Dec. 31, 2021, 6:17 a.m. UTC
From: John Cai <johncai86@gmail.com>

There are two functions that have very similar logic of finding a header
value. find_commit_header, and find_header. We can conslidate the logic
by using find_commit_header and replacing the logic in find_header.

Introduce a new function find_header_max, which is equivalent to
find_commit_header except it takes a len parameter that determines how
many bytes to read. find_commit_header can then call find_header_max
with 0 as the len.

This cleans up duplicate logic, as the logic for finding header values
is now all in one place.

Signed-off-by: John Cai <johncai86@gmail.com>
---
    Consolidate find_header logic into one function
    
    This addresses the NEEDSWORK comment in builtin/receive-pack.c:
    
     /**
       * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
       * after dropping "_commit" from its name and possibly moving it out
       * of commit.c
       **/
    
    
    There is some duplicated logic between find_header and
    find_commit_header that can be consolidated instead of having two places
    in the code that do essentially the same thing. For the sake of simpler
    and more DRY code, use find_commit_header and rename it to find_header
    since it is not limited to finding headers for only commits.
    
    Changes since v2:
    
     * s/find_header_max/find_header_mem
     * moved "len" argument right next to "msg" agument in find_header_mem
     * removed special condition in find_header_mem to check for line == 0

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1125%2Fjohn-cai%2Fjc%2Freplace-find-header-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1125/john-cai/jc/replace-find-header-v3
Pull-Request: https://github.com/git/git/pull/1125

Range-diff vs v2:

 1:  5e0d90d642b ! 1:  463df42e7e0 receive-pack.c: consolidate find header logic
     @@ builtin/receive-pack.c: static char *prepare_push_cert_nonce(const char *path, t
      -	}
      -	return NULL;
      +	size_t out_len;
     -+	const char *val = find_header_max(msg, key, len, &out_len);
     ++	const char *val = find_header_mem(msg, len, key, &out_len);
      +
      +	if (val == NULL)
      +		return NULL;
     @@ commit.c: struct commit_list **commit_list_append(struct commit *commit,
       }
       
      -const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
     -+const char *find_header_max(const char *msg, const char *key,
     -+			size_t len,
     -+			size_t *out_len)
     ++const char *find_header_mem(const char *msg, size_t len,
     ++			const char *key, size_t *out_len)
       {
       	int key_len = strlen(key);
       	const char *line = msg;
       
      -	while (line) {
     -+	while (line && (len == 0 || line < msg + len)) {
     ++	while (line && line < msg + len) {
       		const char *eol = strchrnul(line, '\n');
       
       		if (line == eol)
     @@ commit.c: const char *find_commit_header(const char *msg, const char *key, size_
       
      +const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
      +{
     -+	return find_header_max(msg, key, 0, out_len);
     ++	return find_header_mem(msg, strlen(msg), key, out_len);
      +}
       /*
        * Inspect the given string and determine the true "end" of the log message, in
     @@ commit.h: void free_commit_extra_headers(struct commit_extra_header *extra);
        * Note that some headers (like mergetag) may be multi-line. It is the caller's
        * responsibility to parse further in this case!
        */
     -+const char *find_header_max(const char *msg, const char *key,
     -+			size_t len,
     ++const char *find_header_mem(const char *msg, size_t len,
     ++			const char *key,
      +			size_t *out_len);
      +
       const char *find_commit_header(const char *msg, const char *key,


 builtin/receive-pack.c | 33 ++++++++++-----------------------
 commit.c               |  9 +++++++--
 commit.h               |  5 +++++
 3 files changed, 22 insertions(+), 25 deletions(-)


base-commit: 55b058a8bbcc54bd93c733035c995abc7967e539

Comments

Junio C Hamano Jan. 4, 2022, 1:56 a.m. UTC | #1
"John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:

> +	size_t out_len;
> +	const char *val = find_header_mem(msg, len, key, &out_len);
> +
> +	if (val == NULL)

Style:

	if (!val)

> +		return NULL;
> +
> +	if (next_line)
> +		*next_line = val + out_len + 1;
> +
> +	return xmemdupz(val, out_len);
>  }
>  
>  /*
> diff --git a/commit.c b/commit.c
> index a348f085b2b..8ac32a4d7b5 100644
> --- a/commit.c
> +++ b/commit.c
> @@ -1631,12 +1631,13 @@ struct commit_list **commit_list_append(struct commit *commit,
>  	return &new_commit->next;
>  }
>  
> -const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
> +const char *find_header_mem(const char *msg, size_t len,
> +			const char *key, size_t *out_len)
>  {
>  	int key_len = strlen(key);
>  	const char *line = msg;
>  
> +	while (line && line < msg + len) {
>  		const char *eol = strchrnul(line, '\n');

Between line[0] and msg[len], there may not be a LF nor NUL at all,
and strchrnul() will scan beyond the range we were given (which is
msg[0]..msg[len]).

But that is something we share with the find_header() if I am not
mistaken, so I am OK to leave the code as posted and leave it
outside the scope of this series to clean it up to make it safer.

The reason why it is probably safe for the current set of callers
(and presumably any reasonable new callers we may add later) is that
they computed "len" by scanning the block of memory starting at (or
before) "msg" before calling us, and we know that the block of
memory is properly NUL-terminated.  find_header() is called by
check_nonce() and check_cert_push_options(), both of which tell
us to scan in a strbuf, which is designed to be scannable for NUL
safely by having an extra NUL at the end beyond the end.
John Cai Jan. 4, 2022, 3:12 p.m. UTC | #2
> On Jan 3, 2022, at 8:56 PM, Junio C Hamano <gitster@pobox.com> wrote:
> 
> "John Cai via GitGitGadget" <gitgitgadget@gmail.com> writes:
> 
>> +	size_t out_len;
>> +	const char *val = find_header_mem(msg, len, key, &out_len);
>> +
>> +	if (val == NULL)
> 
> Style:
> 
> 	if (!val)
> 
>> +		return NULL;
>> +
>> +	if (next_line)
>> +		*next_line = val + out_len + 1;
>> +
>> +	return xmemdupz(val, out_len);
>> }
>> 
>> /*
>> diff --git a/commit.c b/commit.c
>> index a348f085b2b..8ac32a4d7b5 100644
>> --- a/commit.c
>> +++ b/commit.c
>> @@ -1631,12 +1631,13 @@ struct commit_list **commit_list_append(struct commit *commit,
>> 	return &new_commit->next;
>> }
>> 
>> -const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
>> +const char *find_header_mem(const char *msg, size_t len,
>> +			const char *key, size_t *out_len)
>> {
>> 	int key_len = strlen(key);
>> 	const char *line = msg;
>> 
>> +	while (line && line < msg + len) {
>> 		const char *eol = strchrnul(line, '\n');
> 
> Between line[0] and msg[len], there may not be a LF nor NUL at all,
> and strchrnul() will scan beyond the range we were given (which is
> msg[0]..msg[len]).
> 
> But that is something we share with the find_header() if I am not
> mistaken, so I am OK to leave the code as posted and leave it
> outside the scope of this series to clean it up to make it safer.

Good catch. Thanks for pointing that out-I didn’t notice it. I’ve added a NEEDSWORK
Block to this so we can address it in a later patch series.

> 
> The reason why it is probably safe for the current set of callers
> (and presumably any reasonable new callers we may add later) is that
> they computed "len" by scanning the block of memory starting at (or
> before) "msg" before calling us, and we know that the block of
> memory is properly NUL-terminated.  find_header() is called by
> check_nonce() and check_cert_push_options(), both of which tell
> us to scan in a strbuf, which is designed to be scannable for NUL
> safely by having an extra NUL at the end beyond the end.
>
diff mbox series

Patch

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 9f4a0b816cf..b661c51a538 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -581,32 +581,19 @@  static char *prepare_push_cert_nonce(const char *path, timestamp_t stamp)
 	return strbuf_detach(&buf, NULL);
 }
 
-/*
- * NEEDSWORK: reuse find_commit_header() from jk/commit-author-parsing
- * after dropping "_commit" from its name and possibly moving it out
- * of commit.c
- */
 static char *find_header(const char *msg, size_t len, const char *key,
 			 const char **next_line)
 {
-	int key_len = strlen(key);
-	const char *line = msg;
-
-	while (line && line < msg + len) {
-		const char *eol = strchrnul(line, '\n');
-
-		if ((msg + len <= eol) || line == eol)
-			return NULL;
-		if (line + key_len < eol &&
-		    !memcmp(line, key, key_len) && line[key_len] == ' ') {
-			int offset = key_len + 1;
-			if (next_line)
-				*next_line = *eol ? eol + 1 : eol;
-			return xmemdupz(line + offset, (eol - line) - offset);
-		}
-		line = *eol ? eol + 1 : NULL;
-	}
-	return NULL;
+	size_t out_len;
+	const char *val = find_header_mem(msg, len, key, &out_len);
+
+	if (val == NULL)
+		return NULL;
+
+	if (next_line)
+		*next_line = val + out_len + 1;
+
+	return xmemdupz(val, out_len);
 }
 
 /*
diff --git a/commit.c b/commit.c
index a348f085b2b..8ac32a4d7b5 100644
--- a/commit.c
+++ b/commit.c
@@ -1631,12 +1631,13 @@  struct commit_list **commit_list_append(struct commit *commit,
 	return &new_commit->next;
 }
 
-const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
+const char *find_header_mem(const char *msg, size_t len,
+			const char *key, size_t *out_len)
 {
 	int key_len = strlen(key);
 	const char *line = msg;
 
-	while (line) {
+	while (line && line < msg + len) {
 		const char *eol = strchrnul(line, '\n');
 
 		if (line == eol)
@@ -1653,6 +1654,10 @@  const char *find_commit_header(const char *msg, const char *key, size_t *out_len
 	return NULL;
 }
 
+const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
+{
+	return find_header_mem(msg, strlen(msg), key, out_len);
+}
 /*
  * Inspect the given string and determine the true "end" of the log message, in
  * order to find where to put a new Signed-off-by trailer.  Ignored are
diff --git a/commit.h b/commit.h
index 3ea32766bcb..38cc5426615 100644
--- a/commit.h
+++ b/commit.h
@@ -290,12 +290,17 @@  void free_commit_extra_headers(struct commit_extra_header *extra);
 
 /*
  * Search the commit object contents given by "msg" for the header "key".
+ * Reads up to "len" bytes of "msg".
  * Returns a pointer to the start of the header contents, or NULL. The length
  * of the header, up to the first newline, is returned via out_len.
  *
  * Note that some headers (like mergetag) may be multi-line. It is the caller's
  * responsibility to parse further in this case!
  */
+const char *find_header_mem(const char *msg, size_t len,
+			const char *key,
+			size_t *out_len);
+
 const char *find_commit_header(const char *msg, const char *key,
 			       size_t *out_len);