diff mbox series

[1/3] unpack: replace xwrite() loop with write_in_full()

Message ID 20240302190348.3946569-2-gitster@pobox.com (mailing list archive)
State Accepted
Commit fa6c383309557b9d2942c47b75a895ca960ad9f5
Headers show
Series Auditing use of xwrite() | expand

Commit Message

Junio C Hamano March 2, 2024, 7:03 p.m. UTC
We have two packfile stream consumers, index-pack and
unpack-objects, that allow excess payload after the packfile stream
data. Their code to relay excess data hasn't changed significantly
since their original implementation that appeared in 67e5a5ec
(git-unpack-objects: re-write to read from stdin, 2005-06-28) and
9bee2478 (mimic unpack-objects when --stdin is used with index-pack,
2006-10-25).

These code blocks contain hand-rolled loops using xwrite(), written
before our write_in_full() helper existed. This helper now provides
the same functionality.

Replace these loops with write_in_full() for shorter, clearer
code. Update related variables accordingly.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/index-pack.c     | 17 +++--------------
 builtin/unpack-objects.c |  8 +-------
 2 files changed, 4 insertions(+), 21 deletions(-)

Comments

Patrick Steinhardt March 4, 2024, 6:58 a.m. UTC | #1
On Sat, Mar 02, 2024 at 11:03:46AM -0800, Junio C Hamano wrote:
> We have two packfile stream consumers, index-pack and
> unpack-objects, that allow excess payload after the packfile stream
> data. Their code to relay excess data hasn't changed significantly
> since their original implementation that appeared in 67e5a5ec
> (git-unpack-objects: re-write to read from stdin, 2005-06-28) and
> 9bee2478 (mimic unpack-objects when --stdin is used with index-pack,
> 2006-10-25).
> 
> These code blocks contain hand-rolled loops using xwrite(), written
> before our write_in_full() helper existed. This helper now provides
> the same functionality.
> 
> Replace these loops with write_in_full() for shorter, clearer
> code. Update related variables accordingly.
> 
> Signed-off-by: Junio C Hamano <gitster@pobox.com>
> ---
>  builtin/index-pack.c     | 17 +++--------------
>  builtin/unpack-objects.c |  8 +-------
>  2 files changed, 4 insertions(+), 21 deletions(-)
> 
> diff --git a/builtin/index-pack.c b/builtin/index-pack.c
> index a3a37bd215..856428fef9 100644
> --- a/builtin/index-pack.c
> +++ b/builtin/index-pack.c
> @@ -1524,14 +1524,12 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
>  	struct strbuf pack_name = STRBUF_INIT;
>  	struct strbuf index_name = STRBUF_INIT;
>  	struct strbuf rev_index_name = STRBUF_INIT;
> -	int err;
>  
>  	if (!from_stdin) {
>  		close(input_fd);
>  	} else {
>  		fsync_component_or_die(FSYNC_COMPONENT_PACK, output_fd, curr_pack_name);
> -		err = close(output_fd);
> -		if (err)
> +		if (close(output_fd))
>  			die_errno(_("error while closing pack file"));
>  	}
>  
> @@ -1566,17 +1564,8 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
>  		write_or_die(1, buf.buf, buf.len);
>  		strbuf_release(&buf);
>  
> -		/*
> -		 * Let's just mimic git-unpack-objects here and write
> -		 * the last part of the input buffer to stdout.
> -		 */
> -		while (input_len) {
> -			err = xwrite(1, input_buffer + input_offset, input_len);
> -			if (err <= 0)
> -				break;
> -			input_len -= err;
> -			input_offset += err;
> -		}
> +		/* Write the last part of the buffer to stdout */
> +		write_in_full(1, input_buffer + input_offset, input_len);

With this change we stop updating `input_len` and `input_offset`, both
of which are global variables. Assuming that tests pass this must be
okay right now given that this is the final part of what we are writing.
But I wonder whether we shouldn't update those regardless just so that
these remain consistent?

>  	}
>  
>  	strbuf_release(&rev_index_name);
> diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
> index e0a701f2b3..f1c85a00ae 100644
> --- a/builtin/unpack-objects.c
> +++ b/builtin/unpack-objects.c
> @@ -679,13 +679,7 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix UNUSED)
>  	use(the_hash_algo->rawsz);
>  
>  	/* Write the last part of the buffer to stdout */
> -	while (len) {
> -		int ret = xwrite(1, buffer + offset, len);
> -		if (ret <= 0)
> -			break;
> -		len -= ret;
> -		offset += ret;
> -	}
> +	write_in_full(1, buffer + offset, len);

Same here.

Patrick

>  	/* All done */
>  	return has_errors;
> -- 
> 2.44.0-84-gb387623c12
> 
>
Junio C Hamano March 4, 2024, 7:29 a.m. UTC | #2
Patrick Steinhardt <ps@pks.im> writes:

>> -		while (input_len) {
>> -			err = xwrite(1, input_buffer + input_offset, input_len);
>> -			if (err <= 0)
>> -				break;
>> -			input_len -= err;
>> -			input_offset += err;
>> -		}
>> +		/* Write the last part of the buffer to stdout */
>> +		write_in_full(1, input_buffer + input_offset, input_len);
>
> With this change we stop updating `input_len` and `input_offset`, both
> of which are global variables. Assuming that tests pass this must be
> okay right now given that this is the final part of what we are writing.
> But I wonder whether we shouldn't update those regardless just so that
> these remain consistent?

It is probably a good hygiene, even though it may not matter at all
for the correctness in the current code.

Thanks for your sharp eyes.

>> diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
>> index e0a701f2b3..f1c85a00ae 100644
>> --- a/builtin/unpack-objects.c
>> +++ b/builtin/unpack-objects.c
>> @@ -679,13 +679,7 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix UNUSED)
>>  	use(the_hash_algo->rawsz);
>>  
>>  	/* Write the last part of the buffer to stdout */
>> -	while (len) {
>> -		int ret = xwrite(1, buffer + offset, len);
>> -		if (ret <= 0)
>> -			break;
>> -		len -= ret;
>> -		offset += ret;
>> -	}
>> +	write_in_full(1, buffer + offset, len);
>
> Same here.
>
> Patrick
>
>>  	/* All done */
>>  	return has_errors;
>> -- 
>> 2.44.0-84-gb387623c12
>> 
>>
Junio C Hamano March 4, 2024, 4:43 p.m. UTC | #3
Junio C Hamano <gitster@pobox.com> writes:

> Patrick Steinhardt <ps@pks.im> writes:
>
>>> -		while (input_len) {
>>> -			err = xwrite(1, input_buffer + input_offset, input_len);
>>> -			if (err <= 0)
>>> -				break;
>>> -			input_len -= err;
>>> -			input_offset += err;
>>> -		}
>>> +		/* Write the last part of the buffer to stdout */
>>> +		write_in_full(1, input_buffer + input_offset, input_len);
>>
>> With this change we stop updating `input_len` and `input_offset`, both
>> of which are global variables. Assuming that tests pass this must be
>> okay right now given that this is the final part of what we are writing.
>> But I wonder whether we shouldn't update those regardless just so that
>> these remain consistent?
>
> It is probably a good hygiene, even though it may not matter at all
> for the correctness in the current code.
>
> Thanks for your sharp eyes.

Actually, I changed my mind.  As you said, this is flushing the very
end of the data in the input_buffer[] and nobody will fill() the
input_buffer[] after the call to this function happens.

>>> -	while (len) {
>>> ...
>>> -		len -= ret;
>>> -		offset += ret;
>>> -	}
>>> +	write_in_full(1, buffer + offset, len);
>>
>> Same here.

Ditto.  We are about to pass the control back to the caller that
will exit using the "has_errors" we return from here.

>>
>> Patrick
>>
>>>  	/* All done */
>>>  	return has_errors;
>>> -- 
>>> 2.44.0-84-gb387623c12
>>> 
>>>
diff mbox series

Patch

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index a3a37bd215..856428fef9 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1524,14 +1524,12 @@  static void final(const char *final_pack_name, const char *curr_pack_name,
 	struct strbuf pack_name = STRBUF_INIT;
 	struct strbuf index_name = STRBUF_INIT;
 	struct strbuf rev_index_name = STRBUF_INIT;
-	int err;
 
 	if (!from_stdin) {
 		close(input_fd);
 	} else {
 		fsync_component_or_die(FSYNC_COMPONENT_PACK, output_fd, curr_pack_name);
-		err = close(output_fd);
-		if (err)
+		if (close(output_fd))
 			die_errno(_("error while closing pack file"));
 	}
 
@@ -1566,17 +1564,8 @@  static void final(const char *final_pack_name, const char *curr_pack_name,
 		write_or_die(1, buf.buf, buf.len);
 		strbuf_release(&buf);
 
-		/*
-		 * Let's just mimic git-unpack-objects here and write
-		 * the last part of the input buffer to stdout.
-		 */
-		while (input_len) {
-			err = xwrite(1, input_buffer + input_offset, input_len);
-			if (err <= 0)
-				break;
-			input_len -= err;
-			input_offset += err;
-		}
+		/* Write the last part of the buffer to stdout */
+		write_in_full(1, input_buffer + input_offset, input_len);
 	}
 
 	strbuf_release(&rev_index_name);
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index e0a701f2b3..f1c85a00ae 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -679,13 +679,7 @@  int cmd_unpack_objects(int argc, const char **argv, const char *prefix UNUSED)
 	use(the_hash_algo->rawsz);
 
 	/* Write the last part of the buffer to stdout */
-	while (len) {
-		int ret = xwrite(1, buffer + offset, len);
-		if (ret <= 0)
-			break;
-		len -= ret;
-		offset += ret;
-	}
+	write_in_full(1, buffer + offset, len);
 
 	/* All done */
 	return has_errors;