diff mbox series

[v6,5/6] unpack-objects.c: add dry_run mode for get_data()

Message ID 20211217112629.12334-6-chiyutianyi@gmail.com (mailing list archive)
State Superseded
Headers show
Series unpack large blobs in stream | expand

Commit Message

Han Xin Dec. 17, 2021, 11:26 a.m. UTC
From: Han Xin <hanxin.hx@alibaba-inc.com>

In dry_run mode, "get_data()" is used to verify the inflation of data,
and the returned buffer will not be used at all and will be freed
immediately. Even in dry_run mode, it is dangerous to allocate a
full-size buffer for a large blob object. Therefore, only allocate a
low memory footprint when calling "get_data()" in dry_run mode.

Suggested-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Han Xin <hanxin.hx@alibaba-inc.com>
---
 builtin/unpack-objects.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

Comments

René Scharfe Dec. 17, 2021, 9:22 p.m. UTC | #1
Am 17.12.21 um 12:26 schrieb Han Xin:
> From: Han Xin <hanxin.hx@alibaba-inc.com>
>
> In dry_run mode, "get_data()" is used to verify the inflation of data,
> and the returned buffer will not be used at all and will be freed
> immediately. Even in dry_run mode, it is dangerous to allocate a
> full-size buffer for a large blob object. Therefore, only allocate a
> low memory footprint when calling "get_data()" in dry_run mode.

Clever.  Looks good to me.

For some reason I was expecting this patch to have some connection to
one of the earlier ones (perhaps because get_data() was mentioned),
but it is technically independent.

>
> Suggested-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> Signed-off-by: Han Xin <hanxin.hx@alibaba-inc.com>
> ---
>  builtin/unpack-objects.c | 23 +++++++++++++++++------
>  1 file changed, 17 insertions(+), 6 deletions(-)
>
> diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
> index 4a9466295b..c4a17bdb44 100644
> --- a/builtin/unpack-objects.c
> +++ b/builtin/unpack-objects.c
> @@ -96,15 +96,21 @@ static void use(int bytes)
>  	display_throughput(progress, consumed_bytes);
>  }
>
> -static void *get_data(unsigned long size)
> +static void *get_data(unsigned long size, int dry_run)
>  {
>  	git_zstream stream;
> -	void *buf = xmallocz(size);
> +	unsigned long bufsize;
> +	void *buf;
>
>  	memset(&stream, 0, sizeof(stream));
> +	if (dry_run && size > 8192)
> +		bufsize = 8192;
> +	else
> +		bufsize = size;
> +	buf = xmallocz(bufsize);
>
>  	stream.next_out = buf;
> -	stream.avail_out = size;
> +	stream.avail_out = bufsize;
>  	stream.next_in = fill(1);
>  	stream.avail_in = len;
>  	git_inflate_init(&stream);
> @@ -124,6 +130,11 @@ static void *get_data(unsigned long size)
>  		}
>  		stream.next_in = fill(1);
>  		stream.avail_in = len;
> +		if (dry_run) {
> +			/* reuse the buffer in dry_run mode */
> +			stream.next_out = buf;
> +			stream.avail_out = bufsize;
> +		}
>  	}
>  	git_inflate_end(&stream);
>  	return buf;
> @@ -323,7 +334,7 @@ static void added_object(unsigned nr, enum object_type type,
>  static void unpack_non_delta_entry(enum object_type type, unsigned long size,
>  				   unsigned nr)
>  {
> -	void *buf = get_data(size);
> +	void *buf = get_data(size, dry_run);
>
>  	if (!dry_run && buf)
>  		write_object(nr, type, buf, size);
> @@ -357,7 +368,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
>  	if (type == OBJ_REF_DELTA) {
>  		oidread(&base_oid, fill(the_hash_algo->rawsz));
>  		use(the_hash_algo->rawsz);
> -		delta_data = get_data(delta_size);
> +		delta_data = get_data(delta_size, dry_run);
>  		if (dry_run || !delta_data) {
>  			free(delta_data);
>  			return;
> @@ -396,7 +407,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
>  		if (base_offset <= 0 || base_offset >= obj_list[nr].offset)
>  			die("offset value out of bound for delta base object");
>
> -		delta_data = get_data(delta_size);
> +		delta_data = get_data(delta_size, dry_run);
>  		if (dry_run || !delta_data) {
>  			free(delta_data);
>  			return;
diff mbox series

Patch

diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 4a9466295b..c4a17bdb44 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -96,15 +96,21 @@  static void use(int bytes)
 	display_throughput(progress, consumed_bytes);
 }
 
-static void *get_data(unsigned long size)
+static void *get_data(unsigned long size, int dry_run)
 {
 	git_zstream stream;
-	void *buf = xmallocz(size);
+	unsigned long bufsize;
+	void *buf;
 
 	memset(&stream, 0, sizeof(stream));
+	if (dry_run && size > 8192)
+		bufsize = 8192;
+	else
+		bufsize = size;
+	buf = xmallocz(bufsize);
 
 	stream.next_out = buf;
-	stream.avail_out = size;
+	stream.avail_out = bufsize;
 	stream.next_in = fill(1);
 	stream.avail_in = len;
 	git_inflate_init(&stream);
@@ -124,6 +130,11 @@  static void *get_data(unsigned long size)
 		}
 		stream.next_in = fill(1);
 		stream.avail_in = len;
+		if (dry_run) {
+			/* reuse the buffer in dry_run mode */
+			stream.next_out = buf;
+			stream.avail_out = bufsize;
+		}
 	}
 	git_inflate_end(&stream);
 	return buf;
@@ -323,7 +334,7 @@  static void added_object(unsigned nr, enum object_type type,
 static void unpack_non_delta_entry(enum object_type type, unsigned long size,
 				   unsigned nr)
 {
-	void *buf = get_data(size);
+	void *buf = get_data(size, dry_run);
 
 	if (!dry_run && buf)
 		write_object(nr, type, buf, size);
@@ -357,7 +368,7 @@  static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 	if (type == OBJ_REF_DELTA) {
 		oidread(&base_oid, fill(the_hash_algo->rawsz));
 		use(the_hash_algo->rawsz);
-		delta_data = get_data(delta_size);
+		delta_data = get_data(delta_size, dry_run);
 		if (dry_run || !delta_data) {
 			free(delta_data);
 			return;
@@ -396,7 +407,7 @@  static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 		if (base_offset <= 0 || base_offset >= obj_list[nr].offset)
 			die("offset value out of bound for delta base object");
 
-		delta_data = get_data(delta_size);
+		delta_data = get_data(delta_size, dry_run);
 		if (dry_run || !delta_data) {
 			free(delta_data);
 			return;