@@ -96,15 +96,26 @@ static void use(int bytes)
display_throughput(progress, consumed_bytes);
}
+/*
+ * Decompress zstream from stdin and return specific size of data.
+ * The caller is responsible to free the returned buffer.
+ *
+ * But for dry_run mode, "get_data()" is only used to check the
+ * integrity of data, and the returned buffer is not used at all.
+ * Therefore, in dry_run mode, "get_data()" will release the small
+ * allocated buffer which is reused to hold temporary zstream output
+ * and return NULL instead of returning garbage data.
+ */
static void *get_data(unsigned long size)
{
git_zstream stream;
- void *buf = xmallocz(size);
+ unsigned long bufsize = dry_run && size > 8192 ? 8192 : size;
+ void *buf = xmallocz(bufsize);
memset(&stream, 0, sizeof(stream));
stream.next_out = buf;
- stream.avail_out = size;
+ stream.avail_out = bufsize;
stream.next_in = fill(1);
stream.avail_in = len;
git_inflate_init(&stream);
@@ -124,8 +135,15 @@ static void *get_data(unsigned long size)
}
stream.next_in = fill(1);
stream.avail_in = len;
+ if (dry_run) {
+ /* reuse the buffer in dry_run mode */
+ stream.next_out = buf;
+ stream.avail_out = bufsize;
+ }
}
git_inflate_end(&stream);
+ if (dry_run)
+ FREE_AND_NULL(buf);
return buf;
}
@@ -325,10 +343,8 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size,
{
void *buf = get_data(size);
- if (!dry_run && buf)
+ if (buf)
write_object(nr, type, buf, size);
- else
- free(buf);
}
static int resolve_against_held(unsigned nr, const struct object_id *base,
@@ -358,10 +374,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
oidread(&base_oid, fill(the_hash_algo->rawsz));
use(the_hash_algo->rawsz);
delta_data = get_data(delta_size);
- if (dry_run || !delta_data) {
- free(delta_data);
+ if (!delta_data)
return;
- }
if (has_object_file(&base_oid))
; /* Ok we have this one */
else if (resolve_against_held(nr, &base_oid,
@@ -397,10 +411,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
die("offset value out of bound for delta base object");
delta_data = get_data(delta_size);
- if (dry_run || !delta_data) {
- free(delta_data);
+ if (!delta_data)
return;
- }
lo = 0;
hi = nr;
while (lo < hi) {
new file mode 100755
@@ -0,0 +1,41 @@
+#!/bin/sh
+#
+# Copyright (c) 2022 Han Xin
+#
+
+test_description='git unpack-objects with large objects'
+
+. ./test-lib.sh
+
+prepare_dest () {
+ test_when_finished "rm -rf dest.git" &&
+ git init --bare dest.git
+}
+
+test_expect_success "create large objects (1.5 MB) and PACK" '
+ test-tool genrandom foo 1500000 >big-blob &&
+ test_commit --append foo big-blob &&
+ test-tool genrandom bar 1500000 >big-blob &&
+ test_commit --append bar big-blob &&
+ PACK=$(echo HEAD | git pack-objects --revs pack)
+'
+
+test_expect_success 'set memory limitation to 1MB' '
+ GIT_ALLOC_LIMIT=1m &&
+ export GIT_ALLOC_LIMIT
+'
+
+test_expect_success 'unpack-objects failed under memory limitation' '
+ prepare_dest &&
+ test_must_fail git -C dest.git unpack-objects <pack-$PACK.pack 2>err &&
+ grep "fatal: attempting to allocate" err
+'
+
+test_expect_success 'unpack-objects works with memory limitation in dry-run mode' '
+ prepare_dest &&
+ git -C dest.git unpack-objects -n <pack-$PACK.pack &&
+ test_stdout_line_count = 0 find dest.git/objects -type f &&
+ test_dir_is_empty dest.git/objects/pack
+'
+
+test_done