@@ -424,6 +424,17 @@ be delta compressed, but larger binary media files won't be.
+
Common unit suffixes of 'k', 'm', or 'g' are supported.
+core.bigFileStreamingThreshold::
+ Files larger than this will be streamed out to a temporary
+ object file while being hashed, which will when be renamed
+ in-place to a loose object, particularly if the
+ `core.bigFileThreshold' setting dictates that they're always
+ written out as loose objects.
++
+Default is 128 MiB on all platforms.
++
+Common unit suffixes of 'k', 'm', or 'g' are supported.
+
core.excludesFile::
Specifies the pathname to the file that contains patterns to
describe paths that are not meant to be tracked, in addition
@@ -331,11 +331,82 @@ static void added_object(unsigned nr, enum object_type type,
}
}
+struct input_zstream_data {
+ git_zstream *zstream;
+ unsigned char buf[8192];
+ int status;
+};
+
+static const void *feed_input_zstream(const struct input_stream *in_stream,
+ unsigned long *readlen)
+{
+ struct input_zstream_data *data = in_stream->data;
+ git_zstream *zstream = data->zstream;
+ void *in = fill(1);
+
+ if (!len || data->status == Z_STREAM_END) {
+ *readlen = 0;
+ return NULL;
+ }
+
+ zstream->next_out = data->buf;
+ zstream->avail_out = sizeof(data->buf);
+ zstream->next_in = in;
+ zstream->avail_in = len;
+
+ data->status = git_inflate(zstream, 0);
+ use(len - zstream->avail_in);
+ *readlen = sizeof(data->buf) - zstream->avail_out;
+
+ return data->buf;
+}
+
+static void write_stream_blob(unsigned nr, unsigned long size)
+{
+ git_zstream zstream;
+ struct input_zstream_data data;
+ struct input_stream in_stream = {
+ .read = feed_input_zstream,
+ .data = &data,
+ };
+
+ memset(&zstream, 0, sizeof(zstream));
+ memset(&data, 0, sizeof(data));
+ data.zstream = &zstream;
+ git_inflate_init(&zstream);
+
+ if (write_object_file_flags(&in_stream, size,
+ type_name(OBJ_BLOB),
+ &obj_list[nr].oid,
+ HASH_STREAM))
+ die(_("failed to write object in stream"));
+
+ if (zstream.total_out != size || data.status != Z_STREAM_END)
+ die(_("inflate returned %d"), data.status);
+ git_inflate_end(&zstream);
+
+ if (strict) {
+ struct blob *blob = lookup_blob(the_repository, &obj_list[nr].oid);
+ if (blob)
+ blob->object.flags |= FLAG_WRITTEN;
+ else
+ die(_("invalid blob object from stream"));
+ }
+ obj_list[nr].obj = NULL;
+}
+
static void unpack_non_delta_entry(enum object_type type, unsigned long size,
unsigned nr)
{
- void *buf = get_data(size, dry_run);
+ void *buf;
+
+ /* Write large blob in stream without allocating full buffer. */
+ if (!dry_run && type == OBJ_BLOB && size > big_file_streaming_threshold) {
+ write_stream_blob(nr, size);
+ return;
+ }
+ buf = get_data(size, dry_run);
if (!dry_run && buf)
write_object(nr, type, buf, size);
else
@@ -975,6 +975,7 @@ extern size_t packed_git_window_size;
extern size_t packed_git_limit;
extern size_t delta_base_cache_limit;
extern unsigned long big_file_threshold;
+extern unsigned long big_file_streaming_threshold;
extern unsigned long pack_size_limit_cfg;
/*
@@ -1408,6 +1408,11 @@ static int git_default_core_config(const char *var, const char *value, void *cb)
return 0;
}
+ if (!strcmp(var, "core.bigfilestreamingthreshold")) {
+ big_file_streaming_threshold = git_config_ulong(var, value);
+ return 0;
+ }
+
if (!strcmp(var, "core.packedgitlimit")) {
packed_git_limit = git_config_ulong(var, value);
return 0;
@@ -47,6 +47,7 @@ size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
size_t delta_base_cache_limit = 96 * 1024 * 1024;
unsigned long big_file_threshold = 512 * 1024 * 1024;
+unsigned long big_file_streaming_threshold = 128 * 1024 * 1024;
int pager_use_color = 1;
const char *editor_program;
const char *askpass_program;
new file mode 100755
@@ -0,0 +1,87 @@
+#!/bin/sh
+#
+# Copyright (c) 2021 Han Xin
+#
+
+test_description='Test unpack-objects when receive pack'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+. ./test-lib.sh
+
+prepare_dest () {
+ test_when_finished "rm -rf dest.git" &&
+ git init --bare dest.git &&
+ git -C dest.git config core.bigFileStreamingThreshold $1 &&
+ git -C dest.git config core.bigFileThreshold $1
+}
+
+test_expect_success "setup repo with big blobs (1.5 MB)" '
+ test-tool genrandom foo 1500000 >big-blob &&
+ test_commit --append foo big-blob &&
+ test-tool genrandom bar 1500000 >big-blob &&
+ test_commit --append bar big-blob &&
+ (
+ cd .git &&
+ find objects/?? -type f | sort
+ ) >expect &&
+ PACK=$(echo main | git pack-objects --revs test)
+'
+
+test_expect_success 'setup env: GIT_ALLOC_LIMIT to 1MB' '
+ GIT_ALLOC_LIMIT=1m &&
+ export GIT_ALLOC_LIMIT
+'
+
+test_expect_success 'fail to unpack-objects: cannot allocate' '
+ prepare_dest 2m &&
+ test_must_fail git -C dest.git unpack-objects <test-$PACK.pack 2>err &&
+ grep "fatal: attempting to allocate" err &&
+ (
+ cd dest.git &&
+ find objects/?? -type f | sort
+ ) >actual &&
+ test_file_not_empty actual &&
+ ! test_cmp expect actual
+'
+
+test_expect_success 'unpack big object in stream' '
+ prepare_dest 1m &&
+ git -C dest.git unpack-objects <test-$PACK.pack &&
+ git -C dest.git fsck &&
+ (
+ cd dest.git &&
+ find objects/?? -type f | sort
+ ) >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'unpack big object in stream with existing oids' '
+ prepare_dest 1m &&
+ git -C dest.git index-pack --stdin <test-$PACK.pack &&
+ (
+ cd dest.git &&
+ find objects/?? -type f | sort
+ ) >actual &&
+ test_must_be_empty actual &&
+ git -C dest.git unpack-objects <test-$PACK.pack &&
+ git -C dest.git fsck &&
+ (
+ cd dest.git &&
+ find objects/?? -type f | sort
+ ) >actual &&
+ test_must_be_empty actual
+'
+
+test_expect_success 'unpack-objects dry-run' '
+ prepare_dest 1m &&
+ git -C dest.git unpack-objects -n <test-$PACK.pack &&
+ (
+ cd dest.git &&
+ find objects/ -type f
+ ) >actual &&
+ test_must_be_empty actual
+'
+
+test_done