Message ID | 20211210103435.83656-1-chiyutianyi@gmail.com (mailing list archive) |
---|---|
Headers | show |
Series | unpack large blobs in stream | expand |
From: Han Xin <hanxin.hx@alibaba-inc.com>
Changes since v5:
* Refactor write_loose_object() to reuse in stream version sugguest by
Ævar Arnfjörð Bjarmason [1].
* Add a new testcase into t5590-unpack-non-delta-objects to cover the case of
unpacking existing objects.
* Fix code formatting in unpack-objects.c sugguest by
Ævar Arnfjörð Bjarmason [2].
1. https://lore.kernel.org/git/211213.86bl1l9bfz.gmgdl@evledraar.gmail.com/
2. https://lore.kernel.org/git/211213.867dc8ansq.gmgdl@evledraar.gmail.com/
Han Xin (6):
object-file.c: release strbuf in write_loose_object()
object-file.c: refactor object header generation into a function
object-file.c: refactor write_loose_object() to reuse in stream
version
object-file.c: make "write_object_file_flags()" to support read in
stream
unpack-objects.c: add dry_run mode for get_data()
unpack-objects: unpack_non_delta_entry() read data in a stream
Documentation/config/core.txt | 11 ++
builtin/unpack-objects.c | 94 ++++++++++++-
cache.h | 2 +
config.c | 5 +
environment.c | 1 +
object-file.c | 207 +++++++++++++++++++++++-----
object-store.h | 5 +
t/t5590-unpack-non-delta-objects.sh | 87 ++++++++++++
8 files changed, 370 insertions(+), 42 deletions(-)
create mode 100755 t/t5590-unpack-non-delta-objects.sh
Range-diff against v5:
1: f3595e68cc < -: ---------- object-file: refactor write_loose_object() to support read from stream
2: c25fdd1fe5 < -: ---------- object-file.c: handle undetermined oid in write_loose_object()
3: ed226f2f9f < -: ---------- object-file.c: read stream in a loop in write_loose_object()
-: ---------- > 1: 59d35dac5f object-file.c: release strbuf in write_loose_object()
-: ---------- > 2: 2174a6cbad object-file.c: refactor object header generation into a function
-: ---------- > 3: 8a704ecc59 object-file.c: refactor write_loose_object() to reuse in stream version
-: ---------- > 4: 96f05632a2 object-file.c: make "write_object_file_flags()" to support read in stream
4: 2f91e540f6 ! 5: 1acbb6e849 unpack-objects.c: add dry_run mode for get_data()
@@ builtin/unpack-objects.c: static void use(int bytes)
{
git_zstream stream;
- void *buf = xmallocz(size);
-+ unsigned long bufsize = dry_run ? 8192 : size;
-+ void *buf = xmallocz(bufsize);
++ unsigned long bufsize;
++ void *buf;
memset(&stream, 0, sizeof(stream));
++ if (dry_run && size > 8192)
++ bufsize = 8192;
++ else
++ bufsize = size;
++ buf = xmallocz(bufsize);
stream.next_out = buf;
- stream.avail_out = size;
5: 7698938eac < -: ---------- object-file.c: make "write_object_file_flags()" to support "HASH_STREAM"
6: 92d69cb84a ! 6: 476aaba527 unpack-objects: unpack_non_delta_entry() read data in a stream
@@ builtin/unpack-objects.c: static void added_object(unsigned nr, enum object_type
+ int status;
+};
+
-+static const void *feed_input_zstream(struct input_stream *in_stream, unsigned long *readlen)
++static const void *feed_input_zstream(const struct input_stream *in_stream,
++ unsigned long *readlen)
+{
+ struct input_zstream_data *data = in_stream->data;
+ git_zstream *zstream = data->zstream;
@@ builtin/unpack-objects.c: static void added_object(unsigned nr, enum object_type
+ .read = feed_input_zstream,
+ .data = &data,
+ };
-+ int ret;
+
+ memset(&zstream, 0, sizeof(zstream));
+ memset(&data, 0, sizeof(data));
+ data.zstream = &zstream;
+ git_inflate_init(&zstream);
+
-+ if ((ret = write_object_file_flags(&in_stream, size, type_name(OBJ_BLOB) ,&obj_list[nr].oid, HASH_STREAM)))
-+ die(_("failed to write object in stream %d"), ret);
++ if (write_object_file_flags(&in_stream, size,
++ type_name(OBJ_BLOB),
++ &obj_list[nr].oid,
++ HASH_STREAM))
++ die(_("failed to write object in stream"));
+
+ if (zstream.total_out != size || data.status != Z_STREAM_END)
+ die(_("inflate returned %d"), data.status);
+ git_inflate_end(&zstream);
+
-+ if (strict && !dry_run) {
++ if (strict) {
+ struct blob *blob = lookup_blob(the_repository, &obj_list[nr].oid);
+ if (blob)
+ blob->object.flags |= FLAG_WRITTEN;
+ else
-+ die("invalid blob object from stream");
++ die(_("invalid blob object from stream"));
+ }
+ obj_list[nr].obj = NULL;
+}
@@ t/t5590-unpack-non-delta-objects.sh (new)
+prepare_dest () {
+ test_when_finished "rm -rf dest.git" &&
+ git init --bare dest.git &&
-+ git -C dest.git config core.bigFileStreamingThreshold $1
++ git -C dest.git config core.bigFileStreamingThreshold $1 &&
+ git -C dest.git config core.bigFileThreshold $1
+}
+
@@ t/t5590-unpack-non-delta-objects.sh (new)
+ test_cmp expect actual
+'
+
++test_expect_success 'unpack big object in stream with existing oids' '
++ prepare_dest 1m &&
++ git -C dest.git index-pack --stdin <test-$PACK.pack &&
++ (
++ cd dest.git &&
++ find objects/?? -type f | sort
++ ) >actual &&
++ test_must_be_empty actual &&
++ git -C dest.git unpack-objects <test-$PACK.pack &&
++ git -C dest.git fsck &&
++ (
++ cd dest.git &&
++ find objects/?? -type f | sort
++ ) >actual &&
++ test_must_be_empty actual
++'
++
+test_expect_success 'unpack-objects dry-run' '
+ prepare_dest 1m &&
+ git -C dest.git unpack-objects -n <test-$PACK.pack &&