mbox series

[v4,0/7] merge-ort: implement support for packing objects together

Message ID cover.1697736516.git.me@ttaylorr.com (mailing list archive)
Headers show
Series merge-ort: implement support for packing objects together | expand

Message

Taylor Blau Oct. 19, 2023, 5:28 p.m. UTC
(Rebased onto the current tip of 'master', which is 813d9a9188 (The
nineteenth batch, 2023-10-18) at the time of writing).

This series implements support for a new merge-tree option,
`--write-pack`, which causes any newly-written objects to be packed
together instead of being stored individually as loose.

I intentionally broke this off from the existing thread, since I
accidentally rerolled mine and Jonathan Tan's Bloom v2 series into it,
causing some confusion.

This is a new round that is significantly simplified thanks to
another very helpful suggestion[1] from Junio. By factoring out a common
"deflate object to pack" that takes an abstract bulk_checkin_source as a
parameter, all of the earlier refactorings can be dropped since we
retain only a single caller instead of multiple.

This resulted in a rather satisfying range-diff (included below, as
usual), and a similarly satisfying inter-diff:

    $ git diff --stat tb/ort-bulk-checkin.v3..
     bulk-checkin.c | 203 ++++++++++++++++---------------------------------
     1 file changed, 64 insertions(+), 139 deletions(-)

Beyond that, the changes since last time can be viewed in the range-diff
below. Thanks in advance for any review!

[1]: https://lore.kernel.org/git/xmqq34y7plj4.fsf@gitster.g/

Taylor Blau (7):
  bulk-checkin: extract abstract `bulk_checkin_source`
  bulk-checkin: generify `stream_blob_to_pack()` for arbitrary types
  bulk-checkin: refactor deflate routine to accept a
    `bulk_checkin_source`
  bulk-checkin: implement `SOURCE_INCORE` mode for `bulk_checkin_source`
  bulk-checkin: introduce `index_blob_bulk_checkin_incore()`
  bulk-checkin: introduce `index_tree_bulk_checkin_incore()`
  builtin/merge-tree.c: implement support for `--write-pack`

 Documentation/git-merge-tree.txt |   4 +
 builtin/merge-tree.c             |   5 +
 bulk-checkin.c                   | 161 ++++++++++++++++++++++++++-----
 bulk-checkin.h                   |   8 ++
 merge-ort.c                      |  42 ++++++--
 merge-recursive.h                |   1 +
 t/t4301-merge-tree-write-tree.sh |  93 ++++++++++++++++++
 7 files changed, 280 insertions(+), 34 deletions(-)

Range-diff against v3:
 1:  2dffa45183 <  -:  ---------- bulk-checkin: factor out `format_object_header_hash()`
 2:  7a10dc794a <  -:  ---------- bulk-checkin: factor out `prepare_checkpoint()`
 3:  20c32d2178 <  -:  ---------- bulk-checkin: factor out `truncate_checkpoint()`
 4:  893051d0b7 <  -:  ---------- bulk-checkin: factor out `finalize_checkpoint()`
 5:  da52ec8380 !  1:  97bb6e9f59 bulk-checkin: extract abstract `bulk_checkin_source`
    @@ bulk-checkin.c: static int stream_blob_to_pack(struct bulk_checkin_packfile *sta
      			if (*already_hashed_to < offset) {
      				size_t hsize = offset - *already_hashed_to;
     @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    - 	git_hash_ctx ctx;
    + 	unsigned header_len;
      	struct hashfile_checkpoint checkpoint = {0};
      	struct pack_idx_entry *idx = NULL;
     +	struct bulk_checkin_source source = {
    @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
      	seekback = lseek(fd, 0, SEEK_CUR);
      	if (seekback == (off_t) -1)
     @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    - 	while (1) {
    - 		prepare_checkpoint(state, &checkpoint, idx, flags);
    + 			crc32_begin(state->f);
    + 		}
      		if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
     -					 fd, size, path, flags))
     +					 &source, flags))
      			break;
    - 		truncate_checkpoint(state, &checkpoint, idx);
    + 		/*
    + 		 * Writing this object to the current pack will make
    +@@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    + 		hashfile_truncate(state->f, &checkpoint);
    + 		state->offset = checkpoint.offset;
    + 		flush_bulk_checkin_packfile(state);
     -		if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
     +		if (bulk_checkin_source_seek_to(&source, seekback) == (off_t)-1)
      			return error("cannot seek back");
      	}
    - 	finalize_checkpoint(state, &ctx, &checkpoint, idx, result_oid);
    + 	the_hash_algo->final_oid_fn(result_oid, &ctx);
 7:  04ec74e357 !  2:  9d633df339 bulk-checkin: generify `stream_blob_to_pack()` for arbitrary types
    @@ bulk-checkin.c: static int stream_blob_to_pack(struct bulk_checkin_packfile *sta
      	s.avail_out = sizeof(obuf) - hdrlen;
      
     @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    - 
    - 	while (1) {
    - 		prepare_checkpoint(state, &checkpoint, idx, flags);
    + 			idx->offset = state->offset;
    + 			crc32_begin(state->f);
    + 		}
     -		if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
     -					 &source, flags))
     +		if (!stream_obj_to_pack(state, &ctx, &already_hashed_to,
     +					&source, OBJ_BLOB, flags))
      			break;
    - 		truncate_checkpoint(state, &checkpoint, idx);
    - 		if (bulk_checkin_source_seek_to(&source, seekback) == (off_t)-1)
    + 		/*
    + 		 * Writing this object to the current pack will make
 -:  ---------- >  3:  d5bbd7810e bulk-checkin: refactor deflate routine to accept a `bulk_checkin_source`
 6:  4e9bac5bc1 =  4:  e427fe6ad3 bulk-checkin: implement `SOURCE_INCORE` mode for `bulk_checkin_source`
 8:  8667b76365 !  5:  48095afe80 bulk-checkin: introduce `index_blob_bulk_checkin_incore()`
    @@ Commit message
         objects individually as loose.
     
         Similar to the existing `index_blob_bulk_checkin()` function, the
    -    entrypoint delegates to `deflate_blob_to_pack_incore()`, which is
    -    responsible for formatting the pack header and then deflating the
    -    contents into the pack. The latter is accomplished by calling
    -    deflate_obj_contents_to_pack_incore(), which takes advantage of the
    -    earlier refactorings and is responsible for writing the object to the
    -    pack and handling any overage from pack.packSizeLimit.
    -
    -    The bulk of the new functionality is implemented in the function
    -    `stream_obj_to_pack()`, which can handle streaming objects from memory
    -    to the bulk-checkin pack as a result of the earlier refactoring.
    +    entrypoint delegates to `deflate_obj_to_pack_incore()`. That function in
    +    turn delegates to deflate_obj_to_pack(), which is responsible for
    +    formatting the pack header and then deflating the contents into the
    +    pack.
     
         Consistent with the rest of the bulk-checkin mechanism, there are no
         direct tests here. In future commits when we expose this new
    @@ Commit message
         Signed-off-by: Taylor Blau <me@ttaylorr.com>
     
      ## bulk-checkin.c ##
    -@@ bulk-checkin.c: static void finalize_checkpoint(struct bulk_checkin_packfile *state,
    - 	}
    +@@ bulk-checkin.c: static int deflate_obj_to_pack(struct bulk_checkin_packfile *state,
    + 	return 0;
      }
      
    -+static int deflate_obj_contents_to_pack_incore(struct bulk_checkin_packfile *state,
    -+					       git_hash_ctx *ctx,
    -+					       struct hashfile_checkpoint *checkpoint,
    -+					       struct object_id *result_oid,
    -+					       const void *buf, size_t size,
    -+					       enum object_type type,
    -+					       const char *path, unsigned flags)
    ++static int deflate_obj_to_pack_incore(struct bulk_checkin_packfile *state,
    ++				       struct object_id *result_oid,
    ++				       const void *buf, size_t size,
    ++				       const char *path, enum object_type type,
    ++				       unsigned flags)
     +{
    -+	struct pack_idx_entry *idx = NULL;
    -+	off_t already_hashed_to = 0;
     +	struct bulk_checkin_source source = {
     +		.type = SOURCE_INCORE,
     +		.buf = buf,
    @@ bulk-checkin.c: static void finalize_checkpoint(struct bulk_checkin_packfile *st
     +		.path = path,
     +	};
     +
    -+	/* Note: idx is non-NULL when we are writing */
    -+	if (flags & HASH_WRITE_OBJECT)
    -+		CALLOC_ARRAY(idx, 1);
    -+
    -+	while (1) {
    -+		prepare_checkpoint(state, checkpoint, idx, flags);
    -+
    -+		if (!stream_obj_to_pack(state, ctx, &already_hashed_to, &source,
    -+					type, flags))
    -+			break;
    -+		truncate_checkpoint(state, checkpoint, idx);
    -+		bulk_checkin_source_seek_to(&source, 0);
    -+	}
    -+
    -+	finalize_checkpoint(state, ctx, checkpoint, idx, result_oid);
    -+
    -+	return 0;
    -+}
    -+
    -+static int deflate_blob_to_pack_incore(struct bulk_checkin_packfile *state,
    -+				       struct object_id *result_oid,
    -+				       const void *buf, size_t size,
    -+				       const char *path, unsigned flags)
    -+{
    -+	git_hash_ctx ctx;
    -+	struct hashfile_checkpoint checkpoint = {0};
    -+
    -+	format_object_header_hash(the_hash_algo, &ctx, &checkpoint, OBJ_BLOB,
    -+				  size);
    -+
    -+	return deflate_obj_contents_to_pack_incore(state, &ctx, &checkpoint,
    -+						   result_oid, buf, size,
    -+						   OBJ_BLOB, path, flags);
    ++	return deflate_obj_to_pack(state, result_oid, &source, type, 0, flags);
     +}
     +
      static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    @@ bulk-checkin.c: int index_blob_bulk_checkin(struct object_id *oid,
     +				   const void *buf, size_t size,
     +				   const char *path, unsigned flags)
     +{
    -+	int status = deflate_blob_to_pack_incore(&bulk_checkin_packfile, oid,
    -+						 buf, size, path, flags);
    ++	int status = deflate_obj_to_pack_incore(&bulk_checkin_packfile, oid,
    ++						buf, size, path, OBJ_BLOB,
    ++						flags);
     +	if (!odb_transaction_nesting)
     +		flush_bulk_checkin_packfile(&bulk_checkin_packfile);
     +	return status;
 9:  cba043ef14 !  6:  60568f9281 bulk-checkin: introduce `index_tree_bulk_checkin_incore()`
    @@ Commit message
         machinery will have enough to keep track of the converted object's hash
         in order to update the compatibility mapping.
     
    -    Within `deflate_tree_to_pack_incore()`, the changes should be limited
    -    to something like:
    +    Within some thin wrapper around `deflate_obj_to_pack_incore()` (perhaps
    +    `deflate_tree_to_pack_incore()`), the changes should be limited to
    +    something like:
     
             struct strbuf converted = STRBUF_INIT;
             if (the_repository->compat_hash_algo) {
    @@ Commit message
         Signed-off-by: Taylor Blau <me@ttaylorr.com>
     
      ## bulk-checkin.c ##
    -@@ bulk-checkin.c: static int deflate_blob_to_pack_incore(struct bulk_checkin_packfile *state,
    - 						   OBJ_BLOB, path, flags);
    - }
    - 
    -+static int deflate_tree_to_pack_incore(struct bulk_checkin_packfile *state,
    -+				       struct object_id *result_oid,
    -+				       const void *buf, size_t size,
    -+				       const char *path, unsigned flags)
    -+{
    -+	git_hash_ctx ctx;
    -+	struct hashfile_checkpoint checkpoint = {0};
    -+
    -+	format_object_header_hash(the_hash_algo, &ctx, &checkpoint, OBJ_TREE,
    -+				  size);
    -+
    -+	return deflate_obj_contents_to_pack_incore(state, &ctx, &checkpoint,
    -+						   result_oid, buf, size,
    -+						   OBJ_TREE, path, flags);
    -+}
    -+
    - static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
    - 				struct object_id *result_oid,
    - 				int fd, size_t size,
     @@ bulk-checkin.c: int index_blob_bulk_checkin_incore(struct object_id *oid,
      	return status;
      }
    @@ bulk-checkin.c: int index_blob_bulk_checkin_incore(struct object_id *oid,
     +				   const void *buf, size_t size,
     +				   const char *path, unsigned flags)
     +{
    -+	int status = deflate_tree_to_pack_incore(&bulk_checkin_packfile, oid,
    -+						 buf, size, path, flags);
    ++	int status = deflate_obj_to_pack_incore(&bulk_checkin_packfile, oid,
    ++						buf, size, path, OBJ_TREE,
    ++						flags);
     +	if (!odb_transaction_nesting)
     +		flush_bulk_checkin_packfile(&bulk_checkin_packfile);
     +	return status;
10:  ae70508037 =  7:  b9be9df122 builtin/merge-tree.c: implement support for `--write-pack`

Comments

Junio C Hamano Oct. 19, 2023, 9:47 p.m. UTC | #1
Taylor Blau <me@ttaylorr.com> writes:

> By factoring out a common
> "deflate object to pack" that takes an abstract bulk_checkin_source as a
> parameter, all of the earlier refactorings can be dropped since we
> retain only a single caller instead of multiple.

Ah, that is how we managed to lose the preparatory split of new
helper functions.  The names given to these functions that represent
steps of checkpointing were well thought out, and they served their
documentation purposes very well, but if they all have a single
caller, then that is fine ;-).

I _think_ I carefully followed the code/data flow of the new code to
see that we do identical things when streaming from a file to a pack
(in other words, this does not introduce regression), and nothing
objectionable stood out.  The order of presentation was a good way
to let readers to do so, and the actual "now we have everything
working as we want, let's teach the machinery to also read from an
in-core buffer" step being very small gives us extra confidence with
the result.  Very nicely done.
Jeff King Oct. 20, 2023, 7:29 a.m. UTC | #2
On Thu, Oct 19, 2023 at 01:28:38PM -0400, Taylor Blau wrote:

> This is a new round that is significantly simplified thanks to
> another very helpful suggestion[1] from Junio. By factoring out a common
> "deflate object to pack" that takes an abstract bulk_checkin_source as a
> parameter, all of the earlier refactorings can be dropped since we
> retain only a single caller instead of multiple.

FWIW, I gave this a read-over and did not see anything wrong (on the
contrary, I think the new abstractions make it quite easy to follow).

Two thoughts that occurred to me while reading it (but neither
actionable for your series):

  - Having not worked with the bulk-checkin code much before, I thought
    it only put in one blob per pack, and thus you'd have to teach it to
    handle multiple objects, too. But fortunately I was wrong, and it
    handles this case already. (I mention this mainly because we had
    talked about it off-list a few weeks ago, and some of my confusion
    may have seeped into my comments then).

  - I did briefly wonder if we need this SOURCE abstraction at all. The
    file source assumes we can seek() and read(), so it must be a
    regular file. We could probably just mmap() it and treat it as
    in-core, too (this is what index_fd() and index_path() do under the
    hood!). But it would probably be a disservice to any platforms that
    do not support mmap, as the fallback is to read into a full buffer
    (and the whole point of the bulk checkin code is to avoid that).

-Peff
Junio C Hamano Oct. 20, 2023, 4:53 p.m. UTC | #3
Jeff King <peff@peff.net> writes:

>   - Having not worked with the bulk-checkin code much before, I thought
>     it only put in one blob per pack, and thus you'd have to teach it to
>     handle multiple objects, too. But fortunately I was wrong, and it
>     handles this case already. (I mention this mainly because we had
>     talked about it off-list a few weeks ago, and some of my confusion
>     may have seeped into my comments then).

You probably had it mixed up with a real limitation, i.e., "unlike
fast-import that considers the immediate previous object could be a
good delta base, this code does not even attempt any deltification"
;-)

>   - I did briefly wonder if we need this SOURCE abstraction at all. The
>     file source assumes we can seek() and read(), so it must be a
>     regular file. We could probably just mmap() it and treat it as
>     in-core, too (this is what index_fd() and index_path() do under the
>     hood!).

Ahhhh, I've never thought of that one.  I actually was hoping that
we can add an abstraction layer that can be reused elsewhere and
everywhere (not limited to the bulk-checkin codepath for its source
material), which might help folks who want to refactor Git enough to
allow them plug $CORP specific goodies like virtual filesystem layer
;-).

>     But it would probably be a disservice to any platforms that
>     do not support mmap, as the fallback is to read into a full buffer
>     (and the whole point of the bulk checkin code is to avoid that).

Yes.  The result was a pleasant read.

Thanks, both.
Patrick Steinhardt Oct. 23, 2023, 9:19 a.m. UTC | #4
On Thu, Oct 19, 2023 at 01:28:38PM -0400, Taylor Blau wrote:
> (Rebased onto the current tip of 'master', which is 813d9a9188 (The
> nineteenth batch, 2023-10-18) at the time of writing).
> 
> This series implements support for a new merge-tree option,
> `--write-pack`, which causes any newly-written objects to be packed
> together instead of being stored individually as loose.
> 
> I intentionally broke this off from the existing thread, since I
> accidentally rerolled mine and Jonathan Tan's Bloom v2 series into it,
> causing some confusion.
> 
> This is a new round that is significantly simplified thanks to
> another very helpful suggestion[1] from Junio. By factoring out a common
> "deflate object to pack" that takes an abstract bulk_checkin_source as a
> parameter, all of the earlier refactorings can be dropped since we
> retain only a single caller instead of multiple.
> 
> This resulted in a rather satisfying range-diff (included below, as
> usual), and a similarly satisfying inter-diff:
> 
>     $ git diff --stat tb/ort-bulk-checkin.v3..
>      bulk-checkin.c | 203 ++++++++++++++++---------------------------------
>      1 file changed, 64 insertions(+), 139 deletions(-)
> 
> Beyond that, the changes since last time can be viewed in the range-diff
> below. Thanks in advance for any review!
> 
> [1]: https://lore.kernel.org/git/xmqq34y7plj4.fsf@gitster.g/

A single question regarding an `assert()` from my side. Other than that
the series looks good to me, thanks.

Patrick

> Taylor Blau (7):
>   bulk-checkin: extract abstract `bulk_checkin_source`
>   bulk-checkin: generify `stream_blob_to_pack()` for arbitrary types
>   bulk-checkin: refactor deflate routine to accept a
>     `bulk_checkin_source`
>   bulk-checkin: implement `SOURCE_INCORE` mode for `bulk_checkin_source`
>   bulk-checkin: introduce `index_blob_bulk_checkin_incore()`
>   bulk-checkin: introduce `index_tree_bulk_checkin_incore()`
>   builtin/merge-tree.c: implement support for `--write-pack`
> 
>  Documentation/git-merge-tree.txt |   4 +
>  builtin/merge-tree.c             |   5 +
>  bulk-checkin.c                   | 161 ++++++++++++++++++++++++++-----
>  bulk-checkin.h                   |   8 ++
>  merge-ort.c                      |  42 ++++++--
>  merge-recursive.h                |   1 +
>  t/t4301-merge-tree-write-tree.sh |  93 ++++++++++++++++++
>  7 files changed, 280 insertions(+), 34 deletions(-)
> 
> Range-diff against v3:
>  1:  2dffa45183 <  -:  ---------- bulk-checkin: factor out `format_object_header_hash()`
>  2:  7a10dc794a <  -:  ---------- bulk-checkin: factor out `prepare_checkpoint()`
>  3:  20c32d2178 <  -:  ---------- bulk-checkin: factor out `truncate_checkpoint()`
>  4:  893051d0b7 <  -:  ---------- bulk-checkin: factor out `finalize_checkpoint()`
>  5:  da52ec8380 !  1:  97bb6e9f59 bulk-checkin: extract abstract `bulk_checkin_source`
>     @@ bulk-checkin.c: static int stream_blob_to_pack(struct bulk_checkin_packfile *sta
>       			if (*already_hashed_to < offset) {
>       				size_t hsize = offset - *already_hashed_to;
>      @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
>     - 	git_hash_ctx ctx;
>     + 	unsigned header_len;
>       	struct hashfile_checkpoint checkpoint = {0};
>       	struct pack_idx_entry *idx = NULL;
>      +	struct bulk_checkin_source source = {
>     @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
>       	seekback = lseek(fd, 0, SEEK_CUR);
>       	if (seekback == (off_t) -1)
>      @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
>     - 	while (1) {
>     - 		prepare_checkpoint(state, &checkpoint, idx, flags);
>     + 			crc32_begin(state->f);
>     + 		}
>       		if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
>      -					 fd, size, path, flags))
>      +					 &source, flags))
>       			break;
>     - 		truncate_checkpoint(state, &checkpoint, idx);
>     + 		/*
>     + 		 * Writing this object to the current pack will make
>     +@@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
>     + 		hashfile_truncate(state->f, &checkpoint);
>     + 		state->offset = checkpoint.offset;
>     + 		flush_bulk_checkin_packfile(state);
>      -		if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
>      +		if (bulk_checkin_source_seek_to(&source, seekback) == (off_t)-1)
>       			return error("cannot seek back");
>       	}
>     - 	finalize_checkpoint(state, &ctx, &checkpoint, idx, result_oid);
>     + 	the_hash_algo->final_oid_fn(result_oid, &ctx);
>  7:  04ec74e357 !  2:  9d633df339 bulk-checkin: generify `stream_blob_to_pack()` for arbitrary types
>     @@ bulk-checkin.c: static int stream_blob_to_pack(struct bulk_checkin_packfile *sta
>       	s.avail_out = sizeof(obuf) - hdrlen;
>       
>      @@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
>     - 
>     - 	while (1) {
>     - 		prepare_checkpoint(state, &checkpoint, idx, flags);
>     + 			idx->offset = state->offset;
>     + 			crc32_begin(state->f);
>     + 		}
>      -		if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
>      -					 &source, flags))
>      +		if (!stream_obj_to_pack(state, &ctx, &already_hashed_to,
>      +					&source, OBJ_BLOB, flags))
>       			break;
>     - 		truncate_checkpoint(state, &checkpoint, idx);
>     - 		if (bulk_checkin_source_seek_to(&source, seekback) == (off_t)-1)
>     + 		/*
>     + 		 * Writing this object to the current pack will make
>  -:  ---------- >  3:  d5bbd7810e bulk-checkin: refactor deflate routine to accept a `bulk_checkin_source`
>  6:  4e9bac5bc1 =  4:  e427fe6ad3 bulk-checkin: implement `SOURCE_INCORE` mode for `bulk_checkin_source`
>  8:  8667b76365 !  5:  48095afe80 bulk-checkin: introduce `index_blob_bulk_checkin_incore()`
>     @@ Commit message
>          objects individually as loose.
>      
>          Similar to the existing `index_blob_bulk_checkin()` function, the
>     -    entrypoint delegates to `deflate_blob_to_pack_incore()`, which is
>     -    responsible for formatting the pack header and then deflating the
>     -    contents into the pack. The latter is accomplished by calling
>     -    deflate_obj_contents_to_pack_incore(), which takes advantage of the
>     -    earlier refactorings and is responsible for writing the object to the
>     -    pack and handling any overage from pack.packSizeLimit.
>     -
>     -    The bulk of the new functionality is implemented in the function
>     -    `stream_obj_to_pack()`, which can handle streaming objects from memory
>     -    to the bulk-checkin pack as a result of the earlier refactoring.
>     +    entrypoint delegates to `deflate_obj_to_pack_incore()`. That function in
>     +    turn delegates to deflate_obj_to_pack(), which is responsible for
>     +    formatting the pack header and then deflating the contents into the
>     +    pack.
>      
>          Consistent with the rest of the bulk-checkin mechanism, there are no
>          direct tests here. In future commits when we expose this new
>     @@ Commit message
>          Signed-off-by: Taylor Blau <me@ttaylorr.com>
>      
>       ## bulk-checkin.c ##
>     -@@ bulk-checkin.c: static void finalize_checkpoint(struct bulk_checkin_packfile *state,
>     - 	}
>     +@@ bulk-checkin.c: static int deflate_obj_to_pack(struct bulk_checkin_packfile *state,
>     + 	return 0;
>       }
>       
>     -+static int deflate_obj_contents_to_pack_incore(struct bulk_checkin_packfile *state,
>     -+					       git_hash_ctx *ctx,
>     -+					       struct hashfile_checkpoint *checkpoint,
>     -+					       struct object_id *result_oid,
>     -+					       const void *buf, size_t size,
>     -+					       enum object_type type,
>     -+					       const char *path, unsigned flags)
>     ++static int deflate_obj_to_pack_incore(struct bulk_checkin_packfile *state,
>     ++				       struct object_id *result_oid,
>     ++				       const void *buf, size_t size,
>     ++				       const char *path, enum object_type type,
>     ++				       unsigned flags)
>      +{
>     -+	struct pack_idx_entry *idx = NULL;
>     -+	off_t already_hashed_to = 0;
>      +	struct bulk_checkin_source source = {
>      +		.type = SOURCE_INCORE,
>      +		.buf = buf,
>     @@ bulk-checkin.c: static void finalize_checkpoint(struct bulk_checkin_packfile *st
>      +		.path = path,
>      +	};
>      +
>     -+	/* Note: idx is non-NULL when we are writing */
>     -+	if (flags & HASH_WRITE_OBJECT)
>     -+		CALLOC_ARRAY(idx, 1);
>     -+
>     -+	while (1) {
>     -+		prepare_checkpoint(state, checkpoint, idx, flags);
>     -+
>     -+		if (!stream_obj_to_pack(state, ctx, &already_hashed_to, &source,
>     -+					type, flags))
>     -+			break;
>     -+		truncate_checkpoint(state, checkpoint, idx);
>     -+		bulk_checkin_source_seek_to(&source, 0);
>     -+	}
>     -+
>     -+	finalize_checkpoint(state, ctx, checkpoint, idx, result_oid);
>     -+
>     -+	return 0;
>     -+}
>     -+
>     -+static int deflate_blob_to_pack_incore(struct bulk_checkin_packfile *state,
>     -+				       struct object_id *result_oid,
>     -+				       const void *buf, size_t size,
>     -+				       const char *path, unsigned flags)
>     -+{
>     -+	git_hash_ctx ctx;
>     -+	struct hashfile_checkpoint checkpoint = {0};
>     -+
>     -+	format_object_header_hash(the_hash_algo, &ctx, &checkpoint, OBJ_BLOB,
>     -+				  size);
>     -+
>     -+	return deflate_obj_contents_to_pack_incore(state, &ctx, &checkpoint,
>     -+						   result_oid, buf, size,
>     -+						   OBJ_BLOB, path, flags);
>     ++	return deflate_obj_to_pack(state, result_oid, &source, type, 0, flags);
>      +}
>      +
>       static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
>     @@ bulk-checkin.c: int index_blob_bulk_checkin(struct object_id *oid,
>      +				   const void *buf, size_t size,
>      +				   const char *path, unsigned flags)
>      +{
>     -+	int status = deflate_blob_to_pack_incore(&bulk_checkin_packfile, oid,
>     -+						 buf, size, path, flags);
>     ++	int status = deflate_obj_to_pack_incore(&bulk_checkin_packfile, oid,
>     ++						buf, size, path, OBJ_BLOB,
>     ++						flags);
>      +	if (!odb_transaction_nesting)
>      +		flush_bulk_checkin_packfile(&bulk_checkin_packfile);
>      +	return status;
>  9:  cba043ef14 !  6:  60568f9281 bulk-checkin: introduce `index_tree_bulk_checkin_incore()`
>     @@ Commit message
>          machinery will have enough to keep track of the converted object's hash
>          in order to update the compatibility mapping.
>      
>     -    Within `deflate_tree_to_pack_incore()`, the changes should be limited
>     -    to something like:
>     +    Within some thin wrapper around `deflate_obj_to_pack_incore()` (perhaps
>     +    `deflate_tree_to_pack_incore()`), the changes should be limited to
>     +    something like:
>      
>              struct strbuf converted = STRBUF_INIT;
>              if (the_repository->compat_hash_algo) {
>     @@ Commit message
>          Signed-off-by: Taylor Blau <me@ttaylorr.com>
>      
>       ## bulk-checkin.c ##
>     -@@ bulk-checkin.c: static int deflate_blob_to_pack_incore(struct bulk_checkin_packfile *state,
>     - 						   OBJ_BLOB, path, flags);
>     - }
>     - 
>     -+static int deflate_tree_to_pack_incore(struct bulk_checkin_packfile *state,
>     -+				       struct object_id *result_oid,
>     -+				       const void *buf, size_t size,
>     -+				       const char *path, unsigned flags)
>     -+{
>     -+	git_hash_ctx ctx;
>     -+	struct hashfile_checkpoint checkpoint = {0};
>     -+
>     -+	format_object_header_hash(the_hash_algo, &ctx, &checkpoint, OBJ_TREE,
>     -+				  size);
>     -+
>     -+	return deflate_obj_contents_to_pack_incore(state, &ctx, &checkpoint,
>     -+						   result_oid, buf, size,
>     -+						   OBJ_TREE, path, flags);
>     -+}
>     -+
>     - static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
>     - 				struct object_id *result_oid,
>     - 				int fd, size_t size,
>      @@ bulk-checkin.c: int index_blob_bulk_checkin_incore(struct object_id *oid,
>       	return status;
>       }
>     @@ bulk-checkin.c: int index_blob_bulk_checkin_incore(struct object_id *oid,
>      +				   const void *buf, size_t size,
>      +				   const char *path, unsigned flags)
>      +{
>     -+	int status = deflate_tree_to_pack_incore(&bulk_checkin_packfile, oid,
>     -+						 buf, size, path, flags);
>     ++	int status = deflate_obj_to_pack_incore(&bulk_checkin_packfile, oid,
>     ++						buf, size, path, OBJ_TREE,
>     ++						flags);
>      +	if (!odb_transaction_nesting)
>      +		flush_bulk_checkin_packfile(&bulk_checkin_packfile);
>      +	return status;
> 10:  ae70508037 =  7:  b9be9df122 builtin/merge-tree.c: implement support for `--write-pack`
> -- 
> 2.42.0.405.g86fe3250c2