diff mbox series

[v7,2/5] object-file API: add a format_object_header() function

Message ID 20211221115201.12120-3-chiyutianyi@gmail.com (mailing list archive)
State Superseded
Headers show
Series unpack large blobs in stream | expand

Commit Message

Han Xin Dec. 21, 2021, 11:51 a.m. UTC
From: Ævar Arnfjörð Bjarmason <avarab@gmail.com>

Add a convenience function to wrap the xsnprintf() command that
generates loose object headers. This code was copy/pasted in various
parts of the codebase, let's define it in one place and re-use it from
there.

All except one caller of it had a valid "enum object_type" for us,
it's only write_object_file_prepare() which might need to deal with
"git hash-object --literally" and a potential garbage type. Let's have
the primary API use an "enum object_type", and define an *_extended()
function that can take an arbitrary "const char *" for the type.

See [1] for the discussion that prompted this patch, i.e. new code in
object-file.c that wanted to copy/paste the xsnprintf() invocation.

1. https://lore.kernel.org/git/211213.86bl1l9bfz.gmgdl@evledraar.gmail.com/

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Han Xin <hanxin.hx@alibaba-inc.com>
---
 builtin/index-pack.c |  3 +--
 bulk-checkin.c       |  4 ++--
 cache.h              | 21 +++++++++++++++++++++
 http-push.c          |  2 +-
 object-file.c        | 14 +++++++++++---
 5 files changed, 36 insertions(+), 8 deletions(-)

Comments

René Scharfe Dec. 21, 2021, 2:30 p.m. UTC | #1
Am 21.12.21 um 12:51 schrieb Han Xin:
> From: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
>
> Add a convenience function to wrap the xsnprintf() command that
> generates loose object headers. This code was copy/pasted in various
> parts of the codebase, let's define it in one place and re-use it from
> there.
>
> All except one caller of it had a valid "enum object_type" for us,
> it's only write_object_file_prepare() which might need to deal with
> "git hash-object --literally" and a potential garbage type. Let's have
> the primary API use an "enum object_type", and define an *_extended()
> function that can take an arbitrary "const char *" for the type.
>
> See [1] for the discussion that prompted this patch, i.e. new code in
> object-file.c that wanted to copy/paste the xsnprintf() invocation.
>
> 1. https://lore.kernel.org/git/211213.86bl1l9bfz.gmgdl@evledraar.gmail.com/
>
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> Signed-off-by: Han Xin <hanxin.hx@alibaba-inc.com>
> ---
>  builtin/index-pack.c |  3 +--
>  bulk-checkin.c       |  4 ++--
>  cache.h              | 21 +++++++++++++++++++++
>  http-push.c          |  2 +-
>  object-file.c        | 14 +++++++++++---
>  5 files changed, 36 insertions(+), 8 deletions(-)
>
> diff --git a/builtin/index-pack.c b/builtin/index-pack.c
> index c23d01de7d..4a765ddae6 100644
> --- a/builtin/index-pack.c
> +++ b/builtin/index-pack.c
> @@ -449,8 +449,7 @@ static void *unpack_entry_data(off_t offset, unsigned long size,
>  	int hdrlen;
>
>  	if (!is_delta_type(type)) {
> -		hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %"PRIuMAX,
> -				   type_name(type),(uintmax_t)size) + 1;
> +		hdrlen = format_object_header(hdr, sizeof(hdr), type, (uintmax_t)size);
                                                                      ^^^^^^^^^^^
This explicit cast is unnecessary.  It was needed with xsnprintf(), but
that implementation detail is handled inside the new helper function.

(format_object_header() takes a size_t; even if unsigned long would be
wider than that on some weird architecture, casting the size to
uintmax_t will not avoid the implicit truncation happening during the
function call.)

>  		the_hash_algo->init_fn(&c);
>  		the_hash_algo->update_fn(&c, hdr, hdrlen);
>  	} else
> diff --git a/bulk-checkin.c b/bulk-checkin.c
> index 8785b2ac80..1733a1de4f 100644
> --- a/bulk-checkin.c
> +++ b/bulk-checkin.c
> @@ -220,8 +220,8 @@ static int deflate_to_pack(struct bulk_checkin_state *state,
>  	if (seekback == (off_t) -1)
>  		return error("cannot find the current offset");
>
> -	header_len = xsnprintf((char *)obuf, sizeof(obuf), "%s %" PRIuMAX,
> -			       type_name(type), (uintmax_t)size) + 1;
> +	header_len = format_object_header((char *)obuf, sizeof(obuf),
> +					 type, (uintmax_t)size);
                                               ^^^^^^^^^^^
Same here, just that size is already of type size_t, so a cast makes
even less sense.

>  	the_hash_algo->init_fn(&ctx);
>  	the_hash_algo->update_fn(&ctx, obuf, header_len);
>
> diff --git a/cache.h b/cache.h
> index cfba463aa9..64071a8d80 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -1310,6 +1310,27 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
>  						    unsigned long bufsiz,
>  						    struct strbuf *hdrbuf);
>
> +/**
> + * format_object_header() is a thin wrapper around s xsnprintf() that
> + * writes the initial "<type> <obj-len>" part of the loose object
> + * header. It returns the size that snprintf() returns + 1.
> + *
> + * The format_object_header_extended() function allows for writing a
> + * type_name that's not one of the "enum object_type" types. This is
> + * used for "git hash-object --literally". Pass in a OBJ_NONE as the
> + * type, and a non-NULL "type_str" to do that.
> + *
> + * format_object_header() is a convenience wrapper for
> + * format_object_header_extended().
> + */
> +int format_object_header_extended(char *str, size_t size, enum object_type type,
> +				 const char *type_str, size_t objsize);
> +static inline int format_object_header(char *str, size_t size,
> +				      enum object_type type, size_t objsize)
> +{
> +	return format_object_header_extended(str, size, type, NULL, objsize);
> +}
> +
>  /**
>   * parse_loose_header() parses the starting "<type> <len>\0" of an
>   * object. If it doesn't follow that format -1 is returned. To check
> diff --git a/http-push.c b/http-push.c
> index 3309aaf004..f55e316ff4 100644
> --- a/http-push.c
> +++ b/http-push.c
> @@ -363,7 +363,7 @@ static void start_put(struct transfer_request *request)
>  	git_zstream stream;
>
>  	unpacked = read_object_file(&request->obj->oid, &type, &len);
> -	hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %"PRIuMAX , type_name(type), (uintmax_t)len) + 1;
> +	hdrlen = format_object_header(hdr, sizeof(hdr), type, (uintmax_t)len);
                                                              ^^^^^^^^^^^
Same here; len is of type unsigned long.

>
>  	/* Set it up */
>  	git_deflate_init(&stream, zlib_compression_level);
> diff --git a/object-file.c b/object-file.c
> index eb1426f98c..6bba4766f9 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -1006,6 +1006,14 @@ void *xmmap(void *start, size_t length,
>  	return ret;
>  }
>
> +int format_object_header_extended(char *str, size_t size, enum object_type type,
> +				 const char *typestr, size_t objsize)
> +{
> +	const char *s = type == OBJ_NONE ? typestr : type_name(type);
> +
> +	return xsnprintf(str, size, "%s %"PRIuMAX, s, (uintmax_t)objsize) + 1;
                                                      ^^^^^^^^^^^
This cast is necessary to match PRIuMAX.  And that is used because the z
modifier (as in e.g. printf("%zu", sizeof(size_t));) was only added in
C99 and not all platforms may have it.  (Perhaps this cautious approach
is worth revisiting separately, now that some time has passed, but this
patch series should still use PRIuMAX, as it does.)

> +}
> +
>  /*
>   * With an in-core object data in "map", rehash it to make sure the
>   * object name actually matches "oid" to detect object corruption.
> @@ -1034,7 +1042,7 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
>  		return -1;
>
>  	/* Generate the header */
> -	hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %"PRIuMAX , type_name(obj_type), (uintmax_t)size) + 1;
> +	hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size);
>
>  	/* Sha1.. */
>  	r->hash_algo->init_fn(&c);
> @@ -1734,7 +1742,7 @@ static void write_object_file_prepare(const struct git_hash_algo *algo,
>  	git_hash_ctx c;
>
>  	/* Generate the header */
> -	*hdrlen = xsnprintf(hdr, *hdrlen, "%s %"PRIuMAX , type, (uintmax_t)len)+1;
> +	*hdrlen = format_object_header_extended(hdr, *hdrlen, OBJ_NONE, type, len);
>
>  	/* Sha1.. */
>  	algo->init_fn(&c);
> @@ -2006,7 +2014,7 @@ int force_object_loose(const struct object_id *oid, time_t mtime)
>  	buf = read_object(the_repository, oid, &type, &len);
>  	if (!buf)
>  		return error(_("cannot read object for %s"), oid_to_hex(oid));
> -	hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %"PRIuMAX , type_name(type), (uintmax_t)len) + 1;
> +	hdrlen = format_object_header(hdr, sizeof(hdr), type, len);
>  	ret = write_loose_object(oid, hdr, hdrlen, buf, len, mtime, 0);
>  	free(buf);
>

No explicit cast in these three cases -- good.  They all pass an
unsigned long as last parameter btw.

René
Jiang Xin Dec. 31, 2021, 3:12 a.m. UTC | #2
On Wed, Dec 22, 2021 at 2:56 AM Han Xin <chiyutianyi@gmail.com> wrote:
>
> From: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
>
> Add a convenience function to wrap the xsnprintf() command that
> generates loose object headers. This code was copy/pasted in various
> parts of the codebase, let's define it in one place and re-use it from
> there.
>
> All except one caller of it had a valid "enum object_type" for us,
> it's only write_object_file_prepare() which might need to deal with
> "git hash-object --literally" and a potential garbage type. Let's have
> the primary API use an "enum object_type", and define an *_extended()
> function that can take an arbitrary "const char *" for the type.
>
> See [1] for the discussion that prompted this patch, i.e. new code in
> object-file.c that wanted to copy/paste the xsnprintf() invocation.
>
> 1. https://lore.kernel.org/git/211213.86bl1l9bfz.gmgdl@evledraar.gmail.com/
>
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> Signed-off-by: Han Xin <hanxin.hx@alibaba-inc.com>
> ---
>  builtin/index-pack.c |  3 +--
>  bulk-checkin.c       |  4 ++--
>  cache.h              | 21 +++++++++++++++++++++
>  http-push.c          |  2 +-
>  object-file.c        | 14 +++++++++++---
>  5 files changed, 36 insertions(+), 8 deletions(-)

After a offline review with Han Xin, we feel it's better to move this
fixup commit to the end of this series, and this commit will also fix
an additional "xsnprintf()" we introduced in this series.

--
Jiang Xin
Ævar Arnfjörð Bjarmason Feb. 1, 2022, 2:28 p.m. UTC | #3
On Tue, Dec 21 2021, René Scharfe wrote:

> Am 21.12.21 um 12:51 schrieb Han Xin:
>> From: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
>> [...]
>>  		the_hash_algo->init_fn(&c);
>>  		the_hash_algo->update_fn(&c, hdr, hdrlen);
>>  	} else
>> diff --git a/bulk-checkin.c b/bulk-checkin.c
>> index 8785b2ac80..1733a1de4f 100644
>> --- a/bulk-checkin.c
>> +++ b/bulk-checkin.c
>> @@ -220,8 +220,8 @@ static int deflate_to_pack(struct bulk_checkin_state *state,
>>  	if (seekback == (off_t) -1)
>>  		return error("cannot find the current offset");
>>
>> -	header_len = xsnprintf((char *)obuf, sizeof(obuf), "%s %" PRIuMAX,
>> -			       type_name(type), (uintmax_t)size) + 1;
>> +	header_len = format_object_header((char *)obuf, sizeof(obuf),
>> +					 type, (uintmax_t)size);
>                                                ^^^^^^^^^^^
> Same here, just that size is already of type size_t, so a cast makes
> even less sense.

Thanks, this and the below is something I made sure to include in a
re-roll I'm about to send (to do these cleanups in object-file.c
separately from Han Xin's series).

>> +int format_object_header_extended(char *str, size_t size, enum object_type type,
>> +				 const char *typestr, size_t objsize)
>> +{
>> +	const char *s = type == OBJ_NONE ? typestr : type_name(type);
>> +
>> +	return xsnprintf(str, size, "%s %"PRIuMAX, s, (uintmax_t)objsize) + 1;
>                                                       ^^^^^^^^^^^
> This cast is necessary to match PRIuMAX.  And that is used because the z
> modifier (as in e.g. printf("%zu", sizeof(size_t));) was only added in
> C99 and not all platforms may have it.  (Perhaps this cautious approach
> is worth revisiting separately, now that some time has passed, but this
> patch series should still use PRIuMAX, as it does.)

I tried to use %z recently and found that the CI breaks on Windows, but
this was a few months ago. But I think the status of that particular C99
feature is that we can't use it freely, unfortunately. I may be wrong
about that, I haven't looked it any detail beyond running those CI
errors.
diff mbox series

Patch

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index c23d01de7d..4a765ddae6 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -449,8 +449,7 @@  static void *unpack_entry_data(off_t offset, unsigned long size,
 	int hdrlen;
 
 	if (!is_delta_type(type)) {
-		hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %"PRIuMAX,
-				   type_name(type),(uintmax_t)size) + 1;
+		hdrlen = format_object_header(hdr, sizeof(hdr), type, (uintmax_t)size);
 		the_hash_algo->init_fn(&c);
 		the_hash_algo->update_fn(&c, hdr, hdrlen);
 	} else
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 8785b2ac80..1733a1de4f 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -220,8 +220,8 @@  static int deflate_to_pack(struct bulk_checkin_state *state,
 	if (seekback == (off_t) -1)
 		return error("cannot find the current offset");
 
-	header_len = xsnprintf((char *)obuf, sizeof(obuf), "%s %" PRIuMAX,
-			       type_name(type), (uintmax_t)size) + 1;
+	header_len = format_object_header((char *)obuf, sizeof(obuf),
+					 type, (uintmax_t)size);
 	the_hash_algo->init_fn(&ctx);
 	the_hash_algo->update_fn(&ctx, obuf, header_len);
 
diff --git a/cache.h b/cache.h
index cfba463aa9..64071a8d80 100644
--- a/cache.h
+++ b/cache.h
@@ -1310,6 +1310,27 @@  enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
 						    unsigned long bufsiz,
 						    struct strbuf *hdrbuf);
 
+/**
+ * format_object_header() is a thin wrapper around s xsnprintf() that
+ * writes the initial "<type> <obj-len>" part of the loose object
+ * header. It returns the size that snprintf() returns + 1.
+ *
+ * The format_object_header_extended() function allows for writing a
+ * type_name that's not one of the "enum object_type" types. This is
+ * used for "git hash-object --literally". Pass in a OBJ_NONE as the
+ * type, and a non-NULL "type_str" to do that.
+ *
+ * format_object_header() is a convenience wrapper for
+ * format_object_header_extended().
+ */
+int format_object_header_extended(char *str, size_t size, enum object_type type,
+				 const char *type_str, size_t objsize);
+static inline int format_object_header(char *str, size_t size,
+				      enum object_type type, size_t objsize)
+{
+	return format_object_header_extended(str, size, type, NULL, objsize);
+}
+
 /**
  * parse_loose_header() parses the starting "<type> <len>\0" of an
  * object. If it doesn't follow that format -1 is returned. To check
diff --git a/http-push.c b/http-push.c
index 3309aaf004..f55e316ff4 100644
--- a/http-push.c
+++ b/http-push.c
@@ -363,7 +363,7 @@  static void start_put(struct transfer_request *request)
 	git_zstream stream;
 
 	unpacked = read_object_file(&request->obj->oid, &type, &len);
-	hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %"PRIuMAX , type_name(type), (uintmax_t)len) + 1;
+	hdrlen = format_object_header(hdr, sizeof(hdr), type, (uintmax_t)len);
 
 	/* Set it up */
 	git_deflate_init(&stream, zlib_compression_level);
diff --git a/object-file.c b/object-file.c
index eb1426f98c..6bba4766f9 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1006,6 +1006,14 @@  void *xmmap(void *start, size_t length,
 	return ret;
 }
 
+int format_object_header_extended(char *str, size_t size, enum object_type type,
+				 const char *typestr, size_t objsize)
+{
+	const char *s = type == OBJ_NONE ? typestr : type_name(type);
+
+	return xsnprintf(str, size, "%s %"PRIuMAX, s, (uintmax_t)objsize) + 1;
+}
+
 /*
  * With an in-core object data in "map", rehash it to make sure the
  * object name actually matches "oid" to detect object corruption.
@@ -1034,7 +1042,7 @@  int check_object_signature(struct repository *r, const struct object_id *oid,
 		return -1;
 
 	/* Generate the header */
-	hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %"PRIuMAX , type_name(obj_type), (uintmax_t)size) + 1;
+	hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size);
 
 	/* Sha1.. */
 	r->hash_algo->init_fn(&c);
@@ -1734,7 +1742,7 @@  static void write_object_file_prepare(const struct git_hash_algo *algo,
 	git_hash_ctx c;
 
 	/* Generate the header */
-	*hdrlen = xsnprintf(hdr, *hdrlen, "%s %"PRIuMAX , type, (uintmax_t)len)+1;
+	*hdrlen = format_object_header_extended(hdr, *hdrlen, OBJ_NONE, type, len);
 
 	/* Sha1.. */
 	algo->init_fn(&c);
@@ -2006,7 +2014,7 @@  int force_object_loose(const struct object_id *oid, time_t mtime)
 	buf = read_object(the_repository, oid, &type, &len);
 	if (!buf)
 		return error(_("cannot read object for %s"), oid_to_hex(oid));
-	hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %"PRIuMAX , type_name(type), (uintmax_t)len) + 1;
+	hdrlen = format_object_header(hdr, sizeof(hdr), type, len);
 	ret = write_loose_object(oid, hdr, hdrlen, buf, len, mtime, 0);
 	free(buf);