diff mbox series

[v3,3/3] object-info: add option for retrieving object info

Message ID 20220328191112.3092139-4-calvinwan@google.com (mailing list archive)
State New, archived
Headers show
Series object-info: add option for retrieving object info | expand

Commit Message

Calvin Wan March 28, 2022, 7:11 p.m. UTC
Sometimes it is useful to get information about an object without
having to download it completely. The server logic has already been
implemented as “a2ba162cda (object-info: support for retrieving object
info, 2021-04-20)”. This patch implements the client option for
it. Currently, only 'size' is supported, however future patches can
implement additional options.

Add ‘--object-info’ option to fetch. This option allows the client to
make an object-info command request to a server that supports protocol
v2. If the server is v2, but does not allow for the object-info
command request, fetch the objects as if it were a standard fetch
(however without changing any refs). Therefore, hook `fetch
object-info` into transport_fetch_refs() to easily fallback if needed.

A previous patch added the `transfer.advertiseObjectInfo` config
option, of which this patch utilizes to test the fallback.

---
 Documentation/fetch-options.txt |   5 ++
 Documentation/git-fetch.txt     |   1 +
 builtin/fetch.c                 |  36 ++++++++-
 fetch-pack.c                    |  42 +++++++++-
 fetch-pack.h                    |   9 +++
 t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
 transport-helper.c              |   8 +-
 transport-internal.h            |   1 +
 transport.c                     |  75 ++++++++++++++++-
 transport.h                     |   9 +++
 10 files changed, 315 insertions(+), 9 deletions(-)
 create mode 100755 t/t5583-fetch-object-info.sh

Comments

Junio C Hamano March 29, 2022, 7:57 p.m. UTC | #1
Calvin Wan <calvinwan@google.com> writes:

> diff --git a/transport-internal.h b/transport-internal.h
> index c4ca0b733a..04fa015011 100644
> --- a/transport-internal.h
> +++ b/transport-internal.h
> @@ -59,6 +59,7 @@ struct transport_vtable {
>  	 * use. disconnect() releases these resources.
>  	 **/
>  	int (*disconnect)(struct transport *connection);
> +	int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
>  };
>  
>  #endif

We need something like this squashed in in order to pass "make hdr-check".

 transport-internal.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/transport-internal.h b/transport-internal.h
index 04fa015011..1416c9197b 100644
--- a/transport-internal.h
+++ b/transport-internal.h
@@ -5,6 +5,7 @@ struct ref;
 struct transport;
 struct strvec;
 struct transport_ls_refs_options;
+struct oid_array;
 
 struct transport_vtable {
 	/**
Junio C Hamano March 29, 2022, 10:54 p.m. UTC | #2
Calvin Wan <calvinwan@google.com> writes:

> Add ‘--object-info’ option to fetch. This option allows the client to
> make an object-info command request to a server that supports protocol
> v2. If the server is v2, but does not allow for the object-info
> command request, fetch the objects as if it were a standard fetch
> (however without changing any refs). Therefore, hook `fetch
> object-info` into transport_fetch_refs() to easily fallback if needed.

Sorry, but I do not see anything common between "git fetch" that
fetches history and associated objects and "retrieving only object
info".  Other than "the way I happened to have chosen to implement,
what is used to implement fetch has the most commonality".

If we were to add more "server offloading", say "run blame on the
server side", are we going to piggy back over fetch-pack, too?

It is not necessarily bad way to experiment, to reuse the code to
establish connection, check if the other side is capable to serve
us, throw args at them and then retrieve the result, but exposing
that implemention detail makes a HORRIBLE UX to force users to say
"git fetch --blame='master Makefile' origin".

Shouldn't we be making the part that we truly need to reuse into a
separate API out of fetch-pack and either (1) allow new commands be
easily written reusing the code to create "git remote-object-info"
and "git remote-blame", or (2) come up with a single "do things on
remote" command with various subcommands, i.e. "git over-there
object-info" and "git over-there blame"?

> A previous patch added the `transfer.advertiseObjectInfo` config
> option, of which this patch utilizes to test the fallback.
>
> ---

Missing sign-off.

> diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt
> index 550c16ca61..a13d2ba7ad 100644
> --- a/Documentation/git-fetch.txt
> +++ b/Documentation/git-fetch.txt
> @@ -13,6 +13,7 @@ SYNOPSIS
>  'git fetch' [<options>] <group>
>  'git fetch' --multiple [<options>] [(<repository> | <group>)...]
>  'git fetch' --all [<options>]
> +'git fetch' --object-info=[<arguments>] <remote> [<object-ids>]

This is a start of slippery slope of making "fetch" a kitchen sink
that does not have much to do with "git fetch".  Let's not go this
route.

> @@ -2087,6 +2094,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	struct remote *remote = NULL;
>  	int result = 0;
>  	int prune_tags_ok = 1;
> +	struct oid_array object_info_oids = OID_ARRAY_INIT;
>  
>  	packet_trace_identity("fetch");
>  
> @@ -2168,7 +2176,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	if (dry_run)
>  		write_fetch_head = 0;
>  
> -	if (all) {
> +	if (object_info_options.nr > 0) {
> +		if (argc == 0 || argc == 1) {
> +			die(_("must supply remote and object ids when using --object-info"));
> +		} else {
> +			struct object_id oid;
> +			remote = remote_get(argv[0]);
> +			for (i = 1; i < argc; i++) {
> +				if (get_oid(argv[i], &oid))
> +					return error(_("malformed object id '%s'"), argv[i]);
> +				oid_array_append(&object_info_oids, &oid);
> +			}
> +		}
> +	} else if (all) {
>  		if (argc == 1)
>  			die(_("fetch --all does not take a repository argument"));
>  		else if (argc > 1)

Yuck.  Let's read this again, realize how little commonality we have
in processing args and barf.  That's all because we tried to
piggy-back on "fetch" because the patch wanted to reuse the parts
that are not shown in the patch text.  The right way to do so would
be to factor that "part that do not appear in the patch, because
they are common" out into callable from new code.  That way, we do
not have to contaminate this if/else conditional, both arms of which
is still about running "fetch" to retrieve history and connected
objects, and has nothing to do with "fetch object info".  The way
this patch changes this part of the code is an unnecessary risk to
break the normal "git fetch", and it does not have to be.

I like the idea to give client side a way to ask server to do random
things.  I do not think I want to see unrelated things piggy-backing
on existing client programs only because that is an expedient way to
implement it.
Taylor Blau March 29, 2022, 11:19 p.m. UTC | #3
Hi Calvin,

Junio took a deeper look below, but here are a few small code-hygiene
issues that I noticed while taking a quicker look through this patch.

On Mon, Mar 28, 2022 at 07:11:12PM +0000, Calvin Wan wrote:
> Sometimes it is useful to get information about an object without
> having to download it completely. The server logic has already been
> implemented as “a2ba162cda (object-info: support for retrieving object
> info, 2021-04-20)”. This patch implements the client option for
> it. Currently, only 'size' is supported, however future patches can
> implement additional options.
>
> Add ‘--object-info’ option to fetch. This option allows the client to

The single-quotes here look like smart quotes. I don't think we forbid
these explicitly within commit messages, but using the standard '
(ASCII 0x27) character is more common.

> make an object-info command request to a server that supports protocol
> v2. If the server is v2, but does not allow for the object-info
> command request, fetch the objects as if it were a standard fetch
> (however without changing any refs). Therefore, hook `fetch
> object-info` into transport_fetch_refs() to easily fallback if needed.
>
> A previous patch added the `transfer.advertiseObjectInfo` config
> option, of which this patch utilizes to test the fallback.
>
> ---
>  Documentation/fetch-options.txt |   5 ++
>  Documentation/git-fetch.txt     |   1 +
>  builtin/fetch.c                 |  36 ++++++++-
>  fetch-pack.c                    |  42 +++++++++-
>  fetch-pack.h                    |   9 +++
>  t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
>  transport-helper.c              |   8 +-
>  transport-internal.h            |   1 +
>  transport.c                     |  75 ++++++++++++++++-
>  transport.h                     |   9 +++
>  10 files changed, 315 insertions(+), 9 deletions(-)
>  create mode 100755 t/t5583-fetch-object-info.sh
>
> diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
> index f903683189..f1ca95c32d 100644
> --- a/Documentation/fetch-options.txt
> +++ b/Documentation/fetch-options.txt
> @@ -299,3 +299,8 @@ endif::git-pull[]
>  -6::
>  --ipv6::
>  	Use IPv6 addresses only, ignoring IPv4 addresses.
> +
> +--object-info=[<arguments>]::
> +	Fetches only the relevant information passed in arguments from a
> +	specific remote and set of object ids. Object 'size' is currently
> +	the only supported argument.
> diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt
> index 550c16ca61..a13d2ba7ad 100644
> --- a/Documentation/git-fetch.txt
> +++ b/Documentation/git-fetch.txt
> @@ -13,6 +13,7 @@ SYNOPSIS
>  'git fetch' [<options>] <group>
>  'git fetch' --multiple [<options>] [(<repository> | <group>)...]
>  'git fetch' --all [<options>]
> +'git fetch' --object-info=[<arguments>] <remote> [<object-ids>]
>
>
>  DESCRIPTION
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index 4d12c2fd4d..0b21bebfca 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -29,6 +29,9 @@
>  #include "commit-graph.h"
>  #include "shallow.h"
>  #include "worktree.h"
> +#include "protocol.h"
> +#include "pkt-line.h"
> +#include "connect.h"
>
>  #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
>
> @@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
>  	N_("git fetch [<options>] <group>"),
>  	N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
>  	N_("git fetch --all [<options>]"),
> +	N_("git fetch --object-info=[<arguments>] <remote> [<object-ids>]"),
>  	NULL
>  };
>
> @@ -86,6 +90,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
>  static int fetch_write_commit_graph = -1;
>  static int stdin_refspecs = 0;
>  static int negotiate_only;
> +static struct string_list object_info_options = STRING_LIST_INIT_NODUP;
>
>  static int git_fetch_config(const char *k, const char *v, void *cb)
>  {
> @@ -221,6 +226,8 @@ static struct option builtin_fetch_options[] = {
>  		 N_("write the commit-graph after fetching")),
>  	OPT_BOOL(0, "stdin", &stdin_refspecs,
>  		 N_("accept refspecs from stdin")),
> +	OPT_STRING_LIST(0, "object-info", &object_info_options, N_("option"),
> +		 N_("command request arguments")),
>  	OPT_END()
>  };
>
> @@ -2087,6 +2094,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	struct remote *remote = NULL;
>  	int result = 0;
>  	int prune_tags_ok = 1;
> +	struct oid_array object_info_oids = OID_ARRAY_INIT;
>
>  	packet_trace_identity("fetch");
>
> @@ -2168,7 +2176,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	if (dry_run)
>  		write_fetch_head = 0;
>
> -	if (all) {
> +	if (object_info_options.nr > 0) {

Small nit; since object_info_options.nr should never be negative, we
should instead write "if (object_info_options)".

> +		if (argc == 0 || argc == 1) {
> +			die(_("must supply remote and object ids when using --object-info"));
> +		} else {
> +			struct object_id oid;
> +			remote = remote_get(argv[0]);
> +			for (i = 1; i < argc; i++) {
> +				if (get_oid(argv[i], &oid))
> +					return error(_("malformed object id '%s'"), argv[i]);
> +				oid_array_append(&object_info_oids, &oid);
> +			}
> +		}
> +	} else if (all) {
>  		if (argc == 1)
>  			die(_("fetch --all does not take a repository argument"));
>  		else if (argc > 1)
> @@ -2199,7 +2219,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  		}
>  	}
>
> -	if (negotiate_only) {
> +	if (object_info_options.nr > 0) {
> +		if (!remote)
> +			die(_("must supply remote when using --object-info"));
> +		gtransport = prepare_transport(remote, 1);
> +		if (gtransport->smart_options) {
> +			gtransport->smart_options->object_info = 1;
> +			gtransport->smart_options->object_info_oids = &object_info_oids;
> +			gtransport->smart_options->object_info_options = &object_info_options;
> +		}
> +		if (server_options.nr)
> +			gtransport->server_options = &server_options;
> +		result = transport_fetch_refs(gtransport, NULL);
> +	} else if (negotiate_only) {
>  		struct oidset acked_commits = OIDSET_INIT;
>  		struct oidset_iter iter;
>  		const struct object_id *oid;
> diff --git a/fetch-pack.c b/fetch-pack.c
> index b709a61baf..36e3d1c5d0 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
>  	packet_buf_delim(req_buf);
>  }
>
> +void send_object_info_request(int fd_out, struct object_info_args *args)
> +{
> +	struct strbuf req_buf = STRBUF_INIT;
> +	int i;

I don't think this would ever be practically exploit-able, since we'd
run past the argument limit long before we hit INT_MAX, but this should
be a size_t instead of an int to match the type of args->oid->nr.

> +
> +	write_command_and_capabilities(&req_buf, args->server_options, "object-info");
> +
> +	if (string_list_has_string(args->object_info_options, "size"))
> +		packet_buf_write(&req_buf, "size");
> +
> +	for (i = 0; i < args->oids->nr; i++) {
> +		packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
> +	}

Small nit, the braces around this for-loop are not required.

>  static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
>  			      struct fetch_pack_args *args,
>  			      const struct ref *wants, struct oidset *common,
> @@ -1604,6 +1625,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  	if (args->depth > 0 || args->deepen_since || args->deepen_not)
>  		args->deepen = 1;
>
> +	if (args->object_info) {
> +		state = FETCH_SEND_REQUEST;
> +	}
> +

Ditto here.

>  	while (state != FETCH_DONE) {
>  		switch (state) {
>  		case FETCH_CHECK_LOCAL:
> @@ -1613,7 +1638,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  			/* Filter 'ref' by 'sought' and those that aren't local */
>  			mark_complete_and_common_ref(negotiator, args, &ref);
>  			filter_refs(args, &ref, sought, nr_sought);
> -			if (everything_local(args, &ref))
> +			if (!args->object_info && everything_local(args, &ref))
>  				state = FETCH_DONE;
>  			else
>  				state = FETCH_SEND_REQUEST;
> @@ -1999,8 +2024,19 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
>  		}
>  		args->connectivity_checked = 1;
>  	}
> -
> -	update_shallow(args, sought, nr_sought, &si);
> +
> +	if (args->object_info) {
> +		struct ref *ref_cpy_reader = ref_cpy;
> +		unsigned long size = 0;
> +		while (ref_cpy_reader) {
> +			oid_object_info(the_repository, &(ref_cpy_reader->old_oid), &size);
> +			printf("%s %li\n", oid_to_hex(&(ref_cpy_reader->old_oid)), size);

Both expressions like "&(x)" can be written instead as "&x" without the
outer parenthesis.

Likewise, we do not use %li, that line should instead read:

    printf("%s %"PRIuMAX"\n", oid_to_hex(&ref_cpy_reader->old_oid), (uintmax_t)size);

Thanks,
Taylor
Junio C Hamano March 30, 2022, 12:49 a.m. UTC | #4
Junio C Hamano <gitster@pobox.com> writes:

> Shouldn't we be making the part that we truly need to reuse into a
> separate API out of fetch-pack and either (1) allow new commands be
> easily written reusing the code to create "git remote-object-info"
> and "git remote-blame", or (2) come up with a single "do things on
> remote" command with various subcommands, i.e. "git over-there
> object-info" and "git over-there blame"?

For completeness, we could also make the "git archive" the gold
standard.  If we want "git blame" that is mostly executed on the
remote site, we could teach "git blame --remote <there>", which may
be where a user, who does not know nor care about how things are
implemented, would expect to find the feature.  Is "object-info" in
principle like executing "cat-file --batch-check" remotely?

I think that "Object 'size' is currently the only one the server
knows to emit", combined with the lack of any client side support so
far, means that it is not too late to rework the object-info thing
and possibly reuse more from "cat-file --batch-check".

If we can create a system where running things locally and remotely
are comparable in feature, that would be great, but I do not think
that is a realistic goal to aim for.

Thoughts?
John Cai March 30, 2022, 10:06 p.m. UTC | #5
On 28 Mar 2022, at 15:11, Calvin Wan wrote:

> Sometimes it is useful to get information about an object without
> having to download it completely. The server logic has already been
> implemented as “a2ba162cda (object-info: support for retrieving object
> info, 2021-04-20)”. This patch implements the client option for
> it. Currently, only 'size' is supported, however future patches can
> implement additional options.
>
> Add ‘--object-info’ option to fetch. This option allows the client to
> make an object-info command request to a server that supports protocol
> v2. If the server is v2, but does not allow for the object-info
> command request, fetch the objects as if it were a standard fetch
> (however without changing any refs). Therefore, hook `fetch
> object-info` into transport_fetch_refs() to easily fallback if needed.
>
> A previous patch added the `transfer.advertiseObjectInfo` config
> option, of which this patch utilizes to test the fallback.
>
> ---
>  Documentation/fetch-options.txt |   5 ++
>  Documentation/git-fetch.txt     |   1 +
>  builtin/fetch.c                 |  36 ++++++++-
>  fetch-pack.c                    |  42 +++++++++-
>  fetch-pack.h                    |   9 +++
>  t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
>  transport-helper.c              |   8 +-
>  transport-internal.h            |   1 +
>  transport.c                     |  75 ++++++++++++++++-
>  transport.h                     |   9 +++
>  10 files changed, 315 insertions(+), 9 deletions(-)
>  create mode 100755 t/t5583-fetch-object-info.sh
>
> diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
> index f903683189..f1ca95c32d 100644
> --- a/Documentation/fetch-options.txt
> +++ b/Documentation/fetch-options.txt
> @@ -299,3 +299,8 @@ endif::git-pull[]
>  -6::
>  --ipv6::
>  	Use IPv6 addresses only, ignoring IPv4 addresses.
> +
> +--object-info=[<arguments>]::
> +	Fetches only the relevant information passed in arguments from a
> +	specific remote and set of object ids. Object 'size' is currently
> +	the only supported argument.
> diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt
> index 550c16ca61..a13d2ba7ad 100644
> --- a/Documentation/git-fetch.txt
> +++ b/Documentation/git-fetch.txt
> @@ -13,6 +13,7 @@ SYNOPSIS
>  'git fetch' [<options>] <group>
>  'git fetch' --multiple [<options>] [(<repository> | <group>)...]
>  'git fetch' --all [<options>]
> +'git fetch' --object-info=[<arguments>] <remote> [<object-ids>]
>
>
>  DESCRIPTION
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index 4d12c2fd4d..0b21bebfca 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -29,6 +29,9 @@
>  #include "commit-graph.h"
>  #include "shallow.h"
>  #include "worktree.h"
> +#include "protocol.h"
> +#include "pkt-line.h"
> +#include "connect.h"
>
>  #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
>
> @@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
>  	N_("git fetch [<options>] <group>"),
>  	N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
>  	N_("git fetch --all [<options>]"),
> +	N_("git fetch --object-info=[<arguments>] <remote> [<object-ids>]"),
>  	NULL
>  };
>
> @@ -86,6 +90,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
>  static int fetch_write_commit_graph = -1;
>  static int stdin_refspecs = 0;
>  static int negotiate_only;
> +static struct string_list object_info_options = STRING_LIST_INIT_NODUP;
>
>  static int git_fetch_config(const char *k, const char *v, void *cb)
>  {
> @@ -221,6 +226,8 @@ static struct option builtin_fetch_options[] = {
>  		 N_("write the commit-graph after fetching")),
>  	OPT_BOOL(0, "stdin", &stdin_refspecs,
>  		 N_("accept refspecs from stdin")),
> +	OPT_STRING_LIST(0, "object-info", &object_info_options, N_("option"),
> +		 N_("command request arguments")),
>  	OPT_END()
>  };
>
> @@ -2087,6 +2094,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	struct remote *remote = NULL;
>  	int result = 0;
>  	int prune_tags_ok = 1;
> +	struct oid_array object_info_oids = OID_ARRAY_INIT;
>
>  	packet_trace_identity("fetch");
>
> @@ -2168,7 +2176,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	if (dry_run)
>  		write_fetch_head = 0;
>
> -	if (all) {
> +	if (object_info_options.nr > 0) {
> +		if (argc == 0 || argc == 1) {
> +			die(_("must supply remote and object ids when using --object-info"));
> +		} else {
> +			struct object_id oid;
> +			remote = remote_get(argv[0]);
> +			for (i = 1; i < argc; i++) {
> +				if (get_oid(argv[i], &oid))
> +					return error(_("malformed object id '%s'"), argv[i]);
> +				oid_array_append(&object_info_oids, &oid);
> +			}
> +		}
> +	} else if (all) {
>  		if (argc == 1)
>  			die(_("fetch --all does not take a repository argument"));
>  		else if (argc > 1)
> @@ -2199,7 +2219,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  		}
>  	}
>
> -	if (negotiate_only) {
> +	if (object_info_options.nr > 0) {
> +		if (!remote)
> +			die(_("must supply remote when using --object-info"));
> +		gtransport = prepare_transport(remote, 1);
> +		if (gtransport->smart_options) {
> +			gtransport->smart_options->object_info = 1;
> +			gtransport->smart_options->object_info_oids = &object_info_oids;
> +			gtransport->smart_options->object_info_options = &object_info_options;
> +		}
> +		if (server_options.nr)
> +			gtransport->server_options = &server_options;
> +		result = transport_fetch_refs(gtransport, NULL);
> +	} else if (negotiate_only) {
>  		struct oidset acked_commits = OIDSET_INIT;
>  		struct oidset_iter iter;
>  		const struct object_id *oid;
> diff --git a/fetch-pack.c b/fetch-pack.c
> index b709a61baf..36e3d1c5d0 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
>  	packet_buf_delim(req_buf);
>  }
>
> +void send_object_info_request(int fd_out, struct object_info_args *args)
> +{
> +	struct strbuf req_buf = STRBUF_INIT;
> +	int i;
> +
> +	write_command_and_capabilities(&req_buf, args->server_options, "object-info");
> +
> +	if (string_list_has_string(args->object_info_options, "size"))
> +		packet_buf_write(&req_buf, "size");
> +
> +	for (i = 0; i < args->oids->nr; i++) {
> +		packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
> +	}
> +
> +	packet_buf_flush(&req_buf);
> +	if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
> +		die_errno(_("unable to write request to remote"));
> +
> +	strbuf_release(&req_buf);
> +}
> +
>  static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
>  			      struct fetch_pack_args *args,
>  			      const struct ref *wants, struct oidset *common,
> @@ -1604,6 +1625,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  	if (args->depth > 0 || args->deepen_since || args->deepen_not)
>  		args->deepen = 1;
>
> +	if (args->object_info) {
> +		state = FETCH_SEND_REQUEST;
> +	}
> +
>  	while (state != FETCH_DONE) {
>  		switch (state) {
>  		case FETCH_CHECK_LOCAL:
> @@ -1613,7 +1638,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  			/* Filter 'ref' by 'sought' and those that aren't local */
>  			mark_complete_and_common_ref(negotiator, args, &ref);
>  			filter_refs(args, &ref, sought, nr_sought);
> -			if (everything_local(args, &ref))
> +			if (!args->object_info && everything_local(args, &ref))
>  				state = FETCH_DONE;
>  			else
>  				state = FETCH_SEND_REQUEST;
> @@ -1999,8 +2024,19 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
>  		}
>  		args->connectivity_checked = 1;
>  	}
> -
> -	update_shallow(args, sought, nr_sought, &si);
> +	
> +	if (args->object_info) {
> +		struct ref *ref_cpy_reader = ref_cpy;
> +		unsigned long size = 0;
> +		while (ref_cpy_reader) {
> +			oid_object_info(the_repository, &(ref_cpy_reader->old_oid), &size);
> +			printf("%s %li\n", oid_to_hex(&(ref_cpy_reader->old_oid)), size);
> +			ref_cpy_reader = ref_cpy_reader->next;
> +		}
> +	}
> +	else {
> +		update_shallow(args, sought, nr_sought, &si);
> +	}
>  cleanup:
>  	clear_shallow_info(&si);
>  	oid_array_clear(&shallows_scratch);
> diff --git a/fetch-pack.h b/fetch-pack.h
> index 7f94a2a583..23e69bb218 100644
> --- a/fetch-pack.h
> +++ b/fetch-pack.h
> @@ -42,6 +42,7 @@ struct fetch_pack_args {
>  	unsigned update_shallow:1;
>  	unsigned reject_shallow_remote:1;
>  	unsigned deepen:1;
> +	unsigned object_info:1;
>
>  	/*
>  	 * Indicate that the remote of this request is a promisor remote. The
> @@ -68,6 +69,12 @@ struct fetch_pack_args {
>  	unsigned connectivity_checked:1;
>  };
>
> +struct object_info_args {
> +	const struct string_list *object_info_options;
> +	const struct string_list *server_options;
> +	const struct oid_array *oids;
> +};
> +
>  /*
>   * sought represents remote references that should be updated from.
>   * On return, the names that were found on the remote will have been
> @@ -101,4 +108,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
>   */
>  int report_unmatched_refs(struct ref **sought, int nr_sought);
>
> +void send_object_info_request(int fd_out, struct object_info_args *args);
> +
>  #endif
> diff --git a/t/t5583-fetch-object-info.sh b/t/t5583-fetch-object-info.sh
> new file mode 100755
> index 0000000000..e5db6d953c
> --- /dev/null
> +++ b/t/t5583-fetch-object-info.sh
> @@ -0,0 +1,138 @@
> +#!/bin/sh
> +
> +test_description='test git fetch object-info version 2'
> +
> +TEST_NO_CREATE_REPO=1
> +
> +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
> +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
> +
> +. ./test-lib.sh
> +
> +# Test fetch object-info with 'git://' transport
> +
> +. "$TEST_DIRECTORY"/lib-git-daemon.sh
> +start_git_daemon --export-all --enable=receive-pack
> +daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent
> +
> +
> +test_expect_success 'create repo to be served by git-daemon' '
> +	git init "$daemon_parent" &&
> +	test_commit -C "$daemon_parent" message1 a.txt
> +'
> +
> +test_expect_success 'fetch object-info with git:// using protocol v2' '
> +	(
> +		cd "$daemon_parent" &&
> +		object_id=$(git rev-parse message1:a.txt) &&
> +		length=$(wc -c <a.txt) &&
> +
> +		printf "%s %d\n" "$object_id" "$length" >expect &&
> +		git -c protocol.version=2 fetch --object-info=size "$GIT_DAEMON_URL/parent" "$object_id" >actual &&
> +		test_cmp expect actual
> +	)
> +'
> +stop_git_daemon
> +
> +# Test protocol v2 with 'http://' transport
> +
> +. "$TEST_DIRECTORY"/lib-httpd.sh
> +start_httpd
> +
> +test_expect_success 'create repo to be served by http:// transport' '
> +	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +	git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true &&
> +	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message1 a.txt &&
> +	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message2 b.txt
> +'
> +
> +test_expect_success 'fetch object-info with http:// using protocol v2' '
> +	(
> +		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +		object_id=$(git rev-parse message1:a.txt) &&
> +		length=$(wc -c <a.txt) &&
> +
> +		printf "%s %d\n" "$object_id" "$length" >expect &&
> +		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
> +		test_cmp expect actual
> +	)
> +'
> +
> +test_expect_success 'fetch object-info for multiple objects' '
> +	(
> +		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +		object_id1=$(git rev-parse message1:a.txt) &&
> +		object_id2=$(git rev-parse message2:b.txt) &&
> +		length1=$(wc -c <a.txt) &&
> +		length2=$(wc -c <b.txt) &&
> +
> +		printf "%s %d\n" "$object_id1" "$length1" >expect &&
> +		printf "%s %d\n" "$object_id2" "$length2" >>expect &&
> +		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id1" "$object_id2" >actual &&
> +		test_cmp expect actual
> +	)
> +'
> +
> +test_expect_success 'fetch object-info fallbacks to standard fetch if object-info is not supported by the server' '
> +	(
> +		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +		object_id=$(git rev-parse message1:a.txt) &&
> +		length=$(wc -c <a.txt) &&
> +
> +		printf "%s %d\n" "$object_id" "$length" >expect &&
> +		git config objectinfo.advertise false &&
> +		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
> +		test_cmp expect actual
> +	)

I noticed this test doesn't confirm that standard fetch was actually used. Would
it be useful to check for the existence of the object to prove it got fetched?

Also, is seems the way this test is set up, it's fetching from itself.
This is logically fine in most cases that client == server, but it was a little
confusing at first because it looked like the invocation of git config was on
the client rather than the server, until I realized they are the same.

Since the server is being used as the client, we wouldn't be able to confirm that
the object got fetched. I realize for most of these tests, it doesn't really matter
to conflate client with server. I think this is the one exception.

But, as a larger question for the list, I'm wondering if it's best practice to separate client from
server in tests, or if it's an accepted style to just use one repository in cases
when it doesn't really matter?

Thanks
John

> +'
> +
> +test_expect_success 'fetch object-info fails on server with legacy protocol' '
> +	(
> +		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +		object_id=$(git rev-parse message1:a.txt) &&
> +		test_must_fail git -c protocol.version=0 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err &&
> +		test_i18ngrep "object-info requires protocol v2" err
> +	)
> +'
> +
> +test_expect_success 'fetch object-info fails on malformed OID' '
> +	(
> +		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> +		malformed_object_id="this_id_is_not_valid" &&
> +		test_must_fail git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$malformed_object_id" 2>err &&
> +		test_i18ngrep "malformed object id '$malformed_object_id'" err
> +	)
> +'
> +
> +test_expect_success 'fetch object-info fails on missing OID' '
> +	git clone "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" missing_oid_repo &&
> +	test_commit -C missing_oid_repo message3 c.txt &&
> +	(
> +		cd missing_oid_repo &&
> +		object_id=$(git rev-parse message3:c.txt) &&
> +		test_must_fail env GIT_TRACE_PACKET=1 git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err &&
> +		test_i18ngrep "fatal: remote error: upload-pack: not our ref $object_id" err
> +	)
> +'
> +
> +# Test fetch object-info with 'file://' transport
> +
> +test_expect_success 'create repo to be served by file:// transport' '
> +	git init server &&
> +	test_commit -C server message1 a.txt &&
> +	git -C server config protocol.version 2
> +'
> +
> +test_expect_success 'fetch object-info with file:// using protocol v2' '
> +	(
> +		cd server &&
> +		object_id=$(git rev-parse message1:a.txt) &&
> +		length=$(wc -c <a.txt) &&
> +
> +		printf "%s %d\n" "$object_id" "$length" >expect &&
> +		git -c protocol.version=2 fetch --object-info=size "file://$(pwd)" "$object_id" >actual &&
> +		test_cmp expect actual
> +	)
> +'
> +
> +test_done
> diff --git a/transport-helper.c b/transport-helper.c
> index a0297b0986..64c175e904 100644
> --- a/transport-helper.c
> +++ b/transport-helper.c
> @@ -686,13 +686,17 @@ static int fetch_refs(struct transport *transport,
>
>  	/*
>  	 * If we reach here, then the server, the client, and/or the transport
> -	 * helper does not support protocol v2. --negotiate-only requires
> -	 * protocol v2.
> +	 * helper does not support protocol v2. --negotiate-only and --object-info
> +	 * require protocol v2.
>  	 */
>  	if (data->transport_options.acked_commits) {
>  		warning(_("--negotiate-only requires protocol v2"));
>  		return -1;
>  	}
> +	if (transport->smart_options->object_info) {
> +		warning(_("--object-info requires protocol v2"));
> +		return -1;
> +	}
>
>  	if (!data->get_refs_list_called)
>  		get_refs_list_using_list(transport, 0);
> diff --git a/transport-internal.h b/transport-internal.h
> index c4ca0b733a..04fa015011 100644
> --- a/transport-internal.h
> +++ b/transport-internal.h
> @@ -59,6 +59,7 @@ struct transport_vtable {
>  	 * use. disconnect() releases these resources.
>  	 **/
>  	int (*disconnect)(struct transport *connection);
> +	int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
>  };
>
>  #endif
> diff --git a/transport.c b/transport.c
> index 70e9840a90..65a1b1fdb3 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -350,6 +350,67 @@ static struct ref *handshake(struct transport *transport, int for_push,
>  	return refs;
>  }
>
> +/*
> + * Fetches object info if server supports object-info command
> + * Make sure to fallback to normal fetch if this fails
> + */
> +static int fetch_object_info(struct transport *transport)
> +{
> +	struct git_transport_data *data = transport->data;
> +	struct object_info_args args;
> +	struct packet_reader reader;
> +
> +	memset(&args, 0, sizeof(args));
> +	args.server_options = transport->server_options;
> +	args.object_info_options = transport->smart_options->object_info_options;
> +	args.oids = transport->smart_options->object_info_oids;
> +
> +	connect_setup(transport, 0);
> +	packet_reader_init(&reader, data->fd[0], NULL, 0,
> +			PACKET_READ_CHOMP_NEWLINE |
> +			PACKET_READ_DIE_ON_ERR_PACKET);
> +	data->version = discover_version(&reader);
> +
> +	transport->hash_algo = reader.hash_algo;
> +
> +	switch (data->version) {
> +	case protocol_v2:
> +		if (!server_supports_v2("object-info", 0))
> +			return 0;
> +		send_object_info_request(data->fd[1], &args);
> +		break;
> +	case protocol_v1:
> +	case protocol_v0:
> +		die(_("wrong protocol version. expected v2"));
> +	case protocol_unknown_version:
> +		BUG("unknown protocol version");
> +	}
> +
> +	if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> +		die(_("error reading object info header"));
> +	}
> +	if (strcmp(reader.line, "size")) {
> +		die(_("expected 'size', received '%s'"), reader.line);
> +	}
> +	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> +		printf("%s\n", reader.line);
> +	}
> +	check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> +
> +	return 1;
> +}
> +
> +static int append_oid_to_ref(const struct object_id *oid, void *data)
> +{
> +	struct ref *ref = data;
> +	struct ref *temp_ref = xcalloc(1, sizeof (struct ref));
> +	temp_ref->old_oid = *oid;
> +	temp_ref->exact_oid = 1;
> +	ref->next = temp_ref;
> +	ref = ref->next;
> +	return 0;
> +}
> +
>  static struct ref *get_refs_via_connect(struct transport *transport, int for_push,
>  					struct transport_ls_refs_options *options)
>  {
> @@ -364,6 +425,7 @@ static int fetch_refs_via_pack(struct transport *transport,
>  	struct ref *refs = NULL;
>  	struct fetch_pack_args args;
>  	struct ref *refs_tmp = NULL;
> +	struct ref *wanted_refs = xcalloc(1, sizeof (struct ref));
>
>  	memset(&args, 0, sizeof(args));
>  	args.uploadpack = data->options.uploadpack;
> @@ -388,8 +450,17 @@ static int fetch_refs_via_pack(struct transport *transport,
>  	args.server_options = transport->server_options;
>  	args.negotiation_tips = data->options.negotiation_tips;
>  	args.reject_shallow_remote = transport->smart_options->reject_shallow;
> +	args.object_info = transport->smart_options->object_info;
>
> -	if (!data->got_remote_heads) {
> +	if (transport->smart_options && transport->smart_options->object_info) {
> +		struct ref *temp_ref = wanted_refs;
> +		if (fetch_object_info(transport)) {
> +			goto cleanup;
> +		}
> +		oid_array_for_each(transport->smart_options->object_info_oids, append_oid_to_ref, temp_ref);
> +		wanted_refs = wanted_refs->next;
> +		transport->remote_refs = wanted_refs;
> +	} else if (!data->got_remote_heads) {
>  		int i;
>  		int must_list_refs = 0;
>  		for (i = 0; i < nr_heads; i++) {
> @@ -406,7 +477,7 @@ static int fetch_refs_via_pack(struct transport *transport,
>  	else if (data->version <= protocol_v1)
>  		die_if_server_options(transport);
>
> -	if (data->options.acked_commits) {
> +	if (data->options.acked_commits && !transport->smart_options->object_info) {
>  		if (data->version < protocol_v2) {
>  			warning(_("--negotiate-only requires protocol v2"));
>  			ret = -1;
> diff --git a/transport.h b/transport.h
> index a0bc6a1e9e..3bba16f86e 100644
> --- a/transport.h
> +++ b/transport.h
> @@ -30,6 +30,12 @@ struct git_transport_options {
>  	 */
>  	unsigned connectivity_checked:1;
>
> +	/*
> +	 * Transport will attempt to pull only object-info. Fallbacks
> +	 * to pulling entire object if object-info is not supported
> +	 */
> +	unsigned object_info : 1;
> +
>  	int depth;
>  	const char *deepen_since;
>  	const struct string_list *deepen_not;
> @@ -53,6 +59,9 @@ struct git_transport_options {
>  	 * common commits to this oidset instead of fetching any packfiles.
>  	 */
>  	struct oidset *acked_commits;
> +
> +	struct oid_array *object_info_oids;
> +	struct string_list *object_info_options;
>  };
>
>  enum transport_family {
> -- 
> 2.33.0.664.g0785eb7698
Jonathan Tan March 30, 2022, 10:07 p.m. UTC | #6
Calvin Wan <calvinwan@google.com> writes:
> @@ -1604,6 +1625,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  	if (args->depth > 0 || args->deepen_since || args->deepen_not)
>  		args->deepen = 1;
>  
> +	if (args->object_info) {
> +		state = FETCH_SEND_REQUEST;
> +	}
> +
>  	while (state != FETCH_DONE) {
>  		switch (state) {
>  		case FETCH_CHECK_LOCAL:
> @@ -1613,7 +1638,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  			/* Filter 'ref' by 'sought' and those that aren't local */
>  			mark_complete_and_common_ref(negotiator, args, &ref);
>  			filter_refs(args, &ref, sought, nr_sought);
> -			if (everything_local(args, &ref))
> +			if (!args->object_info && everything_local(args, &ref))
>  				state = FETCH_DONE;
>  			else
>  				state = FETCH_SEND_REQUEST;

I haven't looked into this deeply, but one thing to consider is to do
this in a separate function (instead of reusing do_fetch_pack_v2())
because the object-info request and response has almost nothing in
common with fetch.

> diff --git a/t/t5583-fetch-object-info.sh b/t/t5583-fetch-object-info.sh

[skip tests]

Also test when: the user gives an invalid --object-info (e.g.
"--object-info=foo") and when the user gives two parameters, one valid
and one invalid. (Both should fail.)

> diff --git a/transport-internal.h b/transport-internal.h
> index c4ca0b733a..04fa015011 100644
> --- a/transport-internal.h
> +++ b/transport-internal.h
> @@ -59,6 +59,7 @@ struct transport_vtable {
>  	 * use. disconnect() releases these resources.
>  	 **/
>  	int (*disconnect)(struct transport *connection);
> +	int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
>  };

Do we need another entry in the vtable, or can we do without? (As it is,
I don't think we need it, as evidenced by the fact that this patch did
not add any entries to the transport_vtable instances.)

> diff --git a/transport.c b/transport.c
> index 70e9840a90..65a1b1fdb3 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -350,6 +350,67 @@ static struct ref *handshake(struct transport *transport, int for_push,
>  	return refs;
>  }
>  
> +/*
> + * Fetches object info if server supports object-info command
> + * Make sure to fallback to normal fetch if this fails
> + */
> +static int fetch_object_info(struct transport *transport)
> +{
> +	struct git_transport_data *data = transport->data;
> +	struct object_info_args args;
> +	struct packet_reader reader;
> +
> +	memset(&args, 0, sizeof(args));
> +	args.server_options = transport->server_options;
> +	args.object_info_options = transport->smart_options->object_info_options;
> +	args.oids = transport->smart_options->object_info_oids;

I think we can avoid code duplication in this function too. So from
here (the connect_setup line below)...

> +	connect_setup(transport, 0);
> +	packet_reader_init(&reader, data->fd[0], NULL, 0,
> +			PACKET_READ_CHOMP_NEWLINE |
> +			PACKET_READ_DIE_ON_ERR_PACKET);
> +	data->version = discover_version(&reader);
> +
> +	transport->hash_algo = reader.hash_algo;
> +
> +	switch (data->version) {
> +	case protocol_v2:
> +		if (!server_supports_v2("object-info", 0))
> +			return 0;
> +		send_object_info_request(data->fd[1], &args);
> +		break;
> +	case protocol_v1:
> +	case protocol_v0:
> +		die(_("wrong protocol version. expected v2"));
> +	case protocol_unknown_version:
> +		BUG("unknown protocol version");
> +	}

...up to here can be a function (except that we need to take out
send_object_info_request() and maybe parameterize the server_supports_v2
call).

From here...

> +	if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> +		die(_("error reading object info header"));
> +	}
> +	if (strcmp(reader.line, "size")) {
> +		die(_("expected 'size', received '%s'"), reader.line);
> +	}
> +	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> +		printf("%s\n", reader.line);
> +	}

...until here is object-info-specific, yes.

And this...

> +	check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> +
> +	return 1;
> +}

is common code once again.

Also instead of returning 0 from this function if the server doesn't
support object-info, it's probably clearer to have one function that
checks what the server supports, and then have an "if" block which
either does the object-info part or the fetch part depending on what the
server says.
Josh Steadmon March 30, 2022, 10:12 p.m. UTC | #7
On 2022.03.28 19:11, Calvin Wan wrote:
> Sometimes it is useful to get information about an object without
> having to download it completely. The server logic has already been
> implemented as “a2ba162cda (object-info: support for retrieving object
> info, 2021-04-20)”. This patch implements the client option for
> it. Currently, only 'size' is supported, however future patches can
> implement additional options.
> 
> Add ‘--object-info’ option to fetch. This option allows the client to
> make an object-info command request to a server that supports protocol
> v2. If the server is v2, but does not allow for the object-info
> command request, fetch the objects as if it were a standard fetch
> (however without changing any refs). Therefore, hook `fetch
> object-info` into transport_fetch_refs() to easily fallback if needed.
> 
> A previous patch added the `transfer.advertiseObjectInfo` config
> option, of which this patch utilizes to test the fallback.
> 
> ---
>  Documentation/fetch-options.txt |   5 ++
>  Documentation/git-fetch.txt     |   1 +
>  builtin/fetch.c                 |  36 ++++++++-
>  fetch-pack.c                    |  42 +++++++++-
>  fetch-pack.h                    |   9 +++
>  t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
>  transport-helper.c              |   8 +-
>  transport-internal.h            |   1 +
>  transport.c                     |  75 ++++++++++++++++-
>  transport.h                     |   9 +++
>  10 files changed, 315 insertions(+), 9 deletions(-)
>  create mode 100755 t/t5583-fetch-object-info.sh

> diff --git a/fetch-pack.c b/fetch-pack.c
> index b709a61baf..36e3d1c5d0 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
>  	packet_buf_delim(req_buf);
>  }
>  
> +void send_object_info_request(int fd_out, struct object_info_args *args)
> +{
> +	struct strbuf req_buf = STRBUF_INIT;
> +	int i;
> +
> +	write_command_and_capabilities(&req_buf, args->server_options, "object-info");
> +
> +	if (string_list_has_string(args->object_info_options, "size"))
> +		packet_buf_write(&req_buf, "size");

Do we need a newline after "size" here? It's not clear to me that it's
necessary; Documentation/technical/protocol-v2.txt just says
"command-specific-args are packet line framed arguments defined by each
individual command", but the oid arguments below have a newline, so we
should be consistent one way or the other.

It looks like the server-side implementation wants just a bare "size"
string (no newline), but I suspect that either is OK because the
packet_reader is probably using PACKET_READ_CHOMP_NEWLINE.


> +	for (i = 0; i < args->oids->nr; i++) {
> +		packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
> +	}
> +
> +	packet_buf_flush(&req_buf);
> +	if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
> +		die_errno(_("unable to write request to remote"));
> +
> +	strbuf_release(&req_buf);
> +}
> +

> diff --git a/transport.c b/transport.c
> index 70e9840a90..65a1b1fdb3 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -350,6 +350,67 @@ static struct ref *handshake(struct transport *transport, int for_push,
>  	return refs;
>  }
>  
> +/*
> + * Fetches object info if server supports object-info command
> + * Make sure to fallback to normal fetch if this fails
> + */
> +static int fetch_object_info(struct transport *transport)
> +{
> +	struct git_transport_data *data = transport->data;
> +	struct object_info_args args;
> +	struct packet_reader reader;
> +
> +	memset(&args, 0, sizeof(args));
> +	args.server_options = transport->server_options;
> +	args.object_info_options = transport->smart_options->object_info_options;
> +	args.oids = transport->smart_options->object_info_oids;
> +
> +	connect_setup(transport, 0);
> +	packet_reader_init(&reader, data->fd[0], NULL, 0,
> +			PACKET_READ_CHOMP_NEWLINE |
> +			PACKET_READ_DIE_ON_ERR_PACKET);
> +	data->version = discover_version(&reader);
> +
> +	transport->hash_algo = reader.hash_algo;
> +
> +	switch (data->version) {
> +	case protocol_v2:
> +		if (!server_supports_v2("object-info", 0))
> +			return 0;
> +		send_object_info_request(data->fd[1], &args);
> +		break;
> +	case protocol_v1:
> +	case protocol_v0:
> +		die(_("wrong protocol version. expected v2"));

The comment at the top of this function says that callers should be
prepared to fallback to normal fetch if this function fails. The only
way it can currently fail is if we are using protocol V2 but the server
doesn't support object-info. Rather than die() if we're on protocol V1
or V0, can we also return a failure here and let callers fallback to
fetch?

> +	case protocol_unknown_version:
> +		BUG("unknown protocol version");
> +	}
> +
> +	if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> +		die(_("error reading object info header"));
> +	}
> +	if (strcmp(reader.line, "size")) {
> +		die(_("expected 'size', received '%s'"), reader.line);
> +	}
> +	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> +		printf("%s\n", reader.line);
> +	}
> +	check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> +
> +	return 1;
> +}
> +
Calvin Wan March 30, 2022, 10:31 p.m. UTC | #8
Overall, I agree that this option does not belong in fetch. I wouldn't
say I chose to put it there for the sake of implementation
convenience, but because "it does what fetch is supposed to do minus
the object itself". This logic no longer makes any sense to me, and I
regret not seeing the problems with it as I was coming up with
workarounds.

> I think that "Object 'size' is currently the only one the server
> knows to emit", combined with the lack of any client side support so
> far, means that it is not too late to rework the object-info thing
> and possibly reuse more from "cat-file --batch-check".

I talked this over with Jonathan Tan and we came to the conclusion
that this is the next option I should look into, rather than
separating it out into its own command.

Thank you for the review Junio! I appreciate how straightforward all
of your comments are.

On Tue, Mar 29, 2022 at 5:49 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Junio C Hamano <gitster@pobox.com> writes:
>
> > Shouldn't we be making the part that we truly need to reuse into a
> > separate API out of fetch-pack and either (1) allow new commands be
> > easily written reusing the code to create "git remote-object-info"
> > and "git remote-blame", or (2) come up with a single "do things on
> > remote" command with various subcommands, i.e. "git over-there
> > object-info" and "git over-there blame"?
>
> For completeness, we could also make the "git archive" the gold
> standard.  If we want "git blame" that is mostly executed on the
> remote site, we could teach "git blame --remote <there>", which may
> be where a user, who does not know nor care about how things are
> implemented, would expect to find the feature.  Is "object-info" in
> principle like executing "cat-file --batch-check" remotely?
>
> I think that "Object 'size' is currently the only one the server
> knows to emit", combined with the lack of any client side support so
> far, means that it is not too late to rework the object-info thing
> and possibly reuse more from "cat-file --batch-check".
>
> If we can create a system where running things locally and remotely
> are comparable in feature, that would be great, but I do not think
> that is a realistic goal to aim for.
>
> Thoughts?
>
>
>
Jonathan Tan March 30, 2022, 10:43 p.m. UTC | #9
Junio C Hamano <gitster@pobox.com> writes:
> Calvin Wan <calvinwan@google.com> writes:
> 
> > Add ‘--object-info’ option to fetch. This option allows the client to
> > make an object-info command request to a server that supports protocol
> > v2. If the server is v2, but does not allow for the object-info
> > command request, fetch the objects as if it were a standard fetch
> > (however without changing any refs). Therefore, hook `fetch
> > object-info` into transport_fetch_refs() to easily fallback if needed.
> 
> Sorry, but I do not see anything common between "git fetch" that
> fetches history and associated objects and "retrieving only object
> info".  Other than "the way I happened to have chosen to implement,
> what is used to implement fetch has the most commonality".

Calvin and I discussed details of implementation before he wrote these
patches, and my thinking at that time was that to get the relevant
information, we're prepared to fetch the objects themselves from the
remote if need be, but prefer to fetch only metadata if possible, so
putting it in "fetch" seemed reasonable to me. But I agree that it's
probably better to put it somewhere else.
Calvin Wan March 30, 2022, 10:46 p.m. UTC | #10
> but the oid arguments below have a newline, so we
> should be consistent one way or the other.

To match what the other packet functions do, I will get rid of the new
line for the oid loop.

> Rather than die() if we're on protocol V1
> or V0, can we also return a failure here and let callers fallback to
> fetch?

Sounds reasonable. Will look into it.

On Wed, Mar 30, 2022 at 3:12 PM Josh Steadmon <steadmon@google.com> wrote:
>
> On 2022.03.28 19:11, Calvin Wan wrote:
> > Sometimes it is useful to get information about an object without
> > having to download it completely. The server logic has already been
> > implemented as “a2ba162cda (object-info: support for retrieving object
> > info, 2021-04-20)”. This patch implements the client option for
> > it. Currently, only 'size' is supported, however future patches can
> > implement additional options.
> >
> > Add ‘--object-info’ option to fetch. This option allows the client to
> > make an object-info command request to a server that supports protocol
> > v2. If the server is v2, but does not allow for the object-info
> > command request, fetch the objects as if it were a standard fetch
> > (however without changing any refs). Therefore, hook `fetch
> > object-info` into transport_fetch_refs() to easily fallback if needed.
> >
> > A previous patch added the `transfer.advertiseObjectInfo` config
> > option, of which this patch utilizes to test the fallback.
> >
> > ---
> >  Documentation/fetch-options.txt |   5 ++
> >  Documentation/git-fetch.txt     |   1 +
> >  builtin/fetch.c                 |  36 ++++++++-
> >  fetch-pack.c                    |  42 +++++++++-
> >  fetch-pack.h                    |   9 +++
> >  t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
> >  transport-helper.c              |   8 +-
> >  transport-internal.h            |   1 +
> >  transport.c                     |  75 ++++++++++++++++-
> >  transport.h                     |   9 +++
> >  10 files changed, 315 insertions(+), 9 deletions(-)
> >  create mode 100755 t/t5583-fetch-object-info.sh
>
> > diff --git a/fetch-pack.c b/fetch-pack.c
> > index b709a61baf..36e3d1c5d0 100644
> > --- a/fetch-pack.c
> > +++ b/fetch-pack.c
> > @@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
> >       packet_buf_delim(req_buf);
> >  }
> >
> > +void send_object_info_request(int fd_out, struct object_info_args *args)
> > +{
> > +     struct strbuf req_buf = STRBUF_INIT;
> > +     int i;
> > +
> > +     write_command_and_capabilities(&req_buf, args->server_options, "object-info");
> > +
> > +     if (string_list_has_string(args->object_info_options, "size"))
> > +             packet_buf_write(&req_buf, "size");
>
> Do we need a newline after "size" here? It's not clear to me that it's
> necessary; Documentation/technical/protocol-v2.txt just says
> "command-specific-args are packet line framed arguments defined by each
> individual command", but the oid arguments below have a newline, so we
> should be consistent one way or the other.
>
> It looks like the server-side implementation wants just a bare "size"
> string (no newline), but I suspect that either is OK because the
> packet_reader is probably using PACKET_READ_CHOMP_NEWLINE.
>
>
> > +     for (i = 0; i < args->oids->nr; i++) {
> > +             packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
> > +     }
> > +
> > +     packet_buf_flush(&req_buf);
> > +     if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
> > +             die_errno(_("unable to write request to remote"));
> > +
> > +     strbuf_release(&req_buf);
> > +}
> > +
>
> > diff --git a/transport.c b/transport.c
> > index 70e9840a90..65a1b1fdb3 100644
> > --- a/transport.c
> > +++ b/transport.c
> > @@ -350,6 +350,67 @@ static struct ref *handshake(struct transport *transport, int for_push,
> >       return refs;
> >  }
> >
> > +/*
> > + * Fetches object info if server supports object-info command
> > + * Make sure to fallback to normal fetch if this fails
> > + */
> > +static int fetch_object_info(struct transport *transport)
> > +{
> > +     struct git_transport_data *data = transport->data;
> > +     struct object_info_args args;
> > +     struct packet_reader reader;
> > +
> > +     memset(&args, 0, sizeof(args));
> > +     args.server_options = transport->server_options;
> > +     args.object_info_options = transport->smart_options->object_info_options;
> > +     args.oids = transport->smart_options->object_info_oids;
> > +
> > +     connect_setup(transport, 0);
> > +     packet_reader_init(&reader, data->fd[0], NULL, 0,
> > +                     PACKET_READ_CHOMP_NEWLINE |
> > +                     PACKET_READ_DIE_ON_ERR_PACKET);
> > +     data->version = discover_version(&reader);
> > +
> > +     transport->hash_algo = reader.hash_algo;
> > +
> > +     switch (data->version) {
> > +     case protocol_v2:
> > +             if (!server_supports_v2("object-info", 0))
> > +                     return 0;
> > +             send_object_info_request(data->fd[1], &args);
> > +             break;
> > +     case protocol_v1:
> > +     case protocol_v0:
> > +             die(_("wrong protocol version. expected v2"));
>
> The comment at the top of this function says that callers should be
> prepared to fallback to normal fetch if this function fails. The only
> way it can currently fail is if we are using protocol V2 but the server
> doesn't support object-info. Rather than die() if we're on protocol V1
> or V0, can we also return a failure here and let callers fallback to
> fetch?
>
> > +     case protocol_unknown_version:
> > +             BUG("unknown protocol version");
> > +     }
> > +
> > +     if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> > +             die(_("error reading object info header"));
> > +     }
> > +     if (strcmp(reader.line, "size")) {
> > +             die(_("expected 'size', received '%s'"), reader.line);
> > +     }
> > +     while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> > +             printf("%s\n", reader.line);
> > +     }
> > +     check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> > +
> > +     return 1;
> > +}
> > +
Calvin Wan March 30, 2022, 10:47 p.m. UTC | #11
Thank you for the code hygiene tips, Taylor! All of these sounds like
good ideas and will follow through

On Tue, Mar 29, 2022 at 4:19 PM Taylor Blau <me@ttaylorr.com> wrote:
>
> Hi Calvin,
>
> Junio took a deeper look below, but here are a few small code-hygiene
> issues that I noticed while taking a quicker look through this patch.
>
> On Mon, Mar 28, 2022 at 07:11:12PM +0000, Calvin Wan wrote:
> > Sometimes it is useful to get information about an object without
> > having to download it completely. The server logic has already been
> > implemented as “a2ba162cda (object-info: support for retrieving object
> > info, 2021-04-20)”. This patch implements the client option for
> > it. Currently, only 'size' is supported, however future patches can
> > implement additional options.
> >
> > Add ‘--object-info’ option to fetch. This option allows the client to
>
> The single-quotes here look like smart quotes. I don't think we forbid
> these explicitly within commit messages, but using the standard '
> (ASCII 0x27) character is more common.
>
> > make an object-info command request to a server that supports protocol
> > v2. If the server is v2, but does not allow for the object-info
> > command request, fetch the objects as if it were a standard fetch
> > (however without changing any refs). Therefore, hook `fetch
> > object-info` into transport_fetch_refs() to easily fallback if needed.
> >
> > A previous patch added the `transfer.advertiseObjectInfo` config
> > option, of which this patch utilizes to test the fallback.
> >
> > ---
> >  Documentation/fetch-options.txt |   5 ++
> >  Documentation/git-fetch.txt     |   1 +
> >  builtin/fetch.c                 |  36 ++++++++-
> >  fetch-pack.c                    |  42 +++++++++-
> >  fetch-pack.h                    |   9 +++
> >  t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
> >  transport-helper.c              |   8 +-
> >  transport-internal.h            |   1 +
> >  transport.c                     |  75 ++++++++++++++++-
> >  transport.h                     |   9 +++
> >  10 files changed, 315 insertions(+), 9 deletions(-)
> >  create mode 100755 t/t5583-fetch-object-info.sh
> >
> > diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
> > index f903683189..f1ca95c32d 100644
> > --- a/Documentation/fetch-options.txt
> > +++ b/Documentation/fetch-options.txt
> > @@ -299,3 +299,8 @@ endif::git-pull[]
> >  -6::
> >  --ipv6::
> >       Use IPv6 addresses only, ignoring IPv4 addresses.
> > +
> > +--object-info=[<arguments>]::
> > +     Fetches only the relevant information passed in arguments from a
> > +     specific remote and set of object ids. Object 'size' is currently
> > +     the only supported argument.
> > diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt
> > index 550c16ca61..a13d2ba7ad 100644
> > --- a/Documentation/git-fetch.txt
> > +++ b/Documentation/git-fetch.txt
> > @@ -13,6 +13,7 @@ SYNOPSIS
> >  'git fetch' [<options>] <group>
> >  'git fetch' --multiple [<options>] [(<repository> | <group>)...]
> >  'git fetch' --all [<options>]
> > +'git fetch' --object-info=[<arguments>] <remote> [<object-ids>]
> >
> >
> >  DESCRIPTION
> > diff --git a/builtin/fetch.c b/builtin/fetch.c
> > index 4d12c2fd4d..0b21bebfca 100644
> > --- a/builtin/fetch.c
> > +++ b/builtin/fetch.c
> > @@ -29,6 +29,9 @@
> >  #include "commit-graph.h"
> >  #include "shallow.h"
> >  #include "worktree.h"
> > +#include "protocol.h"
> > +#include "pkt-line.h"
> > +#include "connect.h"
> >
> >  #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
> >
> > @@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
> >       N_("git fetch [<options>] <group>"),
> >       N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
> >       N_("git fetch --all [<options>]"),
> > +     N_("git fetch --object-info=[<arguments>] <remote> [<object-ids>]"),
> >       NULL
> >  };
> >
> > @@ -86,6 +90,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
> >  static int fetch_write_commit_graph = -1;
> >  static int stdin_refspecs = 0;
> >  static int negotiate_only;
> > +static struct string_list object_info_options = STRING_LIST_INIT_NODUP;
> >
> >  static int git_fetch_config(const char *k, const char *v, void *cb)
> >  {
> > @@ -221,6 +226,8 @@ static struct option builtin_fetch_options[] = {
> >                N_("write the commit-graph after fetching")),
> >       OPT_BOOL(0, "stdin", &stdin_refspecs,
> >                N_("accept refspecs from stdin")),
> > +     OPT_STRING_LIST(0, "object-info", &object_info_options, N_("option"),
> > +              N_("command request arguments")),
> >       OPT_END()
> >  };
> >
> > @@ -2087,6 +2094,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >       struct remote *remote = NULL;
> >       int result = 0;
> >       int prune_tags_ok = 1;
> > +     struct oid_array object_info_oids = OID_ARRAY_INIT;
> >
> >       packet_trace_identity("fetch");
> >
> > @@ -2168,7 +2176,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >       if (dry_run)
> >               write_fetch_head = 0;
> >
> > -     if (all) {
> > +     if (object_info_options.nr > 0) {
>
> Small nit; since object_info_options.nr should never be negative, we
> should instead write "if (object_info_options)".
>
> > +             if (argc == 0 || argc == 1) {
> > +                     die(_("must supply remote and object ids when using --object-info"));
> > +             } else {
> > +                     struct object_id oid;
> > +                     remote = remote_get(argv[0]);
> > +                     for (i = 1; i < argc; i++) {
> > +                             if (get_oid(argv[i], &oid))
> > +                                     return error(_("malformed object id '%s'"), argv[i]);
> > +                             oid_array_append(&object_info_oids, &oid);
> > +                     }
> > +             }
> > +     } else if (all) {
> >               if (argc == 1)
> >                       die(_("fetch --all does not take a repository argument"));
> >               else if (argc > 1)
> > @@ -2199,7 +2219,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >               }
> >       }
> >
> > -     if (negotiate_only) {
> > +     if (object_info_options.nr > 0) {
> > +             if (!remote)
> > +                     die(_("must supply remote when using --object-info"));
> > +             gtransport = prepare_transport(remote, 1);
> > +             if (gtransport->smart_options) {
> > +                     gtransport->smart_options->object_info = 1;
> > +                     gtransport->smart_options->object_info_oids = &object_info_oids;
> > +                     gtransport->smart_options->object_info_options = &object_info_options;
> > +             }
> > +             if (server_options.nr)
> > +                     gtransport->server_options = &server_options;
> > +             result = transport_fetch_refs(gtransport, NULL);
> > +     } else if (negotiate_only) {
> >               struct oidset acked_commits = OIDSET_INIT;
> >               struct oidset_iter iter;
> >               const struct object_id *oid;
> > diff --git a/fetch-pack.c b/fetch-pack.c
> > index b709a61baf..36e3d1c5d0 100644
> > --- a/fetch-pack.c
> > +++ b/fetch-pack.c
> > @@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
> >       packet_buf_delim(req_buf);
> >  }
> >
> > +void send_object_info_request(int fd_out, struct object_info_args *args)
> > +{
> > +     struct strbuf req_buf = STRBUF_INIT;
> > +     int i;
>
> I don't think this would ever be practically exploit-able, since we'd
> run past the argument limit long before we hit INT_MAX, but this should
> be a size_t instead of an int to match the type of args->oid->nr.
>
> > +
> > +     write_command_and_capabilities(&req_buf, args->server_options, "object-info");
> > +
> > +     if (string_list_has_string(args->object_info_options, "size"))
> > +             packet_buf_write(&req_buf, "size");
> > +
> > +     for (i = 0; i < args->oids->nr; i++) {
> > +             packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
> > +     }
>
> Small nit, the braces around this for-loop are not required.
>
> >  static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
> >                             struct fetch_pack_args *args,
> >                             const struct ref *wants, struct oidset *common,
> > @@ -1604,6 +1625,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
> >       if (args->depth > 0 || args->deepen_since || args->deepen_not)
> >               args->deepen = 1;
> >
> > +     if (args->object_info) {
> > +             state = FETCH_SEND_REQUEST;
> > +     }
> > +
>
> Ditto here.
>
> >       while (state != FETCH_DONE) {
> >               switch (state) {
> >               case FETCH_CHECK_LOCAL:
> > @@ -1613,7 +1638,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
> >                       /* Filter 'ref' by 'sought' and those that aren't local */
> >                       mark_complete_and_common_ref(negotiator, args, &ref);
> >                       filter_refs(args, &ref, sought, nr_sought);
> > -                     if (everything_local(args, &ref))
> > +                     if (!args->object_info && everything_local(args, &ref))
> >                               state = FETCH_DONE;
> >                       else
> >                               state = FETCH_SEND_REQUEST;
> > @@ -1999,8 +2024,19 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
> >               }
> >               args->connectivity_checked = 1;
> >       }
> > -
> > -     update_shallow(args, sought, nr_sought, &si);
> > +
> > +     if (args->object_info) {
> > +             struct ref *ref_cpy_reader = ref_cpy;
> > +             unsigned long size = 0;
> > +             while (ref_cpy_reader) {
> > +                     oid_object_info(the_repository, &(ref_cpy_reader->old_oid), &size);
> > +                     printf("%s %li\n", oid_to_hex(&(ref_cpy_reader->old_oid)), size);
>
> Both expressions like "&(x)" can be written instead as "&x" without the
> outer parenthesis.
>
> Likewise, we do not use %li, that line should instead read:
>
>     printf("%s %"PRIuMAX"\n", oid_to_hex(&ref_cpy_reader->old_oid), (uintmax_t)size);
>
> Thanks,
> Taylor
Junio C Hamano March 30, 2022, 11:42 p.m. UTC | #12
Jonathan Tan <jonathantanmy@google.com> writes:

> Calvin and I discussed details of implementation before he wrote these
> patches, and my thinking at that time was that to get the relevant
> information, we're prepared to fetch the objects themselves from the
> remote if need be, but prefer to fetch only metadata if possible, so
> putting it in "fetch" seemed reasonable to me. But I agree that it's
> probably better to put it somewhere else.

Yeah, I am sympathetic that every "learning something from remote"
looks like a "fetch".  After all, a remote blame may be "fetching
the result of running blame".  But it may not be a good way to
decide what the feature should be called.

Thanks.
Calvin Wan March 31, 2022, 7:56 p.m. UTC | #13
(Just a small nit before I respond. If you could reply with only the
code you're referring to, that would be much appreciated).

> Would it be useful to check for the existence of the object to prove it got fetched?
The object is not saved into the filesystem after fetching for only
object-info so checking for its existence wouldn't prove anything.

> Since the server is being used as the client, we wouldn't be able to confirm that
> the object got fetched
I agree that this test case can be written better to confirm that the
object was fetched through the standard channel. I think I could use
GIT_TRACE_PACKET=1 with the output piped to a log file and then
grepping that log file for packet information resembling a standard
fetch to confirm. (See t5702-protocol-v2.sh as a reference)

> But, as a larger question for the list, I'm wondering if it's best practice to separate client from
> server in tests, or if it's an accepted style to just use one repository in cases
> when it doesn't really matter?
I am as well!

On Wed, Mar 30, 2022 at 3:06 PM John Cai <johncai86@gmail.com> wrote:
>
>
>
> On 28 Mar 2022, at 15:11, Calvin Wan wrote:
>
> > Sometimes it is useful to get information about an object without
> > having to download it completely. The server logic has already been
> > implemented as “a2ba162cda (object-info: support for retrieving object
> > info, 2021-04-20)”. This patch implements the client option for
> > it. Currently, only 'size' is supported, however future patches can
> > implement additional options.
> >
> > Add ‘--object-info’ option to fetch. This option allows the client to
> > make an object-info command request to a server that supports protocol
> > v2. If the server is v2, but does not allow for the object-info
> > command request, fetch the objects as if it were a standard fetch
> > (however without changing any refs). Therefore, hook `fetch
> > object-info` into transport_fetch_refs() to easily fallback if needed.
> >
> > A previous patch added the `transfer.advertiseObjectInfo` config
> > option, of which this patch utilizes to test the fallback.
> >
> > ---
> >  Documentation/fetch-options.txt |   5 ++
> >  Documentation/git-fetch.txt     |   1 +
> >  builtin/fetch.c                 |  36 ++++++++-
> >  fetch-pack.c                    |  42 +++++++++-
> >  fetch-pack.h                    |   9 +++
> >  t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
> >  transport-helper.c              |   8 +-
> >  transport-internal.h            |   1 +
> >  transport.c                     |  75 ++++++++++++++++-
> >  transport.h                     |   9 +++
> >  10 files changed, 315 insertions(+), 9 deletions(-)
> >  create mode 100755 t/t5583-fetch-object-info.sh
> >
> > diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
> > index f903683189..f1ca95c32d 100644
> > --- a/Documentation/fetch-options.txt
> > +++ b/Documentation/fetch-options.txt
> > @@ -299,3 +299,8 @@ endif::git-pull[]
> >  -6::
> >  --ipv6::
> >       Use IPv6 addresses only, ignoring IPv4 addresses.
> > +
> > +--object-info=[<arguments>]::
> > +     Fetches only the relevant information passed in arguments from a
> > +     specific remote and set of object ids. Object 'size' is currently
> > +     the only supported argument.
> > diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt
> > index 550c16ca61..a13d2ba7ad 100644
> > --- a/Documentation/git-fetch.txt
> > +++ b/Documentation/git-fetch.txt
> > @@ -13,6 +13,7 @@ SYNOPSIS
> >  'git fetch' [<options>] <group>
> >  'git fetch' --multiple [<options>] [(<repository> | <group>)...]
> >  'git fetch' --all [<options>]
> > +'git fetch' --object-info=[<arguments>] <remote> [<object-ids>]
> >
> >
> >  DESCRIPTION
> > diff --git a/builtin/fetch.c b/builtin/fetch.c
> > index 4d12c2fd4d..0b21bebfca 100644
> > --- a/builtin/fetch.c
> > +++ b/builtin/fetch.c
> > @@ -29,6 +29,9 @@
> >  #include "commit-graph.h"
> >  #include "shallow.h"
> >  #include "worktree.h"
> > +#include "protocol.h"
> > +#include "pkt-line.h"
> > +#include "connect.h"
> >
> >  #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
> >
> > @@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
> >       N_("git fetch [<options>] <group>"),
> >       N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
> >       N_("git fetch --all [<options>]"),
> > +     N_("git fetch --object-info=[<arguments>] <remote> [<object-ids>]"),
> >       NULL
> >  };
> >
> > @@ -86,6 +90,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
> >  static int fetch_write_commit_graph = -1;
> >  static int stdin_refspecs = 0;
> >  static int negotiate_only;
> > +static struct string_list object_info_options = STRING_LIST_INIT_NODUP;
> >
> >  static int git_fetch_config(const char *k, const char *v, void *cb)
> >  {
> > @@ -221,6 +226,8 @@ static struct option builtin_fetch_options[] = {
> >                N_("write the commit-graph after fetching")),
> >       OPT_BOOL(0, "stdin", &stdin_refspecs,
> >                N_("accept refspecs from stdin")),
> > +     OPT_STRING_LIST(0, "object-info", &object_info_options, N_("option"),
> > +              N_("command request arguments")),
> >       OPT_END()
> >  };
> >
> > @@ -2087,6 +2094,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >       struct remote *remote = NULL;
> >       int result = 0;
> >       int prune_tags_ok = 1;
> > +     struct oid_array object_info_oids = OID_ARRAY_INIT;
> >
> >       packet_trace_identity("fetch");
> >
> > @@ -2168,7 +2176,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >       if (dry_run)
> >               write_fetch_head = 0;
> >
> > -     if (all) {
> > +     if (object_info_options.nr > 0) {
> > +             if (argc == 0 || argc == 1) {
> > +                     die(_("must supply remote and object ids when using --object-info"));
> > +             } else {
> > +                     struct object_id oid;
> > +                     remote = remote_get(argv[0]);
> > +                     for (i = 1; i < argc; i++) {
> > +                             if (get_oid(argv[i], &oid))
> > +                                     return error(_("malformed object id '%s'"), argv[i]);
> > +                             oid_array_append(&object_info_oids, &oid);
> > +                     }
> > +             }
> > +     } else if (all) {
> >               if (argc == 1)
> >                       die(_("fetch --all does not take a repository argument"));
> >               else if (argc > 1)
> > @@ -2199,7 +2219,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
> >               }
> >       }
> >
> > -     if (negotiate_only) {
> > +     if (object_info_options.nr > 0) {
> > +             if (!remote)
> > +                     die(_("must supply remote when using --object-info"));
> > +             gtransport = prepare_transport(remote, 1);
> > +             if (gtransport->smart_options) {
> > +                     gtransport->smart_options->object_info = 1;
> > +                     gtransport->smart_options->object_info_oids = &object_info_oids;
> > +                     gtransport->smart_options->object_info_options = &object_info_options;
> > +             }
> > +             if (server_options.nr)
> > +                     gtransport->server_options = &server_options;
> > +             result = transport_fetch_refs(gtransport, NULL);
> > +     } else if (negotiate_only) {
> >               struct oidset acked_commits = OIDSET_INIT;
> >               struct oidset_iter iter;
> >               const struct object_id *oid;
> > diff --git a/fetch-pack.c b/fetch-pack.c
> > index b709a61baf..36e3d1c5d0 100644
> > --- a/fetch-pack.c
> > +++ b/fetch-pack.c
> > @@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
> >       packet_buf_delim(req_buf);
> >  }
> >
> > +void send_object_info_request(int fd_out, struct object_info_args *args)
> > +{
> > +     struct strbuf req_buf = STRBUF_INIT;
> > +     int i;
> > +
> > +     write_command_and_capabilities(&req_buf, args->server_options, "object-info");
> > +
> > +     if (string_list_has_string(args->object_info_options, "size"))
> > +             packet_buf_write(&req_buf, "size");
> > +
> > +     for (i = 0; i < args->oids->nr; i++) {
> > +             packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
> > +     }
> > +
> > +     packet_buf_flush(&req_buf);
> > +     if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
> > +             die_errno(_("unable to write request to remote"));
> > +
> > +     strbuf_release(&req_buf);
> > +}
> > +
> >  static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
> >                             struct fetch_pack_args *args,
> >                             const struct ref *wants, struct oidset *common,
> > @@ -1604,6 +1625,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
> >       if (args->depth > 0 || args->deepen_since || args->deepen_not)
> >               args->deepen = 1;
> >
> > +     if (args->object_info) {
> > +             state = FETCH_SEND_REQUEST;
> > +     }
> > +
> >       while (state != FETCH_DONE) {
> >               switch (state) {
> >               case FETCH_CHECK_LOCAL:
> > @@ -1613,7 +1638,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
> >                       /* Filter 'ref' by 'sought' and those that aren't local */
> >                       mark_complete_and_common_ref(negotiator, args, &ref);
> >                       filter_refs(args, &ref, sought, nr_sought);
> > -                     if (everything_local(args, &ref))
> > +                     if (!args->object_info && everything_local(args, &ref))
> >                               state = FETCH_DONE;
> >                       else
> >                               state = FETCH_SEND_REQUEST;
> > @@ -1999,8 +2024,19 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
> >               }
> >               args->connectivity_checked = 1;
> >       }
> > -
> > -     update_shallow(args, sought, nr_sought, &si);
> > +
> > +     if (args->object_info) {
> > +             struct ref *ref_cpy_reader = ref_cpy;
> > +             unsigned long size = 0;
> > +             while (ref_cpy_reader) {
> > +                     oid_object_info(the_repository, &(ref_cpy_reader->old_oid), &size);
> > +                     printf("%s %li\n", oid_to_hex(&(ref_cpy_reader->old_oid)), size);
> > +                     ref_cpy_reader = ref_cpy_reader->next;
> > +             }
> > +     }
> > +     else {
> > +             update_shallow(args, sought, nr_sought, &si);
> > +     }
> >  cleanup:
> >       clear_shallow_info(&si);
> >       oid_array_clear(&shallows_scratch);
> > diff --git a/fetch-pack.h b/fetch-pack.h
> > index 7f94a2a583..23e69bb218 100644
> > --- a/fetch-pack.h
> > +++ b/fetch-pack.h
> > @@ -42,6 +42,7 @@ struct fetch_pack_args {
> >       unsigned update_shallow:1;
> >       unsigned reject_shallow_remote:1;
> >       unsigned deepen:1;
> > +     unsigned object_info:1;
> >
> >       /*
> >        * Indicate that the remote of this request is a promisor remote. The
> > @@ -68,6 +69,12 @@ struct fetch_pack_args {
> >       unsigned connectivity_checked:1;
> >  };
> >
> > +struct object_info_args {
> > +     const struct string_list *object_info_options;
> > +     const struct string_list *server_options;
> > +     const struct oid_array *oids;
> > +};
> > +
> >  /*
> >   * sought represents remote references that should be updated from.
> >   * On return, the names that were found on the remote will have been
> > @@ -101,4 +108,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
> >   */
> >  int report_unmatched_refs(struct ref **sought, int nr_sought);
> >
> > +void send_object_info_request(int fd_out, struct object_info_args *args);
> > +
> >  #endif
> > diff --git a/t/t5583-fetch-object-info.sh b/t/t5583-fetch-object-info.sh
> > new file mode 100755
> > index 0000000000..e5db6d953c
> > --- /dev/null
> > +++ b/t/t5583-fetch-object-info.sh
> > @@ -0,0 +1,138 @@
> > +#!/bin/sh
> > +
> > +test_description='test git fetch object-info version 2'
> > +
> > +TEST_NO_CREATE_REPO=1
> > +
> > +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
> > +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
> > +
> > +. ./test-lib.sh
> > +
> > +# Test fetch object-info with 'git://' transport
> > +
> > +. "$TEST_DIRECTORY"/lib-git-daemon.sh
> > +start_git_daemon --export-all --enable=receive-pack
> > +daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent
> > +
> > +
> > +test_expect_success 'create repo to be served by git-daemon' '
> > +     git init "$daemon_parent" &&
> > +     test_commit -C "$daemon_parent" message1 a.txt
> > +'
> > +
> > +test_expect_success 'fetch object-info with git:// using protocol v2' '
> > +     (
> > +             cd "$daemon_parent" &&
> > +             object_id=$(git rev-parse message1:a.txt) &&
> > +             length=$(wc -c <a.txt) &&
> > +
> > +             printf "%s %d\n" "$object_id" "$length" >expect &&
> > +             git -c protocol.version=2 fetch --object-info=size "$GIT_DAEMON_URL/parent" "$object_id" >actual &&
> > +             test_cmp expect actual
> > +     )
> > +'
> > +stop_git_daemon
> > +
> > +# Test protocol v2 with 'http://' transport
> > +
> > +. "$TEST_DIRECTORY"/lib-httpd.sh
> > +start_httpd
> > +
> > +test_expect_success 'create repo to be served by http:// transport' '
> > +     git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +     git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true &&
> > +     test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message1 a.txt &&
> > +     test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message2 b.txt
> > +'
> > +
> > +test_expect_success 'fetch object-info with http:// using protocol v2' '
> > +     (
> > +             cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +             object_id=$(git rev-parse message1:a.txt) &&
> > +             length=$(wc -c <a.txt) &&
> > +
> > +             printf "%s %d\n" "$object_id" "$length" >expect &&
> > +             git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
> > +             test_cmp expect actual
> > +     )
> > +'
> > +
> > +test_expect_success 'fetch object-info for multiple objects' '
> > +     (
> > +             cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +             object_id1=$(git rev-parse message1:a.txt) &&
> > +             object_id2=$(git rev-parse message2:b.txt) &&
> > +             length1=$(wc -c <a.txt) &&
> > +             length2=$(wc -c <b.txt) &&
> > +
> > +             printf "%s %d\n" "$object_id1" "$length1" >expect &&
> > +             printf "%s %d\n" "$object_id2" "$length2" >>expect &&
> > +             git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id1" "$object_id2" >actual &&
> > +             test_cmp expect actual
> > +     )
> > +'
> > +
> > +test_expect_success 'fetch object-info fallbacks to standard fetch if object-info is not supported by the server' '
> > +     (
> > +             cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +             object_id=$(git rev-parse message1:a.txt) &&
> > +             length=$(wc -c <a.txt) &&
> > +
> > +             printf "%s %d\n" "$object_id" "$length" >expect &&
> > +             git config objectinfo.advertise false &&
> > +             git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
> > +             test_cmp expect actual
> > +     )
>
> I noticed this test doesn't confirm that standard fetch was actually used. Would
> it be useful to check for the existence of the object to prove it got fetched?
>
> Also, is seems the way this test is set up, it's fetching from itself.
> This is logically fine in most cases that client == server, but it was a little
> confusing at first because it looked like the invocation of git config was on
> the client rather than the server, until I realized they are the same.
>
> Since the server is being used as the client, we wouldn't be able to confirm that
> the object got fetched. I realize for most of these tests, it doesn't really matter
> to conflate client with server. I think this is the one exception.
>
> But, as a larger question for the list, I'm wondering if it's best practice to separate client from
> server in tests, or if it's an accepted style to just use one repository in cases
> when it doesn't really matter?
>
> Thanks
> John
>
> > +'
> > +
> > +test_expect_success 'fetch object-info fails on server with legacy protocol' '
> > +     (
> > +             cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +             object_id=$(git rev-parse message1:a.txt) &&
> > +             test_must_fail git -c protocol.version=0 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err &&
> > +             test_i18ngrep "object-info requires protocol v2" err
> > +     )
> > +'
> > +
> > +test_expect_success 'fetch object-info fails on malformed OID' '
> > +     (
> > +             cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
> > +             malformed_object_id="this_id_is_not_valid" &&
> > +             test_must_fail git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$malformed_object_id" 2>err &&
> > +             test_i18ngrep "malformed object id '$malformed_object_id'" err
> > +     )
> > +'
> > +
> > +test_expect_success 'fetch object-info fails on missing OID' '
> > +     git clone "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" missing_oid_repo &&
> > +     test_commit -C missing_oid_repo message3 c.txt &&
> > +     (
> > +             cd missing_oid_repo &&
> > +             object_id=$(git rev-parse message3:c.txt) &&
> > +             test_must_fail env GIT_TRACE_PACKET=1 git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err &&
> > +             test_i18ngrep "fatal: remote error: upload-pack: not our ref $object_id" err
> > +     )
> > +'
> > +
> > +# Test fetch object-info with 'file://' transport
> > +
> > +test_expect_success 'create repo to be served by file:// transport' '
> > +     git init server &&
> > +     test_commit -C server message1 a.txt &&
> > +     git -C server config protocol.version 2
> > +'
> > +
> > +test_expect_success 'fetch object-info with file:// using protocol v2' '
> > +     (
> > +             cd server &&
> > +             object_id=$(git rev-parse message1:a.txt) &&
> > +             length=$(wc -c <a.txt) &&
> > +
> > +             printf "%s %d\n" "$object_id" "$length" >expect &&
> > +             git -c protocol.version=2 fetch --object-info=size "file://$(pwd)" "$object_id" >actual &&
> > +             test_cmp expect actual
> > +     )
> > +'
> > +
> > +test_done
> > diff --git a/transport-helper.c b/transport-helper.c
> > index a0297b0986..64c175e904 100644
> > --- a/transport-helper.c
> > +++ b/transport-helper.c
> > @@ -686,13 +686,17 @@ static int fetch_refs(struct transport *transport,
> >
> >       /*
> >        * If we reach here, then the server, the client, and/or the transport
> > -      * helper does not support protocol v2. --negotiate-only requires
> > -      * protocol v2.
> > +      * helper does not support protocol v2. --negotiate-only and --object-info
> > +      * require protocol v2.
> >        */
> >       if (data->transport_options.acked_commits) {
> >               warning(_("--negotiate-only requires protocol v2"));
> >               return -1;
> >       }
> > +     if (transport->smart_options->object_info) {
> > +             warning(_("--object-info requires protocol v2"));
> > +             return -1;
> > +     }
> >
> >       if (!data->get_refs_list_called)
> >               get_refs_list_using_list(transport, 0);
> > diff --git a/transport-internal.h b/transport-internal.h
> > index c4ca0b733a..04fa015011 100644
> > --- a/transport-internal.h
> > +++ b/transport-internal.h
> > @@ -59,6 +59,7 @@ struct transport_vtable {
> >        * use. disconnect() releases these resources.
> >        **/
> >       int (*disconnect)(struct transport *connection);
> > +     int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
> >  };
> >
> >  #endif
> > diff --git a/transport.c b/transport.c
> > index 70e9840a90..65a1b1fdb3 100644
> > --- a/transport.c
> > +++ b/transport.c
> > @@ -350,6 +350,67 @@ static struct ref *handshake(struct transport *transport, int for_push,
> >       return refs;
> >  }
> >
> > +/*
> > + * Fetches object info if server supports object-info command
> > + * Make sure to fallback to normal fetch if this fails
> > + */
> > +static int fetch_object_info(struct transport *transport)
> > +{
> > +     struct git_transport_data *data = transport->data;
> > +     struct object_info_args args;
> > +     struct packet_reader reader;
> > +
> > +     memset(&args, 0, sizeof(args));
> > +     args.server_options = transport->server_options;
> > +     args.object_info_options = transport->smart_options->object_info_options;
> > +     args.oids = transport->smart_options->object_info_oids;
> > +
> > +     connect_setup(transport, 0);
> > +     packet_reader_init(&reader, data->fd[0], NULL, 0,
> > +                     PACKET_READ_CHOMP_NEWLINE |
> > +                     PACKET_READ_DIE_ON_ERR_PACKET);
> > +     data->version = discover_version(&reader);
> > +
> > +     transport->hash_algo = reader.hash_algo;
> > +
> > +     switch (data->version) {
> > +     case protocol_v2:
> > +             if (!server_supports_v2("object-info", 0))
> > +                     return 0;
> > +             send_object_info_request(data->fd[1], &args);
> > +             break;
> > +     case protocol_v1:
> > +     case protocol_v0:
> > +             die(_("wrong protocol version. expected v2"));
> > +     case protocol_unknown_version:
> > +             BUG("unknown protocol version");
> > +     }
> > +
> > +     if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
> > +             die(_("error reading object info header"));
> > +     }
> > +     if (strcmp(reader.line, "size")) {
> > +             die(_("expected 'size', received '%s'"), reader.line);
> > +     }
> > +     while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
> > +             printf("%s\n", reader.line);
> > +     }
> > +     check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
> > +
> > +     return 1;
> > +}
> > +
> > +static int append_oid_to_ref(const struct object_id *oid, void *data)
> > +{
> > +     struct ref *ref = data;
> > +     struct ref *temp_ref = xcalloc(1, sizeof (struct ref));
> > +     temp_ref->old_oid = *oid;
> > +     temp_ref->exact_oid = 1;
> > +     ref->next = temp_ref;
> > +     ref = ref->next;
> > +     return 0;
> > +}
> > +
> >  static struct ref *get_refs_via_connect(struct transport *transport, int for_push,
> >                                       struct transport_ls_refs_options *options)
> >  {
> > @@ -364,6 +425,7 @@ static int fetch_refs_via_pack(struct transport *transport,
> >       struct ref *refs = NULL;
> >       struct fetch_pack_args args;
> >       struct ref *refs_tmp = NULL;
> > +     struct ref *wanted_refs = xcalloc(1, sizeof (struct ref));
> >
> >       memset(&args, 0, sizeof(args));
> >       args.uploadpack = data->options.uploadpack;
> > @@ -388,8 +450,17 @@ static int fetch_refs_via_pack(struct transport *transport,
> >       args.server_options = transport->server_options;
> >       args.negotiation_tips = data->options.negotiation_tips;
> >       args.reject_shallow_remote = transport->smart_options->reject_shallow;
> > +     args.object_info = transport->smart_options->object_info;
> >
> > -     if (!data->got_remote_heads) {
> > +     if (transport->smart_options && transport->smart_options->object_info) {
> > +             struct ref *temp_ref = wanted_refs;
> > +             if (fetch_object_info(transport)) {
> > +                     goto cleanup;
> > +             }
> > +             oid_array_for_each(transport->smart_options->object_info_oids, append_oid_to_ref, temp_ref);
> > +             wanted_refs = wanted_refs->next;
> > +             transport->remote_refs = wanted_refs;
> > +     } else if (!data->got_remote_heads) {
> >               int i;
> >               int must_list_refs = 0;
> >               for (i = 0; i < nr_heads; i++) {
> > @@ -406,7 +477,7 @@ static int fetch_refs_via_pack(struct transport *transport,
> >       else if (data->version <= protocol_v1)
> >               die_if_server_options(transport);
> >
> > -     if (data->options.acked_commits) {
> > +     if (data->options.acked_commits && !transport->smart_options->object_info) {
> >               if (data->version < protocol_v2) {
> >                       warning(_("--negotiate-only requires protocol v2"));
> >                       ret = -1;
> > diff --git a/transport.h b/transport.h
> > index a0bc6a1e9e..3bba16f86e 100644
> > --- a/transport.h
> > +++ b/transport.h
> > @@ -30,6 +30,12 @@ struct git_transport_options {
> >        */
> >       unsigned connectivity_checked:1;
> >
> > +     /*
> > +      * Transport will attempt to pull only object-info. Fallbacks
> > +      * to pulling entire object if object-info is not supported
> > +      */
> > +     unsigned object_info : 1;
> > +
> >       int depth;
> >       const char *deepen_since;
> >       const struct string_list *deepen_not;
> > @@ -53,6 +59,9 @@ struct git_transport_options {
> >        * common commits to this oidset instead of fetching any packfiles.
> >        */
> >       struct oidset *acked_commits;
> > +
> > +     struct oid_array *object_info_oids;
> > +     struct string_list *object_info_options;
> >  };
> >
> >  enum transport_family {
> > --
> > 2.33.0.664.g0785eb7698
Junio C Hamano April 1, 2022, 4:16 p.m. UTC | #14
Calvin Wan <calvinwan@google.com> writes:

>> But, as a larger question for the list, I'm wondering if it's
>> best practice to separate client from server in tests, or if it's
>> an accepted style to just use one repository in cases when it
>> doesn't really matter?
> I am as well!

I do not speak for "the list", but many fetch-push tests were
written to use the same repository, and many early submodule tests
imitated them to use the initial test repository and add it as its
submodule, out of pure laziness (and I am probably the most to be
blamed)---these scenarios are not very useful but when the transfer
itself is not the focus of the tests it was a handy way.  Of course
it is not encouraged, and those tests that do care about seeing
objects getting transferred do make sure that they are as
independent as the real world settings that we want the feature
being tested to be working in.
diff mbox series

Patch

diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
index f903683189..f1ca95c32d 100644
--- a/Documentation/fetch-options.txt
+++ b/Documentation/fetch-options.txt
@@ -299,3 +299,8 @@  endif::git-pull[]
 -6::
 --ipv6::
 	Use IPv6 addresses only, ignoring IPv4 addresses.
+
+--object-info=[<arguments>]::
+	Fetches only the relevant information passed in arguments from a
+	specific remote and set of object ids. Object 'size' is currently
+	the only supported argument.
diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt
index 550c16ca61..a13d2ba7ad 100644
--- a/Documentation/git-fetch.txt
+++ b/Documentation/git-fetch.txt
@@ -13,6 +13,7 @@  SYNOPSIS
 'git fetch' [<options>] <group>
 'git fetch' --multiple [<options>] [(<repository> | <group>)...]
 'git fetch' --all [<options>]
+'git fetch' --object-info=[<arguments>] <remote> [<object-ids>]
 
 
 DESCRIPTION
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 4d12c2fd4d..0b21bebfca 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -29,6 +29,9 @@ 
 #include "commit-graph.h"
 #include "shallow.h"
 #include "worktree.h"
+#include "protocol.h"
+#include "pkt-line.h"
+#include "connect.h"
 
 #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
 
@@ -37,6 +40,7 @@  static const char * const builtin_fetch_usage[] = {
 	N_("git fetch [<options>] <group>"),
 	N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
 	N_("git fetch --all [<options>]"),
+	N_("git fetch --object-info=[<arguments>] <remote> [<object-ids>]"),
 	NULL
 };
 
@@ -86,6 +90,7 @@  static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
 static int fetch_write_commit_graph = -1;
 static int stdin_refspecs = 0;
 static int negotiate_only;
+static struct string_list object_info_options = STRING_LIST_INIT_NODUP;
 
 static int git_fetch_config(const char *k, const char *v, void *cb)
 {
@@ -221,6 +226,8 @@  static struct option builtin_fetch_options[] = {
 		 N_("write the commit-graph after fetching")),
 	OPT_BOOL(0, "stdin", &stdin_refspecs,
 		 N_("accept refspecs from stdin")),
+	OPT_STRING_LIST(0, "object-info", &object_info_options, N_("option"),
+		 N_("command request arguments")),
 	OPT_END()
 };
 
@@ -2087,6 +2094,7 @@  int cmd_fetch(int argc, const char **argv, const char *prefix)
 	struct remote *remote = NULL;
 	int result = 0;
 	int prune_tags_ok = 1;
+	struct oid_array object_info_oids = OID_ARRAY_INIT;
 
 	packet_trace_identity("fetch");
 
@@ -2168,7 +2176,19 @@  int cmd_fetch(int argc, const char **argv, const char *prefix)
 	if (dry_run)
 		write_fetch_head = 0;
 
-	if (all) {
+	if (object_info_options.nr > 0) {
+		if (argc == 0 || argc == 1) {
+			die(_("must supply remote and object ids when using --object-info"));
+		} else {
+			struct object_id oid;
+			remote = remote_get(argv[0]);
+			for (i = 1; i < argc; i++) {
+				if (get_oid(argv[i], &oid))
+					return error(_("malformed object id '%s'"), argv[i]);
+				oid_array_append(&object_info_oids, &oid);
+			}
+		}
+	} else if (all) {
 		if (argc == 1)
 			die(_("fetch --all does not take a repository argument"));
 		else if (argc > 1)
@@ -2199,7 +2219,19 @@  int cmd_fetch(int argc, const char **argv, const char *prefix)
 		}
 	}
 
-	if (negotiate_only) {
+	if (object_info_options.nr > 0) {
+		if (!remote)
+			die(_("must supply remote when using --object-info"));
+		gtransport = prepare_transport(remote, 1);
+		if (gtransport->smart_options) {
+			gtransport->smart_options->object_info = 1;
+			gtransport->smart_options->object_info_oids = &object_info_oids;
+			gtransport->smart_options->object_info_options = &object_info_options;
+		}
+		if (server_options.nr)
+			gtransport->server_options = &server_options;
+		result = transport_fetch_refs(gtransport, NULL);
+	} else if (negotiate_only) {
 		struct oidset acked_commits = OIDSET_INIT;
 		struct oidset_iter iter;
 		const struct object_id *oid;
diff --git a/fetch-pack.c b/fetch-pack.c
index b709a61baf..36e3d1c5d0 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1269,6 +1269,27 @@  static void write_command_and_capabilities(struct strbuf *req_buf,
 	packet_buf_delim(req_buf);
 }
 
+void send_object_info_request(int fd_out, struct object_info_args *args)
+{
+	struct strbuf req_buf = STRBUF_INIT;
+	int i;
+
+	write_command_and_capabilities(&req_buf, args->server_options, "object-info");
+
+	if (string_list_has_string(args->object_info_options, "size"))
+		packet_buf_write(&req_buf, "size");
+
+	for (i = 0; i < args->oids->nr; i++) {
+		packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
+	}
+
+	packet_buf_flush(&req_buf);
+	if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
+		die_errno(_("unable to write request to remote"));
+
+	strbuf_release(&req_buf);
+}
+
 static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 			      struct fetch_pack_args *args,
 			      const struct ref *wants, struct oidset *common,
@@ -1604,6 +1625,10 @@  static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 	if (args->depth > 0 || args->deepen_since || args->deepen_not)
 		args->deepen = 1;
 
+	if (args->object_info) {
+		state = FETCH_SEND_REQUEST;
+	}
+
 	while (state != FETCH_DONE) {
 		switch (state) {
 		case FETCH_CHECK_LOCAL:
@@ -1613,7 +1638,7 @@  static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 			/* Filter 'ref' by 'sought' and those that aren't local */
 			mark_complete_and_common_ref(negotiator, args, &ref);
 			filter_refs(args, &ref, sought, nr_sought);
-			if (everything_local(args, &ref))
+			if (!args->object_info && everything_local(args, &ref))
 				state = FETCH_DONE;
 			else
 				state = FETCH_SEND_REQUEST;
@@ -1999,8 +2024,19 @@  struct ref *fetch_pack(struct fetch_pack_args *args,
 		}
 		args->connectivity_checked = 1;
 	}
-
-	update_shallow(args, sought, nr_sought, &si);
+	
+	if (args->object_info) {
+		struct ref *ref_cpy_reader = ref_cpy;
+		unsigned long size = 0;
+		while (ref_cpy_reader) {
+			oid_object_info(the_repository, &(ref_cpy_reader->old_oid), &size);
+			printf("%s %li\n", oid_to_hex(&(ref_cpy_reader->old_oid)), size);
+			ref_cpy_reader = ref_cpy_reader->next;
+		}
+	}
+	else {
+		update_shallow(args, sought, nr_sought, &si);
+	}
 cleanup:
 	clear_shallow_info(&si);
 	oid_array_clear(&shallows_scratch);
diff --git a/fetch-pack.h b/fetch-pack.h
index 7f94a2a583..23e69bb218 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -42,6 +42,7 @@  struct fetch_pack_args {
 	unsigned update_shallow:1;
 	unsigned reject_shallow_remote:1;
 	unsigned deepen:1;
+	unsigned object_info:1;
 
 	/*
 	 * Indicate that the remote of this request is a promisor remote. The
@@ -68,6 +69,12 @@  struct fetch_pack_args {
 	unsigned connectivity_checked:1;
 };
 
+struct object_info_args {
+	const struct string_list *object_info_options;
+	const struct string_list *server_options;
+	const struct oid_array *oids;
+};
+
 /*
  * sought represents remote references that should be updated from.
  * On return, the names that were found on the remote will have been
@@ -101,4 +108,6 @@  void negotiate_using_fetch(const struct oid_array *negotiation_tips,
  */
 int report_unmatched_refs(struct ref **sought, int nr_sought);
 
+void send_object_info_request(int fd_out, struct object_info_args *args);
+
 #endif
diff --git a/t/t5583-fetch-object-info.sh b/t/t5583-fetch-object-info.sh
new file mode 100755
index 0000000000..e5db6d953c
--- /dev/null
+++ b/t/t5583-fetch-object-info.sh
@@ -0,0 +1,138 @@ 
+#!/bin/sh
+
+test_description='test git fetch object-info version 2'
+
+TEST_NO_CREATE_REPO=1
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+. ./test-lib.sh
+
+# Test fetch object-info with 'git://' transport
+
+. "$TEST_DIRECTORY"/lib-git-daemon.sh
+start_git_daemon --export-all --enable=receive-pack
+daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent
+
+
+test_expect_success 'create repo to be served by git-daemon' '
+	git init "$daemon_parent" &&
+	test_commit -C "$daemon_parent" message1 a.txt
+'
+
+test_expect_success 'fetch object-info with git:// using protocol v2' '
+	(
+		cd "$daemon_parent" &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git -c protocol.version=2 fetch --object-info=size "$GIT_DAEMON_URL/parent" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+stop_git_daemon
+
+# Test protocol v2 with 'http://' transport
+
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success 'create repo to be served by http:// transport' '
+	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true &&
+	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message1 a.txt &&
+	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message2 b.txt
+'
+
+test_expect_success 'fetch object-info with http:// using protocol v2' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'fetch object-info for multiple objects' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		object_id1=$(git rev-parse message1:a.txt) &&
+		object_id2=$(git rev-parse message2:b.txt) &&
+		length1=$(wc -c <a.txt) &&
+		length2=$(wc -c <b.txt) &&
+
+		printf "%s %d\n" "$object_id1" "$length1" >expect &&
+		printf "%s %d\n" "$object_id2" "$length2" >>expect &&
+		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id1" "$object_id2" >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'fetch object-info fallbacks to standard fetch if object-info is not supported by the server' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git config objectinfo.advertise false &&
+		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'fetch object-info fails on server with legacy protocol' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		test_must_fail git -c protocol.version=0 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err &&
+		test_i18ngrep "object-info requires protocol v2" err
+	)
+'
+
+test_expect_success 'fetch object-info fails on malformed OID' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		malformed_object_id="this_id_is_not_valid" &&
+		test_must_fail git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$malformed_object_id" 2>err &&
+		test_i18ngrep "malformed object id '$malformed_object_id'" err
+	)
+'
+
+test_expect_success 'fetch object-info fails on missing OID' '
+	git clone "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" missing_oid_repo &&
+	test_commit -C missing_oid_repo message3 c.txt &&
+	(
+		cd missing_oid_repo &&
+		object_id=$(git rev-parse message3:c.txt) &&
+		test_must_fail env GIT_TRACE_PACKET=1 git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err &&
+		test_i18ngrep "fatal: remote error: upload-pack: not our ref $object_id" err
+	)
+'
+
+# Test fetch object-info with 'file://' transport
+
+test_expect_success 'create repo to be served by file:// transport' '
+	git init server &&
+	test_commit -C server message1 a.txt &&
+	git -C server config protocol.version 2
+'
+
+test_expect_success 'fetch object-info with file:// using protocol v2' '
+	(
+		cd server &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git -c protocol.version=2 fetch --object-info=size "file://$(pwd)" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_done
diff --git a/transport-helper.c b/transport-helper.c
index a0297b0986..64c175e904 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -686,13 +686,17 @@  static int fetch_refs(struct transport *transport,
 
 	/*
 	 * If we reach here, then the server, the client, and/or the transport
-	 * helper does not support protocol v2. --negotiate-only requires
-	 * protocol v2.
+	 * helper does not support protocol v2. --negotiate-only and --object-info
+	 * require protocol v2.
 	 */
 	if (data->transport_options.acked_commits) {
 		warning(_("--negotiate-only requires protocol v2"));
 		return -1;
 	}
+	if (transport->smart_options->object_info) {
+		warning(_("--object-info requires protocol v2"));
+		return -1;
+	}
 
 	if (!data->get_refs_list_called)
 		get_refs_list_using_list(transport, 0);
diff --git a/transport-internal.h b/transport-internal.h
index c4ca0b733a..04fa015011 100644
--- a/transport-internal.h
+++ b/transport-internal.h
@@ -59,6 +59,7 @@  struct transport_vtable {
 	 * use. disconnect() releases these resources.
 	 **/
 	int (*disconnect)(struct transport *connection);
+	int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
 };
 
 #endif
diff --git a/transport.c b/transport.c
index 70e9840a90..65a1b1fdb3 100644
--- a/transport.c
+++ b/transport.c
@@ -350,6 +350,67 @@  static struct ref *handshake(struct transport *transport, int for_push,
 	return refs;
 }
 
+/*
+ * Fetches object info if server supports object-info command
+ * Make sure to fallback to normal fetch if this fails
+ */
+static int fetch_object_info(struct transport *transport)
+{
+	struct git_transport_data *data = transport->data;
+	struct object_info_args args;
+	struct packet_reader reader;
+
+	memset(&args, 0, sizeof(args));
+	args.server_options = transport->server_options;
+	args.object_info_options = transport->smart_options->object_info_options;
+	args.oids = transport->smart_options->object_info_oids;
+
+	connect_setup(transport, 0);
+	packet_reader_init(&reader, data->fd[0], NULL, 0,
+			PACKET_READ_CHOMP_NEWLINE |
+			PACKET_READ_DIE_ON_ERR_PACKET);
+	data->version = discover_version(&reader);
+
+	transport->hash_algo = reader.hash_algo;
+
+	switch (data->version) {
+	case protocol_v2:
+		if (!server_supports_v2("object-info", 0))
+			return 0;
+		send_object_info_request(data->fd[1], &args);
+		break;
+	case protocol_v1:
+	case protocol_v0:
+		die(_("wrong protocol version. expected v2"));
+	case protocol_unknown_version:
+		BUG("unknown protocol version");
+	}
+
+	if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
+		die(_("error reading object info header"));
+	}
+	if (strcmp(reader.line, "size")) {
+		die(_("expected 'size', received '%s'"), reader.line);
+	}
+	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
+		printf("%s\n", reader.line);
+	}
+	check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
+
+	return 1;
+}
+
+static int append_oid_to_ref(const struct object_id *oid, void *data)
+{
+	struct ref *ref = data;
+	struct ref *temp_ref = xcalloc(1, sizeof (struct ref));
+	temp_ref->old_oid = *oid;
+	temp_ref->exact_oid = 1;
+	ref->next = temp_ref;
+	ref = ref->next;
+	return 0;
+}
+
 static struct ref *get_refs_via_connect(struct transport *transport, int for_push,
 					struct transport_ls_refs_options *options)
 {
@@ -364,6 +425,7 @@  static int fetch_refs_via_pack(struct transport *transport,
 	struct ref *refs = NULL;
 	struct fetch_pack_args args;
 	struct ref *refs_tmp = NULL;
+	struct ref *wanted_refs = xcalloc(1, sizeof (struct ref));
 
 	memset(&args, 0, sizeof(args));
 	args.uploadpack = data->options.uploadpack;
@@ -388,8 +450,17 @@  static int fetch_refs_via_pack(struct transport *transport,
 	args.server_options = transport->server_options;
 	args.negotiation_tips = data->options.negotiation_tips;
 	args.reject_shallow_remote = transport->smart_options->reject_shallow;
+	args.object_info = transport->smart_options->object_info;
 
-	if (!data->got_remote_heads) {
+	if (transport->smart_options && transport->smart_options->object_info) {
+		struct ref *temp_ref = wanted_refs;
+		if (fetch_object_info(transport)) {
+			goto cleanup;
+		}
+		oid_array_for_each(transport->smart_options->object_info_oids, append_oid_to_ref, temp_ref);
+		wanted_refs = wanted_refs->next;
+		transport->remote_refs = wanted_refs;
+	} else if (!data->got_remote_heads) {
 		int i;
 		int must_list_refs = 0;
 		for (i = 0; i < nr_heads; i++) {
@@ -406,7 +477,7 @@  static int fetch_refs_via_pack(struct transport *transport,
 	else if (data->version <= protocol_v1)
 		die_if_server_options(transport);
 
-	if (data->options.acked_commits) {
+	if (data->options.acked_commits && !transport->smart_options->object_info) {
 		if (data->version < protocol_v2) {
 			warning(_("--negotiate-only requires protocol v2"));
 			ret = -1;
diff --git a/transport.h b/transport.h
index a0bc6a1e9e..3bba16f86e 100644
--- a/transport.h
+++ b/transport.h
@@ -30,6 +30,12 @@  struct git_transport_options {
 	 */
 	unsigned connectivity_checked:1;
 
+	/*
+	 * Transport will attempt to pull only object-info. Fallbacks
+	 * to pulling entire object if object-info is not supported
+	 */
+	unsigned object_info : 1;
+
 	int depth;
 	const char *deepen_since;
 	const struct string_list *deepen_not;
@@ -53,6 +59,9 @@  struct git_transport_options {
 	 * common commits to this oidset instead of fetching any packfiles.
 	 */
 	struct oidset *acked_commits;
+
+	struct oid_array *object_info_oids;
+	struct string_list *object_info_options;
 };
 
 enum transport_family {