diff mbox series

[v8,5/6] transport: add client support for object-info

Message ID 20241223232523.76236-6-eric.peijian@gmail.com (mailing list archive)
State New
Headers show
Series add remote-object-info to batch-command | expand

Commit Message

Eric Ju Dec. 23, 2024, 11:25 p.m. UTC
From: Calvin Wan <calvinwan@google.com>

Sometimes, it is beneficial to retrieve information about an object
without downloading it entirely. The server-side logic for this
functionality was implemented in commit "a2ba162cda (object-info:
support for retrieving object info, 2021-04-20)."

This commit introduces client functions to interact with the server.

Currently, the client supports requesting a list of object IDs with
the ‘size’ feature from a v2 server. If the server does not advertise
this feature (i.e., transfer.advertiseobjectinfo is set to false),
the client will return an error and exit.

Helped-by: Jonathan Tan <jonathantanmy@google.com>
Helped-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Calvin Wan <calvinwan@google.com>
Signed-off-by: Eric Ju  <eric.peijian@gmail.com>
---
 Makefile            |  1 +
 fetch-object-info.c | 92 +++++++++++++++++++++++++++++++++++++++++++++
 fetch-object-info.h | 18 +++++++++
 fetch-pack.c        |  3 ++
 fetch-pack.h        |  2 +
 transport-helper.c  | 11 +++++-
 transport.c         | 28 +++++++++++++-
 transport.h         | 11 ++++++
 8 files changed, 163 insertions(+), 3 deletions(-)
 create mode 100644 fetch-object-info.c
 create mode 100644 fetch-object-info.h

Comments

Calvin Wan Jan. 7, 2025, 6:31 p.m. UTC | #1
Thanks for picking up this series btw!

On Mon, Dec 23, 2024 at 3:25 PM Eric Ju <eric.peijian@gmail.com> wrote:
>
> From: Calvin Wan <calvinwan@google.com>
>
> Sometimes, it is beneficial to retrieve information about an object
> without downloading it entirely. The server-side logic for this
> functionality was implemented in commit "a2ba162cda (object-info:
> support for retrieving object info, 2021-04-20)."
>
> This commit introduces client functions to interact with the server.
>
> Currently, the client supports requesting a list of object IDs with
> the ‘size’ feature from a v2 server. If the server does not advertise
> this feature (i.e., transfer.advertiseobjectinfo is set to false),
> the client will return an error and exit.
>
> Helped-by: Jonathan Tan <jonathantanmy@google.com>
> Helped-by: Christian Couder <chriscool@tuxfamily.org>
> Signed-off-by: Calvin Wan <calvinwan@google.com>
> Signed-off-by: Eric Ju  <eric.peijian@gmail.com>
> ---
>  Makefile            |  1 +
>  fetch-object-info.c | 92 +++++++++++++++++++++++++++++++++++++++++++++
>  fetch-object-info.h | 18 +++++++++
>  fetch-pack.c        |  3 ++
>  fetch-pack.h        |  2 +
>  transport-helper.c  | 11 +++++-
>  transport.c         | 28 +++++++++++++-
>  transport.h         | 11 ++++++
>  8 files changed, 163 insertions(+), 3 deletions(-)
>  create mode 100644 fetch-object-info.c
>  create mode 100644 fetch-object-info.h
>
> diff --git a/Makefile b/Makefile
> index 3fa4bf0d06..70e9ec0464 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -1020,6 +1020,7 @@ LIB_OBJS += ewah/ewah_rlw.o
>  LIB_OBJS += exec-cmd.o
>  LIB_OBJS += fetch-negotiator.o
>  LIB_OBJS += fetch-pack.o
> +LIB_OBJS += fetch-object-info.o
>  LIB_OBJS += fmt-merge-msg.o
>  LIB_OBJS += fsck.o
>  LIB_OBJS += fsmonitor.o
> diff --git a/fetch-object-info.c b/fetch-object-info.c
> new file mode 100644
> index 0000000000..2aa9f2b70d
> --- /dev/null
> +++ b/fetch-object-info.c
> @@ -0,0 +1,92 @@
> +#include "git-compat-util.h"
> +#include "gettext.h"
> +#include "hex.h"
> +#include "pkt-line.h"
> +#include "connect.h"
> +#include "oid-array.h"
> +#include "object-store-ll.h"
> +#include "fetch-object-info.h"
> +#include "string-list.h"
> +
> +/**
> + * send_object_info_request sends git-cat-file object-info command and its
> + * arguments into the request buffer.
> + */
> +static void send_object_info_request(const int fd_out, struct object_info_args *args)
> +{
> +       struct strbuf req_buf = STRBUF_INIT;
> +
> +       write_command_and_capabilities(&req_buf, "object-info", args->server_options);
> +
> +       if (unsorted_string_list_has_string(args->object_info_options, "size"))
> +               packet_buf_write(&req_buf, "size");
> +
> +       if (args->oids) {
> +               for (size_t i = 0; i < args->oids->nr; i++)
> +                       packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i]));
> +       }
> +
> +       packet_buf_flush(&req_buf);
> +       if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
> +               die_errno(_("unable to write request to remote"));
> +
> +       strbuf_release(&req_buf);
> +}
> +
> +/**
> + * fetch_object_info sends git-cat-file object-info command into the request buf
> + * and read the results from packets.
> + */
> +int fetch_object_info(const enum protocol_version version, struct object_info_args *args,
> +                     struct packet_reader *reader, struct object_info *object_info_data,
> +                     const int stateless_rpc, const int fd_out)
> +{
> +       int size_index = -1;
> +
> +       switch (version) {
> +       case protocol_v2:
> +               if (!server_supports_v2("object-info"))
> +                       die(_("object-info capability is not enabled on the server"));
> +               send_object_info_request(fd_out, args);
> +               break;
> +       case protocol_v1:
> +       case protocol_v0:
> +               die(_("wrong protocol version. expected v2"));

s/wrong/unsupported

> +       case protocol_unknown_version:
> +               BUG("unknown protocol version");
> +       }
> +
> +       for (size_t i = 0; i < args->object_info_options->nr; i++) {
> +               if (packet_reader_read(reader) != PACKET_READ_NORMAL) {
> +                       check_stateless_delimiter(stateless_rpc, reader, "stateless delimiter expected");
> +                       return -1;
> +               }
> +               if (unsorted_string_list_has_string(args->object_info_options, reader->line)) {
> +                       if (!strcmp(reader->line, "size")) {
> +                               size_index = i;
> +                               for (size_t j = 0; j < args->oids->nr; j++)
> +                                       object_info_data[j].sizep = xcalloc(1, sizeof(long));
> +                       }
> +                       continue;
> +               }
> +               return -1;
> +       }

I think we can flatten this logic a bit more here to make it more intuitive.

if (!unsorted_string_list_has_string(args->object_info_options, reader->line))
        return -1;
if (!strcmp(reader->line, "size")) {
        size_index = i;
        for (size_t j = 0; j < args->oids->nr; j++)
                object_info_data[j].sizep = xcalloc(1, sizeof(long));
}
Junio C Hamano Jan. 7, 2025, 6:53 p.m. UTC | #2
Calvin Wan <calvinwan@google.com> writes:

> Thanks for picking up this series btw!
> ...
> I think we can flatten this logic a bit more here to make it more intuitive.
>
> if (!unsorted_string_list_has_string(args->object_info_options, reader->line))
>         return -1;
> if (!strcmp(reader->line, "size")) {
>         size_index = i;
>         for (size_t j = 0; j < args->oids->nr; j++)
>                 object_info_data[j].sizep = xcalloc(1, sizeof(long));
> }

Indeed the updated code structure gets easier to follow.

Thanks, both of you.
Eric Ju Jan. 8, 2025, 3:55 p.m. UTC | #3
On Tue, Jan 7, 2025 at 1:31 PM Calvin Wan <calvinwan@google.com> wrote:
>
> Thanks for picking up this series btw!
>
> On Mon, Dec 23, 2024 at 3:25 PM Eric Ju <eric.peijian@gmail.com> wrote:
> >
> > From: Calvin Wan <calvinwan@google.com>
> >
> > Sometimes, it is beneficial to retrieve information about an object
> > without downloading it entirely. The server-side logic for this
> > functionality was implemented in commit "a2ba162cda (object-info:
> > support for retrieving object info, 2021-04-20)."
> >
> > This commit introduces client functions to interact with the server.
> >
> > Currently, the client supports requesting a list of object IDs with
> > the ‘size’ feature from a v2 server. If the server does not advertise
> > this feature (i.e., transfer.advertiseobjectinfo is set to false),
> > the client will return an error and exit.
> >
> > Helped-by: Jonathan Tan <jonathantanmy@google.com>
> > Helped-by: Christian Couder <chriscool@tuxfamily.org>
> > Signed-off-by: Calvin Wan <calvinwan@google.com>
> > Signed-off-by: Eric Ju  <eric.peijian@gmail.com>
> > ---
> >  Makefile            |  1 +
> >  fetch-object-info.c | 92 +++++++++++++++++++++++++++++++++++++++++++++
> >  fetch-object-info.h | 18 +++++++++
> >  fetch-pack.c        |  3 ++
> >  fetch-pack.h        |  2 +
> >  transport-helper.c  | 11 +++++-
> >  transport.c         | 28 +++++++++++++-
> >  transport.h         | 11 ++++++
> >  8 files changed, 163 insertions(+), 3 deletions(-)
> >  create mode 100644 fetch-object-info.c
> >  create mode 100644 fetch-object-info.h
> >
> > diff --git a/Makefile b/Makefile
> > index 3fa4bf0d06..70e9ec0464 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -1020,6 +1020,7 @@ LIB_OBJS += ewah/ewah_rlw.o
> >  LIB_OBJS += exec-cmd.o
> >  LIB_OBJS += fetch-negotiator.o
> >  LIB_OBJS += fetch-pack.o
> > +LIB_OBJS += fetch-object-info.o
> >  LIB_OBJS += fmt-merge-msg.o
> >  LIB_OBJS += fsck.o
> >  LIB_OBJS += fsmonitor.o
> > diff --git a/fetch-object-info.c b/fetch-object-info.c
> > new file mode 100644
> > index 0000000000..2aa9f2b70d
> > --- /dev/null
> > +++ b/fetch-object-info.c
> > @@ -0,0 +1,92 @@
> > +#include "git-compat-util.h"
> > +#include "gettext.h"
> > +#include "hex.h"
> > +#include "pkt-line.h"
> > +#include "connect.h"
> > +#include "oid-array.h"
> > +#include "object-store-ll.h"
> > +#include "fetch-object-info.h"
> > +#include "string-list.h"
> > +
> > +/**
> > + * send_object_info_request sends git-cat-file object-info command and its
> > + * arguments into the request buffer.
> > + */
> > +static void send_object_info_request(const int fd_out, struct object_info_args *args)
> > +{
> > +       struct strbuf req_buf = STRBUF_INIT;
> > +
> > +       write_command_and_capabilities(&req_buf, "object-info", args->server_options);
> > +
> > +       if (unsorted_string_list_has_string(args->object_info_options, "size"))
> > +               packet_buf_write(&req_buf, "size");
> > +
> > +       if (args->oids) {
> > +               for (size_t i = 0; i < args->oids->nr; i++)
> > +                       packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i]));
> > +       }
> > +
> > +       packet_buf_flush(&req_buf);
> > +       if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
> > +               die_errno(_("unable to write request to remote"));
> > +
> > +       strbuf_release(&req_buf);
> > +}
> > +
> > +/**
> > + * fetch_object_info sends git-cat-file object-info command into the request buf
> > + * and read the results from packets.
> > + */
> > +int fetch_object_info(const enum protocol_version version, struct object_info_args *args,
> > +                     struct packet_reader *reader, struct object_info *object_info_data,
> > +                     const int stateless_rpc, const int fd_out)
> > +{
> > +       int size_index = -1;
> > +
> > +       switch (version) {
> > +       case protocol_v2:
> > +               if (!server_supports_v2("object-info"))
> > +                       die(_("object-info capability is not enabled on the server"));
> > +               send_object_info_request(fd_out, args);
> > +               break;
> > +       case protocol_v1:
> > +       case protocol_v0:
> > +               die(_("wrong protocol version. expected v2"));
>
> s/wrong/unsupported
>

Thank you. Fixing it in v9.

> > +       case protocol_unknown_version:
> > +               BUG("unknown protocol version");
> > +       }
> > +
> > +       for (size_t i = 0; i < args->object_info_options->nr; i++) {
> > +               if (packet_reader_read(reader) != PACKET_READ_NORMAL) {
> > +                       check_stateless_delimiter(stateless_rpc, reader, "stateless delimiter expected");
> > +                       return -1;
> > +               }
> > +               if (unsorted_string_list_has_string(args->object_info_options, reader->line)) {
> > +                       if (!strcmp(reader->line, "size")) {
> > +                               size_index = i;
> > +                               for (size_t j = 0; j < args->oids->nr; j++)
> > +                                       object_info_data[j].sizep = xcalloc(1, sizeof(long));
> > +                       }
> > +                       continue;
> > +               }
> > +               return -1;
> > +       }
>
> I think we can flatten this logic a bit more here to make it more intuitive.
>
> if (!unsorted_string_list_has_string(args->object_info_options, reader->line))
>         return -1;
> if (!strcmp(reader->line, "size")) {
>         size_index = i;
>         for (size_t j = 0; j < args->oids->nr; j++)
>                 object_info_data[j].sizep = xcalloc(1, sizeof(long));
> }

Thank you. Revising it in v9.
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index 3fa4bf0d06..70e9ec0464 100644
--- a/Makefile
+++ b/Makefile
@@ -1020,6 +1020,7 @@  LIB_OBJS += ewah/ewah_rlw.o
 LIB_OBJS += exec-cmd.o
 LIB_OBJS += fetch-negotiator.o
 LIB_OBJS += fetch-pack.o
+LIB_OBJS += fetch-object-info.o
 LIB_OBJS += fmt-merge-msg.o
 LIB_OBJS += fsck.o
 LIB_OBJS += fsmonitor.o
diff --git a/fetch-object-info.c b/fetch-object-info.c
new file mode 100644
index 0000000000..2aa9f2b70d
--- /dev/null
+++ b/fetch-object-info.c
@@ -0,0 +1,92 @@ 
+#include "git-compat-util.h"
+#include "gettext.h"
+#include "hex.h"
+#include "pkt-line.h"
+#include "connect.h"
+#include "oid-array.h"
+#include "object-store-ll.h"
+#include "fetch-object-info.h"
+#include "string-list.h"
+
+/**
+ * send_object_info_request sends git-cat-file object-info command and its
+ * arguments into the request buffer.
+ */
+static void send_object_info_request(const int fd_out, struct object_info_args *args)
+{
+	struct strbuf req_buf = STRBUF_INIT;
+
+	write_command_and_capabilities(&req_buf, "object-info", args->server_options);
+
+	if (unsorted_string_list_has_string(args->object_info_options, "size"))
+		packet_buf_write(&req_buf, "size");
+
+	if (args->oids) {
+		for (size_t i = 0; i < args->oids->nr; i++)
+			packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i]));
+	}
+
+	packet_buf_flush(&req_buf);
+	if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
+		die_errno(_("unable to write request to remote"));
+
+	strbuf_release(&req_buf);
+}
+
+/**
+ * fetch_object_info sends git-cat-file object-info command into the request buf
+ * and read the results from packets.
+ */
+int fetch_object_info(const enum protocol_version version, struct object_info_args *args,
+		      struct packet_reader *reader, struct object_info *object_info_data,
+		      const int stateless_rpc, const int fd_out)
+{
+	int size_index = -1;
+
+	switch (version) {
+	case protocol_v2:
+		if (!server_supports_v2("object-info"))
+			die(_("object-info capability is not enabled on the server"));
+		send_object_info_request(fd_out, args);
+		break;
+	case protocol_v1:
+	case protocol_v0:
+		die(_("wrong protocol version. expected v2"));
+	case protocol_unknown_version:
+		BUG("unknown protocol version");
+	}
+
+	for (size_t i = 0; i < args->object_info_options->nr; i++) {
+		if (packet_reader_read(reader) != PACKET_READ_NORMAL) {
+			check_stateless_delimiter(stateless_rpc, reader, "stateless delimiter expected");
+			return -1;
+		}
+		if (unsorted_string_list_has_string(args->object_info_options, reader->line)) {
+			if (!strcmp(reader->line, "size")) {
+				size_index = i;
+				for (size_t j = 0; j < args->oids->nr; j++)
+					object_info_data[j].sizep = xcalloc(1, sizeof(long));
+			}
+			continue;
+		}
+		return -1;
+	}
+
+	for (size_t i = 0; packet_reader_read(reader) == PACKET_READ_NORMAL && i < args->oids->nr; i++){
+		struct string_list object_info_values = STRING_LIST_INIT_DUP;
+
+		string_list_split(&object_info_values, reader->line, ' ', -1);
+		if (0 <= size_index) {
+			if (!strcmp(object_info_values.items[1 + size_index].string, ""))
+				die("object-info: not our ref %s",
+					object_info_values.items[0].string);
+
+			*object_info_data[i].sizep = strtoul(object_info_values.items[1 + size_index].string, NULL, 10);
+		}
+
+		string_list_clear(&object_info_values, 0);
+	}
+	check_stateless_delimiter(stateless_rpc, reader, "stateless delimiter expected");
+
+	return 0;
+}
diff --git a/fetch-object-info.h b/fetch-object-info.h
new file mode 100644
index 0000000000..ce1a05dc96
--- /dev/null
+++ b/fetch-object-info.h
@@ -0,0 +1,18 @@ 
+#ifndef FETCH_OBJECT_INFO_H
+#define FETCH_OBJECT_INFO_H
+
+#include "pkt-line.h"
+#include "protocol.h"
+#include "object-store-ll.h"
+
+struct object_info_args {
+	struct string_list *object_info_options;
+	const struct string_list *server_options;
+	struct oid_array *oids;
+};
+
+int fetch_object_info(enum protocol_version version, struct object_info_args *args,
+		      struct packet_reader *reader, struct object_info *object_info_data,
+		      int stateless_rpc, int fd_out);
+
+#endif /* FETCH_OBJECT_INFO_H */
diff --git a/fetch-pack.c b/fetch-pack.c
index 51de82e414..704bc21b47 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1654,6 +1654,9 @@  static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 	if (args->depth > 0 || args->deepen_since || args->deepen_not)
 		args->deepen = 1;
 
+	if (args->object_info)
+		state = FETCH_SEND_REQUEST;
+
 	while (state != FETCH_DONE) {
 		switch (state) {
 		case FETCH_CHECK_LOCAL:
diff --git a/fetch-pack.h b/fetch-pack.h
index 9d3470366f..119d3369f1 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -16,6 +16,7 @@  struct fetch_pack_args {
 	const struct string_list *deepen_not;
 	struct list_objects_filter_options filter_options;
 	const struct string_list *server_options;
+	struct object_info *object_info_data;
 
 	/*
 	 * If not NULL, during packfile negotiation, fetch-pack will send "have"
@@ -42,6 +43,7 @@  struct fetch_pack_args {
 	unsigned reject_shallow_remote:1;
 	unsigned deepen:1;
 	unsigned refetch:1;
+	unsigned object_info:1;
 
 	/*
 	 * Indicate that the remote of this request is a promisor remote. The
diff --git a/transport-helper.c b/transport-helper.c
index d457b42550..9da1547b2c 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -710,8 +710,8 @@  static int fetch_refs(struct transport *transport,
 
 	/*
 	 * If we reach here, then the server, the client, and/or the transport
-	 * helper does not support protocol v2. --negotiate-only requires
-	 * protocol v2.
+	 * helper does not support protocol v2. --negotiate-only and cat-file
+	 * remote-object-info require protocol v2.
 	 */
 	if (data->transport_options.acked_commits) {
 		warning(_("--negotiate-only requires protocol v2"));
@@ -727,6 +727,13 @@  static int fetch_refs(struct transport *transport,
 		free_refs(dummy);
 	}
 
+	/* fail the command explicitly to avoid further commands input. */
+	if (transport->smart_options->object_info)
+		die(_("remote-object-info requires protocol v2"));
+
+	if (!data->get_refs_list_called)
+		get_refs_list_using_list(transport, 0);
+
 	count = 0;
 	for (i = 0; i < nr_heads; i++)
 		if (!(to_fetch[i]->status & REF_STATUS_UPTODATE))
diff --git a/transport.c b/transport.c
index 10d820c333..5a2629de52 100644
--- a/transport.c
+++ b/transport.c
@@ -9,6 +9,7 @@ 
 #include "hook.h"
 #include "pkt-line.h"
 #include "fetch-pack.h"
+#include "fetch-object-info.h"
 #include "remote.h"
 #include "connect.h"
 #include "send-pack.h"
@@ -464,8 +465,33 @@  static int fetch_refs_via_pack(struct transport *transport,
 	args.server_options = transport->server_options;
 	args.negotiation_tips = data->options.negotiation_tips;
 	args.reject_shallow_remote = transport->smart_options->reject_shallow;
+	args.object_info = transport->smart_options->object_info;
+
+	if (transport->smart_options
+		&& transport->smart_options->object_info
+		&& transport->smart_options->object_info_oids->nr > 0) {
+		struct packet_reader reader;
+		struct object_info_args obj_info_args = { 0 };
+
+		obj_info_args.server_options = transport->server_options;
+		obj_info_args.object_info_options = transport->smart_options->object_info_options;
+		obj_info_args.oids = transport->smart_options->object_info_oids;
+
+		connect_setup(transport, 0);
+		packet_reader_init(&reader, data->fd[0], NULL, 0,
+				PACKET_READ_CHOMP_NEWLINE |
+				PACKET_READ_GENTLE_ON_EOF |
+				PACKET_READ_DIE_ON_ERR_PACKET);
+
+		data->version = discover_version(&reader);
+		transport->hash_algo = reader.hash_algo;
+
+		ret = fetch_object_info(data->version, &obj_info_args, &reader,
+					data->options.object_info_data, transport->stateless_rpc,
+					data->fd[1]);
+		goto cleanup;
 
-	if (!data->finished_handshake) {
+	} else if (!data->finished_handshake) {
 		int i;
 		int must_list_refs = 0;
 		for (i = 0; i < nr_heads; i++) {
diff --git a/transport.h b/transport.h
index 44100fa9b7..e61e931863 100644
--- a/transport.h
+++ b/transport.h
@@ -5,6 +5,7 @@ 
 #include "remote.h"
 #include "list-objects-filter-options.h"
 #include "string-list.h"
+#include "object-store.h"
 
 struct git_transport_options {
 	unsigned thin : 1;
@@ -30,6 +31,12 @@  struct git_transport_options {
 	 */
 	unsigned connectivity_checked:1;
 
+	/*
+	 * Transport will attempt to retrieve only object-info.
+	 * If object-info is not supported, the operation will error and exit.
+	 */
+	unsigned object_info : 1;
+
 	int depth;
 	const char *deepen_since;
 	const struct string_list *deepen_not;
@@ -53,6 +60,10 @@  struct git_transport_options {
 	 * common commits to this oidset instead of fetching any packfiles.
 	 */
 	struct oidset *acked_commits;
+
+	struct oid_array *object_info_oids;
+	struct object_info *object_info_data;
+	struct string_list *object_info_options;
 };
 
 enum transport_family {