Message ID | 20220728230210.2952731-6-calvinwan@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | None | expand |
Calvin Wan <calvinwan@google.com> writes: > +void send_object_info_request(int fd_out, struct object_info_args *args) > +{ > + struct strbuf req_buf = STRBUF_INIT; > + size_t i; > + > + write_command_and_capabilities(&req_buf, args->server_options, "object-info"); > + > + if (unsorted_string_list_has_string(args->object_info_options, "size")) > + packet_buf_write(&req_buf, "size"); > + > + if (unsorted_string_list_has_string(args->object_info_options, "type")) > + packet_buf_write(&req_buf, "type"); > + > + if (args->oids) { > + for (i = 0; i < args->oids->nr; i++) > + packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i])); > + } If !args->oids then we say "we want to request object-info to learn size and type for the following objects: oh, there are no objects we are interested in". I wonder if an early return if (!args->oids) return; at the beginning of the function that turns it into a benign no-op, may make more sense? Calling "send_X()" helper and seeing nothing come out on the wire might make it look awkward, though. > @@ -363,10 +437,12 @@ static int fetch_refs_via_pack(struct transport *transport, > int nr_heads, struct ref **to_fetch) > { > int ret = 0; > + size_t i; > struct git_transport_data *data = transport->data; > struct ref *refs = NULL; > struct fetch_pack_args args; > struct ref *refs_tmp = NULL; > + struct ref *object_info_refs = xcalloc(1, sizeof (struct ref)); Style: no SP between "sizeof" and "(".
Hi Calvin On 29/07/2022 00:02, Calvin Wan wrote: > Sometimes it is useful to get information about an object without having > to download it completely. The server logic has already been implemented > as “a2ba162cda (object-info: support for retrieving object info, > 2021-04-20)”. This patch adds client functions to communicate with the > server. > > The client currently supports requesting a list of object ids with > features 'size' and 'type' from a v2 server. If a server does not > advertise either of the requested features, then the client falls back > to making the request through 'fetch'. I'm confused by the 'type' support, I might have missed something as I'm not familiar with this code but I couldn't see where we parse the type returned by the server. > + for (i = 0; i < args.object_info_options->nr; i++) { > + if (packet_reader_read(&reader) != PACKET_READ_NORMAL) { > + check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected"); This is one of a number of lines in this series that are way over the 80 column limit. > + return -1; > + } > + if (unsorted_string_list_has_string(args.object_info_options, reader.line)) { > + if (!strcmp(reader.line, "size")) > + size_index = i; Should we be checking for "type" as well? Also does protocol-v2.txt need updating as it only mentions "size" as an attribute. > + continue; > + } > + return -1; > + } > + > + i = 0; > + while (packet_reader_read(&reader) == PACKET_READ_NORMAL && i < args.oids->nr) { > + struct string_list object_info_values = STRING_LIST_INIT_DUP; > + > + string_list_split(&object_info_values, reader.line, ' ', -1); > + if (0 <= size_index) { To avoid a possible out-of-bounds access we need to check that size_index + 1 < object_info_value.nr in case the server response is malformed > + if (!strcmp(object_info_values.items[1 + size_index].string, "")) > + die("object-info: not our ref %s", I'm a bit confused by this message is it trying to say "object %s is missing on the server"? > + object_info_values.items[0].string); > + *(*object_info_data)[i].sizep = strtoul(object_info_values.items[1 + size_index].string, NULL, 10); As Junio pointed out in his comments in v4 there is no error checking here - we should check the server has actually returned a number. Note that strtoul() will happily parse negative numbers so we probably want to do something like const char *endp errno = 0 if (!isdigit(*object_info_values.items[1 + size_index].string)) die("...") *(*object_info_data)[i].sizep = strtoul(object_info_values.items[1 + size_index].string, &endp, 10); if (errno || *endp) die("...") Should be we checking the object id matches what we asked for? (I'm not sure if protocol-v2.txt mentions the order in which the objects are returned) Should we be parsing the object type here as well? > @@ -392,8 +468,25 @@ static int fetch_refs_via_pack(struct transport *transport, > args.server_options = transport->server_options; > args.negotiation_tips = data->options.negotiation_tips; > args.reject_shallow_remote = transport->smart_options->reject_shallow; > - > - if (!data->got_remote_heads) { > + args.object_info = transport->smart_options->object_info; > + > + if (transport->smart_options && transport->smart_options->object_info) { > + struct ref *ref = object_info_refs; > + > + if (!fetch_object_info(transport, data->options.object_info_data)) > + goto cleanup; > + args.object_info_data = data->options.object_info_data; > + args.quiet = 1; > + args.no_progress = 1; > + for (i = 0; i < transport->smart_options->object_info_oids->nr; i++) { > + struct ref *temp_ref = xcalloc(1, sizeof (struct ref)); Using CALLOC_ARRAY() or p = xcalloc(1, sizeof(*p)) would be safer here (and everywhere else where you have used xcalloc()) as it ensures we allocate the correct size. > diff --git a/transport.h b/transport.h > index b5bf7b3e70..5512fdb140 100644 > --- a/transport.h > +++ b/transport.h > @@ -31,6 +32,12 @@ struct git_transport_options { > */ > unsigned connectivity_checked:1; > > + /* > + * Transport will attempt to pull only object-info. Fallbacks > + * to pulling entire object if object-info is not supported > + */ Is it definitely true that we fallback to pulling the entire object? - there is at least one place above where we do > + if (transport->smart_options->object_info) { > + die(_("--object-info requires protocol v2")); > + } > Best Wishes Phillip
Hi Calvin On 29/07/2022 00:02, Calvin Wan wrote: > diff --git a/transport.c b/transport.c > index 52db7a3cb0..2d503e2fbd 100644 > --- a/transport.c > +++ b/transport.c > + i = 0; > + while (packet_reader_read(&reader) == PACKET_READ_NORMAL && i < args.oids->nr) { > + struct string_list object_info_values = STRING_LIST_INIT_DUP; I forget to say earlier that this is leaked Best Wishes Phillip
> If !args->oids then we say "we want to request object-info to learn > size and type for the following objects: oh, there are no objects we > are interested in". I wonder if an early return > > if (!args->oids) > return; > > at the beginning of the function that turns it into a benign no-op, > may make more sense? Calling "send_X()" helper and seeing nothing > come out on the wire might make it look awkward, though. If I add this change, then I should also add a check to ensure args->object_info_options is also not empty. The question I think we should answer is whether checking for oids and options should happen inside this function or not. I'm leaning towards outside the function for the reason you stated above about nothing coming out of the wire being awkward. > > + struct ref *object_info_refs = xcalloc(1, sizeof (struct ref)); > > Style: no SP between "sizeof" and "(". ack
> > The client currently supports requesting a list of object ids with > > features 'size' and 'type' from a v2 server. If a server does not > > advertise either of the requested features, then the client falls back > > to making the request through 'fetch'. > > I'm confused by the 'type' support, I might have missed something as I'm > not familiar with this code but I couldn't see where we parse the type > returned by the server. I should clarify that the server does not support 'type', only the client. Since the client falls back to 'fetch' to grab the object info if the server doesn't support a requested option (e.g. type), I have 'type' included as part of the supported client options. > > + for (i = 0; i < args.object_info_options->nr; i++) { > > + if (packet_reader_read(&reader) != PACKET_READ_NORMAL) { > > + check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected"); > > This is one of a number of lines in this series that are way over the 80 > column limit. ack > > + if (unsorted_string_list_has_string(args.object_info_options, reader.line)) { > > + if (!strcmp(reader.line, "size")) > > + size_index = i; > > Should we be checking for "type" as well? Also does protocol-v2.txt need > updating as it only mentions "size" as an attribute. I gave context above -- the server does not accept 'type' so protocol-v2.txt does not need to be updated. > To avoid a possible out-of-bounds access we need to check that > size_index + 1 < object_info_value.nr in case the server response is > malformed ack > > + if (!strcmp(object_info_values.items[1 + size_index].string, "")) > > + die("object-info: not our ref %s", > > I'm a bit confused by this message is it trying to say "object %s is > missing on the server"? Correct. You'll find the same error message in upload-pack.c > > + object_info_values.items[0].string); > > + *(*object_info_data)[i].sizep = strtoul(object_info_values.items[1 + size_index].string, NULL, 10); > > As Junio pointed out in his comments in v4 there is no error checking > here - we should check the server has actually returned a number. Note > that strtoul() will happily parse negative numbers so we probably want > to do something like > > const char *endp > errno = 0 > if (!isdigit(*object_info_values.items[1 + size_index].string)) > die("...") > *(*object_info_data)[i].sizep = strtoul(object_info_values.items[1 + > size_index].string, &endp, 10); > if (errno || *endp) > die("...") > Should be we checking the object id matches what we asked for? (I'm not > sure if protocol-v2.txt mentions the order in which the objects are > returned) Hmmmm I think I either check for an object id match or update protocol-v2.txt to mention order is consistent. > Should we be parsing the object type here as well? When the server starts supporting it. > > + for (i = 0; i < transport->smart_options->object_info_oids->nr; i++) { > > + struct ref *temp_ref = xcalloc(1, sizeof (struct ref)); > > Using CALLOC_ARRAY() or p = xcalloc(1, sizeof(*p)) would be safer here > (and everywhere else where you have used xcalloc()) as it ensures we > allocate the correct size. ack > > + /* > > + * Transport will attempt to pull only object-info. Fallbacks > > + * to pulling entire object if object-info is not supported > > + */ > > Is it definitely true that we fallback to pulling the entire object? - > there is at least one place above where we do Yes, fetch_refs_via_pack() is where fetch_object_info() is called, making it easy to fallback if fetch_object_info() fails. >> + while (packet_reader_read(&reader) == PACKET_READ_NORMAL && i < args.oids->nr) { >> + struct string_list object_info_values = STRING_LIST_INIT_DUP; > > I forget to say earlier that this is leaked ack Thank you for the review!
Hi Calvin On 29/07/2022 00:02, Calvin Wan wrote: > diff --git a/transport.c b/transport.c > index 52db7a3cb0..2d503e2fbd 100644 > --- a/transport.c > +++ b/transport.c > @@ -363,10 +437,12 @@ static int fetch_refs_via_pack(struct transport *transport, > int nr_heads, struct ref **to_fetch) > { > int ret = 0; > + size_t i; > struct git_transport_data *data = transport->data; > struct ref *refs = NULL; > struct fetch_pack_args args; > struct ref *refs_tmp = NULL; > + struct ref *object_info_refs = xcalloc(1, sizeof (struct ref)); When git is compiled with SANITIZE=address then one of the cat-file tests added in patch 6 fails with an out of bounds access. The problem is that the last member of struct ref is a flexible array that is expected to contain a NUL terminated string but we don't allocate any memory for the string here. We could just add one to the size of the allocation but as there is a dedicated allocation function it would be better to use alloc_ref(""). I think it would also be worth delaying this allocation until we're sure it is going to be needed. > memset(&args, 0, sizeof(args)); > args.uploadpack = data->options.uploadpack; > @@ -392,8 +468,25 @@ static int fetch_refs_via_pack(struct transport *transport, > args.server_options = transport->server_options; > args.negotiation_tips = data->options.negotiation_tips; > args.reject_shallow_remote = transport->smart_options->reject_shallow; > - > - if (!data->got_remote_heads) { > + args.object_info = transport->smart_options->object_info; > + > + if (transport->smart_options && transport->smart_options->object_info) { > + struct ref *ref = object_info_refs; > + > + if (!fetch_object_info(transport, data->options.object_info_data)) > + goto cleanup; If we allocate object_info_refs and initialize ref here then we avoid allocating it when it is not needed. > + args.object_info_data = data->options.object_info_data; > + args.quiet = 1; > + args.no_progress = 1; > + for (i = 0; i < transport->smart_options->object_info_oids->nr; i++) { > + struct ref *temp_ref = xcalloc(1, sizeof (struct ref)); This needs to use alloc_ref("") as well > + temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i); This would be clearer as ...oids->oid[i] I think Best Wishes Phillip
Hi Calvin On 04/08/2022 23:20, Calvin Wan wrote: >>> The client currently supports requesting a list of object ids with >>> features 'size' and 'type' from a v2 server. If a server does not >>> advertise either of the requested features, then the client falls back >>> to making the request through 'fetch'. >> >> I'm confused by the 'type' support, I might have missed something as I'm >> not familiar with this code but I couldn't see where we parse the type >> returned by the server. > > I should clarify that the server does not support 'type', only the client. > Since the client falls back to 'fetch' to grab the object info if the server > doesn't support a requested option (e.g. type), I have 'type' included > as part of the supported client options. Does that mean all the type client code is effectively unused and so untested? >>> + if (!strcmp(object_info_values.items[1 + size_index].string, "")) >>> + die("object-info: not our ref %s", >> >> I'm a bit confused by this message is it trying to say "object %s is >> missing on the server"? > > Correct. You'll find the same error message in upload-pack.c I find the message confusing as we're talking about oids not refs. I have also realized that dying here is incompatible with the normal cat-file behavior of printing "<objectname> missing" rather than dying when a missing object is queried. >> Should be we checking the object id matches what we asked for? (I'm not >> sure if protocol-v2.txt mentions the order in which the objects are >> returned) > > Hmmmm I think I either check for an object id match or update > protocol-v2.txt to mention order is consistent. If we can then updating the protocol makes sense. Even if we do that a buggy or malicious server could return the objects in a different order. A malicious server can also lie about the size so I'm not sure how much benefit there is in checking the oids in terms of validating the response but I think we should definitely check that the server returns the expected number of sizes. Best Wishes Phillip
diff --git a/fetch-pack.c b/fetch-pack.c index 8c862b017e..d373aed775 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1293,6 +1293,31 @@ static void write_command_and_capabilities(struct strbuf *req_buf, packet_buf_delim(req_buf); } +void send_object_info_request(int fd_out, struct object_info_args *args) +{ + struct strbuf req_buf = STRBUF_INIT; + size_t i; + + write_command_and_capabilities(&req_buf, args->server_options, "object-info"); + + if (unsorted_string_list_has_string(args->object_info_options, "size")) + packet_buf_write(&req_buf, "size"); + + if (unsorted_string_list_has_string(args->object_info_options, "type")) + packet_buf_write(&req_buf, "type"); + + if (args->oids) { + for (i = 0; i < args->oids->nr; i++) + packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i])); + } + + packet_buf_flush(&req_buf); + if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0) + die_errno(_("unable to write request to remote")); + + strbuf_release(&req_buf); +} + static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, struct fetch_pack_args *args, const struct ref *wants, struct oidset *common, @@ -1634,6 +1659,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, if (args->depth > 0 || args->deepen_since || args->deepen_not) args->deepen = 1; + if (args->object_info) + state = FETCH_SEND_REQUEST; + while (state != FETCH_DONE) { switch (state) { case FETCH_CHECK_LOCAL: diff --git a/fetch-pack.h b/fetch-pack.h index 8c7752fc82..11c513f748 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -17,6 +17,7 @@ struct fetch_pack_args { const struct string_list *deepen_not; struct list_objects_filter_options filter_options; const struct string_list *server_options; + struct object_info **object_info_data; /* * If not NULL, during packfile negotiation, fetch-pack will send "have" @@ -43,6 +44,7 @@ struct fetch_pack_args { unsigned reject_shallow_remote:1; unsigned deepen:1; unsigned refetch:1; + unsigned object_info:1; /* * Indicate that the remote of this request is a promisor remote. The @@ -69,6 +71,12 @@ struct fetch_pack_args { unsigned connectivity_checked:1; }; +struct object_info_args { + struct string_list *object_info_options; + const struct string_list *server_options; + struct oid_array *oids; +}; + /* * sought represents remote references that should be updated from. * On return, the names that were found on the remote will have been @@ -102,4 +110,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips, */ int report_unmatched_refs(struct ref **sought, int nr_sought); +void send_object_info_request(int fd_out, struct object_info_args *args); + #endif diff --git a/transport-helper.c b/transport-helper.c index 322c722478..48a6680200 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -686,13 +686,16 @@ static int fetch_refs(struct transport *transport, /* * If we reach here, then the server, the client, and/or the transport - * helper does not support protocol v2. --negotiate-only requires - * protocol v2. + * helper does not support protocol v2. --negotiate-only and --object-info + * require protocol v2. */ if (data->transport_options.acked_commits) { warning(_("--negotiate-only requires protocol v2")); return -1; } + if (transport->smart_options->object_info) { + die(_("--object-info requires protocol v2")); + } if (!data->get_refs_list_called) get_refs_list_using_list(transport, 0); diff --git a/transport.c b/transport.c index 52db7a3cb0..2d503e2fbd 100644 --- a/transport.c +++ b/transport.c @@ -353,6 +353,80 @@ static struct ref *handshake(struct transport *transport, int for_push, return refs; } +static int fetch_object_info(struct transport *transport, struct object_info **object_info_data) +{ + size_t i; + int size_index = -1; + struct git_transport_data *data = transport->data; + struct object_info_args args; + struct packet_reader reader; + + memset(&args, 0, sizeof(args)); + args.server_options = transport->server_options; + args.object_info_options = transport->smart_options->object_info_options; + args.oids = transport->smart_options->object_info_oids; + + connect_setup(transport, 0); + packet_reader_init(&reader, data->fd[0], NULL, 0, + PACKET_READ_CHOMP_NEWLINE | + PACKET_READ_GENTLE_ON_EOF | + PACKET_READ_DIE_ON_ERR_PACKET); + data->version = discover_version(&reader); + + transport->hash_algo = reader.hash_algo; + + switch (data->version) { + case protocol_v2: + if (!server_supports_v2("object-info", 0)) + return -1; + if (unsorted_string_list_has_string(args.object_info_options, "size") + && !server_supports_feature("object-info", "size", 0)) { + return -1; + } + if (unsorted_string_list_has_string(args.object_info_options, "type") + && !server_supports_feature("object-info", "type", 0)) { + return -1; + } + send_object_info_request(data->fd[1], &args); + break; + case protocol_v1: + case protocol_v0: + die(_("wrong protocol version. expected v2")); + case protocol_unknown_version: + BUG("unknown protocol version"); + } + + for (i = 0; i < args.object_info_options->nr; i++) { + if (packet_reader_read(&reader) != PACKET_READ_NORMAL) { + check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected"); + return -1; + } + if (unsorted_string_list_has_string(args.object_info_options, reader.line)) { + if (!strcmp(reader.line, "size")) + size_index = i; + continue; + } + return -1; + } + + i = 0; + while (packet_reader_read(&reader) == PACKET_READ_NORMAL && i < args.oids->nr) { + struct string_list object_info_values = STRING_LIST_INIT_DUP; + + string_list_split(&object_info_values, reader.line, ' ', -1); + if (0 <= size_index) { + if (!strcmp(object_info_values.items[1 + size_index].string, "")) + die("object-info: not our ref %s", + object_info_values.items[0].string); + *(*object_info_data)[i].sizep = strtoul(object_info_values.items[1 + size_index].string, NULL, 10); + } + i++; + } + check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected"); + + return 0; +} + static struct ref *get_refs_via_connect(struct transport *transport, int for_push, struct transport_ls_refs_options *options) { @@ -363,10 +437,12 @@ static int fetch_refs_via_pack(struct transport *transport, int nr_heads, struct ref **to_fetch) { int ret = 0; + size_t i; struct git_transport_data *data = transport->data; struct ref *refs = NULL; struct fetch_pack_args args; struct ref *refs_tmp = NULL; + struct ref *object_info_refs = xcalloc(1, sizeof (struct ref)); memset(&args, 0, sizeof(args)); args.uploadpack = data->options.uploadpack; @@ -392,8 +468,25 @@ static int fetch_refs_via_pack(struct transport *transport, args.server_options = transport->server_options; args.negotiation_tips = data->options.negotiation_tips; args.reject_shallow_remote = transport->smart_options->reject_shallow; - - if (!data->got_remote_heads) { + args.object_info = transport->smart_options->object_info; + + if (transport->smart_options && transport->smart_options->object_info) { + struct ref *ref = object_info_refs; + + if (!fetch_object_info(transport, data->options.object_info_data)) + goto cleanup; + args.object_info_data = data->options.object_info_data; + args.quiet = 1; + args.no_progress = 1; + for (i = 0; i < transport->smart_options->object_info_oids->nr; i++) { + struct ref *temp_ref = xcalloc(1, sizeof (struct ref)); + temp_ref->old_oid = *(transport->smart_options->object_info_oids->oid + i); + temp_ref->exact_oid = 1; + ref->next = temp_ref; + ref = ref->next; + } + transport->remote_refs = object_info_refs->next; + } else if (!data->got_remote_heads) { int i; int must_list_refs = 0; for (i = 0; i < nr_heads; i++) { @@ -433,12 +526,22 @@ static int fetch_refs_via_pack(struct transport *transport, to_fetch, nr_heads, &data->shallow, &transport->pack_lockfiles, data->version); + if (args.object_info) { + struct ref *ref_cpy_reader = object_info_refs->next; + int i = 0; + while (ref_cpy_reader) { + oid_object_info_extended(the_repository, &ref_cpy_reader->old_oid, &(*args.object_info_data)[i], OBJECT_INFO_LOOKUP_REPLACE); + ref_cpy_reader = ref_cpy_reader->next; + i++; + } + } + data->got_remote_heads = 0; data->options.self_contained_and_connected = args.self_contained_and_connected; data->options.connectivity_checked = args.connectivity_checked; - if (!refs) + if (refs == NULL && !args.object_info) ret = -1; if (report_unmatched_refs(to_fetch, nr_heads)) ret = -1; @@ -453,6 +556,7 @@ static int fetch_refs_via_pack(struct transport *transport, free_refs(refs_tmp); free_refs(refs); + free_refs(object_info_refs); return ret; } diff --git a/transport.h b/transport.h index b5bf7b3e70..5512fdb140 100644 --- a/transport.h +++ b/transport.h @@ -6,6 +6,7 @@ #include "remote.h" #include "list-objects-filter-options.h" #include "string-list.h" +#include "object-store.h" struct git_transport_options { unsigned thin : 1; @@ -31,6 +32,12 @@ struct git_transport_options { */ unsigned connectivity_checked:1; + /* + * Transport will attempt to pull only object-info. Fallbacks + * to pulling entire object if object-info is not supported + */ + unsigned object_info : 1; + int depth; const char *deepen_since; const struct string_list *deepen_not; @@ -54,6 +61,10 @@ struct git_transport_options { * common commits to this oidset instead of fetching any packfiles. */ struct oidset *acked_commits; + + struct oid_array *object_info_oids; + struct object_info **object_info_data; + struct string_list *object_info_options; }; enum transport_family {
Sometimes it is useful to get information about an object without having to download it completely. The server logic has already been implemented as “a2ba162cda (object-info: support for retrieving object info, 2021-04-20)”. This patch adds client functions to communicate with the server. The client currently supports requesting a list of object ids with features 'size' and 'type' from a v2 server. If a server does not advertise either of the requested features, then the client falls back to making the request through 'fetch'. Signed-off-by: Calvin Wan <calvinwan@google.com> Helped-by: Jonathan Tan <jonathantanmy@google.com> --- fetch-pack.c | 28 ++++++++++++ fetch-pack.h | 10 +++++ transport-helper.c | 7 ++- transport.c | 110 +++++++++++++++++++++++++++++++++++++++++++-- transport.h | 11 +++++ 5 files changed, 161 insertions(+), 5 deletions(-)