diff mbox series

[WIP,RFC,4/5] upload-pack: refactor writing of "packfile" line

Message ID 1d678a78a63b7e58988b52c8c0efab38c34a6848.1543879256.git.jonathantanmy@google.com (mailing list archive)
State New, archived
Headers show
Series Design for offloading part of packfile response to CDN | expand

Commit Message

Jonathan Tan Dec. 3, 2018, 11:37 p.m. UTC
A subsequent patch allows pack-objects to output additional information
(in addition to the packfile that it currently outputs). This means that
we must hold off on writing the "packfile" section header to the client
before we process the output of pack-objects, so move the writing of
the "packfile" section header to read_pack_objects_stdout().

Unfortunately, this also means that we cannot send keepalive packets
until pack-objects starts sending out the packfile, since the sideband
is only established when the "packfile" section header is sent.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 upload-pack.c | 47 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 11 deletions(-)

Comments

Junio C Hamano Dec. 6, 2018, 6:35 a.m. UTC | #1
Jonathan Tan <jonathantanmy@google.com> writes:

> @@ -126,6 +129,12 @@ static int read_pack_objects_stdout(int outfd, struct output_state *os)
>  	}
>  	os->used += readsz;
>  
> +	if (!os->packfile_started) {
> +		os->packfile_started = 1;
> +		if (use_protocol_v2)
> +			packet_write_fmt(1, "packfile\n");

If we fix this function so that the only byte in the buffer is held
back without emitted when os->used == 1 as I alluded to, this may
have to be done a bit later, as with such a change, it is no longer
guaranteed that send_client_data() will be called after this point.

> +	}
> +
>  	if (os->used > 1) {
>  		send_client_data(1, os->buffer, os->used - 1);
>  		os->buffer[0] = os->buffer[os->used - 1];

> +static void flush_progress_buf(struct strbuf *progress_buf)
> +{
> +	if (!progress_buf->len)
> +		return;
> +	send_client_data(2, progress_buf->buf, progress_buf->len);
> +	strbuf_reset(progress_buf);
> +}

Interesting.

>  static void create_pack_file(const struct object_array *have_obj,
> -			     const struct object_array *want_obj)
> +			     const struct object_array *want_obj,
> +			     int use_protocol_v2)
>  {
>  	struct child_process pack_objects = CHILD_PROCESS_INIT;
>  	struct output_state output_state = {0};
> @@ -260,9 +278,13 @@ static void create_pack_file(const struct object_array *have_obj,
>  			 */
>  			sz = xread(pack_objects.err, progress,
>  				  sizeof(progress));
> -			if (0 < sz)
> -				send_client_data(2, progress, sz);
> -			else if (sz == 0) {
> +			if (0 < sz) {
> +				if (output_state.packfile_started)
> +					send_client_data(2, progress, sz);
> +				else
> +					strbuf_add(&output_state.progress_buf,
> +						   progress, sz);

Isn't progress output that goes to the channel #2 pretty much
independent from the payload stream that carries the pkt-line 
command like "packfile" plus the raw pack stream?  It somehow
feels like an oxymoron to _buffer_ progress indicator, as it
defeats the whole point of progress report to buffer it.
Jonathan Tan Dec. 6, 2018, 11:25 p.m. UTC | #2
> Jonathan Tan <jonathantanmy@google.com> writes:
> 
> > @@ -126,6 +129,12 @@ static int read_pack_objects_stdout(int outfd, struct output_state *os)
> >  	}
> >  	os->used += readsz;
> >  
> > +	if (!os->packfile_started) {
> > +		os->packfile_started = 1;
> > +		if (use_protocol_v2)
> > +			packet_write_fmt(1, "packfile\n");
> 
> If we fix this function so that the only byte in the buffer is held
> back without emitted when os->used == 1 as I alluded to, this may
> have to be done a bit later, as with such a change, it is no longer
> guaranteed that send_client_data() will be called after this point.

I'm not sure what you mean about there being no guarantee that
send_client_data() is not called - in create_pack_file(), there is an
"if (output_state.used > 0)" line (previously "if (0 <= buffered)") that
outputs anything remaining.

> Isn't progress output that goes to the channel #2 pretty much
> independent from the payload stream that carries the pkt-line 
> command like "packfile" plus the raw pack stream?  It somehow
> feels like an oxymoron to _buffer_ progress indicator, as it
> defeats the whole point of progress report to buffer it.

Yes, it is - I don't fully like this part of the design. I alluded to a
similar issue (keepalive) in the toplevel email [1] and that it might be
better if the server can send sideband throughout the whole response -
perhaps that should be investigated first. If we had sideband throughout
the whole response, we wouldn't need to buffer the progress indicator.

[1] https://public-inbox.org/git/cover.1543879256.git.jonathantanmy@google.com/
Junio C Hamano Dec. 7, 2018, 12:22 a.m. UTC | #3
Jonathan Tan <jonathantanmy@google.com> writes:

>> Jonathan Tan <jonathantanmy@google.com> writes:
>> 
>> > @@ -126,6 +129,12 @@ static int read_pack_objects_stdout(int outfd, struct output_state *os)
>> >  	}
>> >  	os->used += readsz;
>> >  
>> > +	if (!os->packfile_started) {
>> > +		os->packfile_started = 1;
>> > +		if (use_protocol_v2)
>> > +			packet_write_fmt(1, "packfile\n");
>> 
>> If we fix this function so that the only byte in the buffer is held
>> back without emitted when os->used == 1 as I alluded to, this may
>> have to be done a bit later, as with such a change, it is no longer
>> guaranteed that send_client_data() will be called after this point.
>
> I'm not sure what you mean about there being no guarantee that
> send_client_data() is not called - in create_pack_file(), there is an
> "if (output_state.used > 0)" line (previously "if (0 <= buffered)") that
> outputs anything remaining.

I was referring to this part of the review on the previous step,
which you may not yet have read.

    OK, this corresponds to the "*cp++ = buffered"  in the original just
    before xread().

    > +		os->used = 1;
    > +	} else {
    > +		send_client_data(1, os->buffer, os->used);
    > +		os->used = 0;

    I am not sure if the code is correct when os->used happens to be 1
    (shouldn't we hold the byte, skip the call to send_client_data(),
    and go back to poll() to expect more data?), but this is a faithful
    code movement and rewrite of the original.

The point of this logic is to make sure we always hold back some
bytes and do not feed *all* the bytes to the other side by calling
"send-client-data" until we made sure the upstream of what we are
relaying (pack-objects?) successfully exited, but it looks to me
that the "else" clause above ends up flushing everything when
os->used is 1, which goes against the whole purpose of the code.

And the "fix" I was alluding to was to update that "else" clause to
make it a no-op that keeps os->used non-zero, which would not call
send-client-data.

When that fix happens, the part that early in the function this
patch added "now we know we will call send-client-data, so let's say
'here comes packdata' unless we have already said that" is making
the decision too early.  Depending on the value of os->used when we
enter the code and the number of bytes xread() reads from the
upstream, we might not call send-client-data yet (namely, when we
have no buffered data and we happened to get only one byte).

> ... it might be
> better if the server can send sideband throughout the whole response -
> perhaps that should be investigated first.

Yup.  It just looked quite crazy, and it is even more crazy to
buffer keepalives ;-)
diff mbox series

Patch

diff --git a/upload-pack.c b/upload-pack.c
index cec43e0a46..aa2589b858 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -104,9 +104,12 @@  static int write_one_shallow(const struct commit_graft *graft, void *cb_data)
 struct output_state {
 	char buffer[8193];
 	int used;
+	unsigned packfile_started : 1;
+	struct strbuf progress_buf;
 };
 
-static int read_pack_objects_stdout(int outfd, struct output_state *os)
+static int read_pack_objects_stdout(int outfd, struct output_state *os,
+				    int use_protocol_v2)
 {
 	/* Data ready; we keep the last byte to ourselves
 	 * in case we detect broken rev-list, so that we
@@ -126,6 +129,12 @@  static int read_pack_objects_stdout(int outfd, struct output_state *os)
 	}
 	os->used += readsz;
 
+	if (!os->packfile_started) {
+		os->packfile_started = 1;
+		if (use_protocol_v2)
+			packet_write_fmt(1, "packfile\n");
+	}
+
 	if (os->used > 1) {
 		send_client_data(1, os->buffer, os->used - 1);
 		os->buffer[0] = os->buffer[os->used - 1];
@@ -138,8 +147,17 @@  static int read_pack_objects_stdout(int outfd, struct output_state *os)
 	return readsz;
 }
 
+static void flush_progress_buf(struct strbuf *progress_buf)
+{
+	if (!progress_buf->len)
+		return;
+	send_client_data(2, progress_buf->buf, progress_buf->len);
+	strbuf_reset(progress_buf);
+}
+
 static void create_pack_file(const struct object_array *have_obj,
-			     const struct object_array *want_obj)
+			     const struct object_array *want_obj,
+			     int use_protocol_v2)
 {
 	struct child_process pack_objects = CHILD_PROCESS_INIT;
 	struct output_state output_state = {0};
@@ -260,9 +278,13 @@  static void create_pack_file(const struct object_array *have_obj,
 			 */
 			sz = xread(pack_objects.err, progress,
 				  sizeof(progress));
-			if (0 < sz)
-				send_client_data(2, progress, sz);
-			else if (sz == 0) {
+			if (0 < sz) {
+				if (output_state.packfile_started)
+					send_client_data(2, progress, sz);
+				else
+					strbuf_add(&output_state.progress_buf,
+						   progress, sz);
+			} else if (sz == 0) {
 				close(pack_objects.err);
 				pack_objects.err = -1;
 			}
@@ -273,8 +295,10 @@  static void create_pack_file(const struct object_array *have_obj,
 		}
 		if (0 <= pu && (pfd[pu].revents & (POLLIN|POLLHUP))) {
 			int result = read_pack_objects_stdout(pack_objects.out,
-							      &output_state);
-
+							      &output_state,
+							      use_protocol_v2);
+			if (output_state.packfile_started)
+				flush_progress_buf(&output_state.progress_buf);
 			if (result == 0) {
 				close(pack_objects.out);
 				pack_objects.out = -1;
@@ -293,12 +317,14 @@  static void create_pack_file(const struct object_array *have_obj,
 		 * protocol to say anything, so those clients are just out of
 		 * luck.
 		 */
-		if (!ret && use_sideband) {
+		if (!ret && use_sideband && output_state.packfile_started) {
 			static const char buf[] = "0005\1";
 			write_or_die(1, buf, 5);
 		}
 	}
 
+	flush_progress_buf(&output_state.progress_buf);
+
 	if (finish_command(&pack_objects)) {
 		error("git upload-pack: git-pack-objects died with error.");
 		goto fail;
@@ -1094,7 +1120,7 @@  void upload_pack(struct upload_pack_options *options)
 	if (want_obj.nr) {
 		struct object_array have_obj = OBJECT_ARRAY_INIT;
 		get_common_commits(&have_obj, &want_obj);
-		create_pack_file(&have_obj, &want_obj);
+		create_pack_file(&have_obj, &want_obj, 0);
 	}
 }
 
@@ -1475,8 +1501,7 @@  int upload_pack_v2(struct repository *r, struct argv_array *keys,
 			send_wanted_ref_info(&data);
 			send_shallow_info(&data, &want_obj);
 
-			packet_write_fmt(1, "packfile\n");
-			create_pack_file(&have_obj, &want_obj);
+			create_pack_file(&have_obj, &want_obj, 1);
 			state = FETCH_DONE;
 			break;
 		case FETCH_DONE: