diff mbox series

[v2,1/4] fetch: extract writing to FETCH_HEAD

Message ID d80dbc5a9c9520621651541e418ee5216d164053.1610107599.git.ps@pks.im (mailing list archive)
State Superseded
Headers show
Series fetch: implement support for atomic reference updates | expand

Commit Message

Patrick Steinhardt Jan. 8, 2021, 12:11 p.m. UTC
When performing a fetch with the default `--write-fetch-head` option, we
write all updated references to FETCH_HEAD while the updates are
performed. Given that updates are not performed atomically, it means
that we we write to FETCH_HEAD even if some or all of the reference
updates fail.

Given that we simply update FETCH_HEAD ad-hoc with each reference, the
logic is completely contained in `store_update_refs` and thus quite hard
to extend. This can already be seen by the way we skip writing to the
FETCH_HEAD: instead of having a conditional which simply skips writing,
we instead open "/dev/null" and needlessly write all updates there.

We are about to extend git-fetch(1) to accept an `--atomic` flag which
will make the fetch an all-or-nothing operation with regards to the
reference updates. This will also require us to make the updates to
FETCH_HEAD an all-or-nothing operation, but as explained doing so is not
easy with the current layout. This commit thus refactors the wa we write
to FETCH_HEAD and pulls out the logic to open, append to, commit and
close the file. While this may seem rather over-the top at first,
pulling out this logic will make it a lot easier to update the code in a
subsequent commit. It also allows us to easily skip writing completely
in case `--no-write-fetch-head` was passed.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 builtin/fetch.c | 80 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 62 insertions(+), 18 deletions(-)

Comments

Junio C Hamano Jan. 8, 2021, 11:40 p.m. UTC | #1
Patrick Steinhardt <ps@pks.im> writes:

> +static int open_fetch_head(struct fetch_head *fetch_head)
> +{
> +	const char *filename = git_path_fetch_head(the_repository);
> +
> +	if (!write_fetch_head)
> +		return 0;
> +
> +	fetch_head->fp = fopen(filename, "a");
> +	if (!fetch_head->fp)
> +		return error_errno(_("cannot open %s"), filename);
> +
> +	return 0;
> +}

So the difference from the original, which used to have a writable
filehandle to /dev/null in the dry-run mode, is that fetch_head->fp
is left as-is (not even NULLed out).

> +static void append_fetch_head(struct fetch_head *fetch_head, const char *old_oid,

It is clear from the type these days but variable names like
"old_oid" hint the readers that they are not a hexadecimal object
name string but either an array of uchar[40] or a struct object_id;
perhaps "old_oid_hex" would be less misleading.

If the caller does have struct object_id, then it would be even
better to take it as-is as a parameter and use oid_to_hex_r() on it in
this function when it is given to fprintf().  [Nit #1]

> +			      const char *merge_status_marker, const char *note,
> +			      const char *url, size_t url_len)
> +{
> +	size_t i;
> +
> +	if (!write_fetch_head)
> +		return;

Presumably, this check is what makes sure that fetch_head->fp that
is left uninitialized will never gets used.

> +	fprintf(fetch_head->fp, "%s\t%s\t%s",
> +		old_oid, merge_status_marker, note);
> +	for (i = 0; i < url_len; ++i)
> +		if ('\n' == url[i])
> +			fputs("\\n", fetch_head->fp);
> +		else
> +			fputc(url[i], fetch_head->fp);
> +	fputc('\n', fetch_head->fp);
> +}

OK.  This is the "case FETCH_HEAD_NOT_FOR_MERGE" and "case
FETCH_HEAD_MERGE" parts in the original.

As an abstraction, it may be better to make the caller pass a
boolean "is this for merge?" and keep the knowledge of what exact
string is used for merge_status_marker to this function, instead of
letting the caller passing it as a parameter in the string form.
After all, we never allow anything other than an empty string or a
fixed "not-for-merge" string in that place in the file format.
[Nit #2]

> +static void commit_fetch_head(struct fetch_head *fetch_head)
> +{
> +	/* Nothing to commit yet. */
> +}
> +
> +static void close_fetch_head(struct fetch_head *fetch_head)
> +{
> +	if (!write_fetch_head)
> +		return;

So is this check a protection against uninitialized fetch_head->fp.
Both changes make sense.

> +	fclose(fetch_head->fp);
> +}

> @@ -909,22 +959,19 @@ N_("It took %.2f seconds to check forced updates. You can use\n"
>  static int store_updated_refs(const char *raw_url, const char *remote_name,
>  			      int connectivity_checked, struct ref *ref_map)
>  {
> -	FILE *fp;
> +	struct fetch_head fetch_head;

And that is why this variable is left uninitialised on stack and it
is OK.  An alternative design would be to initialize fetch_head.fp
to NULL, and return early with "if (!fetch_head->fp)" in the two
functions that take fetch_head struct.  That way, we have less
reliance on the global variable write_fetch_head.

>  	struct commit *commit;
>  	int url_len, i, rc = 0;
>  	struct strbuf note = STRBUF_INIT;
>  	const char *what, *kind;
>  	struct ref *rm;
>  	char *url;
> -	const char *filename = (!write_fetch_head
> -				? "/dev/null"
> -				: git_path_fetch_head(the_repository));
>  	int want_status;
>  	int summary_width = transport_summary_width(ref_map);
>  
> -	fp = fopen(filename, "a");
> -	if (!fp)
> -		return error_errno(_("cannot open %s"), filename);
> +	rc = open_fetch_head(&fetch_head);
> +	if (rc)
> +		return -1;

OK, we've already said "cannot open" in the open_fetch_head()
function, so we just return an error silently.

> @@ -1016,16 +1063,10 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
>  				merge_status_marker = "not-for-merge";
>  				/* fall-through */
>  			case FETCH_HEAD_MERGE:
> -				fprintf(fp, "%s\t%s\t%s",
> -					oid_to_hex(&rm->old_oid),
> -					merge_status_marker,
> -					note.buf);
> -				for (i = 0; i < url_len; ++i)
> -					if ('\n' == url[i])
> -						fputs("\\n", fp);
> -					else
> -						fputc(url[i], fp);
> -				fputc('\n', fp);
> +				append_fetch_head(&fetch_head,
> +						  oid_to_hex(&rm->old_oid),
> +						  merge_status_marker,
> +						  note.buf, url, url_len);

Here, we can lose merge_status_marker variable from this caller, and
then this caller becomes:

		switch (rm->fetch_head_status) {
		case FETCH_HEAD_NOT_FOR_MERGE:
		case FETCH_HEAD_MERGE:
			append_fetch_head(&fetch_head, &rm->old_oid,
				rm->fetch_head_status == FETCH_HEAD_MERGE,
                                note.buf, url, url_len);

Note that I am passing "is this a ref to be merged?" boolean to keep
the exact string of "not-for-merge" in the callee.

> @@ -1060,6 +1101,9 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
>  		}
>  	}
>  
> +	if (!rc)
> +		commit_fetch_head(&fetch_head);
> +
>  	if (rc & STORE_REF_ERROR_DF_CONFLICT)
>  		error(_("some local refs could not be updated; try running\n"
>  		      " 'git remote prune %s' to remove any old, conflicting "
> @@ -1077,7 +1121,7 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
>   abort:
>  	strbuf_release(&note);
>  	free(url);
> -	fclose(fp);
> +	close_fetch_head(&fetch_head);
>  	return rc;
>  }

Other than the above two nits, this step looks good to me.

Thanks.
Patrick Steinhardt Jan. 11, 2021, 10:26 a.m. UTC | #2
On Fri, Jan 08, 2021 at 03:40:17PM -0800, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> > +static void append_fetch_head(struct fetch_head *fetch_head, const char *old_oid,
> 
> It is clear from the type these days but variable names like
> "old_oid" hint the readers that they are not a hexadecimal object
> name string but either an array of uchar[40] or a struct object_id;
> perhaps "old_oid_hex" would be less misleading.
> 
> If the caller does have struct object_id, then it would be even
> better to take it as-is as a parameter and use oid_to_hex_r() on it in
> this function when it is given to fprintf().  [Nit #1]

Agreed.

[snip]
> > +	fprintf(fetch_head->fp, "%s\t%s\t%s",
> > +		old_oid, merge_status_marker, note);
> > +	for (i = 0; i < url_len; ++i)
> > +		if ('\n' == url[i])
> > +			fputs("\\n", fetch_head->fp);
> > +		else
> > +			fputc(url[i], fetch_head->fp);
> > +	fputc('\n', fetch_head->fp);
> > +}
> 
> OK.  This is the "case FETCH_HEAD_NOT_FOR_MERGE" and "case
> FETCH_HEAD_MERGE" parts in the original.
> 
> As an abstraction, it may be better to make the caller pass a
> boolean "is this for merge?" and keep the knowledge of what exact
> string is used for merge_status_marker to this function, instead of
> letting the caller passing it as a parameter in the string form.
> After all, we never allow anything other than an empty string or a
> fixed "not-for-merge" string in that place in the file format.
> [Nit #2]

I think it's even nicer to just pass in `rm->fetch_head_status`
directly, which allows us to move below switch into `append_fetch_head`.

[snip]
> > +	fclose(fetch_head->fp);
> > +}
> 
> > @@ -909,22 +959,19 @@ N_("It took %.2f seconds to check forced updates. You can use\n"
> >  static int store_updated_refs(const char *raw_url, const char *remote_name,
> >  			      int connectivity_checked, struct ref *ref_map)
> >  {
> > -	FILE *fp;
> > +	struct fetch_head fetch_head;
> 
> And that is why this variable is left uninitialised on stack and it
> is OK.  An alternative design would be to initialize fetch_head.fp
> to NULL, and return early with "if (!fetch_head->fp)" in the two
> functions that take fetch_head struct.  That way, we have less
> reliance on the global variable write_fetch_head.

I like your proposal more. I wasn't quite happy with leaving `fp`
uninitialized, and using it as a sentinel for whether to write something
or not feels natural to me.

[snip]
> > @@ -1016,16 +1063,10 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
> >  				merge_status_marker = "not-for-merge";
> >  				/* fall-through */
> >  			case FETCH_HEAD_MERGE:
> > -				fprintf(fp, "%s\t%s\t%s",
> > -					oid_to_hex(&rm->old_oid),
> > -					merge_status_marker,
> > -					note.buf);
> > -				for (i = 0; i < url_len; ++i)
> > -					if ('\n' == url[i])
> > -						fputs("\\n", fp);
> > -					else
> > -						fputc(url[i], fp);
> > -				fputc('\n', fp);
> > +				append_fetch_head(&fetch_head,
> > +						  oid_to_hex(&rm->old_oid),
> > +						  merge_status_marker,
> > +						  note.buf, url, url_len);
> 
> Here, we can lose merge_status_marker variable from this caller, and
> then this caller becomes:
> 
> 		switch (rm->fetch_head_status) {
> 		case FETCH_HEAD_NOT_FOR_MERGE:
> 		case FETCH_HEAD_MERGE:
> 			append_fetch_head(&fetch_head, &rm->old_oid,
> 				rm->fetch_head_status == FETCH_HEAD_MERGE,
>                                 note.buf, url, url_len);
> 
> Note that I am passing "is this a ref to be merged?" boolean to keep
> the exact string of "not-for-merge" in the callee.

As said above, I'm just moving this complete switch into
`append_fetch_head` and pass `rm->fetch_head_status`.

Patrick
Junio C Hamano Jan. 11, 2021, 7:24 p.m. UTC | #3
Patrick Steinhardt <ps@pks.im> writes:

>> As an abstraction, it may be better to make the caller pass a
>> boolean "is this for merge?" and keep the knowledge of what exact
>> string is used for merge_status_marker to this function, instead of
>> letting the caller passing it as a parameter in the string form.
>> After all, we never allow anything other than an empty string or a
>> fixed "not-for-merge" string in that place in the file format.
>> [Nit #2]
>
> I think it's even nicer to just pass in `rm->fetch_head_status`
> directly, which allows us to move below switch into `append_fetch_head`.

OK.  That may even be better.

Thanks.
diff mbox series

Patch

diff --git a/builtin/fetch.c b/builtin/fetch.c
index ecf8537605..557ec130db 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -897,6 +897,56 @@  static int iterate_ref_map(void *cb_data, struct object_id *oid)
 	return 0;
 }
 
+struct fetch_head {
+	FILE *fp;
+};
+
+static int open_fetch_head(struct fetch_head *fetch_head)
+{
+	const char *filename = git_path_fetch_head(the_repository);
+
+	if (!write_fetch_head)
+		return 0;
+
+	fetch_head->fp = fopen(filename, "a");
+	if (!fetch_head->fp)
+		return error_errno(_("cannot open %s"), filename);
+
+	return 0;
+}
+
+static void append_fetch_head(struct fetch_head *fetch_head, const char *old_oid,
+			      const char *merge_status_marker, const char *note,
+			      const char *url, size_t url_len)
+{
+	size_t i;
+
+	if (!write_fetch_head)
+		return;
+
+	fprintf(fetch_head->fp, "%s\t%s\t%s",
+		old_oid, merge_status_marker, note);
+	for (i = 0; i < url_len; ++i)
+		if ('\n' == url[i])
+			fputs("\\n", fetch_head->fp);
+		else
+			fputc(url[i], fetch_head->fp);
+	fputc('\n', fetch_head->fp);
+}
+
+static void commit_fetch_head(struct fetch_head *fetch_head)
+{
+	/* Nothing to commit yet. */
+}
+
+static void close_fetch_head(struct fetch_head *fetch_head)
+{
+	if (!write_fetch_head)
+		return;
+
+	fclose(fetch_head->fp);
+}
+
 static const char warn_show_forced_updates[] =
 N_("Fetch normally indicates which branches had a forced update,\n"
    "but that check has been disabled. To re-enable, use '--show-forced-updates'\n"
@@ -909,22 +959,19 @@  N_("It took %.2f seconds to check forced updates. You can use\n"
 static int store_updated_refs(const char *raw_url, const char *remote_name,
 			      int connectivity_checked, struct ref *ref_map)
 {
-	FILE *fp;
+	struct fetch_head fetch_head;
 	struct commit *commit;
 	int url_len, i, rc = 0;
 	struct strbuf note = STRBUF_INIT;
 	const char *what, *kind;
 	struct ref *rm;
 	char *url;
-	const char *filename = (!write_fetch_head
-				? "/dev/null"
-				: git_path_fetch_head(the_repository));
 	int want_status;
 	int summary_width = transport_summary_width(ref_map);
 
-	fp = fopen(filename, "a");
-	if (!fp)
-		return error_errno(_("cannot open %s"), filename);
+	rc = open_fetch_head(&fetch_head);
+	if (rc)
+		return -1;
 
 	if (raw_url)
 		url = transport_anonymize_url(raw_url);
@@ -1016,16 +1063,10 @@  static int store_updated_refs(const char *raw_url, const char *remote_name,
 				merge_status_marker = "not-for-merge";
 				/* fall-through */
 			case FETCH_HEAD_MERGE:
-				fprintf(fp, "%s\t%s\t%s",
-					oid_to_hex(&rm->old_oid),
-					merge_status_marker,
-					note.buf);
-				for (i = 0; i < url_len; ++i)
-					if ('\n' == url[i])
-						fputs("\\n", fp);
-					else
-						fputc(url[i], fp);
-				fputc('\n', fp);
+				append_fetch_head(&fetch_head,
+						  oid_to_hex(&rm->old_oid),
+						  merge_status_marker,
+						  note.buf, url, url_len);
 				break;
 			default:
 				/* do not write anything to FETCH_HEAD */
@@ -1060,6 +1101,9 @@  static int store_updated_refs(const char *raw_url, const char *remote_name,
 		}
 	}
 
+	if (!rc)
+		commit_fetch_head(&fetch_head);
+
 	if (rc & STORE_REF_ERROR_DF_CONFLICT)
 		error(_("some local refs could not be updated; try running\n"
 		      " 'git remote prune %s' to remove any old, conflicting "
@@ -1077,7 +1121,7 @@  static int store_updated_refs(const char *raw_url, const char *remote_name,
  abort:
 	strbuf_release(&note);
 	free(url);
-	fclose(fp);
+	close_fetch_head(&fetch_head);
 	return rc;
 }