diff mbox series

[03/11] remote: transfer ownership of memory in add_url(), etc

Message ID 20240614102722.GC222445@coredump.intra.peff.net (mailing list archive)
State Accepted
Commit 52595c155a66076020a42197c10a32086d7c4ada
Headers show
Series allow overriding remote.*.url | expand

Commit Message

Jeff King June 14, 2024, 10:27 a.m. UTC
Many of the internal functions in remote.c take const strings and store
them forever in instances of "struct remote". Since the functions are
internal and callers are aware of the convention, this seems to mostly
work and not cause leaks. But there are some issues:

  - it's impossible to clear any of the arrays, because the data
    dependencies between them are too muddled (if you free() a string,
    it might also be referenced from another array, causing a
    user-after-free; but if you don't, that might be the last reference,
    causing a leak).

    This is mostly of interest for further refactoring and features, but
    there's at least one spot that's already a problem. In alias_all_urls(),
    we replace elements of remote->url and remote->pushurl with their
    aliased forms, dropping references to the original.

  - sometimes strings from outside callers make their way in. For
    example, calling remote_get("foo") when there is no configured "foo"
    remote will create a remote struct with the single url "foo". But
    we'll do so by holding on to the string passed to remote_get()
    forever.

    In practice I think this works out because we'd usually pass in a
    string that lasts the length of the program (a string literal, or
    argv reference, or other data structure allocated in the main
    function). But it's a rather subtle requirement.

Instead, let's have remote->url and remote->pushurl own their string
memory. They'll copy the const strings that are passed in, and callers
can stop making their own copies. Likewise, when we overwrite an entry,
we can free the memory it points to, fixing the leak mentioned above.

We'll leave the struct members as "const" since they are visible to the
outside world, and shouldn't usually be touched. This requires casting
on free() for now, but we'll clean that further in a future patch.

Signed-off-by: Jeff King <peff@peff.net>
---
Since now we'll always allocate, I don't think it's possible for this to
introduce any memory corruption issues. It _might_ introduce a leak, but
I think I fixed all of the callers.

 remote.c | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

Comments

Junio C Hamano June 14, 2024, 5:04 p.m. UTC | #1
Jeff King <peff@peff.net> writes:

> diff --git a/remote.c b/remote.c
> index fd9d58f820..f7c846865f 100644
> --- a/remote.c
> +++ b/remote.c
> @@ -64,13 +64,13 @@ static char *alias_url(const char *url, struct rewrites *r)
>  static void add_url(struct remote *remote, const char *url)
>  {
>  	ALLOC_GROW(remote->url, remote->url_nr + 1, remote->url_alloc);
> -	remote->url[remote->url_nr++] = url;
> +	remote->url[remote->url_nr++] = xstrdup(url);
>  }
>  
>  static void add_pushurl(struct remote *remote, const char *pushurl)
>  {
>  	ALLOC_GROW(remote->pushurl, remote->pushurl_nr + 1, remote->pushurl_alloc);
> -	remote->pushurl[remote->pushurl_nr++] = pushurl;
> +	remote->pushurl[remote->pushurl_nr++] = xstrdup(pushurl);
>  }

OK.  This makes it easier to reason about why these elements are
freed in remote_clear().

>  static void add_pushurl_alias(struct remote_state *remote_state,
> @@ -79,6 +79,7 @@ static void add_pushurl_alias(struct remote_state *remote_state,
>  	char *alias = alias_url(url, &remote_state->rewrites_push);
>  	if (alias)
>  		add_pushurl(remote, alias);
> +	free(alias);
>  }

OK.  I wondered if we want to strdup(url) in my review on the
previous step, but now we are making the add_url() responsible
for making a copy, we instead do the opposite, i.e. free alias
that was allocated for us because we no longer need it.

>  static void add_url_alias(struct remote_state *remote_state,
> @@ -87,6 +88,7 @@ static void add_url_alias(struct remote_state *remote_state,
>  	char *alias = alias_url(url, &remote_state->rewrites);
>  	add_url(remote, alias ? alias : url);
>  	add_pushurl_alias(remote_state, remote, url);
> +	free(alias);
>  }

Likewise.

> @@ -336,7 +338,7 @@ static void read_branches_file(struct remote_state *remote_state,
>  	else
>  		frag = to_free = repo_default_branch_name(the_repository, 0);
>  
> -	add_url_alias(remote_state, remote, strbuf_detach(&buf, NULL));
> +	add_url_alias(remote_state, remote, buf.buf);

It is curious that you delay ...

> @@ -347,6 +349,7 @@ static void read_branches_file(struct remote_state *remote_state,
>  	refspec_appendf(&remote->push, "HEAD:refs/heads/%s", frag);
>  	remote->fetch_tags = 1; /* always auto-follow */
>  
> +	strbuf_release(&buf);
>  	free(to_free);
>  }

... strbuf_release() of the buf to the very end of the function.  

In the original, buf became invalid by doing strbuf_detach(), so we
could do strbuf_release() immediately after add_url_alias() returns
to us if we wanted to.

> @@ -431,15 +434,13 @@ static int handle_config(const char *key, const char *value,
>  	else if (!strcmp(subkey, "prunetags"))
>  		remote->prune_tags = git_config_bool(key, value);
>  	else if (!strcmp(subkey, "url")) {
> -		char *v;
> -		if (git_config_string(&v, key, value))
> -			return -1;
> -		add_url(remote, v);
> +		if (!value)
> +			return config_error_nonbool(key);
> +		add_url(remote, value);

OK.  config_string() does (1) check for "I exist hence I am true"
boolean, and (2) give us a duplicate of the value.  We do not want
the latter, so we do the former ourselves here.  The same story
repeats for pushurl below (ellided).

> @@ -495,8 +496,10 @@ static void alias_all_urls(struct remote_state *remote_state)
>  		for (j = 0; j < remote_state->remotes[i]->pushurl_nr; j++) {
>  			char *alias = alias_url(remote_state->remotes[i]->pushurl[j],
>  						&remote_state->rewrites);
> -			if (alias)
> +			if (alias) {
> +				free((char *)remote_state->remotes[i]->pushurl[j]);
>  				remote_state->remotes[i]->pushurl[j] = alias;
> +			}
>  		}

OK, this is the replacement codepath we saw earlier.  Makes sense
for this and the .url[] side on the other hunk (it is curious that
this has .pushurl[] before .url[], which is the opposite order of
how everybody else deals with them, as pushurl came later).

Thanks.
Jeff King June 16, 2024, 4:59 a.m. UTC | #2
On Fri, Jun 14, 2024 at 10:04:50AM -0700, Junio C Hamano wrote:

> >  static void add_pushurl_alias(struct remote_state *remote_state,
> > @@ -79,6 +79,7 @@ static void add_pushurl_alias(struct remote_state *remote_state,
> >  	char *alias = alias_url(url, &remote_state->rewrites_push);
> >  	if (alias)
> >  		add_pushurl(remote, alias);
> > +	free(alias);
> >  }
> 
> OK.  I wondered if we want to strdup(url) in my review on the
> previous step, but now we are making the add_url() responsible
> for making a copy, we instead do the opposite, i.e. free alias
> that was allocated for us because we no longer need it.

Yeah. Possibly the two should be squashed. I was trying to make this
patch a little less long/confusing, but maybe breaking things up just
posed new questions. :)

> > @@ -336,7 +338,7 @@ static void read_branches_file(struct remote_state *remote_state,
> >  	else
> >  		frag = to_free = repo_default_branch_name(the_repository, 0);
> >  
> > -	add_url_alias(remote_state, remote, strbuf_detach(&buf, NULL));
> > +	add_url_alias(remote_state, remote, buf.buf);
> 
> It is curious that you delay ...
> 
> > @@ -347,6 +349,7 @@ static void read_branches_file(struct remote_state *remote_state,
> >  	refspec_appendf(&remote->push, "HEAD:refs/heads/%s", frag);
> >  	remote->fetch_tags = 1; /* always auto-follow */
> >  
> > +	strbuf_release(&buf);
> >  	free(to_free);
> >  }
> 
> ... strbuf_release() of the buf to the very end of the function.  
> 
> In the original, buf became invalid by doing strbuf_detach(), so we
> could do strbuf_release() immediately after add_url_alias() returns
> to us if we wanted to.

Right. I had originally written it that way, since that would be the
mechanical conversion. But since there was already cleanup at the bottom
of the function, it felt more natural to shuffle it there. Which is
correct as long as there are no other references to buf nor early
returns. You can't see that from the context, but it is true in this
case.

Bumping to --inter-hunk-context=4 does it a little more obvious.

> > @@ -431,15 +434,13 @@ static int handle_config(const char *key, const char *value,
> >  	else if (!strcmp(subkey, "prunetags"))
> >  		remote->prune_tags = git_config_bool(key, value);
> >  	else if (!strcmp(subkey, "url")) {
> > -		char *v;
> > -		if (git_config_string(&v, key, value))
> > -			return -1;
> > -		add_url(remote, v);
> > +		if (!value)
> > +			return config_error_nonbool(key);
> > +		add_url(remote, value);
> 
> OK.  config_string() does (1) check for "I exist hence I am true"
> boolean, and (2) give us a duplicate of the value.  We do not want
> the latter, so we do the former ourselves here.  The same story
> repeats for pushurl below (ellided).

Yep. If we followed through on the "bool means reset" approach, then these
extra NULL checks would go away in favor of one in add_url(). I didn't
pursue that here. And if we do, I think we should do it in all the other
spots, too, for consistency.

-Peff
Junio C Hamano June 17, 2024, 5:42 p.m. UTC | #3
Jeff King <peff@peff.net> writes:

> On Fri, Jun 14, 2024 at 10:04:50AM -0700, Junio C Hamano wrote:
>
>> >  static void add_pushurl_alias(struct remote_state *remote_state,
>> > @@ -79,6 +79,7 @@ static void add_pushurl_alias(struct remote_state *remote_state,
>> >  	char *alias = alias_url(url, &remote_state->rewrites_push);
>> >  	if (alias)
>> >  		add_pushurl(remote, alias);
>> > +	free(alias);
>> >  }
>> 
>> OK.  I wondered if we want to strdup(url) in my review on the
>> previous step, but now we are making the add_url() responsible
>> for making a copy, we instead do the opposite, i.e. free alias
>> that was allocated for us because we no longer need it.
>
> Yeah. Possibly the two should be squashed. I was trying to make this
> patch a little less long/confusing, but maybe breaking things up just
> posed new questions. :)

No squashing is needed.  It's just that [02/11] could go in either
direction and the reader was held in suspense until [03/11] that
picked one direction to go ;-).

> Right. I had originally written it that way, since that would be the
> mechanical conversion. But since there was already cleanup at the bottom
> of the function, it felt more natural to shuffle it there. Which is
> correct as long as there are no other references to buf nor early
> returns. You can't see that from the context, but it is true in this
> case.

Yeah, either way it is correct, and I think the "cleanup at the end,
where the single label is there for any new error code paths to jump
to" pattern is a good approach going forward.

Looking good.
Elijah Newren June 25, 2024, 5:30 p.m. UTC | #4
On Fri, Jun 14, 2024 at 4:08 AM Jeff King <peff@peff.net> wrote:
>
> Many of the internal functions in remote.c take const strings and store
> them forever in instances of "struct remote". Since the functions are
> internal and callers are aware of the convention, this seems to mostly
> work and not cause leaks. But there are some issues:
>
>   - it's impossible to clear any of the arrays, because the data
>     dependencies between them are too muddled (if you free() a string,
>     it might also be referenced from another array, causing a
>     user-after-free; but if you don't, that might be the last reference,

s/user-after-free/use-after-free/

(Read the rest of the patch and it all looks good.)
diff mbox series

Patch

diff --git a/remote.c b/remote.c
index fd9d58f820..f7c846865f 100644
--- a/remote.c
+++ b/remote.c
@@ -64,13 +64,13 @@  static char *alias_url(const char *url, struct rewrites *r)
 static void add_url(struct remote *remote, const char *url)
 {
 	ALLOC_GROW(remote->url, remote->url_nr + 1, remote->url_alloc);
-	remote->url[remote->url_nr++] = url;
+	remote->url[remote->url_nr++] = xstrdup(url);
 }
 
 static void add_pushurl(struct remote *remote, const char *pushurl)
 {
 	ALLOC_GROW(remote->pushurl, remote->pushurl_nr + 1, remote->pushurl_alloc);
-	remote->pushurl[remote->pushurl_nr++] = pushurl;
+	remote->pushurl[remote->pushurl_nr++] = xstrdup(pushurl);
 }
 
 static void add_pushurl_alias(struct remote_state *remote_state,
@@ -79,6 +79,7 @@  static void add_pushurl_alias(struct remote_state *remote_state,
 	char *alias = alias_url(url, &remote_state->rewrites_push);
 	if (alias)
 		add_pushurl(remote, alias);
+	free(alias);
 }
 
 static void add_url_alias(struct remote_state *remote_state,
@@ -87,6 +88,7 @@  static void add_url_alias(struct remote_state *remote_state,
 	char *alias = alias_url(url, &remote_state->rewrites);
 	add_url(remote, alias ? alias : url);
 	add_pushurl_alias(remote_state, remote, url);
+	free(alias);
 }
 
 struct remotes_hash_key {
@@ -293,7 +295,7 @@  static void read_remotes_file(struct remote_state *remote_state,
 
 		if (skip_prefix(buf.buf, "URL:", &v))
 			add_url_alias(remote_state, remote,
-				      xstrdup(skip_spaces(v)));
+				      skip_spaces(v));
 		else if (skip_prefix(buf.buf, "Push:", &v))
 			refspec_append(&remote->push, skip_spaces(v));
 		else if (skip_prefix(buf.buf, "Pull:", &v))
@@ -336,7 +338,7 @@  static void read_branches_file(struct remote_state *remote_state,
 	else
 		frag = to_free = repo_default_branch_name(the_repository, 0);
 
-	add_url_alias(remote_state, remote, strbuf_detach(&buf, NULL));
+	add_url_alias(remote_state, remote, buf.buf);
 	refspec_appendf(&remote->fetch, "refs/heads/%s:refs/heads/%s",
 			frag, remote->name);
 
@@ -347,6 +349,7 @@  static void read_branches_file(struct remote_state *remote_state,
 	refspec_appendf(&remote->push, "HEAD:refs/heads/%s", frag);
 	remote->fetch_tags = 1; /* always auto-follow */
 
+	strbuf_release(&buf);
 	free(to_free);
 }
 
@@ -431,15 +434,13 @@  static int handle_config(const char *key, const char *value,
 	else if (!strcmp(subkey, "prunetags"))
 		remote->prune_tags = git_config_bool(key, value);
 	else if (!strcmp(subkey, "url")) {
-		char *v;
-		if (git_config_string(&v, key, value))
-			return -1;
-		add_url(remote, v);
+		if (!value)
+			return config_error_nonbool(key);
+		add_url(remote, value);
 	} else if (!strcmp(subkey, "pushurl")) {
-		char *v;
-		if (git_config_string(&v, key, value))
-			return -1;
-		add_pushurl(remote, v);
+		if (!value)
+			return config_error_nonbool(key);
+		add_pushurl(remote, value);
 	} else if (!strcmp(subkey, "push")) {
 		char *v;
 		if (git_config_string(&v, key, value))
@@ -495,8 +496,10 @@  static void alias_all_urls(struct remote_state *remote_state)
 		for (j = 0; j < remote_state->remotes[i]->pushurl_nr; j++) {
 			char *alias = alias_url(remote_state->remotes[i]->pushurl[j],
 						&remote_state->rewrites);
-			if (alias)
+			if (alias) {
+				free((char *)remote_state->remotes[i]->pushurl[j]);
 				remote_state->remotes[i]->pushurl[j] = alias;
+			}
 		}
 		add_pushurl_aliases = remote_state->remotes[i]->pushurl_nr == 0;
 		for (j = 0; j < remote_state->remotes[i]->url_nr; j++) {
@@ -507,8 +510,10 @@  static void alias_all_urls(struct remote_state *remote_state)
 					remote_state->remotes[i]->url[j]);
 			alias = alias_url(remote_state->remotes[i]->url[j],
 					  &remote_state->rewrites);
-			if (alias)
+			if (alias) {
+				free((char *)remote_state->remotes[i]->url[j]);
 				remote_state->remotes[i]->url[j] = alias;
+			}
 		}
 	}
 }