diff mbox series

[v2,1/2] abspath: add a function to resolve paths with missing components

Message ID 20201009191511.267461-2-sandals@crustytoothpaste.net (mailing list archive)
State New, archived
Headers show
Series [v2,1/2] abspath: add a function to resolve paths with missing components | expand

Commit Message

brian m. carlson Oct. 9, 2020, 7:15 p.m. UTC
We'd like to canonicalize paths such that we can preserve any number of
trailing components that may be missing.  Let's add a function to do
that that calls strbuf_realpath to find the canonical path for the
portion we do have and then append the missing part.  We adjust
strip_last_component to return us the component it has stripped and use
that to help us accumulate the missing part.

Note that it is intentional that we invoke strbuf_realpath here,
repeatedly if necessary, because on Windows that function is replaced
with a version that uses the proper system semantics for
canonicalization.  Trying to adjust strbuf_realpath to perform this kind
of canonicalization with an additional option would fail to work
properly on Windows.  The present approach is equivalent to
strbuf_realpath for cases where the path exists, and the only other
cases where we will use this function the additional overhead of
multiple invocations is not significant.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
 abspath.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++----
 cache.h   |  1 +
 2 files changed, 47 insertions(+), 4 deletions(-)

Comments

Junio C Hamano Oct. 9, 2020, 9:10 p.m. UTC | #1
"brian m. carlson" <sandals@crustytoothpaste.net> writes:

> We'd like to canonicalize paths such that we can preserve any number of
> trailing components that may be missing.

Sorry, but at least to me, the above gives no clue what kind of
operation is desired to be done on paths.  How would one preserve
what does not exist (i.e. are missing)?

Do you mean some leading components in a path point at existing
directories and after some point a component names a directory
that does not exist, so everything after that does not yet exist
until you "mkdir -p" them?

I guess my confusion comes primarily from the fuzziness of the verb
"canonicalize" in the sentence.  We want to handle a/b/../c/d and
there are various combinations of missng and existing directories,
e.g. a/b may not exist or a/b may but a/c may not, etc.  Is that
what is going on?  Makes me wonder if it makes sense to canonicalize
a/b/../c/d into a/c/d when a/b does not exist in the first place,
though.

> Let's add a function to do
> that that calls strbuf_realpath to find the canonical path for the
> portion we do have and then append the missing part.  We adjust
> strip_last_component to return us the component it has stripped and use
> that to help us accumulate the missing part.

OK, so if we have a/b/c/d and know a/b/c/d does not exist on the
filesystem, we start by splitting it to a/b/c and d, see if a/b/c
exists, and if not, do the same recursively to a/b/c to split it
into a/b and c, and prefix the latter to 'd' that we split earlier
(i.e. now we have a/b and c/d), until we have an existing directory
on the first half?

> Note that it is intentional that we invoke strbuf_realpath here,
> repeatedly if necessary, because on Windows that function is replaced
> with a version that uses the proper system semantics for
> canonicalization.  Trying to adjust strbuf_realpath to perform this kind
> of canonicalization with an additional option would fail to work
> properly on Windows.  The present approach is equivalent to
> strbuf_realpath for cases where the path exists, and the only other
> cases where we will use this function the additional overhead of
> multiple invocations is not significant.
>
> Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
> ---
>  abspath.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++----
>  cache.h   |  1 +
>  2 files changed, 47 insertions(+), 4 deletions(-)
>
> diff --git a/abspath.c b/abspath.c
> index 6f15a418bb..092bb33b64 100644
> --- a/abspath.c
> +++ b/abspath.c
> @@ -11,8 +11,12 @@ int is_directory(const char *path)
>  	return (!stat(path, &st) && S_ISDIR(st.st_mode));
>  }
>  
> -/* removes the last path component from 'path' except if 'path' is root */
> -static void strip_last_component(struct strbuf *path)
> +/*
> + * Removes the last path component from 'path' except if 'path' is root.
> + *
> + * If last is not NULL, the last path component is copied to last.
> + */
> +static void strip_last_component(struct strbuf *path, struct strbuf *last)
>  {
>  	size_t offset = offset_1st_component(path->buf);
>  	size_t len = path->len;
> @@ -20,6 +24,10 @@ static void strip_last_component(struct strbuf *path)
>  	/* Find start of the last component */
>  	while (offset < len && !is_dir_sep(path->buf[len - 1]))
>  		len--;
> +
> +	if (last)
> +		strbuf_addstr(last, path->buf + len);
> +
>  	/* Skip sequences of multiple path-separators */
>  	while (offset < len && is_dir_sep(path->buf[len - 1]))
>  		len--;
> @@ -118,7 +126,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
>  			continue; /* '.' component */
>  		} else if (next.len == 2 && !strcmp(next.buf, "..")) {
>  			/* '..' component; strip the last path component */
> -			strip_last_component(resolved);
> +			strip_last_component(resolved, NULL);
>  			continue;
>  		}
>  
> @@ -169,7 +177,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
>  				 * strip off the last component since it will
>  				 * be replaced with the contents of the symlink
>  				 */
> -				strip_last_component(resolved);
> +				strip_last_component(resolved, NULL);
>  			}
>  
>  			/*
> @@ -202,6 +210,40 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
>  	return retval;
>  }
>  
> +/*
> + * Like strbuf_realpath, but trailing components which do not exist are copied
> + * through.
> + */
> +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path)
> +{
> +	struct strbuf remaining = STRBUF_INIT;
> +	struct strbuf trailing = STRBUF_INIT;
> +	struct strbuf component = STRBUF_INIT;
> +
> +	strbuf_addstr(&remaining, path);
> +
> +	while (remaining.len) {
> +		if (strbuf_realpath(resolved, remaining.buf, 0)) {
> +			strbuf_addbuf(resolved, &trailing);
> +
> +			strbuf_release(&component);
> +			strbuf_release(&remaining);
> +			strbuf_release(&trailing);
> +
> +			return resolved->buf;
> +		}
> +		strip_last_component(&remaining, &component);
> +		strbuf_insertstr(&trailing, 0, "/");
> +		strbuf_insertstr(&trailing, 1, component.buf);

I may be utterly confused, but is this where

    - we started with a/b/c/d, pushed 'd' into trailing and decided
      to redo with a/b/c

    - now we split the a/b/c into a/b and c, and adjusting what is
      in trailing from 'd' to 'c/d'

happens place?  It's a bit sad that we need to repeatedly use
insertstr to prepend in front, instead of appending.

> +		strbuf_reset(&component);
> +	}
> +
> +	strbuf_release(&component);
> +	strbuf_release(&remaining);
> +	strbuf_release(&trailing);
> +	return NULL;
> +}
> +
>  char *real_pathdup(const char *path, int die_on_error)
>  {
>  	struct strbuf realpath = STRBUF_INIT;
> diff --git a/cache.h b/cache.h
> index c0072d43b1..e1e17e108e 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -1320,6 +1320,7 @@ static inline int is_absolute_path(const char *path)
>  int is_directory(const char *);
>  char *strbuf_realpath(struct strbuf *resolved, const char *path,
>  		      int die_on_error);
> +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path);
>  char *real_pathdup(const char *path, int die_on_error);
>  const char *absolute_path(const char *path);
>  char *absolute_pathdup(const char *path);
brian m. carlson Oct. 10, 2020, 1:10 a.m. UTC | #2
On 2020-10-09 at 21:10:04, Junio C Hamano wrote:
> "brian m. carlson" <sandals@crustytoothpaste.net> writes:
> 
> > We'd like to canonicalize paths such that we can preserve any number of
> > trailing components that may be missing.
> 
> Sorry, but at least to me, the above gives no clue what kind of
> operation is desired to be done on paths.  How would one preserve
> what does not exist (i.e. are missing)?
> 
> Do you mean some leading components in a path point at existing
> directories and after some point a component names a directory
> that does not exist, so everything after that does not yet exist
> until you "mkdir -p" them?
> 
> I guess my confusion comes primarily from the fuzziness of the verb
> "canonicalize" in the sentence.  We want to handle a/b/../c/d and
> there are various combinations of missng and existing directories,
> e.g. a/b may not exist or a/b may but a/c may not, etc.  Is that
> what is going on?  Makes me wonder if it makes sense to canonicalize
> a/b/../c/d into a/c/d when a/b does not exist in the first place,
> though.

The behavior that I'm proposing is the realpath -m behavior.  If the
path we're canonicalizing doesn't exist, we find the closest parent that
does exist, canonicalize it (à la realpath(3)), and then append the
components that don't exist to the canonicalized portion.

> > Let's add a function to do
> > that that calls strbuf_realpath to find the canonical path for the
> > portion we do have and then append the missing part.  We adjust
> > strip_last_component to return us the component it has stripped and use
> > that to help us accumulate the missing part.
> 
> OK, so if we have a/b/c/d and know a/b/c/d does not exist on the
> filesystem, we start by splitting it to a/b/c and d, see if a/b/c
> exists, and if not, do the same recursively to a/b/c to split it
> into a/b and c, and prefix the latter to 'd' that we split earlier
> (i.e. now we have a/b and c/d), until we have an existing directory
> on the first half?

Correct.

> > +/*
> > + * Like strbuf_realpath, but trailing components which do not exist are copied
> > + * through.
> > + */
> > +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path)
> > +{
> > +	struct strbuf remaining = STRBUF_INIT;
> > +	struct strbuf trailing = STRBUF_INIT;
> > +	struct strbuf component = STRBUF_INIT;
> > +
> > +	strbuf_addstr(&remaining, path);
> > +
> > +	while (remaining.len) {
> > +		if (strbuf_realpath(resolved, remaining.buf, 0)) {
> > +			strbuf_addbuf(resolved, &trailing);
> > +
> > +			strbuf_release(&component);
> > +			strbuf_release(&remaining);
> > +			strbuf_release(&trailing);
> > +
> > +			return resolved->buf;
> > +		}
> > +		strip_last_component(&remaining, &component);
> > +		strbuf_insertstr(&trailing, 0, "/");
> > +		strbuf_insertstr(&trailing, 1, component.buf);
> 
> I may be utterly confused, but is this where
> 
>     - we started with a/b/c/d, pushed 'd' into trailing and decided
>       to redo with a/b/c
> 
>     - now we split the a/b/c into a/b and c, and adjusting what is
>       in trailing from 'd' to 'c/d'
> 
> happens place?  It's a bit sad that we need to repeatedly use
> insertstr to prepend in front, instead of appending.

Yes, that's true.  It really isn't avoidable, though, with the functions
the way that they are.  We can't use the original path and keep track of
the offset because it may contain multiple path separators and we don't
want to include those in the path.
Johannes Schindelin Nov. 9, 2020, 1:55 p.m. UTC | #3
Hi brian,

On Fri, 9 Oct 2020, brian m. carlson wrote:

> We'd like to canonicalize paths such that we can preserve any number of
> trailing components that may be missing.  Let's add a function to do
> that that calls strbuf_realpath to find the canonical path for the
> portion we do have and then append the missing part.  We adjust
> strip_last_component to return us the component it has stripped and use
> that to help us accumulate the missing part.
>
> Note that it is intentional that we invoke strbuf_realpath here,
> repeatedly if necessary, because on Windows that function is replaced
> with a version that uses the proper system semantics for
> canonicalization.  Trying to adjust strbuf_realpath to perform this kind
> of canonicalization with an additional option would fail to work
> properly on Windows.  The present approach is equivalent to
> strbuf_realpath for cases where the path exists, and the only other
> cases where we will use this function the additional overhead of
> multiple invocations is not significant.

Thank you for being so considerate. Yes, on Windows, we use (wherever
possible) a shortcut that tells us the canonicalized path of existing
entries.

Technically, it is not `strbuf_realpath()` that we override, but we take a
shortcut _in_ that function. That's semantics, though.

More importantly, we recently fixed a bug in our code to allow for a quirk
in the `strbuf_realpath()` function: `strbuf_realpath()` allows the last
path component to not exist. If that is the case, now it's time to try
without last component.

In a sense, this is a 1-level version of your infinite-level
`strbuf_realpath_missing()` function.

An idea that immediately crosses my mind is whether that level could be
something we want to pass directly into `strbuf_realpath()` as a parameter
(it would be 1 to imitate the current behavior and -1 for the
infinite-level case). What do you think? Does that make sense?

In any case, I think this `_missing()` functionality should be implemented
a bit more tightly with the `strbuf_realpath()` function because of the
logic that already allows the last component to be missing:

                if (lstat(resolved->buf, &st)) {
                        /* error out unless this was the last component */
                        if (errno != ENOENT || remaining.len) {
                                if (die_on_error)
                                        die_errno("Invalid path '%s'",
                                                  resolved->buf);
                                else
                                        goto error_out;
                        }

See https://github.com/git/git/blob/v2.29.2/abspath.c#L130-L138 for the
exact code and context.

Seeing as we _already_ have some code to allow for _some_ missing
component, it should be possible to extend the logic to allow for
different levels (e.g. using `count_slashes()` if we want to allow more
than just the last component to be missing).

> Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
> ---
>  abspath.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++----
>  cache.h   |  1 +
>  2 files changed, 47 insertions(+), 4 deletions(-)
>
> diff --git a/abspath.c b/abspath.c
> index 6f15a418bb..092bb33b64 100644
> --- a/abspath.c
> +++ b/abspath.c
> @@ -11,8 +11,12 @@ int is_directory(const char *path)
>  	return (!stat(path, &st) && S_ISDIR(st.st_mode));
>  }
>
> -/* removes the last path component from 'path' except if 'path' is root */
> -static void strip_last_component(struct strbuf *path)
> +/*
> + * Removes the last path component from 'path' except if 'path' is root.
> + *
> + * If last is not NULL, the last path component is copied to last.
> + */
> +static void strip_last_component(struct strbuf *path, struct strbuf *last)
>  {
>  	size_t offset = offset_1st_component(path->buf);
>  	size_t len = path->len;
> @@ -20,6 +24,10 @@ static void strip_last_component(struct strbuf *path)
>  	/* Find start of the last component */
>  	while (offset < len && !is_dir_sep(path->buf[len - 1]))
>  		len--;
> +
> +	if (last)
> +		strbuf_addstr(last, path->buf + len);
> +
>  	/* Skip sequences of multiple path-separators */
>  	while (offset < len && is_dir_sep(path->buf[len - 1]))
>  		len--;
> @@ -118,7 +126,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
>  			continue; /* '.' component */
>  		} else if (next.len == 2 && !strcmp(next.buf, "..")) {
>  			/* '..' component; strip the last path component */
> -			strip_last_component(resolved);
> +			strip_last_component(resolved, NULL);
>  			continue;
>  		}
>
> @@ -169,7 +177,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
>  				 * strip off the last component since it will
>  				 * be replaced with the contents of the symlink
>  				 */
> -				strip_last_component(resolved);
> +				strip_last_component(resolved, NULL);
>  			}
>
>  			/*
> @@ -202,6 +210,40 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
>  	return retval;
>  }
>
> +/*
> + * Like strbuf_realpath, but trailing components which do not exist are copied
> + * through.
> + */
> +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path)
> +{
> +	struct strbuf remaining = STRBUF_INIT;
> +	struct strbuf trailing = STRBUF_INIT;
> +	struct strbuf component = STRBUF_INIT;
> +
> +	strbuf_addstr(&remaining, path);
> +
> +	while (remaining.len) {
> +		if (strbuf_realpath(resolved, remaining.buf, 0)) {
> +			strbuf_addbuf(resolved, &trailing);
> +
> +			strbuf_release(&component);
> +			strbuf_release(&remaining);
> +			strbuf_release(&trailing);
> +
> +			return resolved->buf;
> +		}
> +		strip_last_component(&remaining, &component);
> +		strbuf_insertstr(&trailing, 0, "/");
> +		strbuf_insertstr(&trailing, 1, component.buf);

Not that it matters a lot, but this could be written shorter via

		strbuf_insertf(&trailing, "/%s", component.buf);

But as I said above, I think we should be able to fold the logic _into_
`strbuf_realpath()` (even if this makes my job harder to maintain the
Windows-specific shortcut).

Thanks,
Dscho

> +		strbuf_reset(&component);
> +	}
> +
> +	strbuf_release(&component);
> +	strbuf_release(&remaining);
> +	strbuf_release(&trailing);
> +	return NULL;
> +}
> +
>  char *real_pathdup(const char *path, int die_on_error)
>  {
>  	struct strbuf realpath = STRBUF_INIT;
> diff --git a/cache.h b/cache.h
> index c0072d43b1..e1e17e108e 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -1320,6 +1320,7 @@ static inline int is_absolute_path(const char *path)
>  int is_directory(const char *);
>  char *strbuf_realpath(struct strbuf *resolved, const char *path,
>  		      int die_on_error);
> +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path);
>  char *real_pathdup(const char *path, int die_on_error);
>  const char *absolute_path(const char *path);
>  char *absolute_pathdup(const char *path);
>
Johannes Schindelin Nov. 9, 2020, 1:57 p.m. UTC | #4
Hi brian,

On Sat, 10 Oct 2020, brian m. carlson wrote:

> On 2020-10-09 at 21:10:04, Junio C Hamano wrote:
> > "brian m. carlson" <sandals@crustytoothpaste.net> writes:
> >
> > > We'd like to canonicalize paths such that we can preserve any number of
> > > trailing components that may be missing.
> >
> > Sorry, but at least to me, the above gives no clue what kind of
> > operation is desired to be done on paths.  How would one preserve
> > what does not exist (i.e. are missing)?
> >
> > Do you mean some leading components in a path point at existing
> > directories and after some point a component names a directory
> > that does not exist, so everything after that does not yet exist
> > until you "mkdir -p" them?
> >
> > I guess my confusion comes primarily from the fuzziness of the verb
> > "canonicalize" in the sentence.  We want to handle a/b/../c/d and
> > there are various combinations of missng and existing directories,
> > e.g. a/b may not exist or a/b may but a/c may not, etc.  Is that
> > what is going on?  Makes me wonder if it makes sense to canonicalize
> > a/b/../c/d into a/c/d when a/b does not exist in the first place,
> > though.
>
> The behavior that I'm proposing is the realpath -m behavior.  If the
> path we're canonicalizing doesn't exist, we find the closest parent that
> does exist, canonicalize it (à la realpath(3)), and then append the
> components that don't exist to the canonicalized portion.

FWIW I was immediately able to think of a handful scenarios where this
functionality would come in handy, but I am probably not a typical example
for the median reader. So maybe a concrete example or two why this could
be handy could be shown in the cover letter?

Thanks,
Dscho

>
> > > Let's add a function to do
> > > that that calls strbuf_realpath to find the canonical path for the
> > > portion we do have and then append the missing part.  We adjust
> > > strip_last_component to return us the component it has stripped and use
> > > that to help us accumulate the missing part.
> >
> > OK, so if we have a/b/c/d and know a/b/c/d does not exist on the
> > filesystem, we start by splitting it to a/b/c and d, see if a/b/c
> > exists, and if not, do the same recursively to a/b/c to split it
> > into a/b and c, and prefix the latter to 'd' that we split earlier
> > (i.e. now we have a/b and c/d), until we have an existing directory
> > on the first half?
>
> Correct.
>
> > > +/*
> > > + * Like strbuf_realpath, but trailing components which do not exist are copied
> > > + * through.
> > > + */
> > > +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path)
> > > +{
> > > +	struct strbuf remaining = STRBUF_INIT;
> > > +	struct strbuf trailing = STRBUF_INIT;
> > > +	struct strbuf component = STRBUF_INIT;
> > > +
> > > +	strbuf_addstr(&remaining, path);
> > > +
> > > +	while (remaining.len) {
> > > +		if (strbuf_realpath(resolved, remaining.buf, 0)) {
> > > +			strbuf_addbuf(resolved, &trailing);
> > > +
> > > +			strbuf_release(&component);
> > > +			strbuf_release(&remaining);
> > > +			strbuf_release(&trailing);
> > > +
> > > +			return resolved->buf;
> > > +		}
> > > +		strip_last_component(&remaining, &component);
> > > +		strbuf_insertstr(&trailing, 0, "/");
> > > +		strbuf_insertstr(&trailing, 1, component.buf);
> >
> > I may be utterly confused, but is this where
> >
> >     - we started with a/b/c/d, pushed 'd' into trailing and decided
> >       to redo with a/b/c
> >
> >     - now we split the a/b/c into a/b and c, and adjusting what is
> >       in trailing from 'd' to 'c/d'
> >
> > happens place?  It's a bit sad that we need to repeatedly use
> > insertstr to prepend in front, instead of appending.
>
> Yes, that's true.  It really isn't avoidable, though, with the functions
> the way that they are.  We can't use the original path and keep track of
> the offset because it may contain multiple path separators and we don't
> want to include those in the path.
> --
> brian m. carlson: Houston, Texas, US
>
brian m. carlson Nov. 16, 2020, 2:21 a.m. UTC | #5
On 2020-11-09 at 13:55:53, Johannes Schindelin wrote:
> Hi brian,
> 
> On Fri, 9 Oct 2020, brian m. carlson wrote:
> 
> > We'd like to canonicalize paths such that we can preserve any number of
> > trailing components that may be missing.  Let's add a function to do
> > that that calls strbuf_realpath to find the canonical path for the
> > portion we do have and then append the missing part.  We adjust
> > strip_last_component to return us the component it has stripped and use
> > that to help us accumulate the missing part.
> >
> > Note that it is intentional that we invoke strbuf_realpath here,
> > repeatedly if necessary, because on Windows that function is replaced
> > with a version that uses the proper system semantics for
> > canonicalization.  Trying to adjust strbuf_realpath to perform this kind
> > of canonicalization with an additional option would fail to work
> > properly on Windows.  The present approach is equivalent to
> > strbuf_realpath for cases where the path exists, and the only other
> > cases where we will use this function the additional overhead of
> > multiple invocations is not significant.
> 
> Thank you for being so considerate. Yes, on Windows, we use (wherever
> possible) a shortcut that tells us the canonicalized path of existing
> entries.
> 
> Technically, it is not `strbuf_realpath()` that we override, but we take a
> shortcut _in_ that function. That's semantics, though.
> 
> More importantly, we recently fixed a bug in our code to allow for a quirk
> in the `strbuf_realpath()` function: `strbuf_realpath()` allows the last
> path component to not exist. If that is the case, now it's time to try
> without last component.
> 
> In a sense, this is a 1-level version of your infinite-level
> `strbuf_realpath_missing()` function.
> 
> An idea that immediately crosses my mind is whether that level could be
> something we want to pass directly into `strbuf_realpath()` as a parameter
> (it would be 1 to imitate the current behavior and -1 for the
> infinite-level case). What do you think? Does that make sense?
> 
> In any case, I think this `_missing()` functionality should be implemented
> a bit more tightly with the `strbuf_realpath()` function because of the
> logic that already allows the last component to be missing:
> 
>                 if (lstat(resolved->buf, &st)) {
>                         /* error out unless this was the last component */
>                         if (errno != ENOENT || remaining.len) {
>                                 if (die_on_error)
>                                         die_errno("Invalid path '%s'",
>                                                   resolved->buf);
>                                 else
>                                         goto error_out;
>                         }
> 
> See https://github.com/git/git/blob/v2.29.2/abspath.c#L130-L138 for the
> exact code and context.
> 
> Seeing as we _already_ have some code to allow for _some_ missing
> component, it should be possible to extend the logic to allow for
> different levels (e.g. using `count_slashes()` if we want to allow more
> than just the last component to be missing).

Okay, if you'd prefer to do it that way, that's fine with me.  I'll
reroll with that change.
diff mbox series

Patch

diff --git a/abspath.c b/abspath.c
index 6f15a418bb..092bb33b64 100644
--- a/abspath.c
+++ b/abspath.c
@@ -11,8 +11,12 @@  int is_directory(const char *path)
 	return (!stat(path, &st) && S_ISDIR(st.st_mode));
 }
 
-/* removes the last path component from 'path' except if 'path' is root */
-static void strip_last_component(struct strbuf *path)
+/*
+ * Removes the last path component from 'path' except if 'path' is root.
+ *
+ * If last is not NULL, the last path component is copied to last.
+ */
+static void strip_last_component(struct strbuf *path, struct strbuf *last)
 {
 	size_t offset = offset_1st_component(path->buf);
 	size_t len = path->len;
@@ -20,6 +24,10 @@  static void strip_last_component(struct strbuf *path)
 	/* Find start of the last component */
 	while (offset < len && !is_dir_sep(path->buf[len - 1]))
 		len--;
+
+	if (last)
+		strbuf_addstr(last, path->buf + len);
+
 	/* Skip sequences of multiple path-separators */
 	while (offset < len && is_dir_sep(path->buf[len - 1]))
 		len--;
@@ -118,7 +126,7 @@  char *strbuf_realpath(struct strbuf *resolved, const char *path,
 			continue; /* '.' component */
 		} else if (next.len == 2 && !strcmp(next.buf, "..")) {
 			/* '..' component; strip the last path component */
-			strip_last_component(resolved);
+			strip_last_component(resolved, NULL);
 			continue;
 		}
 
@@ -169,7 +177,7 @@  char *strbuf_realpath(struct strbuf *resolved, const char *path,
 				 * strip off the last component since it will
 				 * be replaced with the contents of the symlink
 				 */
-				strip_last_component(resolved);
+				strip_last_component(resolved, NULL);
 			}
 
 			/*
@@ -202,6 +210,40 @@  char *strbuf_realpath(struct strbuf *resolved, const char *path,
 	return retval;
 }
 
+/*
+ * Like strbuf_realpath, but trailing components which do not exist are copied
+ * through.
+ */
+char *strbuf_realpath_missing(struct strbuf *resolved, const char *path)
+{
+	struct strbuf remaining = STRBUF_INIT;
+	struct strbuf trailing = STRBUF_INIT;
+	struct strbuf component = STRBUF_INIT;
+
+	strbuf_addstr(&remaining, path);
+
+	while (remaining.len) {
+		if (strbuf_realpath(resolved, remaining.buf, 0)) {
+			strbuf_addbuf(resolved, &trailing);
+
+			strbuf_release(&component);
+			strbuf_release(&remaining);
+			strbuf_release(&trailing);
+
+			return resolved->buf;
+		}
+		strip_last_component(&remaining, &component);
+		strbuf_insertstr(&trailing, 0, "/");
+		strbuf_insertstr(&trailing, 1, component.buf);
+		strbuf_reset(&component);
+	}
+
+	strbuf_release(&component);
+	strbuf_release(&remaining);
+	strbuf_release(&trailing);
+	return NULL;
+}
+
 char *real_pathdup(const char *path, int die_on_error)
 {
 	struct strbuf realpath = STRBUF_INIT;
diff --git a/cache.h b/cache.h
index c0072d43b1..e1e17e108e 100644
--- a/cache.h
+++ b/cache.h
@@ -1320,6 +1320,7 @@  static inline int is_absolute_path(const char *path)
 int is_directory(const char *);
 char *strbuf_realpath(struct strbuf *resolved, const char *path,
 		      int die_on_error);
+char *strbuf_realpath_missing(struct strbuf *resolved, const char *path);
 char *real_pathdup(const char *path, int die_on_error);
 const char *absolute_path(const char *path);
 char *absolute_pathdup(const char *path);