diff mbox series

[2/2] attr: add flag `-r|--revisions` to work with revisions

Message ID 20221206103736.53909-3-karthik.188@gmail.com (mailing list archive)
State Superseded
Headers show
Series check-attr: add support to work with revisions | expand

Commit Message

karthik nayak Dec. 6, 2022, 10:37 a.m. UTC
Git check-attr currently doesn't check the git worktree, it either
checks the index or the files directly. This means we cannot check the
attributes for a file against a certain revision.

Add a new flag `--revision`/`-r` which will allow it work with
revisions. This command will now, instead of checking the files/index,
try and receive the blob for the given attribute file against the
provided revision. The flag overrides checking against the index and
filesystem and also works with bare repositories.

We cannot use the `<rev>:<path>` syntax like the one used in `git show`
because any non-flag parameter before `--` is treated as an attribute
and any parameter after `--` is treated as a pathname.

This involves creating a new function `read_attr_from_blob`, which given
the path reads the blob for the path against the provided revision and
parses the attributes line by line. This function is plugged into
`read_attr()` function wherein we go through the different attributes.

Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
Co-authored-by: toon@iotcl.com
---
 archive.c              |   2 +-
 attr.c                 | 120 ++++++++++++++++++++++++++++-------------
 attr.h                 |   7 ++-
 builtin/check-attr.c   |  25 ++++-----
 builtin/pack-objects.c |   2 +-
 convert.c              |   2 +-
 ll-merge.c             |   4 +-
 pathspec.c             |   2 +-
 t/t0003-attributes.sh  |  56 ++++++++++++++++++-
 userdiff.c             |   2 +-
 ws.c                   |   2 +-
 11 files changed, 165 insertions(+), 59 deletions(-)

Comments

Ævar Arnfjörð Bjarmason Dec. 6, 2022, 11:27 a.m. UTC | #1
On Tue, Dec 06 2022, Karthik Nayak wrote:

> Git check-attr currently doesn't check the git worktree, it either
> checks the index or the files directly. This means we cannot check the
> attributes for a file against a certain revision.
>
> Add a new flag `--revision`/`-r` which will allow it work with
> revisions. This command will now, instead of checking the files/index,
> try and receive the blob for the given attribute file against the
> provided revision. The flag overrides checking against the index and
> filesystem and also works with bare repositories.
>
> We cannot use the `<rev>:<path>` syntax like the one used in `git show`
> because any non-flag parameter before `--` is treated as an attribute
> and any parameter after `--` is treated as a pathname.
>
> This involves creating a new function `read_attr_from_blob`, which given
> the path reads the blob for the path against the provided revision and
> parses the attributes line by line. This function is plugged into
> `read_attr()` function wherein we go through the different attributes.
>
> Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
> Co-authored-by: toon@iotcl.com
> ---
>  archive.c              |   2 +-
>  attr.c                 | 120 ++++++++++++++++++++++++++++-------------
>  attr.h                 |   7 ++-
>  builtin/check-attr.c   |  25 ++++-----
>  builtin/pack-objects.c |   2 +-
>  convert.c              |   2 +-
>  ll-merge.c             |   4 +-
>  pathspec.c             |   2 +-
>  t/t0003-attributes.sh  |  56 ++++++++++++++++++-
>  userdiff.c             |   2 +-
>  ws.c                   |   2 +-
>  11 files changed, 165 insertions(+), 59 deletions(-)
>
> diff --git a/archive.c b/archive.c
> index 941495f5d7..bf64dbdce1 100644
> --- a/archive.c
> +++ b/archive.c
> @@ -120,7 +120,7 @@ static const struct attr_check *get_archive_attrs(struct index_state *istate,
>  	static struct attr_check *check;
>  	if (!check)
>  		check = attr_check_initl("export-ignore", "export-subst", NULL);
> -	git_check_attr(istate, path, check);
> +	git_check_attr(istate, path, check, NULL);
>  	return check;
>  }
>  
> diff --git a/attr.c b/attr.c
> index 42ad6de8c7..42b67a401f 100644
> --- a/attr.c
> +++ b/attr.c
> @@ -11,8 +11,12 @@
>  #include "exec-cmd.h"
>  #include "attr.h"
>  #include "dir.h"
> +#include "git-compat-util.h"

As a rule in this project we include either cache.h at the top, or
git-compat-util.h, and the former includes the latter.

This file already has cache.h at the top, so it won't need this.

> +	if (buf == NULL)

if (!buf)

> +		more = (*ep == '\n');

Nit: parens not needed.

> +	struct object_id oid;
> +	unsigned long sz;
> +	enum object_type type;
> +	void *buf;
> +	struct strbuf sb;
> +
> +	if (object_name == NULL)

Ditto !object_name test.

> +		return NULL;
> +
> +	strbuf_init(&sb, strlen(path) + 1 + strlen(object_name));
> +	strbuf_addstr(&sb, object_name);
> +	strbuf_addstr(&sb, ":");
> +	strbuf_addstr(&sb, path);

Is this really performance sensitive so we need to pre-size this, or
would a simpler:

	struct strbuf sb = STRBUF_INIT;
        strbuf_addf(&sb, "%s:%s", path, object_name);

Do?

> +	} else if (object_name != NULL) {

else if (object_name)

>  void git_check_attr(struct index_state *istate,
> -		    const char *path, struct attr_check *check);
> +					const char *path, struct attr_check *check,
> +					const char *object_name);

This (and possibly other places) seem funnily indented..

>  	if (collect_all) {
> -		git_all_attrs(&the_index, full_path, check);
> +		git_all_attrs(&the_index, full_path, check, object_name);
>  	} else {
> -		git_check_attr(&the_index, full_path, check);
> +		git_check_attr(&the_index, full_path, check, object_name);
>  	}

Earlier you do a get_oid(), shouldn't that be a
repo_get_oid(istate->repo, ...) to be future-proof? I.e. use the repo of
the passed-in index.

I think it'll always be "the_repository" for now, but for an API it
makes sense to hardcode that assumption in fewer places.

> +test_expect_success 'bare repository: with --revision' '
> +	(
> +		cd bare.git &&
> +		(
> +			echo "f	test=f" &&
> +			echo "a/i test=a/i"

You don't need a subshell just to echo a string. You can use {}-braces,
but in this case just:

    printf "%s\n", "f test=f" "a/i test=a/i" | git hash-object .... &&


> +		) | git hash-object -w --stdin > id &&
> +		git update-index --add --cacheinfo 100644 $(cat id) .gitattributes &&

Split the "cat" into a varible, otherwise its failure will be hidden.

> +		git write-tree > id &&

We use ">id" style for redirection, not "> id".

> +		git commit-tree $(cat id) -m "random commit message" > id &&

Ditto..
karthik nayak Dec. 6, 2022, 1:06 p.m. UTC | #2
Hello Ævar,

> >
> > diff --git a/attr.c b/attr.c
> > index 42ad6de8c7..42b67a401f 100644
> > --- a/attr.c
> > +++ b/attr.c
> > @@ -11,8 +11,12 @@
> >  #include "exec-cmd.h"
> >  #include "attr.h"
> >  #include "dir.h"
> > +#include "git-compat-util.h"
>
> As a rule in this project we include either cache.h at the top, or
> git-compat-util.h, and the former includes the latter.
>
> This file already has cache.h at the top, so it won't need this.
>

Right, will remove.

> > +     if (buf == NULL)
>
> if (!buf)

Makes sense.

>
> > +             more = (*ep == '\n');
>
> Nit: parens not needed.
>

I rather let this be, since it's existing code that I just move.

> > +     struct object_id oid;
> > +     unsigned long sz;
> > +     enum object_type type;
> > +     void *buf;
> > +     struct strbuf sb;
> > +
> > +     if (object_name == NULL)
>
> Ditto !object_name test.
>

Will change.

> > +             return NULL;
> > +
> > +     strbuf_init(&sb, strlen(path) + 1 + strlen(object_name));
> > +     strbuf_addstr(&sb, object_name);
> > +     strbuf_addstr(&sb, ":");
> > +     strbuf_addstr(&sb, path);
>
> Is this really performance sensitive so we need to pre-size this, or
> would a simpler:
>
>         struct strbuf sb = STRBUF_INIT;
>         strbuf_addf(&sb, "%s:%s", path, object_name);
>
> Do?
>

This is much simpler and should do, will change.

> > +     } else if (object_name != NULL) {
>
> else if (object_name)

Will change.

>
> >  void git_check_attr(struct index_state *istate,
> > -                 const char *path, struct attr_check *check);
> > +                                     const char *path, struct attr_check *check,
> > +                                     const char *object_name);
>
> This (and possibly other places) seem funnily indented..
>

I think it's due to tab-indent being set to a default 4, I fixed it in
the other files, forgot to
check the header. Will fix it.

> >       if (collect_all) {
> > -             git_all_attrs(&the_index, full_path, check);
> > +             git_all_attrs(&the_index, full_path, check, object_name);
> >       } else {
> > -             git_check_attr(&the_index, full_path, check);
> > +             git_check_attr(&the_index, full_path, check, object_name);
> >       }
>
> Earlier you do a get_oid(), shouldn't that be a
> repo_get_oid(istate->repo, ...) to be future-proof? I.e. use the repo of
> the passed-in index.
>
> I think it'll always be "the_repository" for now, but for an API it
> makes sense to hardcode that assumption in fewer places.
>

I will make this change, I didn't know about repo_get_oid() before.

> > +test_expect_success 'bare repository: with --revision' '
> > +     (
> > +             cd bare.git &&
> > +             (
> > +                     echo "f test=f" &&
> > +                     echo "a/i test=a/i"
>
> You don't need a subshell just to echo a string. You can use {}-braces,
> but in this case just:
>
>     printf "%s\n", "f test=f" "a/i test=a/i" | git hash-object .... &&
>
>

While I agree with what you're saying, the whole test file does it
this way (echo in a subshell),
wouldn't it be better to stay consistent?

> > +             ) | git hash-object -w --stdin > id &&
> > +             git update-index --add --cacheinfo 100644 $(cat id) .gitattributes &&
>
> Split the "cat" into a varible, otherwise its failure will be hidden.
>

Done.

> > +             git write-tree > id &&
>
> We use ">id" style for redirection, not "> id".
>
> > +             git commit-tree $(cat id) -m "random commit message" > id &&
>
> Ditto..

Will make this change too.

Thanks for the review. Will wait a day or two before sending in the
next version.
Junio C Hamano Dec. 7, 2022, 12:12 a.m. UTC | #3
Karthik Nayak <karthik.188@gmail.com> writes:

> Add a new flag `--revision`/`-r` which will allow it work with
> revisions. This command will now, instead of checking the files/index,
> try and receive the blob for the given attribute file against the
> provided revision. The flag overrides checking against the index and
> filesystem and also works with bare repositories.

As "check-attr" was not invented as a user-facing subcommand but was
a hack for debugging, I would have minded this change, but these
days people seem to treat it as if it is just one of the proper
plumbing commands, the new command line convention bothers me a
bit.  No other command uses --<anything> to signal that what comes
after it is a rev.

But I do not think of a better alternative without making the
command line ambiguous, so I'll stop at raising a concern, so that
others who may be better at UI can come up with one.

As to the C API, please do not append the new parameter at the end.
When there are no logical ordering among the things listed, be it in
the members of a struct or the parameters to a function, we encourage
to append, but in this case

    void git_check_attr(struct index_state *istate,
                        const char *path,
                        struct attr_check *check)

we are saying "pick <path>, referring to .gitattributes files from
the index as needed, and apply the checks in check[]", and the new
behaviour is "pick <path>, referring to .gitattributes files from
the tree-ish as needed, and do the same", so istate and the tree-ish
object should sit together.

Also, at the C API level, I suspect that we strongly prefer to pass
around either the "struct object_id *" or "struct tree *", not working
with end-user supplied character strings without any sanity-checking
or parsing.

Another concern, among many existing callers of git_check_attr(),
is there any that will conceivably benefit in the future if they
could read the attributes from a tree-ish?  If not, it may make a
lot more sense if you did not butcher them, and instead introduce a
new API function git_check_attr_from_tree() and use it in the only
place you handle the "-r tree-ish" command line option in the
updated part of the "git check-attr" command.

Thanks.
Eric Sunshine Dec. 7, 2022, 1:10 a.m. UTC | #4
On Tue, Dec 6, 2022 at 7:15 PM Junio C Hamano <gitster@pobox.com> wrote:
> Karthik Nayak <karthik.188@gmail.com> writes:
> > Add a new flag `--revision`/`-r` which will allow it work with
> > revisions. This command will now, instead of checking the files/index,
> > try and receive the blob for the given attribute file against the
> > provided revision. The flag overrides checking against the index and
> > filesystem and also works with bare repositories.
>
> As "check-attr" was not invented as a user-facing subcommand but was
> a hack for debugging, I would have minded this change, but these
> days people seem to treat it as if it is just one of the proper
> plumbing commands, the new command line convention bothers me a
> bit.  No other command uses --<anything> to signal that what comes
> after it is a rev.
>
> But I do not think of a better alternative without making the
> command line ambiguous, so I'll stop at raising a concern, so that
> others who may be better at UI can come up with one.

A few minor comments...

We don't usually squat on short options, such as `-r`, right from the
start but only add the short alias once shown that there is demand.

Option `-r` has strong association with "recursive" elsewhere, so I'd
worry about giving it such a completely different meaning here.

Rather than calling the option `--revision`, perhaps pattern it after
git-restore's `--source` option which allows specifying a particular
commit, tree, tag, etc.?
karthik nayak Dec. 7, 2022, 11:05 a.m. UTC | #5
Hey Eric,

On Wed, Dec 7, 2022 at 2:10 AM Eric Sunshine <sunshine@sunshineco.com> wrote:
>
> On Tue, Dec 6, 2022 at 7:15 PM Junio C Hamano <gitster@pobox.com> wrote:
> > Karthik Nayak <karthik.188@gmail.com> writes:
> > > Add a new flag `--revision`/`-r` which will allow it work with
> > > revisions. This command will now, instead of checking the files/index,
> > > try and receive the blob for the given attribute file against the
> > > provided revision. The flag overrides checking against the index and
> > > filesystem and also works with bare repositories.
> >
> > As "check-attr" was not invented as a user-facing subcommand but was
> > a hack for debugging, I would have minded this change, but these
> > days people seem to treat it as if it is just one of the proper
> > plumbing commands, the new command line convention bothers me a
> > bit.  No other command uses --<anything> to signal that what comes
> > after it is a rev.
> >
> > But I do not think of a better alternative without making the
> > command line ambiguous, so I'll stop at raising a concern, so that
> > others who may be better at UI can come up with one.
>
> A few minor comments...
>
> We don't usually squat on short options, such as `-r`, right from the
> start but only add the short alias once shown that there is demand.
>
> Option `-r` has strong association with "recursive" elsewhere, so I'd
> worry about giving it such a completely different meaning here.
>
> Rather than calling the option `--revision`, perhaps pattern it after
> git-restore's `--source` option which allows specifying a particular
> commit, tree, tag, etc.?


I'm okay with removing the shortform `-r`. I decided to use `-r|--revision`
because it is also used in `git svn`.

I don't have strong feeling about the naming here, but I do feel that
`-r|--revision` sounds better than `--source`, because source doesn't directly
imply a revision and in the context of `git check-attr` is a bit ambiguous
because it could imply the source regarding which `gitattributes` file to
check against (although this feature doesn't exist).
Ævar Arnfjörð Bjarmason Dec. 7, 2022, 11:38 a.m. UTC | #6
On Wed, Dec 07 2022, Junio C Hamano wrote:

> Karthik Nayak <karthik.188@gmail.com> writes:
>
>> Add a new flag `--revision`/`-r` which will allow it work with
>> revisions. This command will now, instead of checking the files/index,
>> try and receive the blob for the given attribute file against the
>> provided revision. The flag overrides checking against the index and
>> filesystem and also works with bare repositories.
>
> As "check-attr" was not invented as a user-facing subcommand but was
> a hack for debugging, I would have minded this change, but these
> days people seem to treat it as if it is just one of the proper
> plumbing commands, the new command line convention bothers me a
> bit.  No other command uses --<anything> to signal that what comes
> after it is a rev.
>
> But I do not think of a better alternative without making the
> command line ambiguous, so I'll stop at raising a concern, so that
> others who may be better at UI can come up with one.

I don't really like it either, but maybe we've backed ourselves into a
corner here.

But let's look at that. So the command takes:

	git check-attr <attr>... -- <path>...

Or:

	echo "<path>" |
	git check-attr --stdin <attr>...

So we'd want to specify a <revision>, without making the <attr> or
<path> ambiguous.

Now, when we map the attributes we go through attr_name_valid(), which
checks that the attribute names are valid. A commentthere says:

         * Attribute name cannot begin with '-' and must consist of 
         * characters from [-A-Za-z0-9_.].

So can't we instead accept:

	git check-attr [<rev>:]<attr>... -- <path>...

I.e.:

	git check-attr HEAD~:foo -- path

And it wouldn't be ambiguous because attribute names can't contain ":"?
This would be consistent with e.g. "git show" and "git cat-file", just
with "<attr>" instead of the "<path>".

This would also mean that we'd support:

	git check-attr HEAD:foo HEAD~:bar HEAD~2:baz

etc., i.e. the ability to support multiple revision/attribute
pairs. Skimming the currently proposed code there seems to be no good
reason not to support that (we just need to look up other blobs), and it
would allow querying those without spawning N processes.
karthik nayak Dec. 7, 2022, 11:40 a.m. UTC | #7
On Wed, Dec 7, 2022 at 1:12 AM Junio C Hamano <gitster@pobox.com> wrote:
>
> Karthik Nayak <karthik.188@gmail.com> writes:
>
> > Add a new flag `--revision`/`-r` which will allow it work with
> > revisions. This command will now, instead of checking the files/index,
> > try and receive the blob for the given attribute file against the
> > provided revision. The flag overrides checking against the index and
> > filesystem and also works with bare repositories.
>
> As "check-attr" was not invented as a user-facing subcommand but was
> a hack for debugging, I would have minded this change, but these
> days people seem to treat it as if it is just one of the proper
> plumbing commands, the new command line convention bothers me a
> bit.  No other command uses --<anything> to signal that what comes
> after it is a rev.
>
> But I do not think of a better alternative without making the
> command line ambiguous, so I'll stop at raising a concern, so that
> others who may be better at UI can come up with one.
>

I understand this concern, but I do think it would be really useful. I see that
Taylor and Brian have also mentioned how this would be useful. As such, I
think it's a good indication to take this forward.

> As to the C API, please do not append the new parameter at the end.
> When there are no logical ordering among the things listed, be it in
> the members of a struct or the parameters to a function, we encourage
> to append, but in this case
>
>     void git_check_attr(struct index_state *istate,
>                         const char *path,
>                         struct attr_check *check)
>
> we are saying "pick <path>, referring to .gitattributes files from
> the index as needed, and apply the checks in check[]", and the new
> behaviour is "pick <path>, referring to .gitattributes files from
> the tree-ish as needed, and do the same", so istate and the tree-ish
> object should sit together.
>

I will be more cognizant of this approach and make these changes.

> Also, at the C API level, I suspect that we strongly prefer to pass
> around either the "struct object_id *" or "struct tree *", not working
> with end-user supplied character strings without any sanity-checking
> or parsing.
>

I must admit, I did take the path of least resistance here. So we finally need
to parse the `revision:<pathname>` where the `<pathname>` is generated
dynamically as we move through the check-attr stack.

My question is, if I generate an `object_id` at the start (in
builtin/check-attr.c)
with only the `revision`, is there a way to traverse to find the blob
for each of
the different <pathnames>? I haven't looked at Git code for a while now, and
I'm not sure what the best way to do this. Maybe I've missed some API which
would make this simple, any help is appreciated.

> Another concern, among many existing callers of git_check_attr(),
> is there any that will conceivably benefit in the future if they
> could read the attributes from a tree-ish?  If not, it may make a
> lot more sense if you did not butcher them, and instead introduce a
> new API function git_check_attr_from_tree() and use it in the only
> place you handle the "-r tree-ish" command line option in the
> updated part of the "git check-attr" command.
>

The concern then is that we'll also have to duplicate some of the code in
`git_all_attrs` and `git_check_attr` and have some logic to work
correctly for with
and without the `-a` option. All in all, this was much easier.

I don't have enough knowledge to say if the other callers will someday
want to extend
to providing a revision too, so I'll leave that to you!

> Thanks.

Thanks for the review.

--
- Karthik
Ævar Arnfjörð Bjarmason Dec. 7, 2022, 11:53 a.m. UTC | #8
On Wed, Dec 07 2022, Karthik Nayak wrote:

> On Wed, Dec 7, 2022 at 1:12 AM Junio C Hamano <gitster@pobox.com> wrote:
> [...]
>> Also, at the C API level, I suspect that we strongly prefer to pass
>> around either the "struct object_id *" or "struct tree *", not working
>> with end-user supplied character strings without any sanity-checking
>> or parsing.
>>
>
> I must admit, I did take the path of least resistance here. So we finally need
> to parse the `revision:<pathname>` where the `<pathname>` is generated
> dynamically as we move through the check-attr stack.
>
> My question is, if I generate an `object_id` at the start (in
> builtin/check-attr.c)
> with only the `revision`, is there a way to traverse to find the blob
> for each of
> the different <pathnames>? I haven't looked at Git code for a while now, and
> I'm not sure what the best way to do this. Maybe I've missed some API which
> would make this simple, any help is appreciated.

The get_oid() that you're doing now happens in a loop, and should be
pulled out of it.

I suggested making that a feature in
https://lore.kernel.org/git/221207.86lenja0zi.gmgdl@evledraar.gmail.com/;
but if you keep the interface you've got where you only support a single
<rev> it would make the most sense to do that get_oid() in the builtin/
code at the start, and then pass the oid/path pair down.

You'd still need to call read_object_file() or equivalent for each
<rev>:<path> pair though.
karthik nayak Dec. 7, 2022, 12:29 p.m. UTC | #9
On Wed, Dec 7, 2022 at 12:56 PM Ævar Arnfjörð Bjarmason
<avarab@gmail.com> wrote:
>
>
> On Wed, Dec 07 2022, Karthik Nayak wrote:
>
> > On Wed, Dec 7, 2022 at 1:12 AM Junio C Hamano <gitster@pobox.com> wrote:
> > [...]
> >> Also, at the C API level, I suspect that we strongly prefer to pass
> >> around either the "struct object_id *" or "struct tree *", not working
> >> with end-user supplied character strings without any sanity-checking
> >> or parsing.
> >>
> >
> > I must admit, I did take the path of least resistance here. So we finally need
> > to parse the `revision:<pathname>` where the `<pathname>` is generated
> > dynamically as we move through the check-attr stack.
> >
> > My question is, if I generate an `object_id` at the start (in
> > builtin/check-attr.c)
> > with only the `revision`, is there a way to traverse to find the blob
> > for each of
> > the different <pathnames>? I haven't looked at Git code for a while now, and
> > I'm not sure what the best way to do this. Maybe I've missed some API which
> > would make this simple, any help is appreciated.
>
> The get_oid() that you're doing now happens in a loop, and should be
> pulled out of it.
>
> I suggested making that a feature in
> https://lore.kernel.org/git/221207.86lenja0zi.gmgdl@evledraar.gmail.com/;
> but if you keep the interface you've got where you only support a single
> <rev> it would make the most sense to do that get_oid() in the builtin/
> code at the start, and then pass the oid/path pair down.
>
> You'd still need to call read_object_file() or equivalent for each
> <rev>:<path> pair though.

That's exactly my question, now we're calling `get_oid()` with the pathname.
Which would directly give us the object_id for the blob. Whereas if we call
`get_oid()` _without_ the pathname, we would still need to get the object_id
for the blob.

My specific question being, how do I, given:

struct object_id oid_1;
get_oid(revision, &oid)

and pathname, get the equivalent of:

struct object_id oid_2;
strbuf_addf(&sb, "%s:%s", revision, path);
get_oid(sb.buf, &oid)

?

Basically, I fail to see how to transform oid_1 to oid_2, using
pathname. I agree
this would eventually be fed into `read_object_file`.
karthik nayak Dec. 7, 2022, 12:33 p.m. UTC | #10
On Wed, Dec 7, 2022 at 12:48 PM Ævar Arnfjörð Bjarmason
<avarab@gmail.com> wrote:
>
>
> On Wed, Dec 07 2022, Junio C Hamano wrote:
>
> > Karthik Nayak <karthik.188@gmail.com> writes:
> >
> >> Add a new flag `--revision`/`-r` which will allow it work with
> >> revisions. This command will now, instead of checking the files/index,
> >> try and receive the blob for the given attribute file against the
> >> provided revision. The flag overrides checking against the index and
> >> filesystem and also works with bare repositories.
> >
> > As "check-attr" was not invented as a user-facing subcommand but was
> > a hack for debugging, I would have minded this change, but these
> > days people seem to treat it as if it is just one of the proper
> > plumbing commands, the new command line convention bothers me a
> > bit.  No other command uses --<anything> to signal that what comes
> > after it is a rev.
> >
> > But I do not think of a better alternative without making the
> > command line ambiguous, so I'll stop at raising a concern, so that
> > others who may be better at UI can come up with one.
>
> I don't really like it either, but maybe we've backed ourselves into a
> corner here.
>
> But let's look at that. So the command takes:
>
>         git check-attr <attr>... -- <path>...
>
> Or:
>
>         echo "<path>" |
>         git check-attr --stdin <attr>...
>
> So we'd want to specify a <revision>, without making the <attr> or
> <path> ambiguous.
>
> Now, when we map the attributes we go through attr_name_valid(), which
> checks that the attribute names are valid. A commentthere says:
>
>          * Attribute name cannot begin with '-' and must consist of
>          * characters from [-A-Za-z0-9_.].
>
> So can't we instead accept:
>
>         git check-attr [<rev>:]<attr>... -- <path>...
>
> I.e.:
>
>         git check-attr HEAD~:foo -- path
>
> And it wouldn't be ambiguous because attribute names can't contain ":"?
> This would be consistent with e.g. "git show" and "git cat-file", just
> with "<attr>" instead of the "<path>".
>
> This would also mean that we'd support:
>
>         git check-attr HEAD:foo HEAD~:bar HEAD~2:baz
>
> etc., i.e. the ability to support multiple revision/attribute
> pairs. Skimming the currently proposed code there seems to be no good
> reason not to support that (we just need to look up other blobs), and it
> would allow querying those without spawning N processes.

Thanks for this walkthrough, quick question, this wouldn't work with
the `-a` condition, right?
Current patch series tends to work with/without `-a`.

Also, my personal opinion is that when being consistent we need to be fully
consistent, i.e. <revision>:<path>, tweaking this slightly to be
<revision>:<attr> is
worse than breaking consistency.
diff mbox series

Patch

diff --git a/archive.c b/archive.c
index 941495f5d7..bf64dbdce1 100644
--- a/archive.c
+++ b/archive.c
@@ -120,7 +120,7 @@  static const struct attr_check *get_archive_attrs(struct index_state *istate,
 	static struct attr_check *check;
 	if (!check)
 		check = attr_check_initl("export-ignore", "export-subst", NULL);
-	git_check_attr(istate, path, check);
+	git_check_attr(istate, path, check, NULL);
 	return check;
 }
 
diff --git a/attr.c b/attr.c
index 42ad6de8c7..42b67a401f 100644
--- a/attr.c
+++ b/attr.c
@@ -11,8 +11,12 @@ 
 #include "exec-cmd.h"
 #include "attr.h"
 #include "dir.h"
+#include "git-compat-util.h"
+#include "strbuf.h"
 #include "utf8.h"
 #include "quote.h"
+#include "revision.h"
+#include "object-store.h"
 #include "thread-utils.h"
 
 const char git_attr__true[] = "(builtin)true";
@@ -729,14 +733,67 @@  static struct attr_stack *read_attr_from_file(const char *path, unsigned flags)
 	return res;
 }
 
-static struct attr_stack *read_attr_from_index(struct index_state *istate,
-					       const char *path,
-					       unsigned flags)
+static struct attr_stack *read_attr_from_buf(char *buf, const char *path,
+					     unsigned flags)
 {
 	struct attr_stack *res;
-	char *buf, *sp;
+	char *sp;
 	int lineno = 0;
 
+	if (buf == NULL)
+		return NULL;
+
+	CALLOC_ARRAY(res, 1);
+	for (sp = buf; *sp;) {
+		char *ep;
+		int more;
+
+		ep = strchrnul(sp, '\n');
+		more = (*ep == '\n');
+		*ep = '\0';
+		handle_attr_line(res, sp, path, ++lineno, flags);
+		sp = ep + more;
+	}
+	free(buf);
+
+	return res;
+}
+
+static struct attr_stack *read_attr_from_blob(const char *path,
+					      const char *object_name,
+					      unsigned flags)
+{
+	struct object_id oid;
+	unsigned long sz;
+	enum object_type type;
+	void *buf;
+	struct strbuf sb;
+
+	if (object_name == NULL)
+		return NULL;
+
+	strbuf_init(&sb, strlen(path) + 1 + strlen(object_name));
+	strbuf_addstr(&sb, object_name);
+	strbuf_addstr(&sb, ":");
+	strbuf_addstr(&sb, path);
+
+	if (get_oid(sb.buf, &oid))
+		return NULL;
+
+	buf = read_object_file(&oid, &type, &sz);
+	if (!buf || type != OBJ_BLOB) {
+		free(buf);
+		return NULL;
+	}
+
+	return read_attr_from_buf(buf, path, flags);
+}
+
+static struct attr_stack *read_attr_from_index(struct index_state *istate,
+					       const char *path, unsigned flags)
+{
+	char *buf;
+
 	if (!istate)
 		return NULL;
 
@@ -758,28 +815,19 @@  static struct attr_stack *read_attr_from_index(struct index_state *istate,
 	if (!buf)
 		return NULL;
 
-	CALLOC_ARRAY(res, 1);
-	for (sp = buf; *sp; ) {
-		char *ep;
-		int more;
-
-		ep = strchrnul(sp, '\n');
-		more = (*ep == '\n');
-		*ep = '\0';
-		handle_attr_line(res, sp, path, ++lineno, flags);
-		sp = ep + more;
-	}
-	free(buf);
-	return res;
+	return read_attr_from_buf(buf, path, flags);
 }
 
 static struct attr_stack *read_attr(struct index_state *istate,
-				    const char *path, unsigned flags)
+				    const char *path, const char *object_name,
+				    unsigned flags)
 {
 	struct attr_stack *res = NULL;
 
 	if (direction == GIT_ATTR_INDEX) {
 		res = read_attr_from_index(istate, path, flags);
+	} else if (object_name != NULL) {
+		res = read_attr_from_blob(path, object_name, flags);
 	} else if (!is_bare_repository()) {
 		if (direction == GIT_ATTR_CHECKOUT) {
 			res = read_attr_from_index(istate, path, flags);
@@ -839,7 +887,8 @@  static void push_stack(struct attr_stack **attr_stack_p,
 }
 
 static void bootstrap_attr_stack(struct index_state *istate,
-				 struct attr_stack **stack)
+				 struct attr_stack **stack,
+				 const char *object_name)
 {
 	struct attr_stack *e;
 	unsigned flags = READ_ATTR_MACRO_OK;
@@ -864,7 +913,7 @@  static void bootstrap_attr_stack(struct index_state *istate,
 	}
 
 	/* root directory */
-	e = read_attr(istate, GITATTRIBUTES_FILE, flags | READ_ATTR_NOFOLLOW);
+	e = read_attr(istate, GITATTRIBUTES_FILE, object_name, flags | READ_ATTR_NOFOLLOW);
 	push_stack(stack, e, xstrdup(""), 0);
 
 	/* info frame */
@@ -877,9 +926,9 @@  static void bootstrap_attr_stack(struct index_state *istate,
 	push_stack(stack, e, NULL, 0);
 }
 
-static void prepare_attr_stack(struct index_state *istate,
-			       const char *path, int dirlen,
-			       struct attr_stack **stack)
+static void prepare_attr_stack(struct index_state *istate, const char *path,
+			       int dirlen, struct attr_stack **stack,
+			       const char *object_name)
 {
 	struct attr_stack *info;
 	struct strbuf pathbuf = STRBUF_INIT;
@@ -899,7 +948,7 @@  static void prepare_attr_stack(struct index_state *istate,
 	 * .gitattributes in deeper directories to shallower ones,
 	 * and finally use the built-in set as the default.
 	 */
-	bootstrap_attr_stack(istate, stack);
+	bootstrap_attr_stack(istate, stack, object_name);
 
 	/*
 	 * Pop the "info" one that is always at the top of the stack.
@@ -954,7 +1003,7 @@  static void prepare_attr_stack(struct index_state *istate,
 		strbuf_add(&pathbuf, path + pathbuf.len, (len - pathbuf.len));
 		strbuf_addf(&pathbuf, "/%s", GITATTRIBUTES_FILE);
 
-		next = read_attr(istate, pathbuf.buf, READ_ATTR_NOFOLLOW);
+		next = read_attr(istate, pathbuf.buf, object_name, READ_ATTR_NOFOLLOW);
 
 		/* reset the pathbuf to not include "/.gitattributes" */
 		strbuf_setlen(&pathbuf, len);
@@ -1073,9 +1122,9 @@  static void determine_macros(struct all_attrs_item *all_attrs,
  * If check->check_nr is non-zero, only attributes in check[] are collected.
  * Otherwise all attributes are collected.
  */
-static void collect_some_attrs(struct index_state *istate,
-			       const char *path,
-			       struct attr_check *check)
+static void collect_some_attrs(struct index_state *istate, const char *path,
+			       struct attr_check *check,
+			       const char *object_name)
 {
 	int pathlen, rem, dirlen;
 	const char *cp, *last_slash = NULL;
@@ -1094,7 +1143,7 @@  static void collect_some_attrs(struct index_state *istate,
 		dirlen = 0;
 	}
 
-	prepare_attr_stack(istate, path, dirlen, &check->stack);
+	prepare_attr_stack(istate, path, dirlen, &check->stack, object_name);
 	all_attrs_init(&g_attr_hashmap, check);
 	determine_macros(check->all_attrs, check->stack);
 
@@ -1102,13 +1151,12 @@  static void collect_some_attrs(struct index_state *istate,
 	fill(path, pathlen, basename_offset, check->stack, check->all_attrs, rem);
 }
 
-void git_check_attr(struct index_state *istate,
-		    const char *path,
-		    struct attr_check *check)
+void git_check_attr(struct index_state *istate, const char *path,
+		    struct attr_check *check, const char *object_name)
 {
 	int i;
 
-	collect_some_attrs(istate, path, check);
+	collect_some_attrs(istate, path, check, object_name);
 
 	for (i = 0; i < check->nr; i++) {
 		size_t n = check->items[i].attr->attr_nr;
@@ -1119,13 +1167,13 @@  void git_check_attr(struct index_state *istate,
 	}
 }
 
-void git_all_attrs(struct index_state *istate,
-		   const char *path, struct attr_check *check)
+void git_all_attrs(struct index_state *istate, const char *path,
+		   struct attr_check *check, const char *object_name)
 {
 	int i;
 
 	attr_check_reset(check);
-	collect_some_attrs(istate, path, check);
+	collect_some_attrs(istate, path, check, object_name);
 
 	for (i = 0; i < check->all_attrs_nr; i++) {
 		const char *name = check->all_attrs[i].attr->name;
diff --git a/attr.h b/attr.h
index 3fb40cced0..0708d6e046 100644
--- a/attr.h
+++ b/attr.h
@@ -169,6 +169,7 @@  struct attr_check {
 	int all_attrs_nr;
 	struct all_attrs_item *all_attrs;
 	struct attr_stack *stack;
+	char *object_id;
 };
 
 struct attr_check *attr_check_alloc(void);
@@ -190,14 +191,16 @@  void attr_check_free(struct attr_check *check);
 const char *git_attr_name(const struct git_attr *);
 
 void git_check_attr(struct index_state *istate,
-		    const char *path, struct attr_check *check);
+					const char *path, struct attr_check *check,
+					const char *object_name);
 
 /*
  * Retrieve all attributes that apply to the specified path.
  * check holds the attributes and their values.
  */
 void git_all_attrs(struct index_state *istate,
-		   const char *path, struct attr_check *check);
+				   const char *path, struct attr_check *check,
+				   const char *object_name);
 
 enum git_attr_direction {
 	GIT_ATTR_CHECKIN,
diff --git a/builtin/check-attr.c b/builtin/check-attr.c
index 0fef10eb6b..3e032420ed 100644
--- a/builtin/check-attr.c
+++ b/builtin/check-attr.c
@@ -9,6 +9,7 @@ 
 static int all_attrs;
 static int cached_attrs;
 static int stdin_paths;
+static char *object_name;
 static const char * const check_attr_usage[] = {
 N_("git check-attr [-a | --all | <attr>...] [--] <pathname>..."),
 N_("git check-attr --stdin [-z] [-a | --all | <attr>...]"),
@@ -23,6 +24,7 @@  static const struct option check_attr_options[] = {
 	OPT_BOOL(0 , "stdin", &stdin_paths, N_("read file names from stdin")),
 	OPT_BOOL('z', NULL, &nul_term_line,
 		 N_("terminate input and output records by a NUL character")),
+	OPT_STRING('r', "revision", &object_name, N_("revision"), N_("check attributes at this revision")),
 	OPT_END()
 };
 
@@ -55,27 +57,26 @@  static void output_attr(struct attr_check *check, const char *file)
 	}
 }
 
-static void check_attr(const char *prefix,
-		       struct attr_check *check,
-		       int collect_all,
-		       const char *file)
+static void check_attr(const char *prefix, struct attr_check *check,
+		       int collect_all, const char *file,
+		       const char *object_name
+)
 {
 	char *full_path =
 		prefix_path(prefix, prefix ? strlen(prefix) : 0, file);
 
 	if (collect_all) {
-		git_all_attrs(&the_index, full_path, check);
+		git_all_attrs(&the_index, full_path, check, object_name);
 	} else {
-		git_check_attr(&the_index, full_path, check);
+		git_check_attr(&the_index, full_path, check, object_name);
 	}
 	output_attr(check, file);
 
 	free(full_path);
 }
 
-static void check_attr_stdin_paths(const char *prefix,
-				   struct attr_check *check,
-				   int collect_all)
+static void check_attr_stdin_paths(const char *prefix, struct attr_check *check,
+				   int collect_all, const char *object_name)
 {
 	struct strbuf buf = STRBUF_INIT;
 	struct strbuf unquoted = STRBUF_INIT;
@@ -89,7 +90,7 @@  static void check_attr_stdin_paths(const char *prefix,
 				die("line is badly quoted");
 			strbuf_swap(&buf, &unquoted);
 		}
-		check_attr(prefix, check, collect_all, buf.buf);
+		check_attr(prefix, check, collect_all, buf.buf, object_name);
 		maybe_flush_or_die(stdout, "attribute to stdout");
 	}
 	strbuf_release(&buf);
@@ -177,10 +178,10 @@  int cmd_check_attr(int argc, const char **argv, const char *prefix)
 	}
 
 	if (stdin_paths)
-		check_attr_stdin_paths(prefix, check, all_attrs);
+		check_attr_stdin_paths(prefix, check, all_attrs, object_name);
 	else {
 		for (i = filei; i < argc; i++)
-			check_attr(prefix, check, all_attrs, argv[i]);
+			check_attr(prefix, check, all_attrs, argv[i], object_name);
 		maybe_flush_or_die(stdout, "attribute to stdout");
 	}
 
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 573d0b20b7..f57a0188e6 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1318,7 +1318,7 @@  static int no_try_delta(const char *path)
 
 	if (!check)
 		check = attr_check_initl("delta", NULL);
-	git_check_attr(the_repository->index, path, check);
+	git_check_attr(the_repository->index, path, check, NULL);
 	if (ATTR_FALSE(check->items[0].value))
 		return 1;
 	return 0;
diff --git a/convert.c b/convert.c
index 9b67649032..095729004f 100644
--- a/convert.c
+++ b/convert.c
@@ -1308,7 +1308,7 @@  void convert_attrs(struct index_state *istate,
 		git_config(read_convert_config, NULL);
 	}
 
-	git_check_attr(istate, path, check);
+	git_check_attr(istate, path, check, NULL);
 	ccheck = check->items;
 	ca->crlf_action = git_path_check_crlf(ccheck + 4);
 	if (ca->crlf_action == CRLF_UNDEFINED)
diff --git a/ll-merge.c b/ll-merge.c
index 22a603e8af..45a534badf 100644
--- a/ll-merge.c
+++ b/ll-merge.c
@@ -391,7 +391,7 @@  enum ll_merge_result ll_merge(mmbuffer_t *result_buf,
 		normalize_file(theirs, path, istate);
 	}
 
-	git_check_attr(istate, path, check);
+	git_check_attr(istate, path, check, NULL);
 	ll_driver_name = check->items[0].value;
 	if (check->items[1].value) {
 		marker_size = atoi(check->items[1].value);
@@ -419,7 +419,7 @@  int ll_merge_marker_size(struct index_state *istate, const char *path)
 
 	if (!check)
 		check = attr_check_initl("conflict-marker-size", NULL);
-	git_check_attr(istate, path, check);
+	git_check_attr(istate, path, check, NULL);
 	if (check->items[0].value) {
 		marker_size = atoi(check->items[0].value);
 		if (marker_size <= 0)
diff --git a/pathspec.c b/pathspec.c
index 46e77a85fe..b5217e4c36 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -732,7 +732,7 @@  int match_pathspec_attrs(struct index_state *istate,
 	if (name[namelen])
 		name = to_free = xmemdupz(name, namelen);
 
-	git_check_attr(istate, name, item->attr_check);
+	git_check_attr(istate, name, item->attr_check, NULL);
 
 	free(to_free);
 
diff --git a/t/t0003-attributes.sh b/t/t0003-attributes.sh
index b3aabb8aa3..90a3680a31 100755
--- a/t/t0003-attributes.sh
+++ b/t/t0003-attributes.sh
@@ -25,7 +25,14 @@  attr_check_quote () {
 	git check-attr test -- "$path" >actual &&
 	echo "\"$quoted_path\": test: $expect" >expect &&
 	test_cmp expect actual
+}
+
+attr_check_revision () {
+	path="$1" expect="$2" revision="$3" git_opts="$4" &&
 
+	git $git_opts check-attr -r $revision test -- "$path" >actual 2>err &&
+	echo "$path: test: $expect" >expect &&
+	test_cmp expect actual
 }
 
 test_expect_success 'open-quoted pathname' '
@@ -33,7 +40,6 @@  test_expect_success 'open-quoted pathname' '
 	attr_check a unspecified
 '
 
-
 test_expect_success 'setup' '
 	mkdir -p a/b/d a/c b &&
 	(
@@ -80,6 +86,26 @@  test_expect_success 'setup' '
 	EOF
 '
 
+test_expect_success 'setup branches' '
+	(
+		echo "f	test=f" &&
+		echo "a/i test=n"
+	) | git hash-object -w --stdin > id &&
+	git update-index --add --cacheinfo 100644,$(cat id),foo/bar/.gitattributes &&
+	git write-tree > id &&
+	git commit-tree $(cat id) -m "random commit message" > id &&
+	git update-ref refs/heads/branch1 $(cat id) &&
+
+	(
+		echo "g test=g" &&
+		echo "a/i test=m"
+	) | git hash-object -w --stdin > id &&
+	git update-index --add --cacheinfo 100644,$(cat id),foo/bar/.gitattributes &&
+	git write-tree > id &&
+	git commit-tree $(cat id) -m "random commit message" > id &&
+	git update-ref refs/heads/branch2 $(cat id)
+'
+
 test_expect_success 'command line checks' '
 	test_must_fail git check-attr &&
 	test_must_fail git check-attr -- &&
@@ -287,6 +313,15 @@  test_expect_success 'using --git-dir and --work-tree' '
 	)
 '
 
+test_expect_success 'using --revision' '
+	attr_check_revision foo/bar/f f branch1 &&
+	attr_check_revision foo/bar/a/i n branch1 &&
+	attr_check_revision foo/bar/f unspecified branch2 &&
+	attr_check_revision foo/bar/a/i m branch2 &&
+	attr_check_revision foo/bar/g g branch2 &&
+	attr_check_revision foo/bar/g unspecified branch1
+'
+
 test_expect_success 'setup bare' '
 	git clone --template= --bare . bare.git
 '
@@ -306,6 +341,25 @@  test_expect_success 'bare repository: check that .gitattribute is ignored' '
 	)
 '
 
+test_expect_success 'bare repository: with --revision' '
+	(
+		cd bare.git &&
+		(
+			echo "f	test=f" &&
+			echo "a/i test=a/i"
+		) | git hash-object -w --stdin > id &&
+		git update-index --add --cacheinfo 100644 $(cat id) .gitattributes &&
+		git write-tree > id &&
+		git commit-tree $(cat id) -m "random commit message" > id &&
+		git update-ref refs/heads/master $(cat id) &&
+		attr_check_revision f f HEAD &&
+		attr_check_revision a/f f HEAD &&
+		attr_check_revision a/c/f f HEAD &&
+		attr_check_revision a/i a/i HEAD &&
+		attr_check_revision subdir/a/i unspecified HEAD
+	)
+'
+
 test_expect_success 'bare repository: check that --cached honors index' '
 	(
 		cd bare.git &&
diff --git a/userdiff.c b/userdiff.c
index 151d9a5278..364f5a5ea2 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -412,7 +412,7 @@  struct userdiff_driver *userdiff_find_by_path(struct index_state *istate,
 		check = attr_check_initl("diff", NULL);
 	if (!path)
 		return NULL;
-	git_check_attr(istate, path, check);
+	git_check_attr(istate, path, check, NULL);
 
 	if (ATTR_TRUE(check->items[0].value))
 		return &driver_true;
diff --git a/ws.c b/ws.c
index 6e69877f25..8cc3e429c1 100644
--- a/ws.c
+++ b/ws.c
@@ -78,7 +78,7 @@  unsigned whitespace_rule(struct index_state *istate, const char *pathname)
 	if (!attr_whitespace_rule)
 		attr_whitespace_rule = attr_check_initl("whitespace", NULL);
 
-	git_check_attr(istate, pathname, attr_whitespace_rule);
+	git_check_attr(istate, pathname, attr_whitespace_rule, NULL);
 	value = attr_whitespace_rule->items[0].value;
 	if (ATTR_TRUE(value)) {
 		/* true (whitespace) */