diff mbox series

[v2,GSOC] ref-filter: use single strbuf for all output

Message ID pull.927.v2.git.1617809209164.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v2,GSOC] ref-filter: use single strbuf for all output | expand

Commit Message

ZheNing Hu April 7, 2021, 3:26 p.m. UTC
From: ZheNing Hu <adlternative@gmail.com>

When we use `git for-each-ref`, every ref will call
`show_ref_array_item()` and allocate its own final strbuf
and error strbuf. Instead, we can reuse these two strbuf
for each step ref's output.

The performance for `git for-each-ref` on the Git repository
itself with performance testing tool `hyperfine` changes from
18.7 ms ± 0.4 ms to 18.2 ms ± 0.3 ms.

This approach is similar to the one used by 79ed0a5
(cat-file: use a single strbuf for all output, 2018-08-14)
to speed up the cat-file builtin.

Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
    [GSOC] ref-filter: use single strbuf for all output
    
    Now git for-each-ref can reuse two buffers for all refs output, the
    performance is slightly improved.
    
    Now there may be a question : Should the original interface
    show_ref_array_items be retained?
    
    Thanks.

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-927%2Fadlternative%2Fref-filter-single-buf-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-927/adlternative/ref-filter-single-buf-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/927

Range-diff vs v1:

 1:  bcc3feb4de7c ! 1:  3aed12c4f5a8 [GSOC] ref-filter: use single strbuf for all output
     @@ Commit message
      
          When we use `git for-each-ref`, every ref will call
          `show_ref_array_item()` and allocate its own final strbuf
     -    and error strbuf. Instead, we can provide two single strbuf:
     -    final_buf and error_buf that get reused for each output.
     +    and error strbuf. Instead, we can reuse these two strbuf
     +    for each step ref's output.
      
     -    When run it 100 times:
     +    The performance for `git for-each-ref` on the Git repository
     +    itself with performance testing tool `hyperfine` changes from
     +    18.7 ms ± 0.4 ms to 18.2 ms ± 0.3 ms.
      
     -    $ git for-each-ref
     -
     -    on git.git :
     -
     -    3.19s user
     -    3.88s system
     -    35% cpu
     -    20.199 total
     -
     -    to:
     -
     -    2.89s user
     -    4.00s system
     -    34% cpu
     -    19.741 total
     -
     -    The performance has been slightly improved.
     +    This approach is similar to the one used by 79ed0a5
     +    (cat-file: use a single strbuf for all output, 2018-08-14)
     +    to speed up the cat-file builtin.
      
          Signed-off-by: ZheNing Hu <adlternative@gmail.com>
      
       ## builtin/for-each-ref.c ##
     -@@ builtin/for-each-ref.c: int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
     - 	struct ref_array array;
     - 	struct ref_filter filter;
     - 	struct ref_format format = REF_FORMAT_INIT;
     -+	struct strbuf final_buf = STRBUF_INIT;
     -+	struct strbuf error_buf = STRBUF_INIT;
     +@@ builtin/for-each-ref.c: static char const * const for_each_ref_usage[] = {
       
     - 	struct option opts[] = {
     - 		OPT_BIT('s', "shell", &format.quote_style,
     + int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
     + {
     +-	int i;
     + 	struct ref_sorting *sorting = NULL, **sorting_tail = &sorting;
     + 	int maxcount = 0, icase = 0;
     + 	struct ref_array array;
      @@ builtin/for-each-ref.c: int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
     + 
       	if (!maxcount || array.nr < maxcount)
       		maxcount = array.nr;
     - 	for (i = 0; i < maxcount; i++)
     +-	for (i = 0; i < maxcount; i++)
      -		show_ref_array_item(array.items[i], &format);
     -+		show_ref_array_item(array.items[i], &format, &final_buf, &error_buf);
     ++	show_ref_array_items(array.items, &format, maxcount);
       	ref_array_clear(&array);
       	return 0;
       }
      
     - ## builtin/tag.c ##
     -@@ builtin/tag.c: static int list_tags(struct ref_filter *filter, struct ref_sorting *sorting,
     - 		     struct ref_format *format)
     - {
     - 	struct ref_array array;
     -+	struct strbuf final_buf = STRBUF_INIT;
     -+	struct strbuf error_buf = STRBUF_INIT;
     - 	char *to_free = NULL;
     - 	int i;
     - 
     -@@ builtin/tag.c: static int list_tags(struct ref_filter *filter, struct ref_sorting *sorting,
     - 	ref_array_sort(sorting, &array);
     - 
     - 	for (i = 0; i < array.nr; i++)
     --		show_ref_array_item(array.items[i], format);
     -+		show_ref_array_item(array.items[i], format, &final_buf, &error_buf);
     - 	ref_array_clear(&array);
     - 	free(to_free);
     - 
     -
       ## ref-filter.c ##
      @@ ref-filter.c: int format_ref_array_item(struct ref_array_item *info,
     + 	return 0;
       }
       
     - void show_ref_array_item(struct ref_array_item *info,
     --			 const struct ref_format *format)
     ++void show_ref_array_items(struct ref_array_item **info,
      +			 const struct ref_format *format,
     -+			 struct strbuf *final_buf,
     -+			 struct strbuf *error_buf)
     - {
     --	struct strbuf final_buf = STRBUF_INIT;
     --	struct strbuf error_buf = STRBUF_INIT;
     - 
     --	if (format_ref_array_item(info, format, &final_buf, &error_buf))
     --		die("%s", error_buf.buf);
     --	fwrite(final_buf.buf, 1, final_buf.len, stdout);
     --	strbuf_release(&error_buf);
     --	strbuf_release(&final_buf);
     -+	if (format_ref_array_item(info, format, final_buf, error_buf))
     -+		die("%s", error_buf->buf);
     -+	fwrite(final_buf->buf, 1, final_buf->len, stdout);
     -+	strbuf_reset(error_buf);
     -+	strbuf_reset(final_buf);
     - 	putchar('\n');
     - }
     - 
     -@@ ref-filter.c: void pretty_print_ref(const char *name, const struct object_id *oid,
     - 		      const struct ref_format *format)
     - {
     - 	struct ref_array_item *ref_item;
     ++			 size_t n)
     ++{
      +	struct strbuf final_buf = STRBUF_INIT;
      +	struct strbuf error_buf = STRBUF_INIT;
     ++	size_t i;
      +
     - 	ref_item = new_ref_array_item(name, oid);
     - 	ref_item->kind = ref_kind_from_refname(name);
     --	show_ref_array_item(ref_item, format);
     -+	show_ref_array_item(ref_item, format, &final_buf, &error_buf);
     - 	free_array_item(ref_item);
     - }
     - 
     ++	for (i = 0; i < n; i++) {
     ++		if (format_ref_array_item(info[i], format, &final_buf, &error_buf))
     ++			die("%s", error_buf.buf);
     ++		fwrite(final_buf.buf, 1, final_buf.len, stdout);
     ++		strbuf_reset(&error_buf);
     ++		strbuf_reset(&final_buf);
     ++		putchar('\n');
     ++	}
     ++	strbuf_release(&error_buf);
     ++	strbuf_release(&final_buf);
     ++}
     ++
     + void show_ref_array_item(struct ref_array_item *info,
     + 			 const struct ref_format *format)
     + {
      
       ## ref-filter.h ##
      @@ ref-filter.h: int format_ref_array_item(struct ref_array_item *info,
     - 			  struct strbuf *final_buf,
       			  struct strbuf *error_buf);
       /*  Print the ref using the given format and quote_style */
     --void show_ref_array_item(struct ref_array_item *info, const struct ref_format *format);
     -+void show_ref_array_item(struct ref_array_item *info,
     + void show_ref_array_item(struct ref_array_item *info, const struct ref_format *format);
     ++/*  Print the refs using the given format and quote_style and maxcount */
     ++void show_ref_array_items(struct ref_array_item **info,
      +			 const struct ref_format *format,
     -+			 struct strbuf *final_buf,
     -+			 struct strbuf *error_buf);
     ++			 size_t n);
     ++
       /*  Parse a single sort specifier and add it to the list */
       void parse_ref_sorting(struct ref_sorting **sorting_tail, const char *atom);
       /*  Callback function for parsing the sort option */


 builtin/for-each-ref.c |  4 +---
 ref-filter.c           | 20 ++++++++++++++++++++
 ref-filter.h           |  5 +++++
 3 files changed, 26 insertions(+), 3 deletions(-)


base-commit: 2e36527f23b7f6ae15e6f21ac3b08bf3fed6ee48

Comments

Junio C Hamano April 7, 2021, 8:31 p.m. UTC | #1
"ZheNing Hu via GitGitGadget" <gitgitgadget@gmail.com> writes:

> Subject: Re: [PATCH v2] [GSOC] ref-filter: use single strbuf for all output

The implementation changed so much from the initial attempt, for
which the above title may have been appropriate, that reusing single
strbuf over and over is not the most important part of the change
anymore, I am afraid.  Besides, it uses TWO strbufs ;-)

Subject: [PATCH] ref-filter: introduce show_ref_array_items() helper

or something like that?

> From: ZheNing Hu <adlternative@gmail.com>
>
> When we use `git for-each-ref`, every ref will call
> `show_ref_array_item()` and allocate its own final strbuf
> and error strbuf. Instead, we can reuse these two strbuf
> for each step ref's output.
>
> The performance for `git for-each-ref` on the Git repository
> itself with performance testing tool `hyperfine` changes from
> 18.7 ms ± 0.4 ms to 18.2 ms ± 0.3 ms.
>
> This approach is similar to the one used by 79ed0a5
> (cat-file: use a single strbuf for all output, 2018-08-14)
> to speed up the cat-file builtin.
>
> Signed-off-by: ZheNing Hu <adlternative@gmail.com>
> ---
>     [GSOC] ref-filter: use single strbuf for all output
>     
>     Now git for-each-ref can reuse two buffers for all refs output, the
>     performance is slightly improved.
>     
>     Now there may be a question : Should the original interface
>     show_ref_array_items be retained?
> ...
>        /*  Callback function for parsing the sort option */

Again, not a very useful range-diff as the implementation changed so much.


>  builtin/for-each-ref.c |  4 +---
>  ref-filter.c           | 20 ++++++++++++++++++++
>  ref-filter.h           |  5 +++++
>  3 files changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c
> index cb9c81a04606..d630402230f3 100644
> --- a/builtin/for-each-ref.c
> +++ b/builtin/for-each-ref.c
> @@ -16,7 +16,6 @@ static char const * const for_each_ref_usage[] = {
>  
>  int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
>  {
> -	int i;
>  	struct ref_sorting *sorting = NULL, **sorting_tail = &sorting;
>  	int maxcount = 0, icase = 0;
>  	struct ref_array array;
> @@ -80,8 +79,7 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
>  
>  	if (!maxcount || array.nr < maxcount)
>  		maxcount = array.nr;
> -	for (i = 0; i < maxcount; i++)
> -		show_ref_array_item(array.items[i], &format);
> +	show_ref_array_items(array.items, &format, maxcount);

The intention of this call is to pass an array and the number of
elements in the array as a pair to the function, right?  When you
design the API for a new helper function, do not split them apart by
inserting an unrelated parameter in the middle.

> diff --git a/ref-filter.c b/ref-filter.c
> index f0bd32f71416..27bbf9b6c8ac 100644
> --- a/ref-filter.c
> +++ b/ref-filter.c
> @@ -2435,6 +2435,26 @@ int format_ref_array_item(struct ref_array_item *info,
>  	return 0;
>  }
>  
> +void show_ref_array_items(struct ref_array_item **info,
> +			 const struct ref_format *format,
> +			 size_t n)

IOW,

	void show_ref_array_items(const struct ref_format *format,
				  struct ref_array_item *info[], size_t n)

> +{
> +	struct strbuf final_buf = STRBUF_INIT;
> +	struct strbuf error_buf = STRBUF_INIT;
> +	size_t i;
> +
> +	for (i = 0; i < n; i++) {
> +		if (format_ref_array_item(info[i], format, &final_buf, &error_buf))
> +			die("%s", error_buf.buf);

OK, the contents of error_buf is already localized, so it is correct
not to have _() around the "%s" here.

> +		fwrite(final_buf.buf, 1, final_buf.len, stdout);
> +		strbuf_reset(&error_buf);
> +		strbuf_reset(&final_buf);
> +		putchar('\n');

This is inherited code, but splitting fwrite() and putchar() apart
like this makes the code hard to follow.  Perhaps clean it up later
when nothing else is going on in the code as leftoverbits, outside
the topic.

> +	}
> +	strbuf_release(&error_buf);
> +	strbuf_release(&final_buf);
> +}
> +
>  void show_ref_array_item(struct ref_array_item *info,
>  			 const struct ref_format *format)
>  {

Isn't the point of the new helper function so that this can become a
thin wrapper around it, i.e.

	void show_ref_array_item(...)
        {
		show_ref_array_items(format, &info, 1);
	}

> diff --git a/ref-filter.h b/ref-filter.h
> index 19ea4c413409..eb7e79a6676d 100644
> --- a/ref-filter.h
> +++ b/ref-filter.h
> @@ -121,6 +121,11 @@ int format_ref_array_item(struct ref_array_item *info,
>  			  struct strbuf *error_buf);
>  /*  Print the ref using the given format and quote_style */
>  void show_ref_array_item(struct ref_array_item *info, const struct ref_format *format);
> +/*  Print the refs using the given format and quote_style and maxcount */
> +void show_ref_array_items(struct ref_array_item **info,
> +			 const struct ref_format *format,
> +			 size_t n);

The inconsistency between "maxcount" vs "n" is irritating.  Calling
the parameter with a name that has the word "info" (because the new
parameter is about that array) and a word like "nelem" to hint that
it is the number of elements in the array) would be sensible.

void show_ref_array_items(const struct ref_format *format,
			  struct ref_array_item *info[], size_t info_count);

or something along the line, perhaps?
Jeff King April 7, 2021, 9:27 p.m. UTC | #2
On Wed, Apr 07, 2021 at 03:26:48PM +0000, ZheNing Hu via GitGitGadget wrote:

> diff --git a/ref-filter.c b/ref-filter.c
> index f0bd32f71416..27bbf9b6c8ac 100644
> --- a/ref-filter.c
> +++ b/ref-filter.c
> @@ -2435,6 +2435,26 @@ int format_ref_array_item(struct ref_array_item *info,
>  	return 0;
>  }
>  
> +void show_ref_array_items(struct ref_array_item **info,
> +			 const struct ref_format *format,
> +			 size_t n)
> +{
> +	struct strbuf final_buf = STRBUF_INIT;
> +	struct strbuf error_buf = STRBUF_INIT;
> +	size_t i;
> +
> +	for (i = 0; i < n; i++) {
> +		if (format_ref_array_item(info[i], format, &final_buf, &error_buf))
> +			die("%s", error_buf.buf);
> +		fwrite(final_buf.buf, 1, final_buf.len, stdout);
> +		strbuf_reset(&error_buf);
> +		strbuf_reset(&final_buf);
> +		putchar('\n');
> +	}
> +	strbuf_release(&error_buf);
> +	strbuf_release(&final_buf);
> +}

I think this is a reasonable direction to take the solution: wrapping
the loop so that the reuse of the buffers can be included there.

But I do wonder if we should go the opposite direction, and get rid of
show_ref_array_item() entirely. It only has two callers, both of which
could just write the loop themselves. That is more code, but perhaps it
would make it more clear what is going on in those callers, and to give
them more flexibility.

I notice there's a third user of the ref-filter.c code in
builtin/branch.c that does not use show_ref_array_item(). Instead, it
loops itself and calls format_ref_array_item(). I think this is because
it is sometimes columnizing the results. Perhaps git-tag and
for-each-ref would want to learn the same trick, in which case they'd be
happy to have the open-coded loop.

Either way, it probably makes sense to introduce the same optimization
to the case in builtin/branch.c.

-Peff
ZheNing Hu April 8, 2021, 12:05 p.m. UTC | #3
Junio C Hamano <gitster@pobox.com> 于2021年4月8日周四 上午4:31写道:
>
> "ZheNing Hu via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
> > Subject: Re: [PATCH v2] [GSOC] ref-filter: use single strbuf for all output
>
> The implementation changed so much from the initial attempt, for
> which the above title may have been appropriate, that reusing single
> strbuf over and over is not the most important part of the change
> anymore, I am afraid.  Besides, it uses TWO strbufs ;-)
>
> Subject: [PATCH] ref-filter: introduce show_ref_array_items() helper
>
> or something like that?
>

Yep, I may think that its core is still reusing strbufs, but
"introduce show_ref_array_items()"  will be more accurate.

> > From: ZheNing Hu <adlternative@gmail.com>
> >
> > When we use `git for-each-ref`, every ref will call
> > `show_ref_array_item()` and allocate its own final strbuf
> > and error strbuf. Instead, we can reuse these two strbuf
> > for each step ref's output.
> >
> > The performance for `git for-each-ref` on the Git repository
> > itself with performance testing tool `hyperfine` changes from
> > 18.7 ms ± 0.4 ms to 18.2 ms ± 0.3 ms.
> >
> > This approach is similar to the one used by 79ed0a5
> > (cat-file: use a single strbuf for all output, 2018-08-14)
> > to speed up the cat-file builtin.
> >
> > Signed-off-by: ZheNing Hu <adlternative@gmail.com>
> > ---
> >     [GSOC] ref-filter: use single strbuf for all output
> >
> >     Now git for-each-ref can reuse two buffers for all refs output, the
> >     performance is slightly improved.
> >
> >     Now there may be a question : Should the original interface
> >     show_ref_array_items be retained?
> > ...
> >        /*  Callback function for parsing the sort option */
>
> Again, not a very useful range-diff as the implementation changed so much.
>

This makes me wonder if I should give up GGG in the future.
I also don’t want a rang-diff with a big difference.

>
> >  builtin/for-each-ref.c |  4 +---
> >  ref-filter.c           | 20 ++++++++++++++++++++
> >  ref-filter.h           |  5 +++++
> >  3 files changed, 26 insertions(+), 3 deletions(-)
> >
> > diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c
> > index cb9c81a04606..d630402230f3 100644
> > --- a/builtin/for-each-ref.c
> > +++ b/builtin/for-each-ref.c
> > @@ -16,7 +16,6 @@ static char const * const for_each_ref_usage[] = {
> >
> >  int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
> >  {
> > -     int i;
> >       struct ref_sorting *sorting = NULL, **sorting_tail = &sorting;
> >       int maxcount = 0, icase = 0;
> >       struct ref_array array;
> > @@ -80,8 +79,7 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
> >
> >       if (!maxcount || array.nr < maxcount)
> >               maxcount = array.nr;
> > -     for (i = 0; i < maxcount; i++)
> > -             show_ref_array_item(array.items[i], &format);
> > +     show_ref_array_items(array.items, &format, maxcount);
>
> The intention of this call is to pass an array and the number of
> elements in the array as a pair to the function, right?  When you
> design the API for a new helper function, do not split them apart by
> inserting an unrelated parameter in the middle.
>

Eh, are you saying that `maxcount` is irrelevant here? There should be
`maxcount`, because we need to limit the number of iterations here.

> > diff --git a/ref-filter.c b/ref-filter.c
> > index f0bd32f71416..27bbf9b6c8ac 100644
> > --- a/ref-filter.c
> > +++ b/ref-filter.c
> > @@ -2435,6 +2435,26 @@ int format_ref_array_item(struct ref_array_item *info,
> >       return 0;
> >  }
> >
> > +void show_ref_array_items(struct ref_array_item **info,
> > +                      const struct ref_format *format,
> > +                      size_t n)
>
> IOW,
>
>         void show_ref_array_items(const struct ref_format *format,
>                                   struct ref_array_item *info[], size_t n)
>

Yes, it will be more obvious in the form of an array.

> > +{
> > +     struct strbuf final_buf = STRBUF_INIT;
> > +     struct strbuf error_buf = STRBUF_INIT;
> > +     size_t i;
> > +
> > +     for (i = 0; i < n; i++) {
> > +             if (format_ref_array_item(info[i], format, &final_buf, &error_buf))
> > +                     die("%s", error_buf.buf);
>
> OK, the contents of error_buf is already localized, so it is correct
> not to have _() around the "%s" here.
>
> > +             fwrite(final_buf.buf, 1, final_buf.len, stdout);
> > +             strbuf_reset(&error_buf);
> > +             strbuf_reset(&final_buf);
> > +             putchar('\n');
>
> This is inherited code, but splitting fwrite() and putchar() apart
> like this makes the code hard to follow.  Perhaps clean it up later
> when nothing else is going on in the code as leftoverbits, outside
> the topic.
>

Ok, swap the position of reset and putchar.

> > +     }
> > +     strbuf_release(&error_buf);
> > +     strbuf_release(&final_buf);
> > +}
> > +
> >  void show_ref_array_item(struct ref_array_item *info,
> >                        const struct ref_format *format)
> >  {
>
> Isn't the point of the new helper function so that this can become a
> thin wrapper around it, i.e.
>
>         void show_ref_array_item(...)
>         {
>                 show_ref_array_items(format, &info, 1);
>         }
>

Maybe it makes sense. But as Peff said, Maybe we can just delete it.

> > diff --git a/ref-filter.h b/ref-filter.h
> > index 19ea4c413409..eb7e79a6676d 100644
> > --- a/ref-filter.h
> > +++ b/ref-filter.h
> > @@ -121,6 +121,11 @@ int format_ref_array_item(struct ref_array_item *info,
> >                         struct strbuf *error_buf);
> >  /*  Print the ref using the given format and quote_style */
> >  void show_ref_array_item(struct ref_array_item *info, const struct ref_format *format);
> > +/*  Print the refs using the given format and quote_style and maxcount */
> > +void show_ref_array_items(struct ref_array_item **info,
> > +                      const struct ref_format *format,
> > +                      size_t n);
>
> The inconsistency between "maxcount" vs "n" is irritating.  Calling
> the parameter with a name that has the word "info" (because the new
> parameter is about that array) and a word like "nelem" to hint that
> it is the number of elements in the array) would be sensible.
>
> void show_ref_array_items(const struct ref_format *format,
>                           struct ref_array_item *info[], size_t info_count);
>
> or something along the line, perhaps?
>

Aha, I guess this is the reason for the misunderstanding above.
Yes, `info_count` is the correct meaning and the meaning of `n` is
wrong.

Thanks.
--
ZheNing Hu
ZheNing Hu April 8, 2021, 12:18 p.m. UTC | #4
Jeff King <peff@peff.net> 于2021年4月8日周四 上午5:27写道:
>
> On Wed, Apr 07, 2021 at 03:26:48PM +0000, ZheNing Hu via GitGitGadget wrote:
>
> > diff --git a/ref-filter.c b/ref-filter.c
> > index f0bd32f71416..27bbf9b6c8ac 100644
> > --- a/ref-filter.c
> > +++ b/ref-filter.c
> > @@ -2435,6 +2435,26 @@ int format_ref_array_item(struct ref_array_item *info,
> >       return 0;
> >  }
> >
> > +void show_ref_array_items(struct ref_array_item **info,
> > +                      const struct ref_format *format,
> > +                      size_t n)
> > +{
> > +     struct strbuf final_buf = STRBUF_INIT;
> > +     struct strbuf error_buf = STRBUF_INIT;
> > +     size_t i;
> > +
> > +     for (i = 0; i < n; i++) {
> > +             if (format_ref_array_item(info[i], format, &final_buf, &error_buf))
> > +                     die("%s", error_buf.buf);
> > +             fwrite(final_buf.buf, 1, final_buf.len, stdout);
> > +             strbuf_reset(&error_buf);
> > +             strbuf_reset(&final_buf);
> > +             putchar('\n');
> > +     }
> > +     strbuf_release(&error_buf);
> > +     strbuf_release(&final_buf);
> > +}
>
> I think this is a reasonable direction to take the solution: wrapping
> the loop so that the reuse of the buffers can be included there.
>
> But I do wonder if we should go the opposite direction, and get rid of
> show_ref_array_item() entirely. It only has two callers, both of which
> could just write the loop themselves. That is more code, but perhaps it
> would make it more clear what is going on in those callers, and to give
> them more flexibility.
>

Indeed. I think `pretty_print_ref()` is proof that we may need to keep
`show_ref_array_item()` because If it modified to `show_ref_array_items(...,1);`
it will look very strange.

> I notice there's a third user of the ref-filter.c code in
> builtin/branch.c that does not use show_ref_array_item(). Instead, it
> loops itself and calls format_ref_array_item(). I think this is because
> it is sometimes columnizing the results. Perhaps git-tag and
> for-each-ref would want to learn the same trick, in which case they'd be
> happy to have the open-coded loop.
>

Yes, a special judgment about colopts is used in the `print_ref_list()` of
`branch.c`.

> Either way, it probably makes sense to introduce the same optimization
> to the case in builtin/branch.c.
>

I agree in `branch.c` here can learn this reusing bufs optimization.

> -Peff

Thanks.
--
ZheNing Hu
Jeff King April 8, 2021, 2:32 p.m. UTC | #5
On Thu, Apr 08, 2021 at 08:18:59PM +0800, ZheNing Hu wrote:

> > I think this is a reasonable direction to take the solution: wrapping
> > the loop so that the reuse of the buffers can be included there.
> >
> > But I do wonder if we should go the opposite direction, and get rid of
> > show_ref_array_item() entirely. It only has two callers, both of which
> > could just write the loop themselves. That is more code, but perhaps it
> > would make it more clear what is going on in those callers, and to give
> > them more flexibility.
> >
> 
> Indeed. I think `pretty_print_ref()` is proof that we may need to keep
> `show_ref_array_item()` because If it modified to `show_ref_array_items(...,1);`
> it will look very strange.

What I meant was that we should get rid of show_ref_array_items(), as
well, and just use format_ref_array_item() everywhere. This whole
wrapper is only saving us a few lines, and it makes it harder to see
what the function is doing. Likewise for pretty-print ref. But I dunno.
Maybe that is all going too far.

-Peff
ZheNing Hu April 8, 2021, 2:43 p.m. UTC | #6
Jeff King <peff@peff.net> 于2021年4月8日周四 下午10:32写道:
>
> On Thu, Apr 08, 2021 at 08:18:59PM +0800, ZheNing Hu wrote:
>
> > > I think this is a reasonable direction to take the solution: wrapping
> > > the loop so that the reuse of the buffers can be included there.
> > >
> > > But I do wonder if we should go the opposite direction, and get rid of
> > > show_ref_array_item() entirely. It only has two callers, both of which
> > > could just write the loop themselves. That is more code, but perhaps it
> > > would make it more clear what is going on in those callers, and to give
> > > them more flexibility.
> > >
> >
> > Indeed. I think `pretty_print_ref()` is proof that we may need to keep
> > `show_ref_array_item()` because If it modified to `show_ref_array_items(...,1);`
> > it will look very strange.
>
> What I meant was that we should get rid of show_ref_array_items(), as
> well, and just use format_ref_array_item() everywhere. This whole
> wrapper is only saving us a few lines, and it makes it harder to see
> what the function is doing. Likewise for pretty-print ref. But I dunno.
> Maybe that is all going too far.
>

Ok... so you mean we just use a loop like in branch.c, and get rid of
show_ref_array_items() and show_ref_array_item().
(We can still use the optimization of reuse bufs)

> -Peff

--
ZheNing Hu
Jeff King April 8, 2021, 2:51 p.m. UTC | #7
On Thu, Apr 08, 2021 at 10:43:54PM +0800, ZheNing Hu wrote:

> > What I meant was that we should get rid of show_ref_array_items(), as
> > well, and just use format_ref_array_item() everywhere. This whole
> > wrapper is only saving us a few lines, and it makes it harder to see
> > what the function is doing. Likewise for pretty-print ref. But I dunno.
> > Maybe that is all going too far.
> >
> 
> Ok... so you mean we just use a loop like in branch.c, and get rid of
> show_ref_array_items() and show_ref_array_item().
> (We can still use the optimization of reuse bufs)

Yes, something like this:

diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c
index cb9c81a046..55297fe297 100644
--- a/builtin/for-each-ref.c
+++ b/builtin/for-each-ref.c
@@ -22,6 +22,8 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 	struct ref_array array;
 	struct ref_filter filter;
 	struct ref_format format = REF_FORMAT_INIT;
+	struct strbuf output = STRBUF_INIT;
+	struct strbuf err = STRBUF_INIT;
 
 	struct option opts[] = {
 		OPT_BIT('s', "shell", &format.quote_style,
@@ -80,8 +82,16 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 
 	if (!maxcount || array.nr < maxcount)
 		maxcount = array.nr;
-	for (i = 0; i < maxcount; i++)
-		show_ref_array_item(array.items[i], &format);
+
+	for (i = 0; i < maxcount; i++) {
+		strbuf_reset(&output);
+		if (format_ref_array_item(array.items[i], &format, &output, &err))
+			die("%s", err.buf);
+		fwrite(output.buf, 1, output.len, stdout);
+		putchar('\n');
+	}
+
+	strbuf_release(&output);
 	ref_array_clear(&array);
 	return 0;
 }

It is dropping a few lines by assuming that the error buf is only
touched when we return an error (which IMHO is a reasonable assumption
to make).

-Peff
ZheNing Hu April 8, 2021, 3:12 p.m. UTC | #8
Jeff King <peff@peff.net> 于2021年4月8日周四 下午10:51写道:
>
> On Thu, Apr 08, 2021 at 10:43:54PM +0800, ZheNing Hu wrote:
>
> > > What I meant was that we should get rid of show_ref_array_items(), as
> > > well, and just use format_ref_array_item() everywhere. This whole
> > > wrapper is only saving us a few lines, and it makes it harder to see
> > > what the function is doing. Likewise for pretty-print ref. But I dunno.
> > > Maybe that is all going too far.
> > >
> >
> > Ok... so you mean we just use a loop like in branch.c, and get rid of
> > show_ref_array_items() and show_ref_array_item().
> > (We can still use the optimization of reuse bufs)
>
> Yes, something like this:
>
> diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c
> index cb9c81a046..55297fe297 100644
> --- a/builtin/for-each-ref.c
> +++ b/builtin/for-each-ref.c
> @@ -22,6 +22,8 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
>         struct ref_array array;
>         struct ref_filter filter;
>         struct ref_format format = REF_FORMAT_INIT;
> +       struct strbuf output = STRBUF_INIT;
> +       struct strbuf err = STRBUF_INIT;
>
>         struct option opts[] = {
>                 OPT_BIT('s', "shell", &format.quote_style,
> @@ -80,8 +82,16 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
>
>         if (!maxcount || array.nr < maxcount)
>                 maxcount = array.nr;
> -       for (i = 0; i < maxcount; i++)
> -               show_ref_array_item(array.items[i], &format);
> +
> +       for (i = 0; i < maxcount; i++) {
> +               strbuf_reset(&output);
> +               if (format_ref_array_item(array.items[i], &format, &output, &err))
> +                       die("%s", err.buf);
> +               fwrite(output.buf, 1, output.len, stdout);
> +               putchar('\n');
> +       }
> +
> +       strbuf_release(&output);
>         ref_array_clear(&array);
>         return 0;
>  }
>
> It is dropping a few lines by assuming that the error buf is only
> touched when we return an error (which IMHO is a reasonable assumption
> to make).
>

I agree, err buffer does not need to be reused.

> -Peff

Thanks.
--
ZheNing Hu
diff mbox series

Patch

diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c
index cb9c81a04606..d630402230f3 100644
--- a/builtin/for-each-ref.c
+++ b/builtin/for-each-ref.c
@@ -16,7 +16,6 @@  static char const * const for_each_ref_usage[] = {
 
 int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 {
-	int i;
 	struct ref_sorting *sorting = NULL, **sorting_tail = &sorting;
 	int maxcount = 0, icase = 0;
 	struct ref_array array;
@@ -80,8 +79,7 @@  int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 
 	if (!maxcount || array.nr < maxcount)
 		maxcount = array.nr;
-	for (i = 0; i < maxcount; i++)
-		show_ref_array_item(array.items[i], &format);
+	show_ref_array_items(array.items, &format, maxcount);
 	ref_array_clear(&array);
 	return 0;
 }
diff --git a/ref-filter.c b/ref-filter.c
index f0bd32f71416..27bbf9b6c8ac 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -2435,6 +2435,26 @@  int format_ref_array_item(struct ref_array_item *info,
 	return 0;
 }
 
+void show_ref_array_items(struct ref_array_item **info,
+			 const struct ref_format *format,
+			 size_t n)
+{
+	struct strbuf final_buf = STRBUF_INIT;
+	struct strbuf error_buf = STRBUF_INIT;
+	size_t i;
+
+	for (i = 0; i < n; i++) {
+		if (format_ref_array_item(info[i], format, &final_buf, &error_buf))
+			die("%s", error_buf.buf);
+		fwrite(final_buf.buf, 1, final_buf.len, stdout);
+		strbuf_reset(&error_buf);
+		strbuf_reset(&final_buf);
+		putchar('\n');
+	}
+	strbuf_release(&error_buf);
+	strbuf_release(&final_buf);
+}
+
 void show_ref_array_item(struct ref_array_item *info,
 			 const struct ref_format *format)
 {
diff --git a/ref-filter.h b/ref-filter.h
index 19ea4c413409..eb7e79a6676d 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -121,6 +121,11 @@  int format_ref_array_item(struct ref_array_item *info,
 			  struct strbuf *error_buf);
 /*  Print the ref using the given format and quote_style */
 void show_ref_array_item(struct ref_array_item *info, const struct ref_format *format);
+/*  Print the refs using the given format and quote_style and maxcount */
+void show_ref_array_items(struct ref_array_item **info,
+			 const struct ref_format *format,
+			 size_t n);
+
 /*  Parse a single sort specifier and add it to the list */
 void parse_ref_sorting(struct ref_sorting **sorting_tail, const char *atom);
 /*  Callback function for parsing the sort option */