diff mbox series

[v8,8/8] ls-tree.c: introduce "--format" option

Message ID e0add802fbbabde7e7b3743127b2d4047f1ce760.1641043500.git.dyroneteng@gmail.com (mailing list archive)
State Superseded
Headers show
Series ls-tree: "--object-only" and "--format" opts | expand

Commit Message

Teng Long Jan. 1, 2022, 1:50 p.m. UTC
Add a --format option to ls-tree. It has an existing default output,
and then --long and --name-only options to emit the default output
along with the objectsize and, or to only emit object paths.

Rather than add --type-only, --object-only etc. we can just support a
--format using a strbuf_expand() similar to "for-each-ref
--format". We might still add such options in the future for
convenience.

The --format implementation is slower than the existing code, but this
change does not cause any performance regressions. We'll leave the
existing show_tree() unchanged, and only run show_tree_fmt() in if
a --format different than the hardcoded built-in ones corresponding to
the existing modes is provided.

I.e. something like the "--long" output would be much slower with
this, mainly due to how we need to allocate various things to do with
quote.c instead of spewing the output directly to stdout.

The new option of '--format' comes from Ævar Arnfjörð Bjarmasonn's
idea and suggestion, this commit makes modifications in terms of the
original discussion on community [1].

Here is the statistics about performance tests:

1. Default format (hitten the builtin formats):

    "git ls-tree <tree-ish>" vs "--format='%(mode) %(type) %(object)%x09%(file)'"

    $hyperfine --warmup=10 "/opt/git/master/bin/git ls-tree -r HEAD"
    Benchmark 1: /opt/git/master/bin/git ls-tree -r HEAD
    Time (mean ± σ):     105.2 ms ±   3.3 ms    [User: 84.3 ms, System: 20.8 ms]
    Range (min … max):    99.2 ms … 113.2 ms    28 runs

    $hyperfine --warmup=10 "/opt/git/ls-tree-oid-only/bin/git ls-tree -r --format='%(mode) %(type) %(object)%x09%(file)'  HEAD"
    Benchmark 1: /opt/git/ls-tree-oid-only/bin/git ls-tree -r --format='%(mode) %(type) %(object)%x09%(file)'  HEAD
    Time (mean ± σ):     106.4 ms ±   2.7 ms    [User: 86.1 ms, System: 20.2 ms]
    Range (min … max):   100.2 ms … 110.5 ms    29 runs

2. Default format includes object size (hitten the builtin formats):

    "git ls-tree -l <tree-ish>" vs "--format='%(mode) %(type) %(object) %(size:padded)%x09%(file)'"

    $hyperfine --warmup=10 "/opt/git/master/bin/git ls-tree -r -l HEAD"
    Benchmark 1: /opt/git/master/bin/git ls-tree -r -l HEAD
    Time (mean ± σ):     335.1 ms ±   6.5 ms    [User: 304.6 ms, System: 30.4 ms]
    Range (min … max):   327.5 ms … 348.4 ms    10 runs

    $hyperfine --warmup=10 "/opt/git/ls-tree-oid-only/bin/git ls-tree -r --format='%(mode) %(type) %(object) %(size:padded)%x09%(file)'  HEAD"
    Benchmark 1: /opt/git/ls-tree-oid-only/bin/git ls-tree -r --format='%(mode) %(type) %(object) %(size:padded)%x09%(file)'  HEAD
    Time (mean ± σ):     337.2 ms ±   8.2 ms    [User: 309.2 ms, System: 27.9 ms]
    Range (min … max):   328.8 ms … 349.4 ms    10 runs

Links:
[1] https://public-inbox.org/git/RFC-patch-6.7-eac299f06ff-20211217T131635Z-avarab@gmail.com/

Signed-off-by: Teng Long <dyroneteng@gmail.com>
---
 Documentation/git-ls-tree.txt |  50 ++++++++-
 builtin/ls-tree.c             | 191 ++++++++++++++++++++++++++++------
 t/t3105-ls-tree-format.sh     |  55 ++++++++++
 3 files changed, 259 insertions(+), 37 deletions(-)
 create mode 100755 t/t3105-ls-tree-format.sh

Comments

Johannes Schindelin Jan. 4, 2022, 2:38 p.m. UTC | #1
Hi Teng,

On Sat, 1 Jan 2022, Teng Long wrote:

> diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
> index 009ffeb15d..6e3e5a4d06 100644
> --- a/builtin/ls-tree.c
> +++ b/builtin/ls-tree.c
> @@ -56,23 +56,75 @@ enum {
>
>  static int cmdmode = MODE_UNSPECIFIED;
>
> -static int parse_shown_fields(void)
> +static const char *format;
> +static const char *default_format = "%(mode) %(type) %(object)%x09%(file)";
> +static const char *long_format = "%(mode) %(type) %(object) %(size:padded)%x09%(file)";
> +static const char *name_only_format = "%(file)";
> +static const char *object_only_format = "%(object)";
> +
> +static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
> +			      const enum object_type type, unsigned int padded)
>  {
> -	if (cmdmode == MODE_NAME_ONLY) {
> -		shown_bits = SHOW_FILE_NAME;
> -		return 0;
> +	if (type == OBJ_BLOB) {
> +		unsigned long size;
> +		if (oid_object_info(the_repository, oid, &size) < 0)
> +			die(_("could not get object info about '%s'"),
> +			    oid_to_hex(oid));
> +		if (padded)
> +			strbuf_addf(line, "%7" PRIuMAX, (uintmax_t)size);
> +		else
> +			strbuf_addf(line, "%" PRIuMAX, (uintmax_t)size);
> +	} else if (padded) {
> +		strbuf_addf(line, "%7s", "-");

This, along with two other similar instances, triggers the
`static-analysis` job in the CI failure of `seen`. The suggested diff is:


-- snip --
diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
index 6e3e5a4d0634..8301d1a15f9a 100644
--- a/builtin/ls-tree.c
+++ b/builtin/ls-tree.c
@@ -75,7 +75,7 @@ static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
 		else
 			strbuf_addf(line, "%" PRIuMAX, (uintmax_t)size);
 	} else if (padded) {
-		strbuf_addf(line, "%7s", "-");
+		strbuf_addstr(line, "-");
 	} else {
 		strbuf_addstr(line, "-");
 	}
@@ -110,7 +110,7 @@ static size_t expand_show_tree(struct strbuf *line, const char *start,
 	} else if (skip_prefix(start, "(size)", &p)) {
 		expand_objectsize(line, data->oid, data->type, 0);
 	} else if (skip_prefix(start, "(object)", &p)) {
-		strbuf_addstr(line, find_unique_abbrev(data->oid, abbrev));
+		strbuf_add_unique_abbrev(line, data->oid, abbrev);
 	} else if (skip_prefix(start, "(file)", &p)) {
 		const char *name = data->base->buf;
 		const char *prefix = chomp_prefix ? ls_tree_prefix : NULL;
@@ -119,7 +119,7 @@ static size_t expand_show_tree(struct strbuf *line, const char *start,
 		strbuf_addstr(data->base, data->pathname);
 		name = relative_path(data->base->buf, prefix, &sb);
 		quote_c_style(name, &quoted, NULL, 0);
-		strbuf_addstr(line, quoted.buf);
+		strbuf_addbuf(line, &quoted);
 	} else {
 		errlen = (unsigned long)len;
 		die(_("bad ls-tree format: %%%.*s"), errlen, start);
-- snap --

But I think that the first hunk indicates a deeper issue, as `%7s`
probably meant to pad the dash to seven dashes (which that format won't
accomplish, but `strbuf_addchars()` would)?

Ciao,
Dscho

> +	} else {
> +		strbuf_addstr(line, "-");
>  	}
> -	if (cmdmode == MODE_OBJECT_ONLY) {
> -		shown_bits = SHOW_OBJECT_NAME;
> -		return 0;
> +}
> +
> +static size_t expand_show_tree(struct strbuf *line, const char *start,
> +			       void *context)
> +{
> +	struct shown_data *data = context;
> +	const char *end;
> +	const char *p;
> +	unsigned int errlen;
> +	size_t len;
> +	len = strbuf_expand_literal_cb(line, start, NULL);
> +	if (len)
> +		return len;
> +
> +	if (*start != '(')
> +		die(_("bad ls-tree format: as '%s'"), start);
> +
> +	end = strchr(start + 1, ')');
> +	if (!end)
> +		die(_("bad ls-tree format: element '%s' does not end in ')'"), start);
> +
> +	len = end - start + 1;
> +	if (skip_prefix(start, "(mode)", &p)) {
> +		strbuf_addf(line, "%06o", data->mode);
> +	} else if (skip_prefix(start, "(type)", &p)) {
> +		strbuf_addstr(line, type_name(data->type));
> +	} else if (skip_prefix(start, "(size:padded)", &p)) {
> +		expand_objectsize(line, data->oid, data->type, 1);
> +	} else if (skip_prefix(start, "(size)", &p)) {
> +		expand_objectsize(line, data->oid, data->type, 0);
> +	} else if (skip_prefix(start, "(object)", &p)) {
> +		strbuf_addstr(line, find_unique_abbrev(data->oid, abbrev));
> +	} else if (skip_prefix(start, "(file)", &p)) {
> +		const char *name = data->base->buf;
> +		const char *prefix = chomp_prefix ? ls_tree_prefix : NULL;
> +		struct strbuf quoted = STRBUF_INIT;
> +		struct strbuf sb = STRBUF_INIT;
> +		strbuf_addstr(data->base, data->pathname);
> +		name = relative_path(data->base->buf, prefix, &sb);
> +		quote_c_style(name, &quoted, NULL, 0);
> +		strbuf_addstr(line, quoted.buf);
> +	} else {
> +		errlen = (unsigned long)len;
> +		die(_("bad ls-tree format: %%%.*s"), errlen, start);
>  	}
> -	if (!ls_options || (ls_options & LS_RECURSIVE)
> -	    || (ls_options & LS_SHOW_TREES)
> -	    || (ls_options & LS_TREE_ONLY))
> -		shown_bits = SHOW_DEFAULT;
> -	if (cmdmode == MODE_LONG)
> -		shown_bits = SHOW_DEFAULT | SHOW_SIZE;
> -	return 1;
> +	return len;
>  }
>
>  static int show_recursive(const char *base, size_t baselen,
> @@ -106,6 +158,75 @@ static int show_recursive(const char *base, size_t baselen,
>  	return 0;
>  }
>
> +static int show_tree_init(enum object_type *type, struct strbuf *base,
> +			  const char *pathname, unsigned mode, int *retval)
> +{
> +	if (S_ISGITLINK(mode)) {
> +		*type = OBJ_COMMIT;
> +	} else if (S_ISDIR(mode)) {
> +		if (show_recursive(base->buf, base->len, pathname)) {
> +			*retval = READ_TREE_RECURSIVE;
> +			if (!(ls_options & LS_SHOW_TREES))
> +				return 1;
> +		}
> +		*type = OBJ_TREE;
> +	}
> +	else if (ls_options & LS_TREE_ONLY)
> +		return 1;
> +	return 0;
> +}
> +
> +static int show_tree_fmt(const struct object_id *oid, struct strbuf *base,
> +			 const char *pathname, unsigned mode, void *context)
> +{
> +	size_t baselen;
> +	int retval = 0;
> +	struct strbuf line = STRBUF_INIT;
> +	struct shown_data data = {
> +		.mode = mode,
> +		.type = OBJ_BLOB,
> +		.oid = oid,
> +		.pathname = pathname,
> +		.base = base,
> +	};
> +
> +	if (show_tree_init(&data.type, base, pathname, mode, &retval))
> +		return retval;
> +
> +	baselen = base->len;
> +	strbuf_expand(&line, format, expand_show_tree, &data);
> +	strbuf_addch(&line, line_termination);
> +	fwrite(line.buf, line.len, 1, stdout);
> +	strbuf_setlen(base, baselen);
> +	return retval;
> +}
> +
> +static int parse_shown_fields(void)
> +{
> +	if (cmdmode == MODE_NAME_ONLY ||
> +	    (format && !strcmp(format, name_only_format))) {
> +		shown_bits = SHOW_FILE_NAME;
> +		return 1;
> +	}
> +
> +	if (cmdmode == MODE_OBJECT_ONLY ||
> +	    (format && !strcmp(format, object_only_format))) {
> +		shown_bits = SHOW_OBJECT_NAME;
> +		return 1;
> +	}
> +
> +	if (!ls_options || (ls_options & LS_RECURSIVE)
> +	    || (ls_options & LS_SHOW_TREES)
> +	    || (ls_options & LS_TREE_ONLY)
> +		|| (format && !strcmp(format, default_format)))
> +		shown_bits = SHOW_DEFAULT;
> +
> +	if (cmdmode == MODE_LONG ||
> +		(format && !strcmp(format, long_format)))
> +		shown_bits = SHOW_DEFAULT | SHOW_SIZE;
> +	return 1;
> +}
> +
>  static int show_default(struct shown_data *data)
>  {
>  	size_t baselen = data->base->len;
> @@ -137,24 +258,6 @@ static int show_default(struct shown_data *data)
>  	return 1;
>  }
>
> -static int show_tree_init(enum object_type *type, struct strbuf *base,
> -			  const char *pathname, unsigned mode, int *retval)
> -{
> -	if (S_ISGITLINK(mode)) {
> -		*type = OBJ_COMMIT;
> -	} else if (S_ISDIR(mode)) {
> -		if (show_recursive(base->buf, base->len, pathname)) {
> -			*retval = READ_TREE_RECURSIVE;
> -			if (!(ls_options & LS_SHOW_TREES))
> -				return 1;
> -		}
> -		*type = OBJ_TREE;
> -	}
> -	else if (ls_options & LS_TREE_ONLY)
> -		return 1;
> -	return 0;
> -}
> -
>  static int show_tree(const struct object_id *oid, struct strbuf *base,
>  		const char *pathname, unsigned mode, void *context)
>  {
> @@ -196,6 +299,7 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix)
>  	struct object_id oid;
>  	struct tree *tree;
>  	int i, full_tree = 0;
> +	read_tree_fn_t fn = show_tree;
>  	const struct option ls_tree_options[] = {
>  		OPT_BIT('d', NULL, &ls_options, N_("only show trees"),
>  			LS_TREE_ONLY),
> @@ -218,6 +322,9 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix)
>  		OPT_BOOL(0, "full-tree", &full_tree,
>  			 N_("list entire tree; not just current directory "
>  			    "(implies --full-name)")),
> +		OPT_STRING_F(0, "format", &format, N_("format"),
> +			     N_("format to use for the output"),
> +			     PARSE_OPT_NONEG),
>  		OPT__ABBREV(&abbrev),
>  		OPT_END()
>  	};
> @@ -238,6 +345,10 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix)
>  	    ((LS_TREE_ONLY|LS_RECURSIVE) & ls_options))
>  		ls_options |= LS_SHOW_TREES;
>
> +	if (format && cmdmode)
> +		usage_msg_opt(
> +			_("--format can't be combined with other format-altering options"),
> +			ls_tree_usage, ls_tree_options);
>  	if (argc < 1)
>  		usage_with_options(ls_tree_usage, ls_tree_options);
>  	if (get_oid(argv[0], &oid))
> @@ -261,6 +372,18 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix)
>  	tree = parse_tree_indirect(&oid);
>  	if (!tree)
>  		die("not a tree object");
> -	return !!read_tree(the_repository, tree,
> -			   &pathspec, show_tree, NULL);
> +
> +	/*
> +	 * The generic show_tree_fmt() is slower than show_tree(), so
> +	 * take the fast path if possible.
> +	 */
> +	if (format && (!strcmp(format, default_format) ||
> +				   !strcmp(format, long_format) ||
> +				   !strcmp(format, name_only_format) ||
> +				   !strcmp(format, object_only_format)))
> +		fn = show_tree;
> +	else if (format)
> +		fn = show_tree_fmt;
> +
> +	return !!read_tree(the_repository, tree, &pathspec, fn, NULL);
>  }
> diff --git a/t/t3105-ls-tree-format.sh b/t/t3105-ls-tree-format.sh
> new file mode 100755
> index 0000000000..92b4d240e8
> --- /dev/null
> +++ b/t/t3105-ls-tree-format.sh
> @@ -0,0 +1,55 @@
> +#!/bin/sh
> +
> +test_description='ls-tree --format'
> +
> +TEST_PASSES_SANITIZE_LEAK=true
> +. ./test-lib.sh
> +
> +test_expect_success 'ls-tree --format usage' '
> +	test_expect_code 129 git ls-tree --format=fmt -l &&
> +	test_expect_code 129 git ls-tree --format=fmt --name-only &&
> +	test_expect_code 129 git ls-tree --format=fmt --name-status &&
> +	test_expect_code 129 git ls-tree --format=fmt --object-only
> +'
> +
> +test_expect_success 'setup' '
> +	mkdir dir &&
> +	test_commit dir/sub-file &&
> +	test_commit top-file
> +'
> +
> +test_ls_tree_format () {
> +	format=$1 &&
> +	opts=$2 &&
> +	shift 2 &&
> +	git ls-tree $opts -r HEAD >expect.raw &&
> +	sed "s/^/> /" >expect <expect.raw &&
> +	git ls-tree --format="> $format" -r HEAD >actual &&
> +	test_cmp expect actual
> +}
> +
> +test_expect_success 'ls-tree --format=<default-like>' '
> +	test_ls_tree_format \
> +		"%(mode) %(type) %(object)%x09%(file)" \
> +		""
> +'
> +
> +test_expect_success 'ls-tree --format=<long-like>' '
> +	test_ls_tree_format \
> +		"%(mode) %(type) %(object) %(size:padded)%x09%(file)" \
> +		"--long"
> +'
> +
> +test_expect_success 'ls-tree --format=<name-only-like>' '
> +	test_ls_tree_format \
> +		"%(file)" \
> +		"--name-only"
> +'
> +
> +test_expect_success 'ls-tree --format=<object-only-like>' '
> +	test_ls_tree_format \
> +		"%(object)" \
> +		"--object-only"
> +'
> +
> +test_done
> --
> 2.33.0.rc1.1802.gbb1c3936fb.dirty
>
>
>
Johannes Schindelin Jan. 4, 2022, 3:17 p.m. UTC | #2
Hi Teng,

On Tue, 4 Jan 2022, Johannes Schindelin wrote:

> -- snip --
> diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
> index 6e3e5a4d0634..8301d1a15f9a 100644
> --- a/builtin/ls-tree.c
> +++ b/builtin/ls-tree.c
> @@ -75,7 +75,7 @@ static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
>  		else
>  			strbuf_addf(line, "%" PRIuMAX, (uintmax_t)size);
>  	} else if (padded) {
> -		strbuf_addf(line, "%7s", "-");
> +		strbuf_addstr(line, "-");
>  	} else {
>  		strbuf_addstr(line, "-");
>  	}
> @@ -110,7 +110,7 @@ static size_t expand_show_tree(struct strbuf *line, const char *start,
>  	} else if (skip_prefix(start, "(size)", &p)) {
>  		expand_objectsize(line, data->oid, data->type, 0);
>  	} else if (skip_prefix(start, "(object)", &p)) {
> -		strbuf_addstr(line, find_unique_abbrev(data->oid, abbrev));
> +		strbuf_add_unique_abbrev(line, data->oid, abbrev);
>  	} else if (skip_prefix(start, "(file)", &p)) {
>  		const char *name = data->base->buf;
>  		const char *prefix = chomp_prefix ? ls_tree_prefix : NULL;
> @@ -119,7 +119,7 @@ static size_t expand_show_tree(struct strbuf *line, const char *start,
>  		strbuf_addstr(data->base, data->pathname);
>  		name = relative_path(data->base->buf, prefix, &sb);
>  		quote_c_style(name, &quoted, NULL, 0);
> -		strbuf_addstr(line, quoted.buf);
> +		strbuf_addbuf(line, &quoted);
>  	} else {
>  		errlen = (unsigned long)len;
>  		die(_("bad ls-tree format: %%%.*s"), errlen, start);
> -- snap --

In addition to that, you need these to quiet down the `linux-leaks` job of
our CI build, which is also failing with your patches:

-- snipsnap --
diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
index 8301d1a15f9a..0dc0327e4785 100644
--- a/builtin/ls-tree.c
+++ b/builtin/ls-tree.c
@@ -120,6 +120,8 @@ static size_t expand_show_tree(struct strbuf *line, const char *start,
 		name = relative_path(data->base->buf, prefix, &sb);
 		quote_c_style(name, &quoted, NULL, 0);
 		strbuf_addbuf(line, &quoted);
+		strbuf_release(&sb);
+		strbuf_release(&quoted);
 	} else {
 		errlen = (unsigned long)len;
 		die(_("bad ls-tree format: %%%.*s"), errlen, start);
@@ -197,6 +199,7 @@ static int show_tree_fmt(const struct object_id *oid, struct strbuf *base,
 	strbuf_expand(&line, format, expand_show_tree, &data);
 	strbuf_addch(&line, line_termination);
 	fwrite(line.buf, line.len, 1, stdout);
+	strbuf_release(&line);
 	strbuf_setlen(base, baselen);
 	return retval;
 }
Teng Long Jan. 5, 2022, 9:40 a.m. UTC | #3
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> In addition to that, you need these to quiet down the `linux-leaks` job of
> our CI build, which is also failing with your patches:

Thanks.
I will fix it in next patch.

Johannes Schindelin <Johannes.Schindelin@gmx.de> 于2022年1月4日周二 23:17写道:
>
> Hi Teng,
>
> On Tue, 4 Jan 2022, Johannes Schindelin wrote:
>
> > -- snip --
> > diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
> > index 6e3e5a4d0634..8301d1a15f9a 100644
> > --- a/builtin/ls-tree.c
> > +++ b/builtin/ls-tree.c
> > @@ -75,7 +75,7 @@ static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
> >               else
> >                       strbuf_addf(line, "%" PRIuMAX, (uintmax_t)size);
> >       } else if (padded) {
> > -             strbuf_addf(line, "%7s", "-");
> > +             strbuf_addstr(line, "-");
> >       } else {
> >               strbuf_addstr(line, "-");
> >       }
> > @@ -110,7 +110,7 @@ static size_t expand_show_tree(struct strbuf *line, const char *start,
> >       } else if (skip_prefix(start, "(size)", &p)) {
> >               expand_objectsize(line, data->oid, data->type, 0);
> >       } else if (skip_prefix(start, "(object)", &p)) {
> > -             strbuf_addstr(line, find_unique_abbrev(data->oid, abbrev));
> > +             strbuf_add_unique_abbrev(line, data->oid, abbrev);
> >       } else if (skip_prefix(start, "(file)", &p)) {
> >               const char *name = data->base->buf;
> >               const char *prefix = chomp_prefix ? ls_tree_prefix : NULL;
> > @@ -119,7 +119,7 @@ static size_t expand_show_tree(struct strbuf *line, const char *start,
> >               strbuf_addstr(data->base, data->pathname);
> >               name = relative_path(data->base->buf, prefix, &sb);
> >               quote_c_style(name, &quoted, NULL, 0);
> > -             strbuf_addstr(line, quoted.buf);
> > +             strbuf_addbuf(line, &quoted);
> >       } else {
> >               errlen = (unsigned long)len;
> >               die(_("bad ls-tree format: %%%.*s"), errlen, start);
> > -- snap --
>
> In addition to that, you need these to quiet down the `linux-leaks` job of
> our CI build, which is also failing with your patches:
>
> -- snipsnap --
> diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
> index 8301d1a15f9a..0dc0327e4785 100644
> --- a/builtin/ls-tree.c
> +++ b/builtin/ls-tree.c
> @@ -120,6 +120,8 @@ static size_t expand_show_tree(struct strbuf *line, const char *start,
>                 name = relative_path(data->base->buf, prefix, &sb);
>                 quote_c_style(name, &quoted, NULL, 0);
>                 strbuf_addbuf(line, &quoted);
> +               strbuf_release(&sb);
> +               strbuf_release(&quoted);
>         } else {
>                 errlen = (unsigned long)len;
>                 die(_("bad ls-tree format: %%%.*s"), errlen, start);
> @@ -197,6 +199,7 @@ static int show_tree_fmt(const struct object_id *oid, struct strbuf *base,
>         strbuf_expand(&line, format, expand_show_tree, &data);
>         strbuf_addch(&line, line_termination);
>         fwrite(line.buf, line.len, 1, stdout);
> +       strbuf_release(&line);
>         strbuf_setlen(base, baselen);
>         return retval;
>  }
Teng Long Jan. 5, 2022, 9:58 a.m. UTC | #4
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> This, along with two other similar instances, triggers the
> `static-analysis` job in the CI failure of `seen`. The suggested diff is:

The second and third I will optimize in the next patch.

The first one. Actually I am a little puzzled from this :

> -               strbuf_addf(line, "%7s", "-");
> +               strbuf_addstr(line, "-");

> But I think that the first hunk indicates a deeper issue, as `%7s`
> probably meant to pad the dash to seven dashes (which that format won't
> accomplish, but `strbuf_addchars()` would)?

"strbuf_addf(line, "%7s", "-");" here is used to align the columns
with a width of
seven chars, not repeat one DASH to seven.

A little weird about the fix recommendation of  "strbuf_addstr(line, "-");" ,
because it will only add a single DASH here.

It's the identical result which compares to the "master"[1]  I think with the
current codes and I tested the "strbuf_addf()" simply and it seems to work
fine.

[1] https://github.com/git/git/blob/master/builtin/ls-tree.c#L106

Thanks.

Johannes Schindelin <Johannes.Schindelin@gmx.de> 于2022年1月4日周二 22:38写道:
>
> Hi Teng,
>
> On Sat, 1 Jan 2022, Teng Long wrote:
>
> > diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
> > index 009ffeb15d..6e3e5a4d06 100644
> > --- a/builtin/ls-tree.c
> > +++ b/builtin/ls-tree.c
> > @@ -56,23 +56,75 @@ enum {
> >
> >  static int cmdmode = MODE_UNSPECIFIED;
> >
> > -static int parse_shown_fields(void)
> > +static const char *format;
> > +static const char *default_format = "%(mode) %(type) %(object)%x09%(file)";
> > +static const char *long_format = "%(mode) %(type) %(object) %(size:padded)%x09%(file)";
> > +static const char *name_only_format = "%(file)";
> > +static const char *object_only_format = "%(object)";
> > +
> > +static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
> > +                           const enum object_type type, unsigned int padded)
> >  {
> > -     if (cmdmode == MODE_NAME_ONLY) {
> > -             shown_bits = SHOW_FILE_NAME;
> > -             return 0;
> > +     if (type == OBJ_BLOB) {
> > +             unsigned long size;
> > +             if (oid_object_info(the_repository, oid, &size) < 0)
> > +                     die(_("could not get object info about '%s'"),
> > +                         oid_to_hex(oid));
> > +             if (padded)
> > +                     strbuf_addf(line, "%7" PRIuMAX, (uintmax_t)size);
> > +             else
> > +                     strbuf_addf(line, "%" PRIuMAX, (uintmax_t)size);
> > +     } else if (padded) {
> > +             strbuf_addf(line, "%7s", "-");
>
> This, along with two other similar instances, triggers the
> `static-analysis` job in the CI failure of `seen`. The suggested diff is:
>
>
> -- snip --
> diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
> index 6e3e5a4d0634..8301d1a15f9a 100644
> --- a/builtin/ls-tree.c
> +++ b/builtin/ls-tree.c
> @@ -75,7 +75,7 @@ static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
>                 else
>                         strbuf_addf(line, "%" PRIuMAX, (uintmax_t)size);
>         } else if (padded) {
> -               strbuf_addf(line, "%7s", "-");
> +               strbuf_addstr(line, "-");
>         } else {
>                 strbuf_addstr(line, "-");
>         }
> @@ -110,7 +110,7 @@ static size_t expand_show_tree(struct strbuf *line, const char *start,
>         } else if (skip_prefix(start, "(size)", &p)) {
>                 expand_objectsize(line, data->oid, data->type, 0);
>         } else if (skip_prefix(start, "(object)", &p)) {
> -               strbuf_addstr(line, find_unique_abbrev(data->oid, abbrev));
> +               strbuf_add_unique_abbrev(line, data->oid, abbrev);
>         } else if (skip_prefix(start, "(file)", &p)) {
>                 const char *name = data->base->buf;
>                 const char *prefix = chomp_prefix ? ls_tree_prefix : NULL;
> @@ -119,7 +119,7 @@ static size_t expand_show_tree(struct strbuf *line, const char *start,
>                 strbuf_addstr(data->base, data->pathname);
>                 name = relative_path(data->base->buf, prefix, &sb);
>                 quote_c_style(name, &quoted, NULL, 0);
> -               strbuf_addstr(line, quoted.buf);
> +               strbuf_addbuf(line, &quoted);
>         } else {
>                 errlen = (unsigned long)len;
>                 die(_("bad ls-tree format: %%%.*s"), errlen, start);
> -- snap --
>
> But I think that the first hunk indicates a deeper issue, as `%7s`
> probably meant to pad the dash to seven dashes (which that format won't
> accomplish, but `strbuf_addchars()` would)?
>
> Ciao,
> Dscho
>
> > +     } else {
> > +             strbuf_addstr(line, "-");
> >       }
> > -     if (cmdmode == MODE_OBJECT_ONLY) {
> > -             shown_bits = SHOW_OBJECT_NAME;
> > -             return 0;
> > +}
> > +
> > +static size_t expand_show_tree(struct strbuf *line, const char *start,
> > +                            void *context)
> > +{
> > +     struct shown_data *data = context;
> > +     const char *end;
> > +     const char *p;
> > +     unsigned int errlen;
> > +     size_t len;
> > +     len = strbuf_expand_literal_cb(line, start, NULL);
> > +     if (len)
> > +             return len;
> > +
> > +     if (*start != '(')
> > +             die(_("bad ls-tree format: as '%s'"), start);
> > +
> > +     end = strchr(start + 1, ')');
> > +     if (!end)
> > +             die(_("bad ls-tree format: element '%s' does not end in ')'"), start);
> > +
> > +     len = end - start + 1;
> > +     if (skip_prefix(start, "(mode)", &p)) {
> > +             strbuf_addf(line, "%06o", data->mode);
> > +     } else if (skip_prefix(start, "(type)", &p)) {
> > +             strbuf_addstr(line, type_name(data->type));
> > +     } else if (skip_prefix(start, "(size:padded)", &p)) {
> > +             expand_objectsize(line, data->oid, data->type, 1);
> > +     } else if (skip_prefix(start, "(size)", &p)) {
> > +             expand_objectsize(line, data->oid, data->type, 0);
> > +     } else if (skip_prefix(start, "(object)", &p)) {
> > +             strbuf_addstr(line, find_unique_abbrev(data->oid, abbrev));
> > +     } else if (skip_prefix(start, "(file)", &p)) {
> > +             const char *name = data->base->buf;
> > +             const char *prefix = chomp_prefix ? ls_tree_prefix : NULL;
> > +             struct strbuf quoted = STRBUF_INIT;
> > +             struct strbuf sb = STRBUF_INIT;
> > +             strbuf_addstr(data->base, data->pathname);
> > +             name = relative_path(data->base->buf, prefix, &sb);
> > +             quote_c_style(name, &quoted, NULL, 0);
> > +             strbuf_addstr(line, quoted.buf);
> > +     } else {
> > +             errlen = (unsigned long)len;
> > +             die(_("bad ls-tree format: %%%.*s"), errlen, start);
> >       }
> > -     if (!ls_options || (ls_options & LS_RECURSIVE)
> > -         || (ls_options & LS_SHOW_TREES)
> > -         || (ls_options & LS_TREE_ONLY))
> > -             shown_bits = SHOW_DEFAULT;
> > -     if (cmdmode == MODE_LONG)
> > -             shown_bits = SHOW_DEFAULT | SHOW_SIZE;
> > -     return 1;
> > +     return len;
> >  }
> >
> >  static int show_recursive(const char *base, size_t baselen,
> > @@ -106,6 +158,75 @@ static int show_recursive(const char *base, size_t baselen,
> >       return 0;
> >  }
> >
> > +static int show_tree_init(enum object_type *type, struct strbuf *base,
> > +                       const char *pathname, unsigned mode, int *retval)
> > +{
> > +     if (S_ISGITLINK(mode)) {
> > +             *type = OBJ_COMMIT;
> > +     } else if (S_ISDIR(mode)) {
> > +             if (show_recursive(base->buf, base->len, pathname)) {
> > +                     *retval = READ_TREE_RECURSIVE;
> > +                     if (!(ls_options & LS_SHOW_TREES))
> > +                             return 1;
> > +             }
> > +             *type = OBJ_TREE;
> > +     }
> > +     else if (ls_options & LS_TREE_ONLY)
> > +             return 1;
> > +     return 0;
> > +}
> > +
> > +static int show_tree_fmt(const struct object_id *oid, struct strbuf *base,
> > +                      const char *pathname, unsigned mode, void *context)
> > +{
> > +     size_t baselen;
> > +     int retval = 0;
> > +     struct strbuf line = STRBUF_INIT;
> > +     struct shown_data data = {
> > +             .mode = mode,
> > +             .type = OBJ_BLOB,
> > +             .oid = oid,
> > +             .pathname = pathname,
> > +             .base = base,
> > +     };
> > +
> > +     if (show_tree_init(&data.type, base, pathname, mode, &retval))
> > +             return retval;
> > +
> > +     baselen = base->len;
> > +     strbuf_expand(&line, format, expand_show_tree, &data);
> > +     strbuf_addch(&line, line_termination);
> > +     fwrite(line.buf, line.len, 1, stdout);
> > +     strbuf_setlen(base, baselen);
> > +     return retval;
> > +}
> > +
> > +static int parse_shown_fields(void)
> > +{
> > +     if (cmdmode == MODE_NAME_ONLY ||
> > +         (format && !strcmp(format, name_only_format))) {
> > +             shown_bits = SHOW_FILE_NAME;
> > +             return 1;
> > +     }
> > +
> > +     if (cmdmode == MODE_OBJECT_ONLY ||
> > +         (format && !strcmp(format, object_only_format))) {
> > +             shown_bits = SHOW_OBJECT_NAME;
> > +             return 1;
> > +     }
> > +
> > +     if (!ls_options || (ls_options & LS_RECURSIVE)
> > +         || (ls_options & LS_SHOW_TREES)
> > +         || (ls_options & LS_TREE_ONLY)
> > +             || (format && !strcmp(format, default_format)))
> > +             shown_bits = SHOW_DEFAULT;
> > +
> > +     if (cmdmode == MODE_LONG ||
> > +             (format && !strcmp(format, long_format)))
> > +             shown_bits = SHOW_DEFAULT | SHOW_SIZE;
> > +     return 1;
> > +}
> > +
> >  static int show_default(struct shown_data *data)
> >  {
> >       size_t baselen = data->base->len;
> > @@ -137,24 +258,6 @@ static int show_default(struct shown_data *data)
> >       return 1;
> >  }
> >
> > -static int show_tree_init(enum object_type *type, struct strbuf *base,
> > -                       const char *pathname, unsigned mode, int *retval)
> > -{
> > -     if (S_ISGITLINK(mode)) {
> > -             *type = OBJ_COMMIT;
> > -     } else if (S_ISDIR(mode)) {
> > -             if (show_recursive(base->buf, base->len, pathname)) {
> > -                     *retval = READ_TREE_RECURSIVE;
> > -                     if (!(ls_options & LS_SHOW_TREES))
> > -                             return 1;
> > -             }
> > -             *type = OBJ_TREE;
> > -     }
> > -     else if (ls_options & LS_TREE_ONLY)
> > -             return 1;
> > -     return 0;
> > -}
> > -
> >  static int show_tree(const struct object_id *oid, struct strbuf *base,
> >               const char *pathname, unsigned mode, void *context)
> >  {
> > @@ -196,6 +299,7 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix)
> >       struct object_id oid;
> >       struct tree *tree;
> >       int i, full_tree = 0;
> > +     read_tree_fn_t fn = show_tree;
> >       const struct option ls_tree_options[] = {
> >               OPT_BIT('d', NULL, &ls_options, N_("only show trees"),
> >                       LS_TREE_ONLY),
> > @@ -218,6 +322,9 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix)
> >               OPT_BOOL(0, "full-tree", &full_tree,
> >                        N_("list entire tree; not just current directory "
> >                           "(implies --full-name)")),
> > +             OPT_STRING_F(0, "format", &format, N_("format"),
> > +                          N_("format to use for the output"),
> > +                          PARSE_OPT_NONEG),
> >               OPT__ABBREV(&abbrev),
> >               OPT_END()
> >       };
> > @@ -238,6 +345,10 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix)
> >           ((LS_TREE_ONLY|LS_RECURSIVE) & ls_options))
> >               ls_options |= LS_SHOW_TREES;
> >
> > +     if (format && cmdmode)
> > +             usage_msg_opt(
> > +                     _("--format can't be combined with other format-altering options"),
> > +                     ls_tree_usage, ls_tree_options);
> >       if (argc < 1)
> >               usage_with_options(ls_tree_usage, ls_tree_options);
> >       if (get_oid(argv[0], &oid))
> > @@ -261,6 +372,18 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix)
> >       tree = parse_tree_indirect(&oid);
> >       if (!tree)
> >               die("not a tree object");
> > -     return !!read_tree(the_repository, tree,
> > -                        &pathspec, show_tree, NULL);
> > +
> > +     /*
> > +      * The generic show_tree_fmt() is slower than show_tree(), so
> > +      * take the fast path if possible.
> > +      */
> > +     if (format && (!strcmp(format, default_format) ||
> > +                                !strcmp(format, long_format) ||
> > +                                !strcmp(format, name_only_format) ||
> > +                                !strcmp(format, object_only_format)))
> > +             fn = show_tree;
> > +     else if (format)
> > +             fn = show_tree_fmt;
> > +
> > +     return !!read_tree(the_repository, tree, &pathspec, fn, NULL);
> >  }
> > diff --git a/t/t3105-ls-tree-format.sh b/t/t3105-ls-tree-format.sh
> > new file mode 100755
> > index 0000000000..92b4d240e8
> > --- /dev/null
> > +++ b/t/t3105-ls-tree-format.sh
> > @@ -0,0 +1,55 @@
> > +#!/bin/sh
> > +
> > +test_description='ls-tree --format'
> > +
> > +TEST_PASSES_SANITIZE_LEAK=true
> > +. ./test-lib.sh
> > +
> > +test_expect_success 'ls-tree --format usage' '
> > +     test_expect_code 129 git ls-tree --format=fmt -l &&
> > +     test_expect_code 129 git ls-tree --format=fmt --name-only &&
> > +     test_expect_code 129 git ls-tree --format=fmt --name-status &&
> > +     test_expect_code 129 git ls-tree --format=fmt --object-only
> > +'
> > +
> > +test_expect_success 'setup' '
> > +     mkdir dir &&
> > +     test_commit dir/sub-file &&
> > +     test_commit top-file
> > +'
> > +
> > +test_ls_tree_format () {
> > +     format=$1 &&
> > +     opts=$2 &&
> > +     shift 2 &&
> > +     git ls-tree $opts -r HEAD >expect.raw &&
> > +     sed "s/^/> /" >expect <expect.raw &&
> > +     git ls-tree --format="> $format" -r HEAD >actual &&
> > +     test_cmp expect actual
> > +}
> > +
> > +test_expect_success 'ls-tree --format=<default-like>' '
> > +     test_ls_tree_format \
> > +             "%(mode) %(type) %(object)%x09%(file)" \
> > +             ""
> > +'
> > +
> > +test_expect_success 'ls-tree --format=<long-like>' '
> > +     test_ls_tree_format \
> > +             "%(mode) %(type) %(object) %(size:padded)%x09%(file)" \
> > +             "--long"
> > +'
> > +
> > +test_expect_success 'ls-tree --format=<name-only-like>' '
> > +     test_ls_tree_format \
> > +             "%(file)" \
> > +             "--name-only"
> > +'
> > +
> > +test_expect_success 'ls-tree --format=<object-only-like>' '
> > +     test_ls_tree_format \
> > +             "%(object)" \
> > +             "--object-only"
> > +'
> > +
> > +test_done
> > --
> > 2.33.0.rc1.1802.gbb1c3936fb.dirty
> >
> >
> >
Johannes Schindelin Jan. 5, 2022, 1:09 p.m. UTC | #5
Hi Teng,

On Wed, 5 Jan 2022, Teng Long wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
> > This, along with two other similar instances, triggers the
> > `static-analysis` job in the CI failure of `seen`. The suggested diff is:
>
> The second and third I will optimize in the next patch.
>
> The first one. Actually I am a little puzzled from this :
>
> > -               strbuf_addf(line, "%7s", "-");
> > +               strbuf_addstr(line, "-");
>
> > But I think that the first hunk indicates a deeper issue, as `%7s`
> > probably meant to pad the dash to seven dashes (which that format won't
> > accomplish, but `strbuf_addchars()` would)?
>
> "strbuf_addf(line, "%7s", "-");" here is used to align the columns
> with a width of seven chars, not repeat one DASH to seven.

Ah. I misremembered and thought that `"% 7s"` would do that, but you're
correct. See below for more on this.

But first, I wonder why the test suite passes with the `strbuf_addstr()`
call... Is this line not covered by any test case?

About the `%7s` thing: The most obvious resolution is to use `"      -"`
with `strbuf_addstr()`. And I would argue that this is the best
resolution.

If you disagree (and want to spin up a full `sprintf()` every time, just
to add those six space characters), feel free to integrate the following
into your patch series:

-- snip --
From a390fcf7eec261c7f0e341bda79f2b1f326d151e Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Wed, 5 Jan 2022 14:02:19 +0100
Subject: [PATCH] cocci: allow padding with `strbuf_addf()`

A convenient way to pad strings is to use something like
`strbuf_addf(&buf, "%20s", "Hello, world!")`.

However, the Coccinelle rule that forbids a format `"%s"` with a
constant string argument cast too wide a net, and also forbade such
padding.

Let's be a bit stricter in that Coccinelle rule.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 contrib/coccinelle/strbuf.cocci | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/coccinelle/strbuf.cocci b/contrib/coccinelle/strbuf.cocci
index d9ada69b432..2d6e0f58fc8 100644
--- a/contrib/coccinelle/strbuf.cocci
+++ b/contrib/coccinelle/strbuf.cocci
@@ -44,7 +44,7 @@ struct strbuf *SBP;

 @@
 expression E1, E2;
-format F =~ "s";
+format F =~ "^s$";
 @@
 - strbuf_addf(E1, "%@F@", E2);
 + strbuf_addstr(E1, E2);
--
2.33.0.windows.2
-- snap --

Ciao,
Dscho

>
> A little weird about the fix recommendation of  "strbuf_addstr(line, "-");" ,
> because it will only add a single DASH here.
>
> It's the identical result which compares to the "master"[1]  I think with the
> current codes and I tested the "strbuf_addf()" simply and it seems to work
> fine.
>
> [1] https://github.com/git/git/blob/master/builtin/ls-tree.c#L106
Teng Long Jan. 5, 2022, 4:44 p.m. UTC | #6
On Wed, Jan 5, 2022 at 9:09 PM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:

> Ah. I misremembered and thought that `"% 7s"` would do that, but you're
> correct. See below for more on this.
>
> But first, I wonder why the test suite passes with the `strbuf_addstr()`
> call... Is this line not covered by any test case?

Definitely, me too.

> About the `%7s` thing: The most obvious resolution is to use `"      -"`
> with `strbuf_addstr()`. And I would argue that this is the best
> resolution.

I agree that's a quick fix in that way.
Can you feed me more info about why you think it's the best
resolution?

> If you disagree (and want to spin up a full `sprintf()` every time, just
> to add those six space characters), feel free to integrate the following
> into your patch series:
>
> -- snip --
> From a390fcf7eec261c7f0e341bda79f2b1f326d151e Mon Sep 17 00:00:00 2001
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
> Date: Wed, 5 Jan 2022 14:02:19 +0100
> Subject: [PATCH] cocci: allow padding with `strbuf_addf()`
>
> A convenient way to pad strings is to use something like
> `strbuf_addf(&buf, "%20s", "Hello, world!")`.
>
> However, the Coccinelle rule that forbids a format `"%s"` with a
> constant string argument cast too wide a net, and also forbade such
> padding.
>
> Let's be a bit stricter in that Coccinelle rule.
>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
>  contrib/coccinelle/strbuf.cocci | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/contrib/coccinelle/strbuf.cocci b/contrib/coccinelle/strbuf.cocci
> index d9ada69b432..2d6e0f58fc8 100644
> --- a/contrib/coccinelle/strbuf.cocci
> +++ b/contrib/coccinelle/strbuf.cocci
> @@ -44,7 +44,7 @@ struct strbuf *SBP;
>
>  @@
>  expression E1, E2;
> -format F =~ "s";
> +format F =~ "^s$";
>  @@
>  - strbuf_addf(E1, "%@F@", E2);
>  + strbuf_addstr(E1, E2);
> --
> 2.33.0.windows.2
> -- snap --

I appreciate the input of 'coccinelle' and the commit.

The current relevant rules of 'strbuf' was added in commit [1], the
purpose of it
seems like to forbid some inefficient use cases and chase the performance
profit as much as possible.

I think "<SP*6>-" and "%7s", they both with the same result, the former
benefits in performance, the later benefits in readability. So let's do a simple
performance test under "linux", then think about which is better for this case:

    Benchmark 1: /opt/git/ls-tree-oid-only-addf/bin/git ls-tree -r
--format='> %(mode) %(type) %(object) %(size:padded)%x09%(file)'  HEAD
    Time (mean ± σ):     387.7 ms ±   8.8 ms    [User: 357.6 ms,
System: 30.0 ms]
    Range (min … max):   377.5 ms … 399.5 ms    10 runs

    Benchmark 1: /opt/git/ls-tree-oid-only-addstr/bin/git ls-tree -r
--format='> %(mode) %(type) %(object) %(size:padded)%x09%(file)'  HEAD
    Time (mean ± σ):     388.9 ms ±   9.0 ms    [User: 362.7 ms,
System: 26.1 ms]
    Range (min … max):   373.4 ms … 399.8 ms    10 runs

It's with a slight performance difference between the two.

So, I decided to integrate your patch as a new commit in the current
patchset and
is it ok for me to mention it's from your guidance in the commit message or
a "helped-by" something like this?

Thanks.

[1] https://github.com/git/git/commit/28c23cd4c3902449aff72cb9a4a703220be0d6ac
diff mbox series

Patch

diff --git a/Documentation/git-ls-tree.txt b/Documentation/git-ls-tree.txt
index 729370f235..ef23c0fac1 100644
--- a/Documentation/git-ls-tree.txt
+++ b/Documentation/git-ls-tree.txt
@@ -10,9 +10,9 @@  SYNOPSIS
 --------
 [verse]
 'git ls-tree' [-d] [-r] [-t] [-l] [-z]
-	    [--name-only] [--name-status] [--object-only] [--full-name] [--full-tree] [--abbrev[=<n>]]
-	    <tree-ish> [<path>...]
-
+	    [--name-only] [--name-status] [--object-only]
+	    [--full-name] [--full-tree] [--abbrev[=<n>]]
+	    [--format=<format>] <tree-ish> [<path>...]
 DESCRIPTION
 -----------
 Lists the contents of a given tree object, like what "/bin/ls -a" does
@@ -79,6 +79,16 @@  OPTIONS
 	Do not limit the listing to the current working directory.
 	Implies --full-name.
 
+--format=<format>::
+	A string that interpolates `%(fieldname)` from the result
+	being shown. It also interpolates `%%` to `%`, and
+	`%xx` where `xx`are hex digits interpolates to character
+	with hex code `xx`; for example `%00` interpolates to
+	`\0` (NUL), `%09` to `\t` (TAB) and `%0a` to `\n` (LF).
+	When specified, `--format` cannot be combined with other
+	format-altering options, including `--long`, `--name-only`
+	and `--object-only`.
+
 [<path>...]::
 	When paths are given, show them (note that this isn't really raw
 	pathnames, but rather a list of patterns to match).  Otherwise
@@ -87,6 +97,9 @@  OPTIONS
 
 Output Format
 -------------
+
+Default format:
+
         <mode> SP <type> SP <object> TAB <file>
 
 This output format is compatible with what `--index-info --stdin` of
@@ -105,6 +118,37 @@  quoted as explained for the configuration variable `core.quotePath`
 (see linkgit:git-config[1]).  Using `-z` the filename is output
 verbatim and the line is terminated by a NUL byte.
 
+Customized format:
+
+It's support to print customized format by `%(fieldname)` with `--format` option.
+For example, if you want to only print the <object> and <file> fields with a
+JSON style, executing with a specific "--format" like
+
+		git ls-tree --format='{"object":"%(object)", "file":"%(file)"}' <tree-ish>
+
+The output format changes to:
+
+		{"object":"<object>", "file":"<file>"}
+
+FIELD NAMES
+-----------
+
+Various values from structured fields can be used to interpolate
+into the resulting output. For each outputing line, the following
+names can be used:
+
+mode::
+	The mode of the object.
+type::
+	The type of the object (`blob` or `tree`).
+object::
+	The name of the object.
+size[:padded]::
+	The size of the object ("-" if it's a tree).
+	It also supports a padded format of size with "%(size:padded)".
+file::
+	The filename of the object.
+
 GIT
 ---
 Part of the linkgit:git[1] suite
diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
index 009ffeb15d..6e3e5a4d06 100644
--- a/builtin/ls-tree.c
+++ b/builtin/ls-tree.c
@@ -56,23 +56,75 @@  enum {
 
 static int cmdmode = MODE_UNSPECIFIED;
 
-static int parse_shown_fields(void)
+static const char *format;
+static const char *default_format = "%(mode) %(type) %(object)%x09%(file)";
+static const char *long_format = "%(mode) %(type) %(object) %(size:padded)%x09%(file)";
+static const char *name_only_format = "%(file)";
+static const char *object_only_format = "%(object)";
+
+static void expand_objectsize(struct strbuf *line, const struct object_id *oid,
+			      const enum object_type type, unsigned int padded)
 {
-	if (cmdmode == MODE_NAME_ONLY) {
-		shown_bits = SHOW_FILE_NAME;
-		return 0;
+	if (type == OBJ_BLOB) {
+		unsigned long size;
+		if (oid_object_info(the_repository, oid, &size) < 0)
+			die(_("could not get object info about '%s'"),
+			    oid_to_hex(oid));
+		if (padded)
+			strbuf_addf(line, "%7" PRIuMAX, (uintmax_t)size);
+		else
+			strbuf_addf(line, "%" PRIuMAX, (uintmax_t)size);
+	} else if (padded) {
+		strbuf_addf(line, "%7s", "-");
+	} else {
+		strbuf_addstr(line, "-");
 	}
-	if (cmdmode == MODE_OBJECT_ONLY) {
-		shown_bits = SHOW_OBJECT_NAME;
-		return 0;
+}
+
+static size_t expand_show_tree(struct strbuf *line, const char *start,
+			       void *context)
+{
+	struct shown_data *data = context;
+	const char *end;
+	const char *p;
+	unsigned int errlen;
+	size_t len;
+	len = strbuf_expand_literal_cb(line, start, NULL);
+	if (len)
+		return len;
+
+	if (*start != '(')
+		die(_("bad ls-tree format: as '%s'"), start);
+
+	end = strchr(start + 1, ')');
+	if (!end)
+		die(_("bad ls-tree format: element '%s' does not end in ')'"), start);
+
+	len = end - start + 1;
+	if (skip_prefix(start, "(mode)", &p)) {
+		strbuf_addf(line, "%06o", data->mode);
+	} else if (skip_prefix(start, "(type)", &p)) {
+		strbuf_addstr(line, type_name(data->type));
+	} else if (skip_prefix(start, "(size:padded)", &p)) {
+		expand_objectsize(line, data->oid, data->type, 1);
+	} else if (skip_prefix(start, "(size)", &p)) {
+		expand_objectsize(line, data->oid, data->type, 0);
+	} else if (skip_prefix(start, "(object)", &p)) {
+		strbuf_addstr(line, find_unique_abbrev(data->oid, abbrev));
+	} else if (skip_prefix(start, "(file)", &p)) {
+		const char *name = data->base->buf;
+		const char *prefix = chomp_prefix ? ls_tree_prefix : NULL;
+		struct strbuf quoted = STRBUF_INIT;
+		struct strbuf sb = STRBUF_INIT;
+		strbuf_addstr(data->base, data->pathname);
+		name = relative_path(data->base->buf, prefix, &sb);
+		quote_c_style(name, &quoted, NULL, 0);
+		strbuf_addstr(line, quoted.buf);
+	} else {
+		errlen = (unsigned long)len;
+		die(_("bad ls-tree format: %%%.*s"), errlen, start);
 	}
-	if (!ls_options || (ls_options & LS_RECURSIVE)
-	    || (ls_options & LS_SHOW_TREES)
-	    || (ls_options & LS_TREE_ONLY))
-		shown_bits = SHOW_DEFAULT;
-	if (cmdmode == MODE_LONG)
-		shown_bits = SHOW_DEFAULT | SHOW_SIZE;
-	return 1;
+	return len;
 }
 
 static int show_recursive(const char *base, size_t baselen,
@@ -106,6 +158,75 @@  static int show_recursive(const char *base, size_t baselen,
 	return 0;
 }
 
+static int show_tree_init(enum object_type *type, struct strbuf *base,
+			  const char *pathname, unsigned mode, int *retval)
+{
+	if (S_ISGITLINK(mode)) {
+		*type = OBJ_COMMIT;
+	} else if (S_ISDIR(mode)) {
+		if (show_recursive(base->buf, base->len, pathname)) {
+			*retval = READ_TREE_RECURSIVE;
+			if (!(ls_options & LS_SHOW_TREES))
+				return 1;
+		}
+		*type = OBJ_TREE;
+	}
+	else if (ls_options & LS_TREE_ONLY)
+		return 1;
+	return 0;
+}
+
+static int show_tree_fmt(const struct object_id *oid, struct strbuf *base,
+			 const char *pathname, unsigned mode, void *context)
+{
+	size_t baselen;
+	int retval = 0;
+	struct strbuf line = STRBUF_INIT;
+	struct shown_data data = {
+		.mode = mode,
+		.type = OBJ_BLOB,
+		.oid = oid,
+		.pathname = pathname,
+		.base = base,
+	};
+
+	if (show_tree_init(&data.type, base, pathname, mode, &retval))
+		return retval;
+
+	baselen = base->len;
+	strbuf_expand(&line, format, expand_show_tree, &data);
+	strbuf_addch(&line, line_termination);
+	fwrite(line.buf, line.len, 1, stdout);
+	strbuf_setlen(base, baselen);
+	return retval;
+}
+
+static int parse_shown_fields(void)
+{
+	if (cmdmode == MODE_NAME_ONLY ||
+	    (format && !strcmp(format, name_only_format))) {
+		shown_bits = SHOW_FILE_NAME;
+		return 1;
+	}
+
+	if (cmdmode == MODE_OBJECT_ONLY ||
+	    (format && !strcmp(format, object_only_format))) {
+		shown_bits = SHOW_OBJECT_NAME;
+		return 1;
+	}
+
+	if (!ls_options || (ls_options & LS_RECURSIVE)
+	    || (ls_options & LS_SHOW_TREES)
+	    || (ls_options & LS_TREE_ONLY)
+		|| (format && !strcmp(format, default_format)))
+		shown_bits = SHOW_DEFAULT;
+
+	if (cmdmode == MODE_LONG ||
+		(format && !strcmp(format, long_format)))
+		shown_bits = SHOW_DEFAULT | SHOW_SIZE;
+	return 1;
+}
+
 static int show_default(struct shown_data *data)
 {
 	size_t baselen = data->base->len;
@@ -137,24 +258,6 @@  static int show_default(struct shown_data *data)
 	return 1;
 }
 
-static int show_tree_init(enum object_type *type, struct strbuf *base,
-			  const char *pathname, unsigned mode, int *retval)
-{
-	if (S_ISGITLINK(mode)) {
-		*type = OBJ_COMMIT;
-	} else if (S_ISDIR(mode)) {
-		if (show_recursive(base->buf, base->len, pathname)) {
-			*retval = READ_TREE_RECURSIVE;
-			if (!(ls_options & LS_SHOW_TREES))
-				return 1;
-		}
-		*type = OBJ_TREE;
-	}
-	else if (ls_options & LS_TREE_ONLY)
-		return 1;
-	return 0;
-}
-
 static int show_tree(const struct object_id *oid, struct strbuf *base,
 		const char *pathname, unsigned mode, void *context)
 {
@@ -196,6 +299,7 @@  int cmd_ls_tree(int argc, const char **argv, const char *prefix)
 	struct object_id oid;
 	struct tree *tree;
 	int i, full_tree = 0;
+	read_tree_fn_t fn = show_tree;
 	const struct option ls_tree_options[] = {
 		OPT_BIT('d', NULL, &ls_options, N_("only show trees"),
 			LS_TREE_ONLY),
@@ -218,6 +322,9 @@  int cmd_ls_tree(int argc, const char **argv, const char *prefix)
 		OPT_BOOL(0, "full-tree", &full_tree,
 			 N_("list entire tree; not just current directory "
 			    "(implies --full-name)")),
+		OPT_STRING_F(0, "format", &format, N_("format"),
+			     N_("format to use for the output"),
+			     PARSE_OPT_NONEG),
 		OPT__ABBREV(&abbrev),
 		OPT_END()
 	};
@@ -238,6 +345,10 @@  int cmd_ls_tree(int argc, const char **argv, const char *prefix)
 	    ((LS_TREE_ONLY|LS_RECURSIVE) & ls_options))
 		ls_options |= LS_SHOW_TREES;
 
+	if (format && cmdmode)
+		usage_msg_opt(
+			_("--format can't be combined with other format-altering options"),
+			ls_tree_usage, ls_tree_options);
 	if (argc < 1)
 		usage_with_options(ls_tree_usage, ls_tree_options);
 	if (get_oid(argv[0], &oid))
@@ -261,6 +372,18 @@  int cmd_ls_tree(int argc, const char **argv, const char *prefix)
 	tree = parse_tree_indirect(&oid);
 	if (!tree)
 		die("not a tree object");
-	return !!read_tree(the_repository, tree,
-			   &pathspec, show_tree, NULL);
+
+	/*
+	 * The generic show_tree_fmt() is slower than show_tree(), so
+	 * take the fast path if possible.
+	 */
+	if (format && (!strcmp(format, default_format) ||
+				   !strcmp(format, long_format) ||
+				   !strcmp(format, name_only_format) ||
+				   !strcmp(format, object_only_format)))
+		fn = show_tree;
+	else if (format)
+		fn = show_tree_fmt;
+
+	return !!read_tree(the_repository, tree, &pathspec, fn, NULL);
 }
diff --git a/t/t3105-ls-tree-format.sh b/t/t3105-ls-tree-format.sh
new file mode 100755
index 0000000000..92b4d240e8
--- /dev/null
+++ b/t/t3105-ls-tree-format.sh
@@ -0,0 +1,55 @@ 
+#!/bin/sh
+
+test_description='ls-tree --format'
+
+TEST_PASSES_SANITIZE_LEAK=true
+. ./test-lib.sh
+
+test_expect_success 'ls-tree --format usage' '
+	test_expect_code 129 git ls-tree --format=fmt -l &&
+	test_expect_code 129 git ls-tree --format=fmt --name-only &&
+	test_expect_code 129 git ls-tree --format=fmt --name-status &&
+	test_expect_code 129 git ls-tree --format=fmt --object-only
+'
+
+test_expect_success 'setup' '
+	mkdir dir &&
+	test_commit dir/sub-file &&
+	test_commit top-file
+'
+
+test_ls_tree_format () {
+	format=$1 &&
+	opts=$2 &&
+	shift 2 &&
+	git ls-tree $opts -r HEAD >expect.raw &&
+	sed "s/^/> /" >expect <expect.raw &&
+	git ls-tree --format="> $format" -r HEAD >actual &&
+	test_cmp expect actual
+}
+
+test_expect_success 'ls-tree --format=<default-like>' '
+	test_ls_tree_format \
+		"%(mode) %(type) %(object)%x09%(file)" \
+		""
+'
+
+test_expect_success 'ls-tree --format=<long-like>' '
+	test_ls_tree_format \
+		"%(mode) %(type) %(object) %(size:padded)%x09%(file)" \
+		"--long"
+'
+
+test_expect_success 'ls-tree --format=<name-only-like>' '
+	test_ls_tree_format \
+		"%(file)" \
+		"--name-only"
+'
+
+test_expect_success 'ls-tree --format=<object-only-like>' '
+	test_ls_tree_format \
+		"%(object)" \
+		"--object-only"
+'
+
+test_done