diff mbox series

[v2,1/1] trace2: write to directory targets

Message ID 59d8c6511bc8c5fd25473c282768b38c97df9e6b.1553126984.git.steadmon@google.com (mailing list archive)
State New, archived
Headers show
Series Write trace2 output to directories | expand

Commit Message

Josh Steadmon March 21, 2019, 12:16 a.m. UTC
When the value of a trace2 environment variable is an absolute path
referring to an existing directory, write output to files (one per
process) underneath the given directory. Files will be named according
to the final component of the trace2 SID, followed by a counter to avoid
potential collisions.

This makes it more convenient to collect traces for every git invocation
by unconditionally setting the relevant trace2 envvar to a constant
directory name.

Signed-off-by: Josh Steadmon <steadmon@google.com>
---
 Documentation/technical/api-trace2.txt |  5 +++
 t/t0210-trace2-normal.sh               | 15 +++++++
 trace2/tr2_dst.c                       | 61 +++++++++++++++++++++++++-
 3 files changed, 79 insertions(+), 2 deletions(-)

Comments

Junio C Hamano March 21, 2019, 2:04 a.m. UTC | #1
Josh Steadmon <steadmon@google.com> writes:

> diff --git a/t/t0210-trace2-normal.sh b/t/t0210-trace2-normal.sh
> index 03a0aedb1d..26c9c1b3b8 100755
> --- a/t/t0210-trace2-normal.sh
> +++ b/t/t0210-trace2-normal.sh
> @@ -80,6 +80,21 @@ test_expect_success 'normal stream, return code 1' '
>  	test_cmp expect actual
>  '
>  
> +test_expect_success 'randomized filename' '
> +	test_when_finished "rm -r traces actual expect" &&
> +	mkdir traces &&
> +	GIT_TR2="$(pwd)/traces" test-tool trace2 001return 0 &&
> +	perl "$TEST_DIRECTORY/t0210/scrub_normal.perl" <"$(ls traces/*)" >actual &&

This is cute.

What we want to test for this new feature is that the directory
traces/ that was originally empty now has exactly one readable file,
which was created by producing a trace.

And redirecting from "$(ls traces/*)" would succeed only when there
is exactly one readble file in the directory.  If it has none, or
more than one, the redirection will fail and we'd notice the error.

> +	cat >expect <<-EOF &&
> +		version $V
> +		start _EXE_ trace2 001return 0
> +		cmd_name trace2 (trace2)
> +		exit elapsed:_TIME_ code:0
> +		atexit elapsed:_TIME_ code:0
> +	EOF
> +	test_cmp expect actual
> +'
> +
>  # Verb 002exit
>  #
>  # Explicit exit(code) from within cmd_<verb> propagates <code>.
> diff --git a/trace2/tr2_dst.c b/trace2/tr2_dst.c
> index fd490a43ad..0e752914dc 100644
> --- a/trace2/tr2_dst.c
> +++ b/trace2/tr2_dst.c
> @@ -1,5 +1,6 @@
>  #include "cache.h"
>  #include "trace2/tr2_dst.h"
> +#include "trace2/tr2_sid.h"
>  
>  /*
>   * If a Trace2 target cannot be opened for writing, we should issue a
> @@ -12,6 +13,11 @@
>   */
>  #define TR2_ENVVAR_DST_DEBUG "GIT_TR2_DST_DEBUG"
>  
> +/*
> + * How many attempts we will make at creating a random trace output path.
> + */
> +#define MAX_RANDOM_ATTEMPTS 10

With the updated design, randomness is no longer the primary
property of this new feature.  The fact that the names are
automatically assigned is.  It could be that the source of tr2_sid
may (or may not) be some randomness, but the point is that the
caller in this patch does not care how tr2_sid is computed.

I'd call this max-attempts (or max-autopath-attempts, but that is
rather long, and I do not think inside the scope of "tr2_dst" that
is about "destination", there will be anything but the destination
path we'd "attempt" with a reasonable maximum value to compute, so
the "-autopath" clarification would not buy us much)....

>  static int tr2_dst_want_warning(void)
>  {
>  	static int tr2env_dst_debug = -1;
> @@ -36,6 +42,53 @@ void tr2_dst_trace_disable(struct tr2_dst *dst)
>  	dst->need_close = 0;
>  }
>  
> +static int tr2_dst_try_random_path(struct tr2_dst *dst, const char *tgt_prefix)

.... and I'd call this s/random/auto/ instead, if I were writing
this patch following the updated design.

> +{
> +	int fd;
> +	const char *last_slash, *sid = tr2_sid_get();
> +	struct strbuf base_path = STRBUF_INIT, final_path = STRBUF_INIT;
> +	unsigned attempt_count;
> +
> +	last_slash = strrchr(sid, '/');
> +	if (last_slash)
> +		sid = last_slash + 1;
> +
> +	strbuf_addstr(&base_path, tgt_prefix);
> +	if (!is_dir_sep(base_path.buf[base_path.len - 1]))
> +		strbuf_addch(&base_path, '/');
> +	strbuf_addstr(&base_path, sid);

I do not think it is such a huge deal, but you can remember the
value of base_path.len at this point and then get rid of the other
strbuf (and copying into it).  As that will leave only one path
variable you need to worry about, you can shorten base_path to just
path if you go that route.

    baselen = path.len;

> +	for (attempt_count = 0; attempt_count < MAX_RANDOM_ATTEMPTS; attempt_count++) {
> +		strbuf_reset(&final_path);
> +		strbuf_addbuf(&final_path, &base_path);
> +		strbuf_addf(&final_path, ".%d", attempt_count);
> +		fd = open(final_path.buf, O_WRONLY | O_CREAT | O_EXCL, 0666);

If you follow the "get rid of final_path" route, these would become:

    strbuf_setlen(&path, baselen);
    strbuf_addf(&path, ".%d", count);
    fd = open(path.buf, ..., 0666);

> +		if (fd != -1)
> +			break;
> +	}

And that way, you have one fewer strbuf to _release() at the end and
at early exit points.

> +		if (tr2_dst_want_warning())
> +			warning("trace2: could not open '%s' for '%s' tracing: %s",
> +				base_path.buf, dst->env_var_name, strerror(errno));

This would need to become

		warning("trace2: could not open '%.*s' for '%s' tracing: %s",
			path.buf, baselen, dst->env_var_name, strerror(errno));

if we go that route.
Jeff Hostetler March 21, 2019, 5:43 p.m. UTC | #2
On 3/20/2019 10:04 PM, Junio C Hamano wrote:
> Josh Steadmon <steadmon@google.com> writes:
> 
[...]
>> +/*
>> + * How many attempts we will make at creating a random trace output path.
>> + */
>> +#define MAX_RANDOM_ATTEMPTS 10
> 
> With the updated design, randomness is no longer the primary
> property of this new feature.  The fact that the names are
> automatically assigned is.  It could be that the source of tr2_sid
> may (or may not) be some randomness, but the point is that the
> caller in this patch does not care how tr2_sid is computed.
> 
> I'd call this max-attempts (or max-autopath-attempts, but that is
> rather long, and I do not think inside the scope of "tr2_dst" that
> is about "destination", there will be anything but the destination
> path we'd "attempt" with a reasonable maximum value to compute, so
> the "-autopath" clarification would not buy us much)....
> 
>>   static int tr2_dst_want_warning(void)
>>   {
>>   	static int tr2env_dst_debug = -1;
>> @@ -36,6 +42,53 @@ void tr2_dst_trace_disable(struct tr2_dst *dst)
>>   	dst->need_close = 0;
>>   }
>>   
>> +static int tr2_dst_try_random_path(struct tr2_dst *dst, const char *tgt_prefix)
> 
> .... and I'd call this s/random/auto/ instead, if I were writing
> this patch following the updated design.

I agree with Junio WRT the s/random/auto/ suggestions.

[...]
>> +	for (attempt_count = 0; attempt_count < MAX_RANDOM_ATTEMPTS; attempt_count++) {
>> +		strbuf_reset(&final_path);
>> +		strbuf_addbuf(&final_path, &base_path);
>> +		strbuf_addf(&final_path, ".%d", attempt_count);

Since the last component of the SID is already very unique and
we're unlikely to have collisions, maybe change the above line to:

		if (attempt_count > 0)
			strbuf_addf(&final_path, ".%d", attempt_count);

and in reality expect to never have files with the suffix.

Unless, that is, they turned on more than one of GIT_TR2,
GIT_TR2_PERF, or GIT_TR2_EVENT and pointed them at the same
directory, but I'm not sure if I care about that edge case
or not.

>> +		fd = open(final_path.buf, O_WRONLY | O_CREAT | O_EXCL, 0666);
[...]

Nice.  Thanks for looking into this.
Jeff
Junio C Hamano March 22, 2019, 3:30 a.m. UTC | #3
Jeff Hostetler <git@jeffhostetler.com> writes:

>>> +	for (attempt_count = 0; attempt_count < MAX_RANDOM_ATTEMPTS; attempt_count++) {
>>> +		strbuf_reset(&final_path);
>>> +		strbuf_addbuf(&final_path, &base_path);
>>> +		strbuf_addf(&final_path, ".%d", attempt_count);
>
> Since the last component of the SID is already very unique and
> we're unlikely to have collisions, maybe change the above line to:
>
> 		if (attempt_count > 0)
> 			strbuf_addf(&final_path, ".%d", attempt_count);
>
> and in reality expect to never have files with the suffix.

That's a nice property.

> Unless, that is, they turned on more than one of GIT_TR2,
> GIT_TR2_PERF, or GIT_TR2_EVENT and pointed them at the same
> directory, but I'm not sure if I care about that edge case
> or not.

That actually makes me wonder if the auto generated filenames want
to have a common trait (e.g. suffix) that allows the readers to tell
from which of these environment variables the names came from.  It
would not be very useful if two files with the same sid component
had .1 suffix for GIT_TR2 trace for one session, and the same .1
suffix is used for GIT_TR2_PERF trace for a pair of files from
another session.

But let's not worry about it for now.  If people do not want them
get mixed up and become hard to tell apart, they can always specify
different directories for different traces.

>
>>> +		fd = open(final_path.buf, O_WRONLY | O_CREAT | O_EXCL, 0666);
> [...]
>
> Nice.  Thanks for looking into this.
> Jeff
Jeff Hostetler March 22, 2019, 2:20 p.m. UTC | #4
On 3/21/2019 11:30 PM, Junio C Hamano wrote:
> Jeff Hostetler <git@jeffhostetler.com> writes:
> 
>>>> +	for (attempt_count = 0; attempt_count < MAX_RANDOM_ATTEMPTS; attempt_count++) {
>>>> +		strbuf_reset(&final_path);
>>>> +		strbuf_addbuf(&final_path, &base_path);
>>>> +		strbuf_addf(&final_path, ".%d", attempt_count);
>>
>> Since the last component of the SID is already very unique and
>> we're unlikely to have collisions, maybe change the above line to:
>>
>> 		if (attempt_count > 0)
>> 			strbuf_addf(&final_path, ".%d", attempt_count);
>>
>> and in reality expect to never have files with the suffix.
> 
> That's a nice property.
> 
>> Unless, that is, they turned on more than one of GIT_TR2,
>> GIT_TR2_PERF, or GIT_TR2_EVENT and pointed them at the same
>> directory, but I'm not sure if I care about that edge case
>> or not.
> 
> That actually makes me wonder if the auto generated filenames want
> to have a common trait (e.g. suffix) that allows the readers to tell
> from which of these environment variables the names came from.  It
> would not be very useful if two files with the same sid component
> had .1 suffix for GIT_TR2 trace for one session, and the same .1
> suffix is used for GIT_TR2_PERF trace for a pair of files from
> another session.

I thought about suggesting that, but didn't think it worth the bother.

> But let's not worry about it for now.  If people do not want them
> get mixed up and become hard to tell apart, they can always specify
> different directories for different traces.

agreed. it should be very rare.  the SID is the built from
	"<microseconds-since-the-epoch><dash><pid>"
so something would have to be seriously wrong with their
system to get collision from 2 different git commands.

and yes, they the advise should be to use different directories
for the different streams.

> 
>>
>>>> +		fd = open(final_path.buf, O_WRONLY | O_CREAT | O_EXCL, 0666);
>> [...]
>>
>> Nice.  Thanks for looking into this.
>> Jeff
diff mbox series

Patch

diff --git a/Documentation/technical/api-trace2.txt b/Documentation/technical/api-trace2.txt
index 2de565fa3d..318673e8e5 100644
--- a/Documentation/technical/api-trace2.txt
+++ b/Documentation/technical/api-trace2.txt
@@ -109,6 +109,11 @@  values are recognized.
 
 	Enables the target, opens and writes to the file in append mode.
 
+	If the target already exists and is a directory, the traces will be
+	written to files (one per process) underneath the given directory. They
+	will be named according to the last component of the SID followed by a
+	counter to avoid potential collisions.
+
 `af_unix:[<socket_type>:]<absolute-pathname>`::
 
 	Enables the target, opens and writes to a Unix Domain Socket
diff --git a/t/t0210-trace2-normal.sh b/t/t0210-trace2-normal.sh
index 03a0aedb1d..26c9c1b3b8 100755
--- a/t/t0210-trace2-normal.sh
+++ b/t/t0210-trace2-normal.sh
@@ -80,6 +80,21 @@  test_expect_success 'normal stream, return code 1' '
 	test_cmp expect actual
 '
 
+test_expect_success 'randomized filename' '
+	test_when_finished "rm -r traces actual expect" &&
+	mkdir traces &&
+	GIT_TR2="$(pwd)/traces" test-tool trace2 001return 0 &&
+	perl "$TEST_DIRECTORY/t0210/scrub_normal.perl" <"$(ls traces/*)" >actual &&
+	cat >expect <<-EOF &&
+		version $V
+		start _EXE_ trace2 001return 0
+		cmd_name trace2 (trace2)
+		exit elapsed:_TIME_ code:0
+		atexit elapsed:_TIME_ code:0
+	EOF
+	test_cmp expect actual
+'
+
 # Verb 002exit
 #
 # Explicit exit(code) from within cmd_<verb> propagates <code>.
diff --git a/trace2/tr2_dst.c b/trace2/tr2_dst.c
index fd490a43ad..0e752914dc 100644
--- a/trace2/tr2_dst.c
+++ b/trace2/tr2_dst.c
@@ -1,5 +1,6 @@ 
 #include "cache.h"
 #include "trace2/tr2_dst.h"
+#include "trace2/tr2_sid.h"
 
 /*
  * If a Trace2 target cannot be opened for writing, we should issue a
@@ -12,6 +13,11 @@ 
  */
 #define TR2_ENVVAR_DST_DEBUG "GIT_TR2_DST_DEBUG"
 
+/*
+ * How many attempts we will make at creating a random trace output path.
+ */
+#define MAX_RANDOM_ATTEMPTS 10
+
 static int tr2_dst_want_warning(void)
 {
 	static int tr2env_dst_debug = -1;
@@ -36,6 +42,53 @@  void tr2_dst_trace_disable(struct tr2_dst *dst)
 	dst->need_close = 0;
 }
 
+static int tr2_dst_try_random_path(struct tr2_dst *dst, const char *tgt_prefix)
+{
+	int fd;
+	const char *last_slash, *sid = tr2_sid_get();
+	struct strbuf base_path = STRBUF_INIT, final_path = STRBUF_INIT;
+	unsigned attempt_count;
+
+	last_slash = strrchr(sid, '/');
+	if (last_slash)
+		sid = last_slash + 1;
+
+	strbuf_addstr(&base_path, tgt_prefix);
+	if (!is_dir_sep(base_path.buf[base_path.len - 1]))
+		strbuf_addch(&base_path, '/');
+	strbuf_addstr(&base_path, sid);
+
+	for (attempt_count = 0; attempt_count < MAX_RANDOM_ATTEMPTS; attempt_count++) {
+		strbuf_reset(&final_path);
+		strbuf_addbuf(&final_path, &base_path);
+		strbuf_addf(&final_path, ".%d", attempt_count);
+
+		fd = open(final_path.buf, O_WRONLY | O_CREAT | O_EXCL, 0666);
+		if (fd != -1)
+			break;
+	}
+
+	if (fd == -1) {
+		if (tr2_dst_want_warning())
+			warning("trace2: could not open '%s' for '%s' tracing: %s",
+				base_path.buf, dst->env_var_name, strerror(errno));
+
+		tr2_dst_trace_disable(dst);
+		strbuf_release(&base_path);
+		strbuf_release(&final_path);
+		return 0;
+	}
+
+	strbuf_release(&base_path);
+	strbuf_release(&final_path);
+
+	dst->fd = fd;
+	dst->need_close = 1;
+	dst->initialized = 1;
+
+	return dst->fd;
+}
+
 static int tr2_dst_try_path(struct tr2_dst *dst, const char *tgt_value)
 {
 	int fd = open(tgt_value, O_WRONLY | O_APPEND | O_CREAT, 0666);
@@ -202,8 +255,12 @@  int tr2_dst_get_trace_fd(struct tr2_dst *dst)
 		return dst->fd;
 	}
 
-	if (is_absolute_path(tgt_value))
-		return tr2_dst_try_path(dst, tgt_value);
+	if (is_absolute_path(tgt_value)) {
+		if (is_directory(tgt_value))
+			return tr2_dst_try_random_path(dst, tgt_value);
+		else
+			return tr2_dst_try_path(dst, tgt_value);
+	}
 
 #ifndef NO_UNIX_SOCKETS
 	if (starts_with(tgt_value, PREFIX_AF_UNIX))