diff mbox series

[6/9] trace2: convert ctx.thread_name to flex array

Message ID 6492b6d2b989e08bb539fff3ffe5bdf50fa0a195.1664900407.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Trace2 timers and counters and some cleanup | expand

Commit Message

Jeff Hostetler Oct. 4, 2022, 4:20 p.m. UTC
From: Jeff Hostetler <jeffhost@microsoft.com>

Convert the `tr2tls_thread_ctx.thread_name` field from a `strbuf`
to a "flex array" at the end of the context structure.

The `thread_name` field is a constant string that is constructed when
the context is created.  Using a (non-const) `strbuf` structure for it
caused some confusion in the past because it implied that someone
could rename a thread after it was created.  That usage was not
intended.  Changing it to a "flex array" will hopefully make the
intent more clear.

Also, move the maximum thread_name truncation to tr2_tgt_perf.c
because it is the only target that needs to worry about output column
alignment.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 trace2/tr2_tgt_event.c |  2 +-
 trace2/tr2_tgt_perf.c  |  8 ++++++--
 trace2/tr2_tls.c       | 25 +++++++++++++------------
 trace2/tr2_tls.h       |  9 +--------
 4 files changed, 21 insertions(+), 23 deletions(-)

Comments

Ævar Arnfjörð Bjarmason Oct. 5, 2022, 11:14 a.m. UTC | #1
On Tue, Oct 04 2022, Jeff Hostetler via GitGitGadget wrote:

> From: Jeff Hostetler <jeffhost@microsoft.com>
>
> Convert the `tr2tls_thread_ctx.thread_name` field from a `strbuf`
> to a "flex array" at the end of the context structure.
>
> The `thread_name` field is a constant string that is constructed when
> the context is created.  Using a (non-const) `strbuf` structure for it
> caused some confusion in the past because it implied that someone
> could rename a thread after it was created.

I think it's been long enough that we could use a reminder about the
"some confusion", i.e. if it was a bug report or something else.

> That usage was not intended.  Changing it to a "flex array" will
> hopefully make the intent more clear.

I see we had some back & forth back in the original submission, although
honestly I skimmed this this time around, had forgetten about that, and
had this pop out at me, and then found my earlier comments.

I see that exchange didn't end as well as I'd hoped[1], and hopefully we
can avoid that here. So having looked at this with fresh eyes maybe
these comments/questions help:

 * I'm unable to bridge the cap from (paraphrased) "we must change the
   type" to "mak[ing] the [read-only] intent more clear".

   I.e. if you go across the codebase and look at various non-const
   "char name[FLEX_ARRAY]" and add a "const" to them you'll find cases
   where we re-write the "FLEX_ARRAY" string, e.g. the one in archive.c
   is one of those (the first grep hit, I stopped looking for others at
   that point).

   Making it "const" will yield:
   
      archive.c: In function ‘queue_directory’:
   archive.c:206:29: error: passing argument 1 of ‘xsnprintf’ discards ‘const’ qualifier from pointer target type [-Werror=discarded-qualifiers]
     206 |         d->len = xsnprintf(d->path, len, "%.*s%s/", (int)base->len, base->buf, filename);

   So aside from anything else (and I may be misunderstanding this) why
   does changing it to a FLEX_ARRAY give us the connotation in the
   confused API user's mind that it shouldn't be messed with that the
   "strbuf" doesn't give us?

 * Now, quoting from the below:

> [...]
> diff --git a/trace2/tr2_tls.c b/trace2/tr2_tls.c
> index 39b41fd2487..89437e773f6 100644
> --- a/trace2/tr2_tls.c
> +++ b/trace2/tr2_tls.c
> @@ -34,7 +34,18 @@ void tr2tls_start_process_clock(void)
>  struct tr2tls_thread_ctx *tr2tls_create_self(const char *name_hint,
>  					     uint64_t us_thread_start)
>  {
> -	struct tr2tls_thread_ctx *ctx = xcalloc(1, sizeof(*ctx));
> +	struct tr2tls_thread_ctx *ctx;
> +	struct strbuf buf_name = STRBUF_INIT;
> +	int thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
> +
> +	if (thread_id)
> +		strbuf_addf(&buf_name, "th%02d:", thread_id);
> +	strbuf_addstr(&buf_name, name_hint);
> +
> +	FLEX_ALLOC_MEM(ctx, thread_name, buf_name.buf, buf_name.len);
> +	strbuf_release(&buf_name);
> +
> +	ctx->thread_id = thread_id;
>  
>  	/*
>  	 * Implicitly "tr2tls_push_self()" to capture the thread's start
> @@ -45,15 +56,6 @@ struct tr2tls_thread_ctx *tr2tls_create_self(const char *name_hint,
>  	ctx->array_us_start = (uint64_t *)xcalloc(ctx->alloc, sizeof(uint64_t));
>  	ctx->array_us_start[ctx->nr_open_regions++] = us_thread_start;
>  
> -	ctx->thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
> -
> -	strbuf_init(&ctx->thread_name, 0);
> -	if (ctx->thread_id)
> -		strbuf_addf(&ctx->thread_name, "th%02d:", ctx->thread_id);
> -	strbuf_addstr(&ctx->thread_name, name_hint);
> -	if (ctx->thread_name.len > TR2_MAX_THREAD_NAME)
> -		strbuf_setlen(&ctx->thread_name, TR2_MAX_THREAD_NAME);
> -
>  	pthread_setspecific(tr2tls_key, ctx);
>  
>  	return ctx;

I found this quote hard to follow because there's functional changes
there mixed up with code re-arangement, consider leading with a commit
like:
	
	diff --git a/trace2/tr2_tls.c b/trace2/tr2_tls.c
	index 39b41fd2487..d7952062007 100644
	--- a/trace2/tr2_tls.c
	+++ b/trace2/tr2_tls.c
	@@ -31,10 +31,24 @@ void tr2tls_start_process_clock(void)
	 	tr2tls_us_start_process = getnanotime() / 1000;
	 }
	 
	+static void fill_thread_name(struct strbuf *buf, const char *name_hint,
	+			     int thread_id)
	+{
	+	if (thread_id)
	+		strbuf_addf(buf, "th%02d:", thread_id);
	+	strbuf_addstr(buf, name_hint);
	+	if (buf->len > TR2_MAX_THREAD_NAME)
	+		strbuf_setlen(buf, TR2_MAX_THREAD_NAME);
	+
	+}
	+
	 struct tr2tls_thread_ctx *tr2tls_create_self(const char *name_hint,
	 					     uint64_t us_thread_start)
	 {
	-	struct tr2tls_thread_ctx *ctx = xcalloc(1, sizeof(*ctx));
	+	struct tr2tls_thread_ctx *ctx;
	+	int thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
	+
	+	ctx = xcalloc(1, sizeof(*ctx));
	 
	 	/*
	 	 * Implicitly "tr2tls_push_self()" to capture the thread's start
	@@ -45,14 +59,8 @@ struct tr2tls_thread_ctx *tr2tls_create_self(const char *name_hint,
	 	ctx->array_us_start = (uint64_t *)xcalloc(ctx->alloc, sizeof(uint64_t));
	 	ctx->array_us_start[ctx->nr_open_regions++] = us_thread_start;
	 
	-	ctx->thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
	-
	 	strbuf_init(&ctx->thread_name, 0);
	-	if (ctx->thread_id)
	-		strbuf_addf(&ctx->thread_name, "th%02d:", ctx->thread_id);
	-	strbuf_addstr(&ctx->thread_name, name_hint);
	-	if (ctx->thread_name.len > TR2_MAX_THREAD_NAME)
	-		strbuf_setlen(&ctx->thread_name, TR2_MAX_THREAD_NAME);
	+	fill_thread_name(&ctx->thread_name, name_hint, thread_id);
	 
	 	pthread_setspecific(tr2tls_key, ctx);

I see from [1] that I comment on that before, i.e. that it was
"looks-to-be-unrelated", hopefully the above clarifies that, i.e. that
it's "unrelated" in the sense that we can do it separately with no
functiontal change, making the real change smaller.

If I then rebase your change on top of that I get the below diff, which
IMO makes it much clearer what's going on. Commenting on that:
	
	diff --git a/trace2/tr2_tls.c b/trace2/tr2_tls.c
	index d7952062007..c540027e75d 100644
	--- a/trace2/tr2_tls.c
	+++ b/trace2/tr2_tls.c
	@@ -37,18 +37,21 @@ static void fill_thread_name(struct strbuf *buf, const char *name_hint,
	 	if (thread_id)
	 		strbuf_addf(buf, "th%02d:", thread_id);
	 	strbuf_addstr(buf, name_hint);
	-	if (buf->len > TR2_MAX_THREAD_NAME)
	-		strbuf_setlen(buf, TR2_MAX_THREAD_NAME);
	-
	 }

Okey, so as explained in the commit message we no longer need to worry
about this limit, but I think leading with a change to just change that
first would help. I.e. wouldn't starting with keeping the strbuf and
doing this truncation in tr2_tgt_perf.c give you the functiotnal change
first, without the type change?

Doing it this way means we're changing the type, and also removing the
limit on thread names for non-perf backends.
	 
	 struct tr2tls_thread_ctx *tr2tls_create_self(const char *name_hint,
	 					     uint64_t us_thread_start)
	 {
	 	struct tr2tls_thread_ctx *ctx;
	+	struct strbuf buf_name = STRBUF_INIT;

Okey, now our scratch buffer is function local, but:

	 	int thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
	 
	-	ctx = xcalloc(1, sizeof(*ctx));
	+	fill_thread_name(&buf_name, name_hint, thread_id);

We still need to malloc() that "struct strbuf", this is the main thing I
found confusing and why I didn't see the point in the original
series. I.e. we can normally pull compiler tricks with FLEX_ARRAY to
avoid allocations.

But here you need to format this string anyway, so we've already
malloc'd it, you just....

	+
	+	FLEX_ALLOC_MEM(ctx, thread_name, buf_name.buf, buf_name.len);

...memcpy() it to the FLEX_ARRAY here, but then...

	+	strbuf_release(&buf_name);

...we have to release this thing we malloc()'d, which was previously the
pointer in the struct. 

	+
	+	ctx->thread_id = thread_id;
	 
	 	/*
	 	 * Implicitly "tr2tls_push_self()" to capture the thread's start

So, I don't really see the point of this "flex array for implicit const"
per the above, you noted in [1] "I convert the field to a flex-array to
avoid [...] the allocation" but what allocation are we really avoiding
here?

We still have to allocate the strbuf as before, we just now allocate the
struct as before + the length of that strbuf, then we can free the
strbuf.

Is it that the end memory use is lower because while we have a
allocation for the strbuf we release it right away, and the compiler (on
some platforms?) can play tricks with sticking this into padding it was
going to put there anyway, given the length of the string?

I can think of ways this *might* matter, I'm just mainly saying that
you're leaving the reader guessing still.

Aside: I can imagine that we *could* actually avoid an allocation here
by being more sneaky.

I.e. you could FLEX_ALLOC_MEM() before populating the strbuf, as we know
the format is "th%02d:", so the space we need for the string is:

	strlen(name_hint) + strlen("th") + strlen(4) /* %02d: + \0 */;

Then you could memset(&ctx->thread_name, 0, len) and strbuf_attach() the
pointer to the start of that, and voila, you could use strbuf_addf() to
do the %02d format part of that.

But I still don't see how this is an area that justifies that sort of
micro-optimization (or worrying about strbuf v.s. flex array),
i.e. don't we usually just have max ncpu threads anyway (the format
implies max 99), so a few strings like "th01:main" aren't going to cost
us much, are they?

<tries it out>

Anyway, if this area was actually performance critical and we *really
cared* about avoiding allocations wouldn't we want to skip both the
"strbuf" there and the "FLEX_ARRAY", and just save away the
"thread_hint" (which the caller hardcodes) and "thread_nr", and then
append on-the-fly?

I came up with the below to do that, it passes all tests, but contains
micro-optimizations that I don't think we need (e.g. I understood you
wanted to avoid printf, so it does that).

But I think it's a useful point of discussion. What test(s) do you have
where the "master" version, FLEX_ARRAY version, and just not strbuf
formatting the thing at all differ?
	
	diff --git a/json-writer.c b/json-writer.c
	index f1cfd8fa8c6..124ad72d200 100644
	--- a/json-writer.c
	+++ b/json-writer.c
	@@ -161,6 +161,47 @@ void jw_object_string(struct json_writer *jw, const char *key, const char *value
	 	append_quoted_string(&jw->json, value);
	 }
	 
	+void jw_strbuf_add_thread_name(struct strbuf *out, const char *thread_hint,
	+			       int thread_id, int max_len)
	+{
	+	size_t oldlen = out->len;
	+
	+	if (thread_id) {
	+		strbuf_addf(out, "th");
	+		/*
	+		 * We're avoiding printf here when on-the-fly
	+		 * formatting, but why?
	+		 */
	+		if (thread_id < 10) {
	+			strbuf_addch(out, '0');
	+			strbuf_addch(out, thread_id % 10 + '0');
	+		} else {
	+			strbuf_addch(out, thread_id / 10 + '0');
	+			strbuf_addch(out, thread_id % 10 + '0');
	+		}
	+		strbuf_addch(out, ':');
	+	}
	+	if (max_len) {
	+		int added = out->len - oldlen;
	+		int limit = max_len - added;
	+
	+		strbuf_addf(out, "%.*s", limit, thread_hint);
	+	} else {
	+		strbuf_addstr(out, thread_hint);
	+	}
	+}
	+
	+void jw_object_thread(struct json_writer *jw, const char *thread_hint,
	+		      int thread_id)
	+{
	+	struct strbuf *out = &jw->json;
	+
	+	object_common(jw, "thread");
	+	strbuf_addch(out, '"');
	+	jw_strbuf_add_thread_name(out, thread_hint, thread_id, 0);
	+	strbuf_addch(out, '"');
	+}
	+
	 void jw_object_intmax(struct json_writer *jw, const char *key, intmax_t value)
	 {
	 	object_common(jw, key);
	diff --git a/json-writer.h b/json-writer.h
	index 209355e0f12..51b78296f8a 100644
	--- a/json-writer.h
	+++ b/json-writer.h
	@@ -77,6 +77,10 @@ void jw_array_begin(struct json_writer *jw, int pretty);
	 
	 void jw_object_string(struct json_writer *jw, const char *key,
	 		      const char *value);
	+void jw_strbuf_add_thread_name(struct strbuf *out, const char *thread_hint,
	+			       int thread_id, int max_len);
	+void jw_object_thread(struct json_writer *jw, const char *thread_hint,
	+		      const int thread_id);
	 void jw_object_intmax(struct json_writer *jw, const char *key, intmax_t value);
	 void jw_object_double(struct json_writer *jw, const char *key, int precision,
	 		      double value);
	diff --git a/trace2/tr2_tgt_event.c b/trace2/tr2_tgt_event.c
	index bb0653e0e6f..6e480fce34a 100644
	--- a/trace2/tr2_tgt_event.c
	+++ b/trace2/tr2_tgt_event.c
	@@ -91,7 +91,7 @@ static void event_fmt_prepare(const char *event_name, const char *file,
	 
	 	jw_object_string(jw, "event", event_name);
	 	jw_object_string(jw, "sid", tr2_sid_get());
	-	jw_object_string(jw, "thread", ctx->thread_name);
	+	jw_object_thread(jw, ctx->thread_hint, ctx->thread_id);
	 
	 	/*
	 	 * In brief mode, only emit <time> on these 2 event types.
	diff --git a/trace2/tr2_tgt_perf.c b/trace2/tr2_tgt_perf.c
	index cbf8aefd56c..9f310756349 100644
	--- a/trace2/tr2_tgt_perf.c
	+++ b/trace2/tr2_tgt_perf.c
	@@ -71,6 +71,8 @@ static void perf_fmt_prepare(const char *event_name,
	 			     const char *category, struct strbuf *buf)
	 {
	 	int len;
	+	int oldlen;
	+	int thread_pad;
	 
	 	strbuf_setlen(buf, 0);
	 
	@@ -109,11 +111,11 @@ static void perf_fmt_prepare(const char *event_name,
	 	}
	 
	 	strbuf_addf(buf, "d%d | ", tr2_sid_depth());
	-	strbuf_addf(buf, "%-*.*s | %-*s | ",
	-		    TR2FMT_PERF_MAX_THREAD_NAME,
	-		    TR2FMT_PERF_MAX_THREAD_NAME,
	-		    ctx->thread_name,
	-		    TR2FMT_PERF_MAX_EVENT_NAME,
	+	oldlen = buf->len;
	+	jw_strbuf_add_thread_name(buf, ctx->thread_hint, ctx->thread_id,
	+				  TR2FMT_PERF_MAX_THREAD_NAME);
	+	thread_pad = TR2FMT_PERF_MAX_THREAD_NAME - (buf->len - oldlen);
	+	strbuf_addf(buf, "%-*s | %-*s | ", thread_pad, "", TR2FMT_PERF_MAX_EVENT_NAME,
	 		    event_name);
	 
	 	len = buf->len + TR2FMT_PERF_REPO_WIDTH;
	diff --git a/trace2/tr2_tls.c b/trace2/tr2_tls.c
	index 02117f808eb..9959ec9b160 100644
	--- a/trace2/tr2_tls.c
	+++ b/trace2/tr2_tls.c
	@@ -31,26 +31,13 @@ void tr2tls_start_process_clock(void)
	 	tr2tls_us_start_process = getnanotime() / 1000;
	 }
	 
	-static void fill_thread_name(struct strbuf *buf, const char *name_hint,
	-			     int thread_id)
	-{
	-	if (thread_id)
	-		strbuf_addf(buf, "th%02d:", thread_id);
	-	strbuf_addstr(buf, name_hint);
	-}
	-
	 struct tr2tls_thread_ctx *tr2tls_create_self(const char *name_hint,
	 					     uint64_t us_thread_start)
	 {
	-	struct tr2tls_thread_ctx *ctx;
	-	struct strbuf buf_name = STRBUF_INIT;
	+	struct tr2tls_thread_ctx *ctx = xcalloc(1, sizeof(*ctx));
	 	int thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
	 
	-	fill_thread_name(&buf_name, name_hint, thread_id);
	-
	-	FLEX_ALLOC_MEM(ctx, thread_name, buf_name.buf, buf_name.len);
	-	strbuf_release(&buf_name);
	-
	+	ctx->thread_hint = name_hint;
	 	ctx->thread_id = thread_id;
	 
	 	/*
	@@ -120,7 +107,8 @@ void tr2tls_pop_self(void)
	 	struct tr2tls_thread_ctx *ctx = tr2tls_get_self();
	 
	 	if (!ctx->nr_open_regions)
	-		BUG("no open regions in thread '%s'", ctx->thread_name);
	+		BUG("no open regions in thread '%s' '%d'", ctx->thread_hint,
	+		    ctx->thread_id);
	 
	 	ctx->nr_open_regions--;
	 }
	diff --git a/trace2/tr2_tls.h b/trace2/tr2_tls.h
	index e306c9bf3ec..f873615ebef 100644
	--- a/trace2/tr2_tls.h
	+++ b/trace2/tr2_tls.h
	@@ -21,8 +21,8 @@ struct tr2tls_thread_ctx {
	 	unsigned int used_any_timer:1;
	 	unsigned int used_any_per_thread_timer:1;
	 	unsigned int used_any_counter:1;
	+	const char *thread_hint;
	 	unsigned int used_any_per_thread_counter:1;
	-	char thread_name[FLEX_ARRAY];
	 };
	 
	 /*
	



1. https://lore.kernel.org/git/e3fd64ef-9e26-19da-7327-38ab77ae359a@jeffhostetler.com/



> @@ -95,7 +97,6 @@ void tr2tls_unset_self(void)
>  
>  	pthread_setspecific(tr2tls_key, NULL);
>  
> -	strbuf_release(&ctx->thread_name);
>  	free(ctx->array_us_start);
>  	free(ctx);
>  }
> @@ -113,7 +114,7 @@ void tr2tls_pop_self(void)
>  	struct tr2tls_thread_ctx *ctx = tr2tls_get_self();
>  
>  	if (!ctx->nr_open_regions)
> -		BUG("no open regions in thread '%s'", ctx->thread_name.buf);
> +		BUG("no open regions in thread '%s'", ctx->thread_name);
>  
>  	ctx->nr_open_regions--;
>  }
> diff --git a/trace2/tr2_tls.h b/trace2/tr2_tls.h
> index f1ee58305d6..be0bc73d08f 100644
> --- a/trace2/tr2_tls.h
> +++ b/trace2/tr2_tls.h
> @@ -9,17 +9,12 @@
>   * There is NO relation to "transport layer security".
>   */
>  
> -/*
> - * Arbitry limit for thread names for column alignment.
> - */
> -#define TR2_MAX_THREAD_NAME (24)
> -
>  struct tr2tls_thread_ctx {
> -	struct strbuf thread_name;
>  	uint64_t *array_us_start;
>  	size_t alloc;
>  	size_t nr_open_regions; /* plays role of "nr" in ALLOC_GROW */
>  	int thread_id;
> +	char thread_name[FLEX_ARRAY];
>  };
>  
>  /*
> @@ -32,8 +27,6 @@ struct tr2tls_thread_ctx {
>   * upon the name of the thread-proc function).  For example:
>   *     { .thread_id=10, .thread_name="th10fsm-listen" }
>   * This helps to identify and distinguish messages from concurrent threads.
> - * The ctx.thread_name field is truncated if necessary to help with column
> - * alignment in printf-style messages.
>   *
>   * In this and all following functions the term "self" refers to the
>   * current thread.
Junio C Hamano Oct. 5, 2022, 6:03 p.m. UTC | #2
"Jeff Hostetler via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Jeff Hostetler <jeffhost@microsoft.com>
>
> Convert the `tr2tls_thread_ctx.thread_name` field from a `strbuf`
> to a "flex array" at the end of the context structure.
>
> The `thread_name` field is a constant string that is constructed when
> the context is created.  Using a (non-const) `strbuf` structure for it
> caused some confusion in the past because it implied that someone
> could rename a thread after it was created.  That usage was not
> intended.  Changing it to a "flex array" will hopefully make the
> intent more clear.

Surely, "const struct strbuf name;" member would be an oxymoron, and
I agree that this should follow "use strbuf as an easy-to-work-with
mechanism to come up with a string, and bake the final value into a
struct as a member of type 'const char []'" pattern.

I recall saying why I thought the flex array was overkill, though.

You have been storing an up-to-24-byte human readable name by
embedding a strbuf that has two size_t plus a pointer (i.e. 24-bytes
even on Windows), and as TR2_MAX_THREAD_NAME is capped at 24 bytes
anyway, an embedded fixed-size thread_name[TR2_MAX_THREAD_NAME+1]
member may be the simplest thing to do, I suspect.

If we were to allow arbitrarily long thread_name[], which may not be
a bad thing to do (e.g. we do not have to worry about truncation
making two names ambiguous, for example), then the flex array is the
right direction to go in, though.
Jeff Hostetler Oct. 6, 2022, 4:28 p.m. UTC | #3
On 10/5/22 7:14 AM, Ævar Arnfjörð Bjarmason wrote:
> 
> On Tue, Oct 04 2022, Jeff Hostetler via GitGitGadget wrote:
> 
>> From: Jeff Hostetler <jeffhost@microsoft.com>
>>
>> Convert the `tr2tls_thread_ctx.thread_name` field from a `strbuf`
>> to a "flex array" at the end of the context structure.
>>
>> The `thread_name` field is a constant string that is constructed when
>> the context is created.  Using a (non-const) `strbuf` structure for it
>> caused some confusion in the past because it implied that someone
>> could rename a thread after it was created.
> 
> I think it's been long enough that we could use a reminder about the
> "some confusion", i.e. if it was a bug report or something else.
> 
>> That usage was not intended.  Changing it to a "flex array" will
>> hopefully make the intent more clear.
> 
> I see we had some back & forth back in the original submission, although
> honestly I skimmed this this time around, had forgetten about that, and
> had this pop out at me, and then found my earlier comments.
> 
> I see that exchange didn't end as well as I'd hoped[1], and hopefully we
> can avoid that here. So having looked at this with fresh eyes maybe
> these comments/questions help:

Yeah, those conversations went rather poorly.  And yes, I'd like to
avoid all of that.  There's a lot in your note here and it'll take a
little while to digest and respond.  But I did want to ACK, sooner
rather than later, that we agree on that.

And yes, I could split out the truncation into a separate commit.
And then revisit the storage change.

Thanks
Jeff
Ævar Arnfjörð Bjarmason Oct. 6, 2022, 9:05 p.m. UTC | #4
On Wed, Oct 05 2022, Junio C Hamano wrote:

> "Jeff Hostetler via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
>> From: Jeff Hostetler <jeffhost@microsoft.com>
>>
>> Convert the `tr2tls_thread_ctx.thread_name` field from a `strbuf`
>> to a "flex array" at the end of the context structure.
>>
>> The `thread_name` field is a constant string that is constructed when
>> the context is created.  Using a (non-const) `strbuf` structure for it
>> caused some confusion in the past because it implied that someone
>> could rename a thread after it was created.  That usage was not
>> intended.  Changing it to a "flex array" will hopefully make the
>> intent more clear.
>
> Surely, "const struct strbuf name;" member would be an oxymoron, and
> I agree that this should follow "use strbuf as an easy-to-work-with
> mechanism to come up with a string, and bake the final value into a
> struct as a member of type 'const char []'" pattern.
>
> I recall saying why I thought the flex array was overkill, though.
>
> You have been storing an up-to-24-byte human readable name by
> embedding a strbuf that has two size_t plus a pointer (i.e. 24-bytes
> even on Windows), and as TR2_MAX_THREAD_NAME is capped at 24 bytes
> anyway, an embedded fixed-size thread_name[TR2_MAX_THREAD_NAME+1]
> member may be the simplest thing to do, I suspect.
>
> If we were to allow arbitrarily long thread_name[], which may not be
> a bad thing to do (e.g. we do not have to worry about truncation
> making two names ambiguous, for example), then the flex array is the
> right direction to go in, though.

We don't even need that, AFAICT. My reply at [1] is rather long, but the
tl;dr is that the interface for this API is:
	
	$ git grep '^\s+trace2_thread_start'
	Documentation/technical/api-trace2.txt: trace2_thread_start("preload_thread");
	builtin/fsmonitor--daemon.c:    trace2_thread_start("fsm-health");
	builtin/fsmonitor--daemon.c:    trace2_thread_start("fsm-listen");
	compat/simple-ipc/ipc-unix-socket.c:    trace2_thread_start("ipc-worker");
	compat/simple-ipc/ipc-unix-socket.c:    trace2_thread_start("ipc-accept");
	compat/simple-ipc/ipc-win32.c:  trace2_thread_start("ipc-server");
	t/helper/test-fsmonitor-client.c:       trace2_thread_start("hammer");
	t/helper/test-simple-ipc.c:     trace2_thread_start("multiple");
	trace2.h:       trace2_thread_start_fl((thread_hint), __FILE__, __LINE__)

And we are taking e.g. "preload_thread" and turning it into strings like
these, and saving it into "struct tr2tls_thread_ctx".

	"preload_thread", // main thread
	"th01:preload_thread", // 1st thread
	"th02:preload_thread" // 2nd thread
	[...]

So, we don't need to strdup() and store that "preload_thread" anywhere.
It's already a constant string we have hardcoded in the program. We just
need to save a pointer to it.

Then we just format the "%s" or (if ".thread_id" == 0) or "th%02d:%s"
(if ".thread_id" > 0) on-the-fly, the two codepaths that end up using
this are already using strbuf_addf(), so just adding to the format there
is easy.

1. https://lore.kernel.org/git/221005.86y1tus9ps.gmgdl@evledraar.gmail.com/
Junio C Hamano Oct. 6, 2022, 9:50 p.m. UTC | #5
Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

>> If we were to allow arbitrarily long thread_name[], which may not be
>> a bad thing to do (e.g. we do not have to worry about truncation
>> making two names ambiguous, for example), then the flex array is the
>> right direction to go in, though.
>
> We don't even need that, AFAICT. ...
> ...
> And we are taking e.g. "preload_thread" and turning it into strings like
> these, and saving it into "struct tr2tls_thread_ctx".
>
> 	"preload_thread", // main thread
> 	"th01:preload_thread", // 1st thread
> 	"th02:preload_thread" // 2nd thread
> 	[...]
>
> So, we don't need to strdup() and store that "preload_thread" anywhere.
> It's already a constant string we have hardcoded in the program. We just
> need to save a pointer to it.

That sounds even simpler.
Jeff Hostetler Oct. 10, 2022, 6:31 p.m. UTC | #6
On 10/5/22 7:14 AM, Ævar Arnfjörð Bjarmason wrote:
> 
> On Tue, Oct 04 2022, Jeff Hostetler via GitGitGadget wrote:
> 
>> From: Jeff Hostetler <jeffhost@microsoft.com>
>>
>> Convert the `tr2tls_thread_ctx.thread_name` field from a `strbuf`
>> to a "flex array" at the end of the context structure.
>>
>> The `thread_name` field is a constant string that is constructed when
>> the context is created.  Using a (non-const) `strbuf` structure for it
>> caused some confusion in the past because it implied that someone
>> could rename a thread after it was created.
> 
> I think it's been long enough that we could use a reminder about the
> "some confusion", i.e. if it was a bug report or something else.
> 
>> That usage was not intended.  Changing it to a "flex array" will
>> hopefully make the intent more clear.
> 
> I see we had some back & forth back in the original submission, although
> honestly I skimmed this this time around, had forgetten about that, and
> had this pop out at me, and then found my earlier comments.
> 
> I see that exchange didn't end as well as I'd hoped[1], and hopefully we
> can avoid that here. So having looked at this with fresh eyes maybe
> these comments/questions help:
> 
>   * I'm unable to bridge the cap from (paraphrased) "we must change the
>     type" to "mak[ing] the [read-only] intent more clear".
> 
>     I.e. if you go across the codebase and look at various non-const
>     "char name[FLEX_ARRAY]" and add a "const" to them you'll find cases
>     where we re-write the "FLEX_ARRAY" string, e.g. the one in archive.c
>     is one of those (the first grep hit, I stopped looking for others at
>     that point).
> 
>     Making it "const" will yield:
>     
>        archive.c: In function ‘queue_directory’:
>     archive.c:206:29: error: passing argument 1 of ‘xsnprintf’ discards ‘const’ qualifier from pointer target type [-Werror=discarded-qualifiers]
>       206 |         d->len = xsnprintf(d->path, len, "%.*s%s/", (int)base->len, base->buf, filename);
> 
>     So aside from anything else (and I may be misunderstanding this) why
>     does changing it to a FLEX_ARRAY give us the connotation in the
>     confused API user's mind that it shouldn't be messed with that the
>     "strbuf" doesn't give us?
[...]

My change in how we store the thread-name in the thread context was JUST
to clarify that it should be treated as a constant string and that code
should not try to modify it.  There was a comment to that effect last
year -- that having it be a strbuf invited one to modify it, when that
was not the intent.

That was all I was trying to do here.  Just make it "not be a strbuf".
Perhaps I lept too far by making it a flex-array.  I probably could
have just changed the field to a "char *" and detached it from the
(now local) strbuf.  That would give the same impression, right?


[...]
>>   	/*
>>   	 * Implicitly "tr2tls_push_self()" to capture the thread's start
>> @@ -45,15 +56,6 @@ struct tr2tls_thread_ctx *tr2tls_create_self(const char *name_hint,
>>   	ctx->array_us_start = (uint64_t *)xcalloc(ctx->alloc, sizeof(uint64_t));
>>   	ctx->array_us_start[ctx->nr_open_regions++] = us_thread_start;
>>   
>> -	ctx->thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
>> -
>> -	strbuf_init(&ctx->thread_name, 0);
>> -	if (ctx->thread_id)
>> -		strbuf_addf(&ctx->thread_name, "th%02d:", ctx->thread_id);
>> -	strbuf_addstr(&ctx->thread_name, name_hint);
>> -	if (ctx->thread_name.len > TR2_MAX_THREAD_NAME)
>> -		strbuf_setlen(&ctx->thread_name, TR2_MAX_THREAD_NAME);
>> -
>>   	pthread_setspecific(tr2tls_key, ctx);
>>   
>>   	return ctx;
> 
> I found this quote hard to follow because there's functional changes
> there mixed up with code re-arangement, consider leading with a commit
> like:
[...]

sorry about that.  yes, there's a bit of churn here because i
needed to reorder the thread-name construction to be before we
allocated the context so that we'd know the buffer size.

and yes, i accidentally mixed in a function change to move the
truncation to the perf backend.

i'll redo all of this.


[...]
> <tries it out>
> 
> Anyway, if this area was actually performance critical and we *really
> cared* about avoiding allocations wouldn't we want to skip both the
> "strbuf" there and the "FLEX_ARRAY", and just save away the
> "thread_hint" (which the caller hardcodes) and "thread_nr", and then
> append on-the-fly?
> 
> I came up with the below to do that, it passes all tests, but contains
> micro-optimizations that I don't think we need (e.g. I understood you
> wanted to avoid printf, so it does that).
> 
> But I think it's a useful point of discussion. What test(s) do you have
> where the "master" version, FLEX_ARRAY version, and just not strbuf
> formatting the thing at all differ?
[...]

none of this was about micro-optimization.  i was just trying to get
the buffer away from a strbuf.  i still want it pre-formatted once
at thread-start, but that's it.

FWIW, I don't think having it formatted in each event helps anything.
it would have to go thru sprintf on every message.  it's much better
to just format it once in the thread-start.


[...]
> 	diff --git a/json-writer.c b/json-writer.c
[...] 	
> 	+void jw_strbuf_add_thread_name(struct strbuf *out, const char *thread_hint,
> 	+			       int thread_id, int max_len)
> 	+{
[...]
> 	+}
> 	+
> 	+void jw_object_thread(struct json_writer *jw, const char *thread_hint,
> 	+		      int thread_id)
> 	+{
[...]
> 	+}
[...]

We should not do this.  Just format the name in thread-start and
let json-writer print the string as we have been.

Adding thread formatting to json-writer also violates a separation
of concerns.

I'll re-roll this commit completely.

thanks
Jeff
Jeff Hostetler Oct. 10, 2022, 6:39 p.m. UTC | #7
On 10/6/22 5:05 PM, Ævar Arnfjörð Bjarmason wrote:
> 
> On Wed, Oct 05 2022, Junio C Hamano wrote:
> 
>> "Jeff Hostetler via GitGitGadget" <gitgitgadget@gmail.com> writes:
>>
>>> From: Jeff Hostetler <jeffhost@microsoft.com>
>>>
>>> Convert the `tr2tls_thread_ctx.thread_name` field from a `strbuf`
>>> to a "flex array" at the end of the context structure.
>>>
[...]
> 
> We don't even need that, AFAICT. My reply at [1] is rather long, but the
> tl;dr is that the interface for this API is:
> 	
> 	$ git grep '^\s+trace2_thread_start'
> 	Documentation/technical/api-trace2.txt: trace2_thread_start("preload_thread");
> 	builtin/fsmonitor--daemon.c:    trace2_thread_start("fsm-health");
> 	builtin/fsmonitor--daemon.c:    trace2_thread_start("fsm-listen");
> 	compat/simple-ipc/ipc-unix-socket.c:    trace2_thread_start("ipc-worker");
> 	compat/simple-ipc/ipc-unix-socket.c:    trace2_thread_start("ipc-accept");
> 	compat/simple-ipc/ipc-win32.c:  trace2_thread_start("ipc-server");
> 	t/helper/test-fsmonitor-client.c:       trace2_thread_start("hammer");
> 	t/helper/test-simple-ipc.c:     trace2_thread_start("multiple");
> 	trace2.h:       trace2_thread_start_fl((thread_hint), __FILE__, __LINE__)
> 
> And we are taking e.g. "preload_thread" and turning it into strings like
> these, and saving it into "struct tr2tls_thread_ctx".
> 
> 	"preload_thread", // main thread
> 	"th01:preload_thread", // 1st thread
> 	"th02:preload_thread" // 2nd thread
> 	[...]
> 
> So, we don't need to strdup() and store that "preload_thread" anywhere.
> It's already a constant string we have hardcoded in the program. We just
> need to save a pointer to it.

Current callers tend to pass a string literal.  There's nothing
to say that they will continue to do so in the future.


> Then we just format the "%s" or (if ".thread_id" == 0) or "th%02d:%s"
> (if ".thread_id" > 0) on-the-fly, the two codepaths that end up using
> this are already using strbuf_addf(), so just adding to the format there
> is easy.
[...]

But then you'd be formatting this "th%0d:%s" on every message
printer.  Whereas we can format it once in the thread-start and
save the extra work -- at the expense of a string buffer in the
thread context.

Granted, the event handlers are generating output lines with many
"%" fields, so they are doing non-trivial amounts of work on every
event, but by using a pre-formatted thread-name, we don't need to
increase that workload.

Jeff
diff mbox series

Patch

diff --git a/trace2/tr2_tgt_event.c b/trace2/tr2_tgt_event.c
index 37a3163be12..52f9356c695 100644
--- a/trace2/tr2_tgt_event.c
+++ b/trace2/tr2_tgt_event.c
@@ -90,7 +90,7 @@  static void event_fmt_prepare(const char *event_name, const char *file,
 
 	jw_object_string(jw, "event", event_name);
 	jw_object_string(jw, "sid", tr2_sid_get());
-	jw_object_string(jw, "thread", ctx->thread_name.buf);
+	jw_object_string(jw, "thread", ctx->thread_name);
 
 	/*
 	 * In brief mode, only emit <time> on these 2 event types.
diff --git a/trace2/tr2_tgt_perf.c b/trace2/tr2_tgt_perf.c
index 8cb792488c8..fdeb3292d3a 100644
--- a/trace2/tr2_tgt_perf.c
+++ b/trace2/tr2_tgt_perf.c
@@ -25,6 +25,7 @@  static int tr2env_perf_be_brief;
 
 #define TR2FMT_PERF_FL_WIDTH (28)
 #define TR2FMT_PERF_MAX_EVENT_NAME (12)
+#define TR2FMT_PERF_MAX_THREAD_NAME (24)
 #define TR2FMT_PERF_REPO_WIDTH (3)
 #define TR2FMT_PERF_CATEGORY_WIDTH (12)
 
@@ -107,8 +108,11 @@  static void perf_fmt_prepare(const char *event_name,
 	}
 
 	strbuf_addf(buf, "d%d | ", tr2_sid_depth());
-	strbuf_addf(buf, "%-*s | %-*s | ", TR2_MAX_THREAD_NAME,
-		    ctx->thread_name.buf, TR2FMT_PERF_MAX_EVENT_NAME,
+	strbuf_addf(buf, "%-*.*s | %-*s | ",
+		    TR2FMT_PERF_MAX_THREAD_NAME,
+		    TR2FMT_PERF_MAX_THREAD_NAME,
+		    ctx->thread_name,
+		    TR2FMT_PERF_MAX_EVENT_NAME,
 		    event_name);
 
 	len = buf->len + TR2FMT_PERF_REPO_WIDTH;
diff --git a/trace2/tr2_tls.c b/trace2/tr2_tls.c
index 39b41fd2487..89437e773f6 100644
--- a/trace2/tr2_tls.c
+++ b/trace2/tr2_tls.c
@@ -34,7 +34,18 @@  void tr2tls_start_process_clock(void)
 struct tr2tls_thread_ctx *tr2tls_create_self(const char *name_hint,
 					     uint64_t us_thread_start)
 {
-	struct tr2tls_thread_ctx *ctx = xcalloc(1, sizeof(*ctx));
+	struct tr2tls_thread_ctx *ctx;
+	struct strbuf buf_name = STRBUF_INIT;
+	int thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
+
+	if (thread_id)
+		strbuf_addf(&buf_name, "th%02d:", thread_id);
+	strbuf_addstr(&buf_name, name_hint);
+
+	FLEX_ALLOC_MEM(ctx, thread_name, buf_name.buf, buf_name.len);
+	strbuf_release(&buf_name);
+
+	ctx->thread_id = thread_id;
 
 	/*
 	 * Implicitly "tr2tls_push_self()" to capture the thread's start
@@ -45,15 +56,6 @@  struct tr2tls_thread_ctx *tr2tls_create_self(const char *name_hint,
 	ctx->array_us_start = (uint64_t *)xcalloc(ctx->alloc, sizeof(uint64_t));
 	ctx->array_us_start[ctx->nr_open_regions++] = us_thread_start;
 
-	ctx->thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
-
-	strbuf_init(&ctx->thread_name, 0);
-	if (ctx->thread_id)
-		strbuf_addf(&ctx->thread_name, "th%02d:", ctx->thread_id);
-	strbuf_addstr(&ctx->thread_name, name_hint);
-	if (ctx->thread_name.len > TR2_MAX_THREAD_NAME)
-		strbuf_setlen(&ctx->thread_name, TR2_MAX_THREAD_NAME);
-
 	pthread_setspecific(tr2tls_key, ctx);
 
 	return ctx;
@@ -95,7 +97,6 @@  void tr2tls_unset_self(void)
 
 	pthread_setspecific(tr2tls_key, NULL);
 
-	strbuf_release(&ctx->thread_name);
 	free(ctx->array_us_start);
 	free(ctx);
 }
@@ -113,7 +114,7 @@  void tr2tls_pop_self(void)
 	struct tr2tls_thread_ctx *ctx = tr2tls_get_self();
 
 	if (!ctx->nr_open_regions)
-		BUG("no open regions in thread '%s'", ctx->thread_name.buf);
+		BUG("no open regions in thread '%s'", ctx->thread_name);
 
 	ctx->nr_open_regions--;
 }
diff --git a/trace2/tr2_tls.h b/trace2/tr2_tls.h
index f1ee58305d6..be0bc73d08f 100644
--- a/trace2/tr2_tls.h
+++ b/trace2/tr2_tls.h
@@ -9,17 +9,12 @@ 
  * There is NO relation to "transport layer security".
  */
 
-/*
- * Arbitry limit for thread names for column alignment.
- */
-#define TR2_MAX_THREAD_NAME (24)
-
 struct tr2tls_thread_ctx {
-	struct strbuf thread_name;
 	uint64_t *array_us_start;
 	size_t alloc;
 	size_t nr_open_regions; /* plays role of "nr" in ALLOC_GROW */
 	int thread_id;
+	char thread_name[FLEX_ARRAY];
 };
 
 /*
@@ -32,8 +27,6 @@  struct tr2tls_thread_ctx {
  * upon the name of the thread-proc function).  For example:
  *     { .thread_id=10, .thread_name="th10fsm-listen" }
  * This helps to identify and distinguish messages from concurrent threads.
- * The ctx.thread_name field is truncated if necessary to help with column
- * alignment in printf-style messages.
  *
  * In this and all following functions the term "self" refers to the
  * current thread.