diff mbox series

[2/9] trace2: convert tr2tls_thread_ctx.thread_name from strbuf to char*

Message ID 3a4fe07e40e967622035844ff10ded1ed71d94fc.1640012469.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Trace2 stopwatch timers and global counters | expand

Commit Message

Jeff Hostetler Dec. 20, 2021, 3:01 p.m. UTC
From: Jeff Hostetler <jeffhost@microsoft.com>

Use a 'char *' to hold the thread name rather than a 'struct strbuf'.
The thread name is set when the thread is created and should not be
be modified afterwards.  Replace the strbuf with an allocated pointer
to make that more clear.

This was discussed in: https://lore.kernel.org/all/xmqqa6kdwo24.fsf@gitster.g/

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 trace2/tr2_tgt_event.c |  2 +-
 trace2/tr2_tgt_perf.c  |  2 +-
 trace2/tr2_tls.c       | 16 +++++++++-------
 trace2/tr2_tls.h       |  2 +-
 4 files changed, 12 insertions(+), 10 deletions(-)

Comments

Ævar Arnfjörð Bjarmason Dec. 20, 2021, 4:31 p.m. UTC | #1
On Mon, Dec 20 2021, Jeff Hostetler via GitGitGadget wrote:

> From: Jeff Hostetler <jeffhost@microsoft.com>
>
> Use a 'char *' to hold the thread name rather than a 'struct strbuf'.
> The thread name is set when the thread is created and should not be
> be modified afterwards.  Replace the strbuf with an allocated pointer
> to make that more clear.
>
> This was discussed in: https://lore.kernel.org/all/xmqqa6kdwo24.fsf@gitster.g/
>
> Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
> ---
>  trace2/tr2_tgt_event.c |  2 +-
>  trace2/tr2_tgt_perf.c  |  2 +-
>  trace2/tr2_tls.c       | 16 +++++++++-------
>  trace2/tr2_tls.h       |  2 +-
>  4 files changed, 12 insertions(+), 10 deletions(-)
>
> diff --git a/trace2/tr2_tgt_event.c b/trace2/tr2_tgt_event.c
> index 3a0014417cc..ca48d00aebc 100644
> --- a/trace2/tr2_tgt_event.c
> +++ b/trace2/tr2_tgt_event.c
> @@ -88,7 +88,7 @@ static void event_fmt_prepare(const char *event_name, const char *file,
>  
>  	jw_object_string(jw, "event", event_name);
>  	jw_object_string(jw, "sid", tr2_sid_get());
> -	jw_object_string(jw, "thread", ctx->thread_name.buf);
> +	jw_object_string(jw, "thread", ctx->thread_name);
>  
>  	/*
>  	 * In brief mode, only emit <time> on these 2 event types.
> diff --git a/trace2/tr2_tgt_perf.c b/trace2/tr2_tgt_perf.c
> index e4acca13d64..c3e57fcb3c0 100644
> --- a/trace2/tr2_tgt_perf.c
> +++ b/trace2/tr2_tgt_perf.c
> @@ -106,7 +106,7 @@ static void perf_fmt_prepare(const char *event_name,
>  
>  	strbuf_addf(buf, "d%d | ", tr2_sid_depth());
>  	strbuf_addf(buf, "%-*s | %-*s | ", TR2_MAX_THREAD_NAME,
> -		    ctx->thread_name.buf, TR2FMT_PERF_MAX_EVENT_NAME,
> +		    ctx->thread_name, TR2FMT_PERF_MAX_EVENT_NAME,
>  		    event_name);
>  
>  	len = buf->len + TR2FMT_PERF_REPO_WIDTH;
> diff --git a/trace2/tr2_tls.c b/trace2/tr2_tls.c
> index 7da94aba522..cd8b9f2f0a0 100644
> --- a/trace2/tr2_tls.c
> +++ b/trace2/tr2_tls.c
> @@ -35,6 +35,7 @@ struct tr2tls_thread_ctx *tr2tls_create_self(const char *thread_name,
>  					     uint64_t us_thread_start)
>  {
>  	struct tr2tls_thread_ctx *ctx = xcalloc(1, sizeof(*ctx));
> +	struct strbuf buf_name = STRBUF_INIT;
>  
>  	/*
>  	 * Implicitly "tr2tls_push_self()" to capture the thread's start
> @@ -47,12 +48,13 @@ struct tr2tls_thread_ctx *tr2tls_create_self(const char *thread_name,
>  
>  	ctx->thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
>  
> -	strbuf_init(&ctx->thread_name, 0);
>  	if (ctx->thread_id)
> -		strbuf_addf(&ctx->thread_name, "th%02d:", ctx->thread_id);
> -	strbuf_addstr(&ctx->thread_name, thread_name);
> -	if (ctx->thread_name.len > TR2_MAX_THREAD_NAME)
> -		strbuf_setlen(&ctx->thread_name, TR2_MAX_THREAD_NAME);
> +		strbuf_addf(&buf_name, "th%02d:", ctx->thread_id);
> +	strbuf_addstr(&buf_name, thread_name);
> +	if (buf_name.len > TR2_MAX_THREAD_NAME)
> +		strbuf_setlen(&buf_name, TR2_MAX_THREAD_NAME);
> +
> +	ctx->thread_name = strbuf_detach(&buf_name, NULL);
>  
>  	pthread_setspecific(tr2tls_key, ctx);
>  
> @@ -95,7 +97,7 @@ void tr2tls_unset_self(void)
>  
>  	pthread_setspecific(tr2tls_key, NULL);
>  
> -	strbuf_release(&ctx->thread_name);
> +	free(ctx->thread_name);
>  	free(ctx->array_us_start);
>  	free(ctx);
>  }
> @@ -113,7 +115,7 @@ void tr2tls_pop_self(void)
>  	struct tr2tls_thread_ctx *ctx = tr2tls_get_self();
>  
>  	if (!ctx->nr_open_regions)
> -		BUG("no open regions in thread '%s'", ctx->thread_name.buf);
> +		BUG("no open regions in thread '%s'", ctx->thread_name);
>  
>  	ctx->nr_open_regions--;
>  }
> diff --git a/trace2/tr2_tls.h b/trace2/tr2_tls.h
> index a90bd639d48..d968da6a679 100644
> --- a/trace2/tr2_tls.h
> +++ b/trace2/tr2_tls.h
> @@ -9,7 +9,7 @@
>  #define TR2_MAX_THREAD_NAME (24)
>  
>  struct tr2tls_thread_ctx {
> -	struct strbuf thread_name;
> +	char *thread_name;
>  	uint64_t *array_us_start;
>  	size_t alloc;
>  	size_t nr_open_regions; /* plays role of "nr" in ALLOC_GROW */

Junio's suggestion in the linked E-Mail was to make this a "const char *".

Narrowly, I don't see why not just add a "const" to the "struct strbuf
*" instead.

But less narrowly if we're not going to change it why malloc a new one
at all? Can't we just use the "const char *" passed into
tr2tls_create_self(), and for the "th%02d:" case have the code that's
formatting it handle that case?

I.e. have the things that use it as a "%s" now call a function that
formats things as a function of the "ctx->thread_id" (which may be 0)
and limit it by TR2_MAX_THREAD_NAME?
Jeff Hostetler Dec. 20, 2021, 7:07 p.m. UTC | #2
On 12/20/21 11:31 AM, Ævar Arnfjörð Bjarmason wrote:
> 
> On Mon, Dec 20 2021, Jeff Hostetler via GitGitGadget wrote:
> 
>> From: Jeff Hostetler <jeffhost@microsoft.com>
>>
>> Use a 'char *' to hold the thread name rather than a 'struct strbuf'.
>> The thread name is set when the thread is created and should not be
>> be modified afterwards.  Replace the strbuf with an allocated pointer
>> to make that more clear.
>>
>> This was discussed in: https://lore.kernel.org/all/xmqqa6kdwo24.fsf@gitster.g/
 >>...
>> diff --git a/trace2/tr2_tls.c b/trace2/tr2_tls.c
>> index 7da94aba522..cd8b9f2f0a0 100644
>> --- a/trace2/tr2_tls.c
>> +++ b/trace2/tr2_tls.c
>> @@ -35,6 +35,7 @@ struct tr2tls_thread_ctx *tr2tls_create_self(const char *thread_name,
>>   					     uint64_t us_thread_start)
>>   {
>>   	struct tr2tls_thread_ctx *ctx = xcalloc(1, sizeof(*ctx));
>> +	struct strbuf buf_name = STRBUF_INIT;
>>   
>>   	/*
>>   	 * Implicitly "tr2tls_push_self()" to capture the thread's start
>> @@ -47,12 +48,13 @@ struct tr2tls_thread_ctx *tr2tls_create_self(const char *thread_name,
>>   
>>   	ctx->thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
>>   
>> -	strbuf_init(&ctx->thread_name, 0);
>>   	if (ctx->thread_id)
>> -		strbuf_addf(&ctx->thread_name, "th%02d:", ctx->thread_id);
>> -	strbuf_addstr(&ctx->thread_name, thread_name);
>> -	if (ctx->thread_name.len > TR2_MAX_THREAD_NAME)
>> -		strbuf_setlen(&ctx->thread_name, TR2_MAX_THREAD_NAME);
>> +		strbuf_addf(&buf_name, "th%02d:", ctx->thread_id);
>> +	strbuf_addstr(&buf_name, thread_name);
>> +	if (buf_name.len > TR2_MAX_THREAD_NAME)
>> +		strbuf_setlen(&buf_name, TR2_MAX_THREAD_NAME);
>> +
>> +	ctx->thread_name = strbuf_detach(&buf_name, NULL);
>>   
>>   	pthread_setspecific(tr2tls_key, ctx);
>>   
 >>..
>> diff --git a/trace2/tr2_tls.h b/trace2/tr2_tls.h
>> index a90bd639d48..d968da6a679 100644
>> --- a/trace2/tr2_tls.h
>> +++ b/trace2/tr2_tls.h
>> @@ -9,7 +9,7 @@
>>   #define TR2_MAX_THREAD_NAME (24)
>>   
>>   struct tr2tls_thread_ctx {
>> -	struct strbuf thread_name;
>> +	char *thread_name;
>>   	uint64_t *array_us_start;
>>   	size_t alloc;
>>   	size_t nr_open_regions; /* plays role of "nr" in ALLOC_GROW */
> 
> Junio's suggestion in the linked E-Mail was to make this a "const char *".

Yes, it was.  To me a "const char *" in a structure means that
the structure does not own the pointer and must not free it.
Whereas as "char *" means that the structure might own it and
should maybe free it when the structure is freed.  My usage here
is that the structure does own it (because it took it from the
temporary strbuf using strbuf_detach()) and so it must free it.
Therefore it should not be "const".  This has nothing to do with
whether or not we allow the thread name to be changed after the
fact.  (We don't, but that is a different issue).

> 
> Narrowly, I don't see why not just add a "const" to the "struct strbuf
> *" instead.

Adding "const" to a strbuf would be wrong in this case, since the
structure owns the strbuf and needs to strbuf_release the contained
buffer and (now) free the strbuf pointer, right?

This also makes things confusing -- all callers of tr2tls_create_self()
would now be responsible for allocating a strbuf to pass in -- and who
would own those.  This would also create opportunities for mistakes if
they pass in the address of a stack-based strbuf, right?

This is being used to initialize thread-based data, so the caller
can't just use a "function local static" or a "global static" strbuf.


> 
> But less narrowly if we're not going to change it why malloc a new one
> at all? Can't we just use the "const char *" passed into
> tr2tls_create_self(), and for the "th%02d:" case have the code that's
> formatting it handle that case?
> 
> I.e. have the things that use it as a "%s" now call a function that
> formats things as a function of the "ctx->thread_id" (which may be 0)
> and limit it by TR2_MAX_THREAD_NAME?
> 

This would be less efficient, right?  That thread name is included in
*EVERY* _perf and _event message emitted.  If we were to change the
design to have basically a callback to get the formatted value based
on the `ctx` or `cts->thread_id` and dynamically formatting the name,
then we would have to hit that callback once (or twice) for every Trace2
message, right?  That would be much slower than just having a fixed
string (formatted when the thread is created) that we can just use.
And even if we said that the callback could cache the result (like
we do when we lookup env vars), where would it cache it?  It would have
to cache it in the `ctx`, which is where it currently is and without
any of the unnecessary overhead, right?

I think you're assuming that callers of `tr2tls_create_self()` always
pass a literal string such that that string value is always safe to
reference later.  Nothing would prevent a caller from passing the
address of a stack buffer.  It is not safe to assume that that string
pointer will always be valid, such as after the thread exits.  It is
better for _create_self() to copy the given string (whether we format
it immediately or not) than to assume that the pointer will always be
valid, right?


So I don't think we should deviate from the patch that I submitted.

Jeff
Ævar Arnfjörð Bjarmason Dec. 20, 2021, 7:35 p.m. UTC | #3
On Mon, Dec 20 2021, Jeff Hostetler wrote:

> On 12/20/21 11:31 AM, Ævar Arnfjörð Bjarmason wrote:
>> On Mon, Dec 20 2021, Jeff Hostetler via GitGitGadget wrote:
>> 
>>> From: Jeff Hostetler <jeffhost@microsoft.com>
>>>
>>> Use a 'char *' to hold the thread name rather than a 'struct strbuf'.
>>> The thread name is set when the thread is created and should not be
>>> be modified afterwards.  Replace the strbuf with an allocated pointer
>>> to make that more clear.
>>>
>>> This was discussed in: https://lore.kernel.org/all/xmqqa6kdwo24.fsf@gitster.g/
>>>...
>>> diff --git a/trace2/tr2_tls.c b/trace2/tr2_tls.c
>>> index 7da94aba522..cd8b9f2f0a0 100644
>>> --- a/trace2/tr2_tls.c
>>> +++ b/trace2/tr2_tls.c
>>> @@ -35,6 +35,7 @@ struct tr2tls_thread_ctx *tr2tls_create_self(const char *thread_name,
>>>   					     uint64_t us_thread_start)
>>>   {
>>>   	struct tr2tls_thread_ctx *ctx = xcalloc(1, sizeof(*ctx));
>>> +	struct strbuf buf_name = STRBUF_INIT;
>>>     	/*
>>>   	 * Implicitly "tr2tls_push_self()" to capture the thread's start
>>> @@ -47,12 +48,13 @@ struct tr2tls_thread_ctx *tr2tls_create_self(const char *thread_name,
>>>     	ctx->thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
>>>   -	strbuf_init(&ctx->thread_name, 0);
>>>   	if (ctx->thread_id)
>>> -		strbuf_addf(&ctx->thread_name, "th%02d:", ctx->thread_id);
>>> -	strbuf_addstr(&ctx->thread_name, thread_name);
>>> -	if (ctx->thread_name.len > TR2_MAX_THREAD_NAME)
>>> -		strbuf_setlen(&ctx->thread_name, TR2_MAX_THREAD_NAME);
>>> +		strbuf_addf(&buf_name, "th%02d:", ctx->thread_id);
>>> +	strbuf_addstr(&buf_name, thread_name);
>>> +	if (buf_name.len > TR2_MAX_THREAD_NAME)
>>> +		strbuf_setlen(&buf_name, TR2_MAX_THREAD_NAME);
>>> +
>>> +	ctx->thread_name = strbuf_detach(&buf_name, NULL);
>>>     	pthread_setspecific(tr2tls_key, ctx);
>>>   
>>>..
>>> diff --git a/trace2/tr2_tls.h b/trace2/tr2_tls.h
>>> index a90bd639d48..d968da6a679 100644
>>> --- a/trace2/tr2_tls.h
>>> +++ b/trace2/tr2_tls.h
>>> @@ -9,7 +9,7 @@
>>>   #define TR2_MAX_THREAD_NAME (24)
>>>     struct tr2tls_thread_ctx {
>>> -	struct strbuf thread_name;
>>> +	char *thread_name;
>>>   	uint64_t *array_us_start;
>>>   	size_t alloc;
>>>   	size_t nr_open_regions; /* plays role of "nr" in ALLOC_GROW */
>> Junio's suggestion in the linked E-Mail was to make this a "const
>> char *".
>
> Yes, it was.  To me a "const char *" in a structure means that
> the structure does not own the pointer and must not free it.
> Whereas as "char *" means that the structure might own it and
> should maybe free it when the structure is freed.  My usage here
> is that the structure does own it (because it took it from the
> temporary strbuf using strbuf_detach()) and so it must free it.
> Therefore it should not be "const".  This has nothing to do with
> whether or not we allow the thread name to be changed after the
> fact.  (We don't, but that is a different issue).

We use the pattern of having a "const char *" that's really a "char *"
with a cast to free() in many existing APIs for this scenario.

Maybe the cast for free would be more correct here, see my recent
9081a421a6d (checkout: fix "branch info" memory leaks, 2021-11-16) & the
discussion it referencese. I.e. in that case we didn't go for the
"free((char *)ptr)" cast as it was a private API.

>> Narrowly, I don't see why not just add a "const" to the "struct
>> strbuf
>> *" instead.
>
> Adding "const" to a strbuf would be wrong in this case, since the
> structure owns the strbuf and needs to strbuf_release the contained
> buffer and (now) free the strbuf pointer, right?
>
> This also makes things confusing -- all callers of tr2tls_create_self()
> would now be responsible for allocating a strbuf to pass in -- and who
> would own those.  This would also create opportunities for mistakes if
> they pass in the address of a stack-based strbuf, right?
>
> This is being used to initialize thread-based data, so the caller
> can't just use a "function local static" or a "global static" strbuf.

Right, I meant that in the context of who/where you'd have your casts.

>> But less narrowly if we're not going to change it why malloc a new
>> one
>> at all? Can't we just use the "const char *" passed into
>> tr2tls_create_self(), and for the "th%02d:" case have the code that's
>> formatting it handle that case?
>> I.e. have the things that use it as a "%s" now call a function that
>> formats things as a function of the "ctx->thread_id" (which may be 0)
>> and limit it by TR2_MAX_THREAD_NAME?
>> 
>
> This would be less efficient, right?  That thread name is included in
> *EVERY* _perf and _event message emitted.  If we were to change the
> design to have basically a callback to get the formatted value based
> on the `ctx` or `cts->thread_id` and dynamically formatting the name,
> then we would have to hit that callback once (or twice) for every Trace2
> message, right?  That would be much slower than just having a fixed
> string (formatted when the thread is created) that we can just use.
> And even if we said that the callback could cache the result (like
> we do when we lookup env vars), where would it cache it?  It would have
> to cache it in the `ctx`, which is where it currently is and without
> any of the unnecessary overhead, right?

Aren't we per
https://lore.kernel.org/git/211220.86czlrurm6.gmgdl@evledraar.gmail.com/
doing a lot of that formatting (and sometimes allocation) anyway in a
way that's easily avoidable for the "perf" backend?

And for tr2_tgt_event.c we'll call jw_object_string(), which calls
append_quoted_string() for each event. That'll be re-quoting (presumably
always needlessly) the thread_name every time.

So just deferring a single strbuf_addf() doesn't seem like it would slow
things down.

> I think you're assuming that callers of `tr2tls_create_self()` always
> pass a literal string such that that string value is always safe to
> reference later.  Nothing would prevent a caller from passing the
> address of a stack buffer.  It is not safe to assume that that string
> pointer will always be valid, such as after the thread exits.  It is
> better for _create_self() to copy the given string (whether we format
> it immediately or not) than to assume that the pointer will always be
> valid, right?

Sure, if that's the API we can xstrdup() it, and/or xstrfmt() it etc. as
we're doing now.

> So I don't think we should deviate from the patch that I submitted.

I'm not saying anything needs to change here, these were really just
read-through suggestion, but I think per the above (about the casts &
optimization) that some of your assumptions here may not hold.
Junio C Hamano Dec. 21, 2021, 7:22 a.m. UTC | #4
"Jeff Hostetler via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Jeff Hostetler <jeffhost@microsoft.com>
>
> Use a 'char *' to hold the thread name rather than a 'struct strbuf'.
> The thread name is set when the thread is created and should not be
> be modified afterwards.  Replace the strbuf with an allocated pointer
> to make that more clear.

Sounds good.  Use of strbuf is perfectly fine while you compute the
final value of the string, but as a more permanent location to store
the result, it often is unsuitable (and strbuf_split_buf() is a prime
example of how *not* to design your API function around the type).

> diff --git a/trace2/tr2_tls.c b/trace2/tr2_tls.c
> index 7da94aba522..cd8b9f2f0a0 100644
> --- a/trace2/tr2_tls.c
> +++ b/trace2/tr2_tls.c
> @@ -35,6 +35,7 @@ struct tr2tls_thread_ctx *tr2tls_create_self(const char *thread_name,
>  					     uint64_t us_thread_start)
>  {
>  	struct tr2tls_thread_ctx *ctx = xcalloc(1, sizeof(*ctx));
> +	struct strbuf buf_name = STRBUF_INIT;
>  
>  	/*
>  	 * Implicitly "tr2tls_push_self()" to capture the thread's start
> @@ -47,12 +48,13 @@ struct tr2tls_thread_ctx *tr2tls_create_self(const char *thread_name,
>  
>  	ctx->thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
>  
> -	strbuf_init(&ctx->thread_name, 0);
>  	if (ctx->thread_id)
> -		strbuf_addf(&ctx->thread_name, "th%02d:", ctx->thread_id);
> -	strbuf_addstr(&ctx->thread_name, thread_name);
> -	if (ctx->thread_name.len > TR2_MAX_THREAD_NAME)
> -		strbuf_setlen(&ctx->thread_name, TR2_MAX_THREAD_NAME);
> +		strbuf_addf(&buf_name, "th%02d:", ctx->thread_id);
> +	strbuf_addstr(&buf_name, thread_name);
> +	if (buf_name.len > TR2_MAX_THREAD_NAME)
> +		strbuf_setlen(&buf_name, TR2_MAX_THREAD_NAME);
> +
> +	ctx->thread_name = strbuf_detach(&buf_name, NULL);

This is not exactly a new problem, but if we use a mechanism to
allow arbitrary long string (like composing with strbuf and
detaching the resulting string as is), instead of having a fixed
name[] array embedded in the ctx structure, I wonder if applying the
maximum length this early makes sense.  Such a truncation would
allow more than one ctx structures to share the same name, which
somehow feels error prone, inviting a mistake to use .thread_name
member as an identifier, when its only intended use is to give a
human-readable and not necessarily unique label.  Of course, if the
maximum is reasonably low, like a few dozen bytes, it may even make
sense to embed an array of the fixed size and not worry about an
extra pointer.

> diff --git a/trace2/tr2_tls.h b/trace2/tr2_tls.h
> index a90bd639d48..d968da6a679 100644
> --- a/trace2/tr2_tls.h
> +++ b/trace2/tr2_tls.h
> @@ -9,7 +9,7 @@
>  #define TR2_MAX_THREAD_NAME (24)
>  
>  struct tr2tls_thread_ctx {
> -	struct strbuf thread_name;
> +	char *thread_name;

That is, something like

	char thread_name[TR2_MAX_THREAD_NAME + 1];

perhaps with moving it to the end of the struct to avoid padding
waste, would make more sense than the posted patch, if we accept
an early truncation and information loss.

The other extreme would also make equally more sense than the posted
patch.  Just grab strbuf_detach() result without truncation and
point at it with "char *thread_name" here, and if the output layer
wants to limit the names to some reasonable length, deal with the
TR2_MAX_THREAD_NAME at that layer, without losing information too
early.  It might be a much bigger surgery, I am afraid, because the
users of ctx->thread_name (and old ctx->thread_name.buf) all are
relying on the string being shorter than TR2_MAX_THREAD_NAME.

>  	uint64_t *array_us_start;
>  	size_t alloc;
>  	size_t nr_open_regions; /* plays role of "nr" in ALLOC_GROW */
Junio C Hamano Dec. 21, 2021, 7:33 a.m. UTC | #5
Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

>>  struct tr2tls_thread_ctx {
>> -	struct strbuf thread_name;
>> +	char *thread_name;
>>  	uint64_t *array_us_start;
>>  	size_t alloc;
>>  	size_t nr_open_regions; /* plays role of "nr" in ALLOC_GROW */
>
> Junio's suggestion in the linked E-Mail was to make this a "const char *".

Sorry, but in that linked E-Mail, I wasn't picking between "const
char *" and "char *" at all.  What I cared was *not* to keep a
long-term constant string in a member whose type is "struct strbuf".
Jeff Hostetler Dec. 22, 2021, 4:28 p.m. UTC | #6
On 12/21/21 2:22 AM, Junio C Hamano wrote:
> "Jeff Hostetler via GitGitGadget" <gitgitgadget@gmail.com> writes:
> 
>> From: Jeff Hostetler <jeffhost@microsoft.com>
>>
>> Use a 'char *' to hold the thread name rather than a 'struct strbuf'.
>> The thread name is set when the thread is created and should not be
>> be modified afterwards.  Replace the strbuf with an allocated pointer
>> to make that more clear.
> 
> Sounds good.  Use of strbuf is perfectly fine while you compute the
> final value of the string, but as a more permanent location to store
> the result, it often is unsuitable (and strbuf_split_buf() is a prime
> example of how *not* to design your API function around the type).
> 
>> diff --git a/trace2/tr2_tls.c b/trace2/tr2_tls.c
>> index 7da94aba522..cd8b9f2f0a0 100644
>> --- a/trace2/tr2_tls.c
>> +++ b/trace2/tr2_tls.c
>> @@ -35,6 +35,7 @@ struct tr2tls_thread_ctx *tr2tls_create_self(const char *thread_name,
>>   					     uint64_t us_thread_start)
>>   {
>>   	struct tr2tls_thread_ctx *ctx = xcalloc(1, sizeof(*ctx));
>> +	struct strbuf buf_name = STRBUF_INIT;
>>   
>>   	/*
>>   	 * Implicitly "tr2tls_push_self()" to capture the thread's start
>> @@ -47,12 +48,13 @@ struct tr2tls_thread_ctx *tr2tls_create_self(const char *thread_name,
>>   
>>   	ctx->thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
>>   
>> -	strbuf_init(&ctx->thread_name, 0);
>>   	if (ctx->thread_id)
>> -		strbuf_addf(&ctx->thread_name, "th%02d:", ctx->thread_id);
>> -	strbuf_addstr(&ctx->thread_name, thread_name);
>> -	if (ctx->thread_name.len > TR2_MAX_THREAD_NAME)
>> -		strbuf_setlen(&ctx->thread_name, TR2_MAX_THREAD_NAME);
>> +		strbuf_addf(&buf_name, "th%02d:", ctx->thread_id);
>> +	strbuf_addstr(&buf_name, thread_name);
>> +	if (buf_name.len > TR2_MAX_THREAD_NAME)
>> +		strbuf_setlen(&buf_name, TR2_MAX_THREAD_NAME);
>> +
>> +	ctx->thread_name = strbuf_detach(&buf_name, NULL);
> 
> This is not exactly a new problem, but if we use a mechanism to
> allow arbitrary long string (like composing with strbuf and
> detaching the resulting string as is), instead of having a fixed
> name[] array embedded in the ctx structure, I wonder if applying the
> maximum length this early makes sense.  Such a truncation would
> allow more than one ctx structures to share the same name, which
> somehow feels error prone, inviting a mistake to use .thread_name
> member as an identifier, when its only intended use is to give a
> human-readable and not necessarily unique label.  Of course, if the
> maximum is reasonably low, like a few dozen bytes, it may even make
> sense to embed an array of the fixed size and not worry about an
> extra pointer.
> 

I'll convert it to a flex-array at the bottom of the CTX structure
and then defer the truncation to the _perf target (which only does
that to keep the columns lined up).

That will simplify things considerably.

Thanks
Jeff
Jeff Hostetler Dec. 22, 2021, 4:32 p.m. UTC | #7
On 12/20/21 2:35 PM, Ævar Arnfjörð Bjarmason wrote:
> 
> On Mon, Dec 20 2021, Jeff Hostetler wrote:
> 
>> On 12/20/21 11:31 AM, Ævar Arnfjörð Bjarmason wrote:
>>> On Mon, Dec 20 2021, Jeff Hostetler via GitGitGadget wrote:
>>>
>>>> From: Jeff Hostetler <jeffhost@microsoft.com>
>>>>
>>
>> Yes, it was.  To me a "const char *" in a structure means that
>> the structure does not own the pointer and must not free it.
>> Whereas as "char *" means that the structure might own it and
>> should maybe free it when the structure is freed.  My usage here
>> is that the structure does own it (because it took it from the
>> temporary strbuf using strbuf_detach()) and so it must free it.
>> Therefore it should not be "const".  This has nothing to do with
>> whether or not we allow the thread name to be changed after the
>> fact.  (We don't, but that is a different issue).
> 
> We use the pattern of having a "const char *" that's really a "char *"
> with a cast to free() in many existing APIs for this scenario.


As I mention later in this thread, I'm going to convert the
field into a flex-array, so most of the discussion in this
part of the thread no longer applies.
Junio C Hamano Dec. 22, 2021, 7:57 p.m. UTC | #8
Jeff Hostetler <git@jeffhostetler.com> writes:

> I'll convert it to a flex-array at the bottom of the CTX structure
> and then defer the truncation to the _perf target (which only does
> that to keep the columns lined up).
>
> That will simplify things considerably.

I am not sure if the complexity of flex-array is worth it.

You have been storing an up-to-24-byte human readable name by
embedding a strbuf that has two size_t plus a pointer (i.e. 24-bytes
even on Windows), and the posted patch changes it to a pointer plus
a on-heap allocation with malloc() overhead.

An embedded fixed-size thread_name[TR2_MAX_THREAD_NAME+1] member
may be the simplest thing to do, I suspect.
diff mbox series

Patch

diff --git a/trace2/tr2_tgt_event.c b/trace2/tr2_tgt_event.c
index 3a0014417cc..ca48d00aebc 100644
--- a/trace2/tr2_tgt_event.c
+++ b/trace2/tr2_tgt_event.c
@@ -88,7 +88,7 @@  static void event_fmt_prepare(const char *event_name, const char *file,
 
 	jw_object_string(jw, "event", event_name);
 	jw_object_string(jw, "sid", tr2_sid_get());
-	jw_object_string(jw, "thread", ctx->thread_name.buf);
+	jw_object_string(jw, "thread", ctx->thread_name);
 
 	/*
 	 * In brief mode, only emit <time> on these 2 event types.
diff --git a/trace2/tr2_tgt_perf.c b/trace2/tr2_tgt_perf.c
index e4acca13d64..c3e57fcb3c0 100644
--- a/trace2/tr2_tgt_perf.c
+++ b/trace2/tr2_tgt_perf.c
@@ -106,7 +106,7 @@  static void perf_fmt_prepare(const char *event_name,
 
 	strbuf_addf(buf, "d%d | ", tr2_sid_depth());
 	strbuf_addf(buf, "%-*s | %-*s | ", TR2_MAX_THREAD_NAME,
-		    ctx->thread_name.buf, TR2FMT_PERF_MAX_EVENT_NAME,
+		    ctx->thread_name, TR2FMT_PERF_MAX_EVENT_NAME,
 		    event_name);
 
 	len = buf->len + TR2FMT_PERF_REPO_WIDTH;
diff --git a/trace2/tr2_tls.c b/trace2/tr2_tls.c
index 7da94aba522..cd8b9f2f0a0 100644
--- a/trace2/tr2_tls.c
+++ b/trace2/tr2_tls.c
@@ -35,6 +35,7 @@  struct tr2tls_thread_ctx *tr2tls_create_self(const char *thread_name,
 					     uint64_t us_thread_start)
 {
 	struct tr2tls_thread_ctx *ctx = xcalloc(1, sizeof(*ctx));
+	struct strbuf buf_name = STRBUF_INIT;
 
 	/*
 	 * Implicitly "tr2tls_push_self()" to capture the thread's start
@@ -47,12 +48,13 @@  struct tr2tls_thread_ctx *tr2tls_create_self(const char *thread_name,
 
 	ctx->thread_id = tr2tls_locked_increment(&tr2_next_thread_id);
 
-	strbuf_init(&ctx->thread_name, 0);
 	if (ctx->thread_id)
-		strbuf_addf(&ctx->thread_name, "th%02d:", ctx->thread_id);
-	strbuf_addstr(&ctx->thread_name, thread_name);
-	if (ctx->thread_name.len > TR2_MAX_THREAD_NAME)
-		strbuf_setlen(&ctx->thread_name, TR2_MAX_THREAD_NAME);
+		strbuf_addf(&buf_name, "th%02d:", ctx->thread_id);
+	strbuf_addstr(&buf_name, thread_name);
+	if (buf_name.len > TR2_MAX_THREAD_NAME)
+		strbuf_setlen(&buf_name, TR2_MAX_THREAD_NAME);
+
+	ctx->thread_name = strbuf_detach(&buf_name, NULL);
 
 	pthread_setspecific(tr2tls_key, ctx);
 
@@ -95,7 +97,7 @@  void tr2tls_unset_self(void)
 
 	pthread_setspecific(tr2tls_key, NULL);
 
-	strbuf_release(&ctx->thread_name);
+	free(ctx->thread_name);
 	free(ctx->array_us_start);
 	free(ctx);
 }
@@ -113,7 +115,7 @@  void tr2tls_pop_self(void)
 	struct tr2tls_thread_ctx *ctx = tr2tls_get_self();
 
 	if (!ctx->nr_open_regions)
-		BUG("no open regions in thread '%s'", ctx->thread_name.buf);
+		BUG("no open regions in thread '%s'", ctx->thread_name);
 
 	ctx->nr_open_regions--;
 }
diff --git a/trace2/tr2_tls.h b/trace2/tr2_tls.h
index a90bd639d48..d968da6a679 100644
--- a/trace2/tr2_tls.h
+++ b/trace2/tr2_tls.h
@@ -9,7 +9,7 @@ 
 #define TR2_MAX_THREAD_NAME (24)
 
 struct tr2tls_thread_ctx {
-	struct strbuf thread_name;
+	char *thread_name;
 	uint64_t *array_us_start;
 	size_t alloc;
 	size_t nr_open_regions; /* plays role of "nr" in ALLOC_GROW */