diff mbox series

[1/5] trailer: separate public from internal portion of trailer_iterator

Message ID 0bce4d4b0d5650edf477cbbcc9f4e467b7981426.1691211879.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Trailer readability cleanups | expand

Commit Message

Linus Arver Aug. 5, 2023, 5:04 a.m. UTC
From: Linus Arver <linusa@google.com>

The fields here are not meant to be used by downstream callers, so put
them behind an anonymous struct named as
"__private_to_trailer_c__do_not_use" to warn against their use.

Internally, use a "#define" to keep the code tidy.

Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Linus Arver <linusa@google.com>
---
 trailer.c | 12 +++++++-----
 trailer.h |  6 ++++--
 2 files changed, 11 insertions(+), 7 deletions(-)

Comments

Glen Choo Aug. 7, 2023, 9:16 p.m. UTC | #1
As someone who isn't that familiar with trailer code, and will have less
time for the ML soon, this is more of a quick drive-by..

"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> +#define private __private_to_trailer_c__do_not_use
> +
>  void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
>  {
>  	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
>  	strbuf_init(&iter->key, 0);
>  	strbuf_init(&iter->val, 0);
>  	opts.no_divider = 1;
> -	trailer_info_get(&iter->info, msg, &opts);
> -	iter->cur = 0;
> +	trailer_info_get(&iter->private.info, msg, &opts);
> +	iter->private.cur = 0;
>  }
> --- a/trailer.h
> +++ b/trailer.h
> @@ -119,8 +119,10 @@ struct trailer_iterator {
>  	struct strbuf val;
>...
>  	/* private */
> -	struct trailer_info info;
> -	size_t cur;
> +	struct {
> +		struct trailer_info info;
> +		size_t cur;
> +	} __private_to_trailer_c__do_not_use;
>  };

Interesting approach to "private members". I like that it's fairly
lightweight and clear. On the other hand, I think this will fail to
autocomplete on most people's development setups, and I don't think this
is worth the tradeoff.

This is the first instance of this I could find in the codebase. I'm not
really opposed to having a new way of doing things, but it would be nice
for us to be consistent with how we handle private members. Other
approaches I've seen are:

- Using a "larger" struct to hold private members and "downcasting" for
  public users (struct dir_iterator and struct dir_iterator_int). I
  dislike this because I think this enables 'wrong' memory access too
  easily.

  (As an aside, if we really wanted to 'strictly' enforce privateness in
  this patch, shouldn't we move the "#define private" into the .c file,
  the way dir_iterator_int is in the .c file?)

- Prefixing private members with "__" (khash.h and other header-only
  libraries use this at least, not sure if we have this in the 'main
  tree'). I think this works pretty well most of the time.
- Just marking private members with a comment. IMO this is good enough
  the vast majority of the time - if something is private for a good
  reason, it's unlikely to get used accidentally anyway. But properly
  enforcing "privateness" is worthy goal anyway.

Personally, I think a decent tradeoff between enforcement and ergonomics
would be to use an inner struct like you do here, but name it something
autocomplete-friendly and obviously private, like "private" or
"_private". I suspect self-regulation and code review should be enough
to catch nearly all accidental uses of private members.
Phillip Wood Aug. 8, 2023, 12:19 p.m. UTC | #2
On 07/08/2023 22:16, Glen Choo wrote:
> As someone who isn't that familiar with trailer code, and will have less
> time for the ML soon, this is more of a quick drive-by..

This is a bit of a drive-by comment as well ...

> "Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:
> 
>> +#define private __private_to_trailer_c__do_not_use
>> +
>>   void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
>>   {
>>   	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
>>   	strbuf_init(&iter->key, 0);
>>   	strbuf_init(&iter->val, 0);
>>   	opts.no_divider = 1;
>> -	trailer_info_get(&iter->info, msg, &opts);
>> -	iter->cur = 0;
>> +	trailer_info_get(&iter->private.info, msg, &opts);
>> +	iter->private.cur = 0;
>>   }
>> --- a/trailer.h
>> +++ b/trailer.h
>> @@ -119,8 +119,10 @@ struct trailer_iterator {
>>   	struct strbuf val;
>> ...
>>   	/* private */
>> -	struct trailer_info info;
>> -	size_t cur;
>> +	struct {
>> +		struct trailer_info info;
>> +		size_t cur;
>> +	} __private_to_trailer_c__do_not_use;
>>   };
> 
> Interesting approach to "private members". I like that it's fairly
> lightweight and clear. On the other hand, I think this will fail to
> autocomplete on most people's development setups, and I don't think this
> is worth the tradeoff.
> 
> This is the first instance of this I could find in the codebase. 

We have something similar in unpack_trees.h see 576de3d9560 
(unpack_trees: start splitting internal fields from public API, 
2023-02-27). That adds an "internal" member to "sturct unpack_trees" of 
type "struct unpack_trees_internal which seems to be a easier naming scheme.

> I'm not
> really opposed to having a new way of doing things, but it would be nice
> for us to be consistent with how we handle private members. Other
> approaches I've seen are:
> 
> - Using a "larger" struct to hold private members and "downcasting" for
>    public users (struct dir_iterator and struct dir_iterator_int). I
>    dislike this because I think this enables 'wrong' memory access too
>    easily.
> 
>    (As an aside, if we really wanted to 'strictly' enforce privateness in
>    this patch, shouldn't we move the "#define private" into the .c file,
>    the way dir_iterator_int is in the .c file?)

That #define is pretty ugly

Another common scheme is to have an opaque pointer to the private struct 
  in the public struct (aka pimpl idiom). The merge machinery uses this 
- see merge-recursive.h. (I'm working on something similar for the 
sequencer so we can change the internals without having to re-compile 
everything that includes "sequencer.h")

> - Prefixing private members with "__" (khash.h and other header-only
>    libraries use this at least, not sure if we have this in the 'main
>    tree'). I think this works pretty well most of the time.

It is common but I think the C standard reserves names beginning with "__"

> - Just marking private members with a comment. IMO this is good enough
>    the vast majority of the time - if something is private for a good
>    reason, it's unlikely to get used accidentally anyway. But properly
>    enforcing "privateness" is worthy goal anyway.
>
> Personally, I think a decent tradeoff between enforcement and ergonomics
> would be to use an inner struct like you do here, but name it something
> autocomplete-friendly and obviously private, like "private" or
> "_private".

I agree, something like that would match the unpack_trees example

> I suspect self-regulation and code review should be enough
> to catch nearly all accidental uses of private members.

Agreed

Best Wishes

Phillip
Linus Arver Aug. 10, 2023, 10:50 p.m. UTC | #3
Glen Choo <chooglen@google.com> writes:

> As someone who isn't that familiar with trailer code, and will have less
> time for the ML soon, this is more of a quick drive-by..

Aren't you also going on vacation soon? ;-)

> "Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
>> +#define private __private_to_trailer_c__do_not_use
>> +
>>  void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
>>  {
>>  	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
>>  	strbuf_init(&iter->key, 0);
>>  	strbuf_init(&iter->val, 0);
>>  	opts.no_divider = 1;
>> -	trailer_info_get(&iter->info, msg, &opts);
>> -	iter->cur = 0;
>> +	trailer_info_get(&iter->private.info, msg, &opts);
>> +	iter->private.cur = 0;
>>  }
>> --- a/trailer.h
>> +++ b/trailer.h
>> @@ -119,8 +119,10 @@ struct trailer_iterator {
>>  	struct strbuf val;
>>...
>>  	/* private */
>> -	struct trailer_info info;
>> -	size_t cur;
>> +	struct {
>> +		struct trailer_info info;
>> +		size_t cur;
>> +	} __private_to_trailer_c__do_not_use;
>>  };
>
> [...]
>
> This is the first instance of this I could find in the codebase. I'm not
> really opposed to having a new way of doing things, but it would be nice
> for us to be consistent with how we handle private members. Other
> approaches I've seen are:
>
> - Using a "larger" struct to hold private members and "downcasting" for
>   public users (struct dir_iterator and struct dir_iterator_int). I
>   dislike this because I think this enables 'wrong' memory access too
>   easily.
>   [...]
> - Prefixing private members with "__" (khash.h and other header-only
>   libraries use this at least, not sure if we have this in the 'main
>   tree'). I think this works pretty well most of the time.
> - Just marking private members with a comment. IMO this is good enough
>   the vast majority of the time - if something is private for a good
>   reason, it's unlikely to get used accidentally anyway. But properly
>   enforcing "privateness" is worthy goal anyway.

Thanks for documenting these other approaches.

I prefer the "larger" struct to hold private members pattern. More
specifically I like the container_of approach pointed out by Jacob [2],
because it is an established pattern in the Linux Kernel and because we
already sort of use the same idea in the list_head type we imported from
the Kernel in 94e99012fc (http-walker: reduce O(n) ops with
doubly-linked list, 2016-07-11). That is, for example for the
new_trailer_item struct we have

    struct new_trailer_item {
        struct list_head list;
        <list item stuff>
    };

and to me this is symmetric to the container_of pattern described by Jacob:

    struct dir_entry_private {
        struct dir_entry entry;
        <private stuff>
    };

Accordingly, we are already doing the "structure pointer math" (which
Jacob described in [2]) for list_head in list.h:

    /* Get typed element from list at a given position. */
    #define list_entry(ptr, type, member) \
        ((type *) ((char *) (ptr) - offsetof(type, member)))

In this patch series though, I decided to just stick with giving the
struct a private-sounding name, because I don't think we reached
consensus on what the preferred approach is for separating
public/private fields.

>   (As an aside, if we really wanted to 'strictly' enforce privateness in
>   this patch, shouldn't we move the "#define private" into the .c file,
>   the way dir_iterator_int is in the .c file?)

I think you meant moving the struct into the .c file (the "#define" is
already in the .c file).

> Personally, I think a decent tradeoff between enforcement and ergonomics
> would be to use an inner struct like you do here, but name it something
> autocomplete-friendly and obviously private, like "private" or
> "_private".

SGTM. I think I'll go with "internal" though, to align with 576de3d956
(unpack_trees: start splitting internal fields from public API,
2023-02-27) which Phillip pointed out. Will reroll.

> I suspect self-regulation and code review should be enough
> to catch nearly all accidental uses of private members.

Ack. In the future, if and when we want compiler-level guarantees to
make it impossible (this was the discussion at [1]), we can revisit this
area.

[1] https://lore.kernel.org/git/16ff5069-0408-21cd-995c-8b47afb9810d@github.com/
[2] https://lore.kernel.org/git/CA+P7+xo02dGkjb5DwJ1Af_hoQ5HiuxASheZxoFz+r6B-6cQMug@mail.gmail.com/
Linus Arver Aug. 10, 2023, 11:15 p.m. UTC | #4
Phillip Wood <phillip.wood123@gmail.com> writes:

> We have something similar in unpack_trees.h see 576de3d9560 
> (unpack_trees: start splitting internal fields from public API, 
> 2023-02-27). That adds an "internal" member to "sturct unpack_trees" of 
> type "struct unpack_trees_internal which seems to be a easier naming scheme.

Ack, I will use "internal" as the member name in the next reroll.

>>> +#define private __private_to_trailer_c__do_not_use
> [...]
> That #define is pretty ugly

Haha, indeed. But I think that's the point though (i.e., the degree of
ugliness matches the strength of our codebase's posture to discourage
its use by external users).

I will drop the #define in the next reroll though, so, I guess it's a
moot point anyway.

> Another common scheme is to have an opaque pointer to the private struct 
>   in the public struct (aka pimpl idiom). The merge machinery uses this 
> - see merge-recursive.h. (I'm working on something similar for the 
> sequencer so we can change the internals without having to re-compile 
> everything that includes "sequencer.h")

Very interesting! I look forward to seeing your work. :)

>> - Prefixing private members with "__" (khash.h and other header-only
>>    libraries use this at least, not sure if we have this in the 'main
>>    tree'). I think this works pretty well most of the time.
>
> It is common but I think the C standard reserves names beginning with "__"

Indeed (see [1]).

[1] https://devblogs.microsoft.com/oldnewthing/20230109-00/?p=107685
diff mbox series

Patch

diff --git a/trailer.c b/trailer.c
index f408f9b058d..dff3fafe865 100644
--- a/trailer.c
+++ b/trailer.c
@@ -1214,20 +1214,22 @@  void format_trailers_from_commit(struct strbuf *out, const char *msg,
 	trailer_info_release(&info);
 }
 
+#define private __private_to_trailer_c__do_not_use
+
 void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
 {
 	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
 	strbuf_init(&iter->key, 0);
 	strbuf_init(&iter->val, 0);
 	opts.no_divider = 1;
-	trailer_info_get(&iter->info, msg, &opts);
-	iter->cur = 0;
+	trailer_info_get(&iter->private.info, msg, &opts);
+	iter->private.cur = 0;
 }
 
 int trailer_iterator_advance(struct trailer_iterator *iter)
 {
-	while (iter->cur < iter->info.trailer_nr) {
-		char *trailer = iter->info.trailers[iter->cur++];
+	while (iter->private.cur < iter->private.info.trailer_nr) {
+		char *trailer = iter->private.info.trailers[iter->private.cur++];
 		int separator_pos = find_separator(trailer, separators);
 
 		if (separator_pos < 1)
@@ -1245,7 +1247,7 @@  int trailer_iterator_advance(struct trailer_iterator *iter)
 
 void trailer_iterator_release(struct trailer_iterator *iter)
 {
-	trailer_info_release(&iter->info);
+	trailer_info_release(&iter->private.info);
 	strbuf_release(&iter->val);
 	strbuf_release(&iter->key);
 }
diff --git a/trailer.h b/trailer.h
index 795d2fccfd9..db57e028650 100644
--- a/trailer.h
+++ b/trailer.h
@@ -119,8 +119,10 @@  struct trailer_iterator {
 	struct strbuf val;
 
 	/* private */
-	struct trailer_info info;
-	size_t cur;
+	struct {
+		struct trailer_info info;
+		size_t cur;
+	} __private_to_trailer_c__do_not_use;
 };
 
 /*