diff mbox series

[7/8] pretty: refactor parsing of magic

Message ID 7c96899bb520ab945a650205982f54d65461d5bd.1742367347.git.martin.agren@gmail.com (mailing list archive)
State New
Headers show
Series pretty: minor bugfixing, some refactorings | expand

Commit Message

Martin Ågren March 19, 2025, 7:23 a.m. UTC
Similar to the previous commit, pull out our parsing of initial
placeholder magic into a separate function. This helps make it a bit
easier to get an overview of `format_commit_item()`. It also represents
another small step towards separating the parsing of placeholders from
subsequent usage of the parsed information.

This diff might be a bit easier to read with `-w`.

Signed-off-by: Martin Ågren <martin.agren@gmail.com>
---
 pretty.c | 69 ++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 42 insertions(+), 27 deletions(-)

Comments

Patrick Steinhardt March 20, 2025, 9:18 a.m. UTC | #1
On Wed, Mar 19, 2025 at 08:23:40AM +0100, Martin Ågren wrote:
> Similar to the previous commit, pull out our parsing of initial
> placeholder magic into a separate function. This helps make it a bit
> easier to get an overview of `format_commit_item()`. It also represents
> another small step towards separating the parsing of placeholders from
> subsequent usage of the parsed information.
> 
> This diff might be a bit easier to read with `-w`.
> 
> Signed-off-by: Martin Ågren <martin.agren@gmail.com>
> ---
>  pretty.c | 69 ++++++++++++++++++++++++++++++++++----------------------
>  1 file changed, 42 insertions(+), 27 deletions(-)
> 
> diff --git a/pretty.c b/pretty.c
> index c44ff87481..ddc7fd6aab 100644
> --- a/pretty.c
> +++ b/pretty.c
> @@ -1929,17 +1929,17 @@ static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */
>  	return total_consumed;
>  }
>  
> -static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
> -				 const char *placeholder,
> -				 struct format_commit_context *context)
> +enum magic {
> +	NO_MAGIC,
> +	ADD_LF_BEFORE_NON_EMPTY,
> +	DEL_LF_BEFORE_EMPTY,
> +	ADD_SP_BEFORE_NON_EMPTY
> +};
> +

It would be nice to give all of these enums a common prefix, e.g.:

    enum magic {
            MAGIC_NONE,
            MAGIC_ADD_LF_BEFORE_NON_EMPTY,
            MAGIC_DEL_LF_BEFORE_EMPTY,
            MAGIC_ADD_SP_BEFORE_NON_EMPTY
    };

Makes it easier to see that things belong together and it provides
proper namespacing.

> +/* 2 for 'bad magic', otherwise whether we consumed 0 or 1 chars. */
> +static size_t parse_magic(const char *placeholder, enum magic *ret)
>  {
> -	size_t consumed, orig_len;
> -	enum {
> -		NO_MAGIC,
> -		ADD_LF_BEFORE_NON_EMPTY,
> -		DEL_LF_BEFORE_EMPTY,
> -		ADD_SP_BEFORE_NON_EMPTY
> -	} magic = NO_MAGIC;
> +	enum magic magic;
>  
>  	switch (placeholder[0]) {
>  	case '-':

On the other hand you simply retain existing names. I don't insist on
the refactoring, but still thing it would be nice as the enum has wider
scope now.

> @@ -1952,28 +1952,43 @@ static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
>  		magic = ADD_SP_BEFORE_NON_EMPTY;
>  		break;
>  	default:
> -		break;
> +		*ret = NO_MAGIC;
> +		return 0;
>  	}
> -	if (magic != NO_MAGIC) {
> -		placeholder++;
>  
> -		switch (placeholder[0]) {
> -		case 'w':
> -			/*
> -			 * `%+w()` cannot ever expand to a non-empty string,
> -			 * and it potentially changes the layout of preceding
> -			 * contents. We're thus not able to handle the magic in
> -			 * this combination and refuse the pattern.
> -			 */
> -			return 0;
> -		};
> -	}
> +	switch (placeholder[1]) {
> +	case 'w':
> +		/*
> +		 * `%+w()` cannot ever expand to a non-empty string,
> +		 * and it potentially changes the layout of preceding
> +		 * contents. We're thus not able to handle the magic in
> +		 * this combination and refuse the pattern.
> +		 */
> +		*ret = NO_MAGIC;
> +		return 2;
> +	};
> +
> +	*ret = magic;
> +	return 1;
> +}
> +
> +static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
> +				 const char *placeholder,
> +				 struct format_commit_context *context)
> +{
> +	size_t consumed, orig_len;
> +	enum magic magic;
> +
> +	consumed = parse_magic(placeholder, &magic);
> +	if (consumed > 1)
> +		return 0;
> +	placeholder += consumed;
>  
>  	orig_len = sb->len;
>  	if (context->pad.flush_type == no_flush)
> -		consumed = format_commit_one(sb, placeholder, context);
> +		consumed += format_commit_one(sb, placeholder, context);
>  	else
> -		consumed = format_and_pad_commit(sb, placeholder, context);
> +		consumed += format_and_pad_commit(sb, placeholder, context);
>  	if (magic == NO_MAGIC)
>  		return consumed;
>  
> @@ -1986,7 +2001,7 @@ static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
>  		else if (magic == ADD_SP_BEFORE_NON_EMPTY)
>  			strbuf_insertstr(sb, orig_len, " ");
>  	}
> -	return consumed + 1;
> +	return consumed;

It took me a bit to figure out why this is equivalent to what we had
before. But:

  - If `parse_magic()` returns bigger than 1 we'd have exited early, so
    this return here is never hit.

  - If it returns `0` we have hit `NO_MAGIC`, and we have another early
    return for this case.

So we only end up here in case `consumed = parse_magic(...)` is 1, and
then we add the result from `format_and_pad_commit()` to that value.
Which means that the refactoring is true to the original spirit.

Patrick
Martin Ågren March 20, 2025, 4:12 p.m. UTC | #2
On Thu, 20 Mar 2025 at 10:18, Patrick Steinhardt <ps@pks.im> wrote:
>
> On Wed, Mar 19, 2025 at 08:23:40AM +0100, Martin Ågren wrote:
> > +enum magic {
> > +     NO_MAGIC,
> > +     ADD_LF_BEFORE_NON_EMPTY,
> > +     DEL_LF_BEFORE_EMPTY,
> > +     ADD_SP_BEFORE_NON_EMPTY
> > +};
> > +
>
> It would be nice to give all of these enums a common prefix, e.g.:
>
>     enum magic {
>             MAGIC_NONE,
>             MAGIC_ADD_LF_BEFORE_NON_EMPTY,
>             MAGIC_DEL_LF_BEFORE_EMPTY,
>             MAGIC_ADD_SP_BEFORE_NON_EMPTY
>     };
>
> Makes it easier to see that things belong together and it provides
> proper namespacing.

Agreed, good point.

> On the other hand you simply retain existing names. I don't insist on
> the refactoring, but still thing it would be nice as the enum has wider
> scope now.

Right. It's only file-scoped, but that's still a bigger scope... I'll
rename them to give them all a common prefix as suggested.

> It took me a bit to figure out why this is equivalent to what we had
> before. But:
>
>   - If `parse_magic()` returns bigger than 1 we'd have exited early, so
>     this return here is never hit.
>
>   - If it returns `0` we have hit `NO_MAGIC`, and we have another early
>     return for this case.
>
> So we only end up here in case `consumed = parse_magic(...)` is 1, and
> then we add the result from `format_and_pad_commit()` to that value.
> Which means that the refactoring is true to the original spirit.

If there's anything in particular you think should be called out in the
commit message to assist future readers, just let me know. I'll take
your points above as inspiration for things to highlight better.

There's also the return value "2", which is a bit, well, magic. Or at
least fairly arbitrary. I kind of preferred it over switching to a
signed type in this one spot though.

Thanks for all your very helpful comments.


Martin
diff mbox series

Patch

diff --git a/pretty.c b/pretty.c
index c44ff87481..ddc7fd6aab 100644
--- a/pretty.c
+++ b/pretty.c
@@ -1929,17 +1929,17 @@  static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */
 	return total_consumed;
 }
 
-static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
-				 const char *placeholder,
-				 struct format_commit_context *context)
+enum magic {
+	NO_MAGIC,
+	ADD_LF_BEFORE_NON_EMPTY,
+	DEL_LF_BEFORE_EMPTY,
+	ADD_SP_BEFORE_NON_EMPTY
+};
+
+/* 2 for 'bad magic', otherwise whether we consumed 0 or 1 chars. */
+static size_t parse_magic(const char *placeholder, enum magic *ret)
 {
-	size_t consumed, orig_len;
-	enum {
-		NO_MAGIC,
-		ADD_LF_BEFORE_NON_EMPTY,
-		DEL_LF_BEFORE_EMPTY,
-		ADD_SP_BEFORE_NON_EMPTY
-	} magic = NO_MAGIC;
+	enum magic magic;
 
 	switch (placeholder[0]) {
 	case '-':
@@ -1952,28 +1952,43 @@  static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
 		magic = ADD_SP_BEFORE_NON_EMPTY;
 		break;
 	default:
-		break;
+		*ret = NO_MAGIC;
+		return 0;
 	}
-	if (magic != NO_MAGIC) {
-		placeholder++;
 
-		switch (placeholder[0]) {
-		case 'w':
-			/*
-			 * `%+w()` cannot ever expand to a non-empty string,
-			 * and it potentially changes the layout of preceding
-			 * contents. We're thus not able to handle the magic in
-			 * this combination and refuse the pattern.
-			 */
-			return 0;
-		};
-	}
+	switch (placeholder[1]) {
+	case 'w':
+		/*
+		 * `%+w()` cannot ever expand to a non-empty string,
+		 * and it potentially changes the layout of preceding
+		 * contents. We're thus not able to handle the magic in
+		 * this combination and refuse the pattern.
+		 */
+		*ret = NO_MAGIC;
+		return 2;
+	};
+
+	*ret = magic;
+	return 1;
+}
+
+static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
+				 const char *placeholder,
+				 struct format_commit_context *context)
+{
+	size_t consumed, orig_len;
+	enum magic magic;
+
+	consumed = parse_magic(placeholder, &magic);
+	if (consumed > 1)
+		return 0;
+	placeholder += consumed;
 
 	orig_len = sb->len;
 	if (context->pad.flush_type == no_flush)
-		consumed = format_commit_one(sb, placeholder, context);
+		consumed += format_commit_one(sb, placeholder, context);
 	else
-		consumed = format_and_pad_commit(sb, placeholder, context);
+		consumed += format_and_pad_commit(sb, placeholder, context);
 	if (magic == NO_MAGIC)
 		return consumed;
 
@@ -1986,7 +2001,7 @@  static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
 		else if (magic == ADD_SP_BEFORE_NON_EMPTY)
 			strbuf_insertstr(sb, orig_len, " ");
 	}
-	return consumed + 1;
+	return consumed;
 }
 
 void userformat_find_requirements(const char *fmt, struct userformat_want *w)