diff mbox series

[v3,1/1] userdiff: extend Bash pattern to cover more shell function forms

Message ID 20250328200525.4437-2-dhar61595@gmail.com (mailing list archive)
State New
Headers show
Series userdiff: improve Bash function and word regex patterns | expand

Commit Message

Moumita March 28, 2025, 8:05 p.m. UTC
From: Moumita Dhar <dhar61595@gmail.com>

The previous function regex required explicit matching of function
bodies using `{`, `(`, `((`, or `[[`, which caused several issues:

- It failed to capture valid functions where `{` was on the next line
  due to line continuation (`\`).
- It did not recognize functions with single  command body, such as
  `x () echo hello`.

Replacing the function body matching logic with `.*$`, ensures
that everything on the function definition line is captured,
aligning with other userdiff drivers and improving hunk headers in
`git diff`.

Additionally, the word regex is refined to better recognize shell
syntax, including additional parameter expansion operators and
command-line options, improving syntax-aware diffs.

Signed-off-by: Moumita Dhar <dhar61595@gmail.com>
---
 t/t4018/bash-bashism-style-multiline-function |  4 +++
 t/t4018/bash-posix-style-multiline-function   |  4 +++
 .../bash-posix-style-single-command-function  |  3 ++
 t/t4034-diff-words.sh                         |  1 +
 t/t4034/bash/expect                           | 30 +++++++++++++++++++
 t/t4034/bash/post                             | 25 ++++++++++++++++
 t/t4034/bash/pre                              | 25 ++++++++++++++++
 userdiff.c                                    | 24 +++++++++++----
 8 files changed, 110 insertions(+), 6 deletions(-)
 create mode 100644 t/t4018/bash-bashism-style-multiline-function
 create mode 100644 t/t4018/bash-posix-style-multiline-function
 create mode 100644 t/t4018/bash-posix-style-single-command-function
 create mode 100644 t/t4034/bash/expect
 create mode 100644 t/t4034/bash/post
 create mode 100644 t/t4034/bash/pre
diff mbox series

Patch

diff --git a/t/t4018/bash-bashism-style-multiline-function b/t/t4018/bash-bashism-style-multiline-function
new file mode 100644
index 0000000000..0800daa156
--- /dev/null
+++ b/t/t4018/bash-bashism-style-multiline-function
@@ -0,0 +1,4 @@ 
+function RIGHT \
+{    
+    echo 'ChangeMe'
+}
\ No newline at end of file
diff --git a/t/t4018/bash-posix-style-multiline-function b/t/t4018/bash-posix-style-multiline-function
new file mode 100644
index 0000000000..756f21524b
--- /dev/null
+++ b/t/t4018/bash-posix-style-multiline-function
@@ -0,0 +1,4 @@ 
+RIGHT() \
+{
+    ChangeMe
+}
\ No newline at end of file
diff --git a/t/t4018/bash-posix-style-single-command-function b/t/t4018/bash-posix-style-single-command-function
new file mode 100644
index 0000000000..398ae1c5d2
--- /dev/null
+++ b/t/t4018/bash-posix-style-single-command-function
@@ -0,0 +1,3 @@ 
+RIGHT() echo "hello"
+
+    ChangeMe
diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh
index f51d3557f1..0be647c2fb 100755
--- a/t/t4034-diff-words.sh
+++ b/t/t4034-diff-words.sh
@@ -320,6 +320,7 @@  test_expect_success 'unset default driver' '
 
 test_language_driver ada
 test_language_driver bibtex
+test_language_driver bash
 test_language_driver cpp
 test_language_driver csharp
 test_language_driver css
diff --git a/t/t4034/bash/expect b/t/t4034/bash/expect
new file mode 100644
index 0000000000..a0f7cbd5a3
--- /dev/null
+++ b/t/t4034/bash/expect
@@ -0,0 +1,30 @@ 
+<BOLD>diff --git a/pre b/post<RESET>
+<BOLD>index 09ac008..60ba6a2 100644<RESET>
+<BOLD>--- a/pre<RESET>
+<BOLD>+++ b/post<RESET>
+<CYAN>@@ -1,25 +1,25 @@<RESET>
+<RED>my_var<RESET><GREEN>new_var<RESET>=10
+x=<RED>123<RESET><GREEN>456<RESET>
+y=<RED>3.14<RESET><GREEN>2.71<RESET>
+z=<RED>.5<RESET><GREEN>.75<RESET>
+echo <RED>$USER<RESET><GREEN>$USERNAME<RESET>
+${<RED>HOME<RESET><GREEN>HOMEDIR<RESET>}
+if [ "<RED>$a<RESET><GREEN>$x<RESET>" == "<RED>$b<RESET><GREEN>$y<RESET>" ] || [ "<RED>$c<RESET><GREEN>$x<RESET>" != "<RED>$d<RESET><GREEN>$y<RESET>" ]; then echo "OK"; fi
+((<RED>a<RESET><GREEN>x<RESET>+=<RED>b<RESET><GREEN>y<RESET>))
+((<RED>a<RESET><GREEN>x<RESET>-=<RED>b<RESET><GREEN>y<RESET>))
+$((<RED>a<RESET><GREEN>x<RESET><<<RED>b<RESET><GREEN>y<RESET>))
+$((<RED>a<RESET><GREEN>x<RESET>>><RED>b<RESET><GREEN>y<RESET>))
+${<RED>a<RESET><GREEN>x<RESET>:-<RED>b<RESET><GREEN>y<RESET>}
+${<RED>a<RESET><GREEN>x<RESET>:=<RED>b<RESET><GREEN>y<RESET>}
+${<RED>a<RESET><GREEN>x<RESET>##*/}
+${<RED>a<RESET><GREEN>x<RESET>%.*}
+${<RED>a<RESET><GREEN>x<RESET>%%.*}
+${<RED>a<RESET><GREEN>x<RESET>^^}
+${<RED>a<RESET><GREEN>x<RESET>,}
+${<RED>a<RESET><GREEN>x<RESET>,,}
+${!<RED>a<RESET><GREEN>x<RESET>}
+${<RED>a<RESET><GREEN>x<RESET>[@]}
+${<RED>a<RESET><GREEN>x<RESET>:?error message}
+${<RED>a<RESET><GREEN>x<RESET>:2:3}
+ls <RED>-a<RESET><GREEN>-x<RESET>
+ls <RED>--a<RESET><GREEN>--x<RESET>
diff --git a/t/t4034/bash/post b/t/t4034/bash/post
new file mode 100644
index 0000000000..60ba6a2e75
--- /dev/null
+++ b/t/t4034/bash/post
@@ -0,0 +1,25 @@ 
+new_var=10
+x=456
+y=2.71
+z=.75
+echo $USERNAME
+${HOMEDIR}
+if [ "$x" == "$y" ] || [ "$x" != "$y" ]; then echo "OK"; fi
+((x+=y))
+((x-=y))
+$((x<<y))
+$((x>>y))
+${x:-y}
+${x:=y}
+${x##*/}
+${x%.*}
+${x%%.*}
+${x^^}
+${x,}
+${x,,}
+${!x}
+${x[@]}
+${x:?error message}
+${x:2:3}
+ls -x
+ls --x
diff --git a/t/t4034/bash/pre b/t/t4034/bash/pre
new file mode 100644
index 0000000000..09ac008a83
--- /dev/null
+++ b/t/t4034/bash/pre
@@ -0,0 +1,25 @@ 
+my_var=10
+x=123
+y=3.14
+z=.5
+echo $USER
+${HOME}
+if [ "$a" == "$b" ] || [ "$c" != "$d" ]; then echo "OK"; fi
+((a+=b))
+((a-=b))
+$((a << b))
+$((a >> b))
+${a:-b}
+${a:=b}
+${a##*/}
+${a%.*}
+${a%%.*}
+${a^^}
+${a,}
+${a,,}
+${!a}
+${a[@]}
+${a:?error message}
+${a:2:3}
+ls -a
+ls --a
diff --git a/userdiff.c b/userdiff.c
index 340c4eb4f7..4c77c7e0f6 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -64,15 +64,27 @@  PATTERNS("bash",
 	     /* Bashism identifier with optional parentheses */
 	     "(function[ \t]+[a-zA-Z_][a-zA-Z0-9_]*(([ \t]*\\([ \t]*\\))|([ \t]+))"
 	 ")"
-	 /* Optional whitespace */
-	 "[ \t]*"
-	 /* Compound command starting with `{`, `(`, `((` or `[[` */
-	 "(\\{|\\(\\(?|\\[\\[)"
+	 /* Everything after the function header is captured  */
+	 ".*$"
 	 /* End of captured text */
 	 ")",
 	 /* -- */
-	 /* Characters not in the default $IFS value */
-	 "[^ \t]+"),
+	 /* Identifiers: variable and function names */
+	  "[a-zA-Z_][a-zA-Z0-9_]*"
+	 /* Numeric constants: integers and decimals */
+	  "|[0-9]+(\\.[0-9]*)?|[-+]?\\.[0-9]+"
+	 /* Shell variables: $VAR, ${VAR} */
+	  "|\\$[a-zA-Z_][a-zA-Z0-9_]*|\\$\\{"
+	  /* Logical and comparison operators */
+	 "|\\|\\||&&|<<|>>|==|!=|<=|>="
+	 /* Assignment and arithmetic operators */
+	 "|[-+*/%&|^!=<>]=?"
+	 /* Additional parameter expansion operators */
+	 "|:?=|:-|:\\+|:\\?|:|#|##|%|%%|/[a-zA-Z0-9_-]+|\\^\\^?|,|,,?|!|@|:[0-9]+(:[0-9]+)?"
+	 /* Command-line options (to avoid splitting -option) */
+	 "|--?[a-zA-Z0-9_-]+"
+	 /* Brackets and grouping symbols */
+	 "|\\(|\\)|\\{|\\}|\\[|\\]"),
 PATTERNS("bibtex",
 	 "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
 	 /* -- */