[3/4] chainlint: latch start/end position of each token

Message ID	fa56128c37280ccc87f8f8f6fd586db221b13ea4.1667934510.git.gitgitgadget@gmail.com (mailing list archive)
State	Accepted
Commit	5f0321a9f25bd1366a6f13c2e2b51d6e1e2a5ded
Headers	show Return-Path: <git-owner@kernel.org> Message-Id: <fa56128c37280ccc87f8f8f6fd586db221b13ea4.1667934510.git.gitgitgadget@gmail.com> In-Reply-To: <pull.1375.git.git.1667934510.gitgitgadget@gmail.com> References: <pull.1375.git.git.1667934510.gitgitgadget@gmail.com> Date: Tue, 08 Nov 2022 19:08:29 +0000 Subject: [PATCH 3/4] chainlint: latch start/end position of each token Fcc: Sent Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIME-Version: 1.0 To: git@vger.kernel.org Cc: Jeff King <peff@peff.net>, =?utf-8?b?w4Z2YXIgQXJuZmrDtnLDsA==?= Bjarmason <avarab@gmail.com>, Eric Sunshine <sunshine@sunshineco.com>, Eric Sunshine <sunshine@sunshineco.com> Precedence: bulk From: Eric Sunshine <sunshine@sunshineco.com>
Series	chainlint: improve annotated output \| expand [0/4] chainlint: improve annotated output [1/4] chainlint: add explanatory comments [2/4] chainlint: tighten accuracy when consuming input stream [3/4] chainlint: latch start/end position of each token [4/4] chainlint: annotate original test definition rather than token stream

diff --git a/t/chainlint.pl b/t/chainlint.pl index 1f66c03c593..59aa79babc2 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -75,7 +75,9 @@ sub scan_heredoc_tag { my $self = shift @_; ${$self->{buff}} =~ /\G(-?)/gc; my $indented = $1; - my $tag = $self->scan_token(); + my $token = $self->scan_token(); + return "<<$indented" unless $token; + my $tag = $token->[0]; $tag =~ s/['"\\]//g; push(@{$self->{heretags}}, $indented ? "\t$tag" : "$tag"); return "<<$indented$tag"; @@ -149,7 +151,7 @@ sub scan_dollar { my $self = shift @_; my $b = $self->{buff}; return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...)) - return '(' . join(' ', $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...) + return '(' . join(' ', map {$_->[0]} $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...) return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...} return $1 if $$b =~ /\G(\w+)/gc; # $var return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc. @@ -170,9 +172,11 @@ sub scan_token { my $self = shift @_; my $b = $self->{buff}; my $token = ''; + my $start; RESTART: $$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline) - return "\n" if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment + $start = pos($$b) || 0; + return ["\n", $start, pos($$b)] if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment while (1) { # slurp up non-special characters $token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc; @@ -197,7 +201,7 @@ RESTART: } die("internal error scanning character '$c'\n"); } - return length($token) ? $token : undef; + return length($token) ? [$token, $start, pos($$b)] : undef; } # ShellParser parses POSIX shell scripts (with minor extensions for Bash). It @@ -239,14 +243,14 @@ sub stop_at { my ($self, $token) = @_; return 1 unless defined($token); my $stop = ${$self->{stop}}[-1] if @{$self->{stop}}; - return defined($stop) && $token =~ $stop; + return defined($stop) && $token->[0] =~ $stop; } sub expect { my ($self, $expect) = @_; my $token = $self->next_token(); - return $token if defined($token) && $token eq $expect; - push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token : "<end-of-input>") . "'\n"); + return $token if defined($token) && $token->[0] eq $expect; + push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token->[0] : "<end-of-input>") . "'\n"); $self->untoken($token) if defined($token); return (); } @@ -255,7 +259,7 @@ sub optional_newlines { my $self = shift @_; my @tokens; while (my $token = $self->peek()) { - last unless $token eq "\n"; + last unless $token->[0] eq "\n"; push(@tokens, $self->next_token()); } return @tokens; @@ -278,7 +282,7 @@ sub parse_case_pattern { my @tokens; while (defined(my $token = $self->next_token())) { push(@tokens, $token); - last if $token eq ')'; + last if $token->[0] eq ')'; } return @tokens; } @@ -293,13 +297,13 @@ sub parse_case { $self->optional_newlines()); while (1) { my $token = $self->peek(); - last unless defined($token) && $token ne 'esac'; + last unless defined($token) && $token->[0] ne 'esac'; push(@tokens, $self->parse_case_pattern(), $self->optional_newlines(), $self->parse(qr/^(?:;;|esac)$/)); # item body $token = $self->peek(); - last unless defined($token) && $token ne 'esac'; + last unless defined($token) && $token->[0] ne 'esac'; push(@tokens, $self->expect(';;'), $self->optional_newlines()); @@ -315,7 +319,7 @@ sub parse_for { $self->next_token(), # variable $self->optional_newlines()); my $token = $self->peek(); - if (defined($token) && $token eq 'in') { + if (defined($token) && $token->[0] eq 'in') { push(@tokens, $self->expect('in'), $self->optional_newlines()); @@ -339,11 +343,11 @@ sub parse_if { $self->optional_newlines(), $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body my $token = $self->peek(); - last unless defined($token) && $token eq 'elif'; + last unless defined($token) && $token->[0] eq 'elif'; push(@tokens, $self->expect('elif')); } my $token = $self->peek(); - if (defined($token) && $token eq 'else') { + if (defined($token) && $token->[0] eq 'else') { push(@tokens, $self->expect('else'), $self->optional_newlines(), @@ -380,7 +384,7 @@ sub parse_bash_array_assignment { my @tokens = $self->expect('('); while (defined(my $token = $self->next_token())) { push(@tokens, $token); - last if $token eq ')'; + last if $token->[0] eq ')'; } return @tokens; } @@ -398,29 +402,31 @@ sub parse_cmd { my $self = shift @_; my $cmd = $self->next_token(); return () unless defined($cmd); - return $cmd if $cmd eq "\n"; + return $cmd if $cmd->[0] eq "\n"; my $token; my @tokens = $cmd; - if ($cmd eq '!') { + if ($cmd->[0] eq '!') { push(@tokens, $self->parse_cmd()); return @tokens; - } elsif (my $f = $compound{$cmd}) { + } elsif (my $f = $compound{$cmd->[0]}) { push(@tokens, $self->$f()); - } elsif (defined($token = $self->peek()) && $token eq '(') { - if ($cmd !~ /\w=$/) { + } elsif (defined($token = $self->peek()) && $token->[0] eq '(') { + if ($cmd->[0] !~ /\w=$/) { push(@tokens, $self->parse_func()); return @tokens; } - $tokens[-1] .= join(' ', $self->parse_bash_array_assignment()); + my @array = $self->parse_bash_array_assignment(); + $tokens[-1]->[0] .= join(' ', map {$_->[0]} @array); + $tokens[-1]->[2] = $array[$#array][2] if @array; } while (defined(my $token = $self->next_token())) { $self->untoken($token), last if $self->stop_at($token); push(@tokens, $token); - last if $token =~ /^(?:[;&\n|]|&&|\|\|)$/; + last if $token->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/; } - push(@tokens, $self->next_token()) if $tokens[-1] ne "\n" && defined($token = $self->peek()) && $token eq "\n"; + push(@tokens, $self->next_token()) if $tokens[-1]->[0] ne "\n" && defined($token = $self->peek()) && $token->[0] eq "\n"; return @tokens; } @@ -457,7 +463,7 @@ sub find_non_nl { my $tokens = shift @_; my $n = shift @_; $n = $#$tokens if !defined($n); - $n-- while $n >= 0 && $$tokens[$n] eq "\n"; + $n-- while $n >= 0 && $$tokens[$n]->[0] eq "\n"; return $n; } @@ -467,7 +473,7 @@ sub ends_with { for my $needle (reverse(@$needles)) { return undef if $n < 0; $n = find_non_nl($tokens, $n), next if $needle eq "\n"; - return undef if $$tokens[$n] !~ $needle; + return undef if $$tokens[$n]->[0] !~ $needle; $n--; } return 1; @@ -486,13 +492,13 @@ sub parse_loop_body { my $self = shift @_; my @tokens = $self->SUPER::parse_loop_body(@_); # did loop signal failure via "|| return" or "|| exit"? - return @tokens if !@tokens || grep(/^(?:return|exit|\$\?)$/, @tokens); + return @tokens if !@tokens || grep {$_->[0] =~ /^(?:return|exit|\$\?)$/} @tokens; # did loop upstream of a pipe signal failure via "|| echo 'impossible # text'" as the final command in the loop body? return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]); # flag missing "return/exit" handling explicit failure in loop body my $n = find_non_nl(\@tokens); - splice(@tokens, $n + 1, 0, '?!LOOP?!'); + splice(@tokens, $n + 1, 0, ['?!LOOP?!', $tokens[$n]->[1], $tokens[$n]->[2]]); return @tokens; } @@ -510,7 +516,7 @@ sub accumulate { goto DONE unless @$tokens; # new command is empty line; can't yet check if previous is missing "&&" - goto DONE if @$cmd == 1 && $$cmd[0] eq "\n"; + goto DONE if @$cmd == 1 && $$cmd[0]->[0] eq "\n"; # did previous command end with "&&", "|", "|| return" or similar? goto DONE if match_ending($tokens, \@safe_endings); @@ -518,20 +524,20 @@ sub accumulate { # if this command handles "$?" specially, then okay for previous # command to be missing "&&" for my $token (@$cmd) { - goto DONE if $token =~ /\$\?/; + goto DONE if $token->[0] =~ /\$\?/; } # if this command is "false", "return 1", or "exit 1" (which signal # failure explicitly), then okay for all preceding commands to be # missing "&&" - if ($$cmd[0] =~ /^(?:false|return|exit)$/) { - @$tokens = grep(!/^\?!AMP\?!$/, @$tokens); + if ($$cmd[0]->[0] =~ /^(?:false|return|exit)$/) { + @$tokens = grep {$_->[0] !~ /^\?!AMP\?!$/} @$tokens; goto DONE; } # flag missing "&&" at end of previous command my $n = find_non_nl($tokens); - splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0; + splice(@$tokens, $n + 1, 0, ['?!AMP?!', $$tokens[$n]->[1], $$tokens[$n]->[2]]) unless $n < 0; DONE: $self->SUPER::accumulate($tokens, $cmd); @@ -557,7 +563,7 @@ sub new { # composition of multiple strings and non-string character runs; for instance, # `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d` sub unwrap { - my $token = @_ ? shift @_ : $_; + my $token = (@_ ? shift @_ : $_)->[0]; # simple case: 'sqstring' or "dqstring" return $token if $token =~ s/^'([^']*)'$/$1/; return $token if $token =~ s/^"([^"]*)"$/$1/; @@ -588,9 +594,9 @@ sub check_test { $self->{ntests}++; my $parser = TestParser->new(\$body); my @tokens = $parser->parse(); - return unless $emit_all || grep(/\?![^?]+\?!/, @tokens); + return unless $emit_all || grep {$_->[0] =~ /\?![^?]+\?!/} @tokens; my $c = main::fd_colors(1); - my $checked = join(' ', @tokens); + my $checked = join(' ', map {$_->[0]} @tokens); $checked =~ s/^\n//; $checked =~ s/^ //mg; $checked =~ s/ $//mg; @@ -602,9 +608,9 @@ sub check_test { sub parse_cmd { my $self = shift @_; my @tokens = $self->SUPER::parse_cmd(); - return @tokens unless @tokens && $tokens[0] =~ /^test_expect_(?:success|failure)$/; + return @tokens unless @tokens && $tokens[0]->[0] =~ /^test_expect_(?:success|failure)$/; my $n = $#tokens; - $n-- while $n >= 0 && $tokens[$n] =~ /^(?:[;&\n|]|&&|\|\|)$/; + $n-- while $n >= 0 && $tokens[$n]->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/; $self->check_test($tokens[1], $tokens[2]) if $n == 2; # title body $self->check_test($tokens[2], $tokens[3]) if $n > 2; # prereq title body return @tokens;

[3/4] chainlint: latch start/end position of each token

Commit Message

Patch