@@ -198,6 +198,249 @@ RESTART:
return length($token) ? $token : undef;
}
+# ShellParser parses POSIX shell scripts (with minor extensions for Bash). It
+# is a recursive descent parser very roughly modeled after section 2.10 "Shell
+# Grammar" of POSIX chapter 2 "Shell Command Language".
+package ShellParser;
+
+sub new {
+ my ($class, $s) = @_;
+ my $self = bless {
+ buff => [],
+ stop => [],
+ output => []
+ } => $class;
+ $self->{lexer} = Lexer->new($self, $s);
+ return $self;
+}
+
+sub next_token {
+ my $self = shift @_;
+ return pop(@{$self->{buff}}) if @{$self->{buff}};
+ return $self->{lexer}->scan_token();
+}
+
+sub untoken {
+ my $self = shift @_;
+ push(@{$self->{buff}}, @_);
+}
+
+sub peek {
+ my $self = shift @_;
+ my $token = $self->next_token();
+ return undef unless defined($token);
+ $self->untoken($token);
+ return $token;
+}
+
+sub stop_at {
+ my ($self, $token) = @_;
+ return 1 unless defined($token);
+ my $stop = ${$self->{stop}}[-1] if @{$self->{stop}};
+ return defined($stop) && $token =~ $stop;
+}
+
+sub expect {
+ my ($self, $expect) = @_;
+ my $token = $self->next_token();
+ return $token if defined($token) && $token eq $expect;
+ push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token : "<end-of-input>") . "'\n");
+ $self->untoken($token) if defined($token);
+ return ();
+}
+
+sub optional_newlines {
+ my $self = shift @_;
+ my @tokens;
+ while (my $token = $self->peek()) {
+ last unless $token eq "\n";
+ push(@tokens, $self->next_token());
+ }
+ return @tokens;
+}
+
+sub parse_group {
+ my $self = shift @_;
+ return ($self->parse(qr/^}$/),
+ $self->expect('}'));
+}
+
+sub parse_subshell {
+ my $self = shift @_;
+ return ($self->parse(qr/^\)$/),
+ $self->expect(')'));
+}
+
+sub parse_case_pattern {
+ my $self = shift @_;
+ my @tokens;
+ while (defined(my $token = $self->next_token())) {
+ push(@tokens, $token);
+ last if $token eq ')';
+ }
+ return @tokens;
+}
+
+sub parse_case {
+ my $self = shift @_;
+ my @tokens;
+ push(@tokens,
+ $self->next_token(), # subject
+ $self->optional_newlines(),
+ $self->expect('in'),
+ $self->optional_newlines());
+ while (1) {
+ my $token = $self->peek();
+ last unless defined($token) && $token ne 'esac';
+ push(@tokens,
+ $self->parse_case_pattern(),
+ $self->optional_newlines(),
+ $self->parse(qr/^(?:;;|esac)$/)); # item body
+ $token = $self->peek();
+ last unless defined($token) && $token ne 'esac';
+ push(@tokens,
+ $self->expect(';;'),
+ $self->optional_newlines());
+ }
+ push(@tokens, $self->expect('esac'));
+ return @tokens;
+}
+
+sub parse_for {
+ my $self = shift @_;
+ my @tokens;
+ push(@tokens,
+ $self->next_token(), # variable
+ $self->optional_newlines());
+ my $token = $self->peek();
+ if (defined($token) && $token eq 'in') {
+ push(@tokens,
+ $self->expect('in'),
+ $self->optional_newlines());
+ }
+ push(@tokens,
+ $self->parse(qr/^do$/), # items
+ $self->expect('do'),
+ $self->optional_newlines(),
+ $self->parse_loop_body(),
+ $self->expect('done'));
+ return @tokens;
+}
+
+sub parse_if {
+ my $self = shift @_;
+ my @tokens;
+ while (1) {
+ push(@tokens,
+ $self->parse(qr/^then$/), # if/elif condition
+ $self->expect('then'),
+ $self->optional_newlines(),
+ $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body
+ my $token = $self->peek();
+ last unless defined($token) && $token eq 'elif';
+ push(@tokens, $self->expect('elif'));
+ }
+ my $token = $self->peek();
+ if (defined($token) && $token eq 'else') {
+ push(@tokens,
+ $self->expect('else'),
+ $self->optional_newlines(),
+ $self->parse(qr/^fi$/)); # else body
+ }
+ push(@tokens, $self->expect('fi'));
+ return @tokens;
+}
+
+sub parse_loop_body {
+ my $self = shift @_;
+ return $self->parse(qr/^done$/);
+}
+
+sub parse_loop {
+ my $self = shift @_;
+ return ($self->parse(qr/^do$/), # condition
+ $self->expect('do'),
+ $self->optional_newlines(),
+ $self->parse_loop_body(),
+ $self->expect('done'));
+}
+
+sub parse_func {
+ my $self = shift @_;
+ return ($self->expect('('),
+ $self->expect(')'),
+ $self->optional_newlines(),
+ $self->parse_cmd()); # body
+}
+
+sub parse_bash_array_assignment {
+ my $self = shift @_;
+ my @tokens = $self->expect('(');
+ while (defined(my $token = $self->next_token())) {
+ push(@tokens, $token);
+ last if $token eq ')';
+ }
+ return @tokens;
+}
+
+my %compound = (
+ '{' => \&parse_group,
+ '(' => \&parse_subshell,
+ 'case' => \&parse_case,
+ 'for' => \&parse_for,
+ 'if' => \&parse_if,
+ 'until' => \&parse_loop,
+ 'while' => \&parse_loop);
+
+sub parse_cmd {
+ my $self = shift @_;
+ my $cmd = $self->next_token();
+ return () unless defined($cmd);
+ return $cmd if $cmd eq "\n";
+
+ my $token;
+ my @tokens = $cmd;
+ if ($cmd eq '!') {
+ push(@tokens, $self->parse_cmd());
+ return @tokens;
+ } elsif (my $f = $compound{$cmd}) {
+ push(@tokens, $self->$f());
+ } elsif (defined($token = $self->peek()) && $token eq '(') {
+ if ($cmd !~ /\w=$/) {
+ push(@tokens, $self->parse_func());
+ return @tokens;
+ }
+ $tokens[-1] .= join(' ', $self->parse_bash_array_assignment());
+ }
+
+ while (defined(my $token = $self->next_token())) {
+ $self->untoken($token), last if $self->stop_at($token);
+ push(@tokens, $token);
+ last if $token =~ /^(?:[;&\n|]|&&|\|\|)$/;
+ }
+ push(@tokens, $self->next_token()) if $tokens[-1] ne "\n" && defined($token = $self->peek()) && $token eq "\n";
+ return @tokens;
+}
+
+sub accumulate {
+ my ($self, $tokens, $cmd) = @_;
+ push(@$tokens, @$cmd);
+}
+
+sub parse {
+ my ($self, $stop) = @_;
+ push(@{$self->{stop}}, $stop);
+ goto DONE if $self->stop_at($self->peek());
+ my @tokens;
+ while (my @cmd = $self->parse_cmd()) {
+ $self->accumulate(\@tokens, \@cmd);
+ last if $self->stop_at($self->peek());
+ }
+DONE:
+ pop(@{$self->{stop}});
+ return @tokens;
+}
+
package ScriptParser;
sub new {