From patchwork Sat Mar 10 02:04:00 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Harald van Dijk X-Patchwork-Id: 10272897 X-Patchwork-Delegate: herbert@gondor.apana.org.au Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id 06D216016D for ; Sat, 10 Mar 2018 02:03:10 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id C5D932A0CD for ; Sat, 10 Mar 2018 02:03:09 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id B9F832A0D3; Sat, 10 Mar 2018 02:03:09 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.0 required=2.0 tests=BAYES_00,DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, RCVD_IN_DNSWL_HI, T_TVD_MIME_EPI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 37A5D2A0CD for ; Sat, 10 Mar 2018 02:03:04 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751349AbeCJCDC (ORCPT ); Fri, 9 Mar 2018 21:03:02 -0500 Received: from home.gigawatt.nl ([83.163.3.213]:55670 "EHLO home.gigawatt.nl" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751347AbeCJCDB (ORCPT ); Fri, 9 Mar 2018 21:03:01 -0500 Received: from [IPv6:2001:980:4809:1:e045:1301:c405:78bf] (unknown [IPv6:2001:980:4809:1:e045:1301:c405:78bf]) by home.gigawatt.nl (Postfix) with ESMTPSA id 08DD6540079D; Sat, 10 Mar 2018 02:02:57 +0000 (UTC) DKIM-Filter: OpenDKIM Filter v2.11.0 home.gigawatt.nl 08DD6540079D DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gigawatt.nl; s=default; t=1520647378; bh=ldhCowJxDN4R411/U7J3AmVbednjuaPcieAnIFXXkcw=; l=25658; h=Subject:From:To:Cc:References:Date:In-Reply-To:From; b=ZL3SA+/AxSjvryHKlYKyJxwASYtuP9mVjgRTR8Ru+GG3kWHyjD5kCyLEvecr1vyBL 2ZHevvVcdsYXA+vKhuIWfmSDbJoiDpGqs8iMUJd91CqrvtWsBdrCbzgdghjOhSsuRT IwohIOK2gMyyMx4JJaCYOOa2LAJdxFZ3VPHCroF0= Subject: Re: dash bug: double-quoted "\" breaks glob protection for next char From: Harald van Dijk To: Herbert Xu Cc: Martijn Dekker , Denys Vlasenko , dash@vger.kernel.org References: <86692fea-c33f-d26d-3b26-6e43bc22a0ee@gigawatt.nl> <20180302074922.GA19418@gondor.apana.org.au> <4242819b-4aee-1238-203f-ec08d001be05@gigawatt.nl> <7dac7df9-4093-095e-dd71-2d7383edd8c3@inlv.org> <041881f9-9084-4083-345a-8f85792b48ef@gigawatt.nl> <20180307162944.GA4960@gondor.apana.org.au> <066e53c4-ad05-35bb-2da2-a377ce8f4629@gigawatt.nl> Message-ID: <2192f69d-29ec-461c-d09e-cb63e309e8de@gigawatt.nl> Date: Sat, 10 Mar 2018 03:04:00 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:58.0) Gecko/20100101 Thunderbird/58.0 MIME-Version: 1.0 In-Reply-To: <066e53c4-ad05-35bb-2da2-a377ce8f4629@gigawatt.nl> Content-Language: en-US Sender: dash-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: dash@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP On 3/8/18 1:40 AM, Harald van Dijk wrote: > If the syntax stack is to be stored on the actual stack, then real > recursion could be used instead, as attached. Even though it won't be accepted in dash, I continued with this approach for my own use. I've now got it to about 1800 bytes smaller (at -Os -s). After the other changes I'd done, it became apparent to me that the syntax tables were unnecessary, and that they'd become fairly easy to get rid of. This was a big space saver that may be possible to apply to your version as well. Cheers, Harald van Dijk diff --git a/src/expand.c b/src/expand.c index 2a50830..acd5fdf 100644 --- a/src/expand.c +++ b/src/expand.c @@ -83,7 +83,7 @@ #define RMESCAPE_HEAP 0x10 /* Malloc strings instead of stalloc */ /* Add CTLESC when necessary. */ -#define QUOTES_ESC (EXP_FULL | EXP_CASE | EXP_QPAT) +#define QUOTES_ESC (EXP_FULL | EXP_CASE) /* Do not skip NUL characters. */ #define QUOTES_KEEPNUL EXP_TILDE @@ -115,8 +115,8 @@ STATIC char *exptilde(char *, char *, int); STATIC void expbackq(union node *, int); STATIC const char *subevalvar(char *, char *, int, int, int, int, int); STATIC char *evalvar(char *, int); -STATIC size_t strtodest(const char *, const char *, int); -STATIC void memtodest(const char *, size_t, const char *, int); +STATIC size_t strtodest(const char *, int); +STATIC void memtodest(const char *, size_t, int); STATIC ssize_t varvalue(char *, int, int, int *); STATIC void expandmeta(struct strlist *, int); #ifdef HAVE_GLOB @@ -333,16 +333,6 @@ addquote: case CTLESC: startloc++; length++; - - /* - * Quoted parameter expansion pattern: remove quote - * unless inside inner quotes or we have a literal - * backslash. - */ - if (((flag | inquotes) & (EXP_QPAT | EXP_QUOTED)) == - EXP_QPAT && *p != '\\') - break; - goto addquote; case CTLVAR: p = evalvar(p, flag | inquotes); @@ -396,7 +386,7 @@ done: if (!home || !*home) goto lose; *p = c; - strtodest(home, SQSYNTAX, quotes); + strtodest(home, quotes | EXP_QUOTED); return (p); lose: *p = c; @@ -521,7 +511,6 @@ expbackq(union node *cmd, int flag) char *p; char *dest; int startloc; - char const *syntax = flag & EXP_QUOTED ? DQSYNTAX : BASESYNTAX; struct stackmark smark; INTOFF; @@ -535,7 +524,7 @@ expbackq(union node *cmd, int flag) if (i == 0) goto read; for (;;) { - memtodest(p, i, syntax, flag & QUOTES_ESC); + memtodest(p, i, flag & (QUOTES_ESC | EXP_QUOTED)); read: if (in.fd < 0) break; @@ -651,8 +640,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc, int varfla char *(*scan)(char *, char *, char *, char *, int , int); argstr(p, EXP_TILDE | (subtype != VSASSIGN && subtype != VSQUESTION ? - (flag & (EXP_QUOTED | EXP_QPAT) ? - EXP_QPAT : EXP_CASE) : 0)); + EXP_CASE : 0)); STPUTC('\0', expdest); argbackq = saveargbackq; startp = stackblock() + startloc; @@ -844,7 +832,7 @@ end: */ STATIC void -memtodest(const char *p, size_t len, const char *syntax, int quotes) { +memtodest(const char *p, size_t len, int quotes) { char *q; if (unlikely(!len)) @@ -855,11 +843,17 @@ memtodest(const char *p, size_t len, const char *syntax, int quotes) { do { int c = (signed char)*p++; if (c) { - if ((quotes & QUOTES_ESC) && - ((syntax[c] == CCTL) || - (((quotes & EXP_FULL) || syntax != BASESYNTAX) && - syntax[c] == CBACK))) - USTPUTC(CTLESC, q); + if (quotes & QUOTES_ESC) { + switch (c) { + case '\\': + case '!': case '*': case '?': case '[': case '=': + case '~': case ':': case '/': case '-': case ']': + if (quotes & EXP_QUOTED) + case CTLVARS: + USTPUTC(CTLESC, q); + break; + } + } } else if (!(quotes & QUOTES_KEEPNUL)) continue; USTPUTC(c, q); @@ -870,13 +864,10 @@ memtodest(const char *p, size_t len, const char *syntax, int quotes) { STATIC size_t -strtodest(p, syntax, quotes) - const char *p; - const char *syntax; - int quotes; +strtodest(const char *p, int quotes) { size_t len = strlen(p); - memtodest(p, len, syntax, quotes); + memtodest(p, len, quotes); return len; } @@ -895,15 +886,13 @@ varvalue(char *name, int varflags, int flags, int *quotedp) int sep; char sepc; char **ap; - char const *syntax; int quoted = *quotedp; int subtype = varflags & VSTYPE; int discard = subtype == VSPLUS || subtype == VSLENGTH; - int quotes = (discard ? 0 : (flags & QUOTES_ESC)) | QUOTES_KEEPNUL; + int quotes = quoted | (discard ? 0 : (flags & QUOTES_ESC)) | QUOTES_KEEPNUL; ssize_t len = 0; sep = (flags & EXP_FULL) << CHAR_BIT; - syntax = quoted ? DQSYNTAX : BASESYNTAX; switch (*name) { case '$': @@ -946,11 +935,11 @@ param: if (!(ap = shellparam.p)) return -1; while ((p = *ap++)) { - len += strtodest(p, syntax, quotes); + len += strtodest(p, quotes); if (*ap && sep) { len++; - memtodest(&sepc, 1, syntax, quotes); + memtodest(&sepc, 1, quotes); } } break; @@ -975,7 +964,7 @@ value: if (!p) return -1; - len = strtodest(p, syntax, quotes); + len = strtodest(p, quotes); break; } @@ -1644,7 +1633,6 @@ char * _rmescapes(char *str, int flag) { char *p, *q, *r; - unsigned inquotes; int notescaped; int globbing; @@ -1674,24 +1662,23 @@ _rmescapes(char *str, int flag) q = mempcpy(q, str, len); } } - inquotes = 0; globbing = flag & RMESCAPE_GLOB; notescaped = globbing; while (*p) { if (*p == (char)CTLQUOTEMARK) { - inquotes = ~inquotes; p++; notescaped = globbing; continue; } + if (*p == '\\') { + /* naked back slash */ + notescaped = 0; + goto copy; + } if (*p == (char)CTLESC) { p++; if (notescaped) *q++ = '\\'; - } else if (*p == '\\' && !inquotes) { - /* naked back slash */ - notescaped = 0; - goto copy; } notescaped = globbing; copy: diff --git a/src/expand.h b/src/expand.h index 26dc5b4..90f5328 100644 --- a/src/expand.h +++ b/src/expand.h @@ -55,7 +55,6 @@ struct arglist { #define EXP_VARTILDE 0x4 /* expand tildes in an assignment */ #define EXP_REDIR 0x8 /* file glob for a redirection (1 match only) */ #define EXP_CASE 0x10 /* keeps quotes around for CASE pattern */ -#define EXP_QPAT 0x20 /* pattern in quoted parameter expansion */ #define EXP_VARTILDE2 0x40 /* expand tildes after colons only */ #define EXP_WORD 0x80 /* expand word in parameter expansion */ #define EXP_QUOTED 0x100 /* expand word in double quotes */ diff --git a/src/mksyntax.c b/src/mksyntax.c index a23c18c..4753423 100644 --- a/src/mksyntax.c +++ b/src/mksyntax.c @@ -48,27 +48,6 @@ struct synclass { char *comment; }; -/* Syntax classes */ -struct synclass synclass[] = { - { "CWORD", "character is nothing special" }, - { "CNL", "newline character" }, - { "CBACK", "a backslash character" }, - { "CSQUOTE", "single quote" }, - { "CDQUOTE", "double quote" }, - { "CENDQUOTE", "a terminating quote" }, - { "CBQUOTE", "backwards single quote" }, - { "CVAR", "a dollar sign" }, - { "CENDVAR", "a '}' character" }, - { "CLP", "a left paren in arithmetic" }, - { "CRP", "a right paren in arithmetic" }, - { "CEOF", "end of file" }, - { "CCTL", "like CWORD, except it must be escaped" }, - { "CSPCL", "these terminate a word" }, - { "CIGN", "character should be ignored" }, - { NULL, NULL } -}; - - /* * Syntax classes for is_ functions. Warning: if you add new classes * you may have to change the definition of the is_in_name macro. @@ -94,7 +73,6 @@ static FILE *hfile; static char *syntax[513]; static void filltable(char *); -static void init(void); static void add(char *, char *); static void print(char *); static void output_type_macros(void); @@ -127,15 +105,6 @@ main(int argc, char **argv) fputs("\n", hfile); /* Generate the #define statements in the header file */ - fputs("/* Syntax classes */\n", hfile); - for (i = 0 ; synclass[i].name ; i++) { - sprintf(buf, "#define %s %d", synclass[i].name, i); - fputs(buf, hfile); - for (pos = strlen(buf) ; pos < 32 ; pos = (pos + 8) & ~07) - putc('\t', hfile); - fprintf(hfile, "/* %s */\n", synclass[i].comment); - } - putc('\n', hfile); fputs("/* Syntax classes for is_ functions */\n", hfile); for (i = 0 ; is_entry[i].name ; i++) { sprintf(buf, "#define %s %#o", is_entry[i].name, 1 << i); @@ -149,57 +118,12 @@ main(int argc, char **argv) fprintf(hfile, "#define PEOF %d\n\n", -130); fprintf(hfile, "#define PEOA %d\n\n", -129); putc('\n', hfile); - fputs("#define BASESYNTAX (basesyntax + SYNBASE)\n", hfile); - fputs("#define DQSYNTAX (dqsyntax + SYNBASE)\n", hfile); - fputs("#define SQSYNTAX (sqsyntax + SYNBASE)\n", hfile); - fputs("#define ARISYNTAX (arisyntax + SYNBASE)\n", hfile); - putc('\n', hfile); output_type_macros(); /* is_digit, etc. */ putc('\n', hfile); /* Generate the syntax tables. */ fputs("#include \"shell.h\"\n", cfile); fputs("#include \"syntax.h\"\n\n", cfile); - init(); - fputs("/* syntax table used when not in quotes */\n", cfile); - add("\n", "CNL"); - add("\\", "CBACK"); - add("'", "CSQUOTE"); - add("\"", "CDQUOTE"); - add("`", "CBQUOTE"); - add("$", "CVAR"); - add("}", "CENDVAR"); - add("<>();&| \t", "CSPCL"); - syntax[1] = "CSPCL"; - print("basesyntax"); - init(); - fputs("\n/* syntax table used when in double quotes */\n", cfile); - add("\n", "CNL"); - add("\\", "CBACK"); - add("\"", "CENDQUOTE"); - add("`", "CBQUOTE"); - add("$", "CVAR"); - add("}", "CENDVAR"); - /* ':/' for tilde expansion, '-' for [a\-x] pattern ranges */ - add("!*?[=~:/-]", "CCTL"); - print("dqsyntax"); - init(); - fputs("\n/* syntax table used when in single quotes */\n", cfile); - add("\n", "CNL"); - add("'", "CENDQUOTE"); - /* ':/' for tilde expansion, '-' for [a\-x] pattern ranges */ - add("!*?[=~:/-]\\", "CCTL"); - print("sqsyntax"); - init(); - fputs("\n/* syntax table used when in arithmetic */\n", cfile); - add("\n", "CNL"); - add("\\", "CBACK"); - add("`", "CBQUOTE"); - add("$", "CVAR"); - add("}", "CENDVAR"); - add("(", "CLP"); - add(")", "CRP"); - print("arisyntax"); filltable("0"); fputs("\n/* character classification table */\n", cfile); add("0123456789", "ISDIGIT"); @@ -228,23 +152,6 @@ filltable(char *dftval) } -/* - * Initialize the syntax table with default values. - */ - -static void -init(void) -{ - int ctl; - - filltable("CWORD"); - syntax[0] = "CEOF"; - syntax[1] = "CIGN"; - for (ctl = CTL_FIRST; ctl <= CTL_LAST; ctl++ ) - syntax[130 + ctl] = "CCTL"; -} - - /* * Add entries to the syntax table. */ diff --git a/src/parser.c b/src/parser.c index 3024ab1..42155f9 100644 --- a/src/parser.c +++ b/src/parser.c @@ -68,8 +68,15 @@ -/* Used by expandstr to get here-doc like behaviour. */ -#define FAKEEOFMARK (char *)1 +/* Flags for readtoken1(). */ +#define RT_SQSYNTAX 0x01 +#define RT_DQSYNTAX 0x02 +#define RT_HEREDOC 0x04 +#define RT_STRING 0x08 +#define RT_VARSUBST 0x10 +#define RT_ARISUBST 0x20 +#define RT_ARIPAREN 0x40 +#define RT_STRIPTABS 0x80 @@ -106,7 +113,7 @@ STATIC void parseheredoc(void); STATIC int peektoken(void); STATIC int readtoken(void); STATIC int xxreadtoken(void); -STATIC int readtoken1(int, char const *, char *, int); +STATIC int readtoken1(int, char *, int); STATIC void synexpect(int) __attribute__((__noreturn__)); STATIC void synerror(const char *) __attribute__((__noreturn__)); STATIC void setprompt(int); @@ -121,11 +128,6 @@ isassignment(const char *p) return *q == '='; } -static inline int realeofmark(const char *eofmark) -{ - return eofmark && eofmark != FAKEEOFMARK; -} - /* * Read and parse a command. Returns NEOF on end of file. (NULL is a @@ -657,8 +659,7 @@ parseheredoc(void) if (needprompt) { setprompt(2); } - readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX, - here->eofmark, here->striptabs); + readtoken1(pgetc(), here->eofmark, (here->here->type == NHERE ? RT_SQSYNTAX : RT_DQSYNTAX) | RT_HEREDOC | here->striptabs); n = (union node *)stalloc(sizeof (struct narg)); n->narg.type = NARG; n->narg.next = NULL; @@ -828,7 +829,8 @@ xxreadtoken(void) } } breakloop: - return readtoken1(c, BASESYNTAX, (char *)NULL, 0); + readtoken1(c, NULL, 0); + return lasttoken; #undef RETURN } @@ -856,69 +858,59 @@ static int pgetc_eatbnl(void) * word which marks the end of the document and striptabs is true if * leading tabs should be stripped from the document. The argument firstc * is the first character of the input token or document. - * - * Because C does not have internal subroutines, I have simulated them - * using goto's to implement the subroutine linkage. The following macros - * will run code that appears at the end of readtoken1. */ -#define CHECKEND() {goto checkend; checkend_return:;} -#define PARSEREDIR() {goto parseredir; parseredir_return:;} -#define PARSESUB() {goto parsesub; parsesub_return:;} -#define PARSEBACKQOLD() {oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;} -#define PARSEBACKQNEW() {oldstyle = 0; goto parsebackq; parsebackq_newreturn:;} -#define PARSEARITH() {goto parsearith; parsearith_return:;} +STATIC char *readtoken1_loop(char *, int, char *, int); +STATIC int readtoken1_endword(char *, char *); +STATIC char *readtoken1_checkend(char *, int *, char *, int); +STATIC int readtoken1_parseredir(char *, int); +STATIC char *readtoken1_parsesub(char *, char *, int); +STATIC char *readtoken1_parsebackq(char *, int, int); +STATIC char *readtoken1_parsearith(char *, char *, int); STATIC int -readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) +readtoken1(int firstc, char *eofmark, int flags) { - int c = firstc; char *out; - size_t len; - struct nodelist *bqlist; - int quotef; - int dblquote; - int varnest; /* levels of variables expansion */ - int arinest; /* levels of arithmetic expansion */ - int parenlevel; /* levels of parens in arithmetic */ - int dqvarnest; /* levels of variables expansion within double quotes */ - int oldstyle; - /* syntax before arithmetic */ - char const *uninitialized_var(prevsyntax); - - dblquote = 0; - if (syntax == DQSYNTAX) - dblquote = 1; - quotef = 0; - bqlist = NULL; - varnest = 0; - arinest = 0; - parenlevel = 0; - dqvarnest = 0; + quoteflag = 0; + backquotelist = NULL; STARTSTACKSTR(out); + out = readtoken1_loop(out, firstc, eofmark, flags); + return readtoken1_endword(out, eofmark); +} + +STATIC char * +readtoken1_loop(char *out, int c, char *eofmark, int flags) +{ + int qsyntax; + loop: { /* for each line, until end of word */ - CHECKEND(); /* set c to PEOF if at end of here document */ + out = readtoken1_checkend(out, &c, eofmark, flags); /* set c to PEOF if at end of here document */ for (;;) { /* until end of line or end of word */ CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */ - switch(syntax[c]) { - case CNL: /* '\n' */ - if (syntax == BASESYNTAX) + switch(c) { + case '\n': + if (!flags) goto endword; /* exit outer loop */ USTPUTC(c, out); nlprompt(); c = pgetc(); goto loop; /* continue outer loop */ - case CWORD: +word: + default: USTPUTC(c, out); break; - case CCTL: - if (eofmark == NULL || dblquote) +control: + case '!': case '*': case '?': case '[': case '=': + case '~': case ':': case '/': case '-': case ']': + if (flags & (RT_SQSYNTAX | RT_DQSYNTAX) && !(flags & RT_HEREDOC)) USTPUTC(CTLESC, out); USTPUTC(c, out); break; - /* backslash */ - case CBACK: + case '\\': + if (flags & RT_SQSYNTAX) + goto control; c = pgetc2(); if (c == PEOF) { USTPUTC(CTLESC, out); @@ -928,128 +920,132 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) nlprompt(); } else { if ( - dblquote && + (flags & RT_DQSYNTAX) && c != '\\' && c != '`' && c != '$' && ( c != '"' || - eofmark != NULL + !(flags & RT_STRING) + ) && ( + c != '}' || + !(flags & RT_VARSUBST) ) ) { + USTPUTC(CTLESC, out); USTPUTC('\\', out); } USTPUTC(CTLESC, out); USTPUTC(c, out); - quotef++; - } - break; - case CSQUOTE: - syntax = SQSYNTAX; -quotemark: - if (eofmark == NULL) { - USTPUTC(CTLQUOTEMARK, out); + quoteflag++; } break; - case CDQUOTE: - syntax = DQSYNTAX; - dblquote = 1; - goto quotemark; - case CENDQUOTE: - if (eofmark && !varnest) - USTPUTC(c, out); - else { - if (dqvarnest == 0) { - syntax = BASESYNTAX; - dblquote = 0; - } - quotef++; - goto quotemark; + do { + case '\'': + qsyntax = RT_SQSYNTAX; + break; + case '"': + qsyntax = RT_DQSYNTAX; + break; + } while(0); + if (flags & (RT_HEREDOC | RT_SQSYNTAX | RT_DQSYNTAX) & ~qsyntax) + goto word; + if ((flags & (qsyntax | RT_VARSUBST)) == qsyntax) { + quoteflag++; + if (!(flags & RT_HEREDOC)) + USTPUTC(CTLQUOTEMARK, out); + return out; } + USTPUTC(CTLQUOTEMARK, out); + out = readtoken1_loop(out, pgetc(), eofmark, RT_STRING | qsyntax | (flags & RT_STRIPTABS)); break; - case CVAR: /* '$' */ - PARSESUB(); /* parse substitution */ - break; - case CENDVAR: /* '}' */ - if (varnest > 0) { - varnest--; - if (dqvarnest > 0) { - dqvarnest--; - } - USTPUTC(CTLENDVAR, out); - } else { - USTPUTC(c, out); - } + case '$': + if (flags & RT_SQSYNTAX) + goto word; + out = readtoken1_parsesub(out, eofmark, flags); /* parse substitution */ break; - case CLP: /* '(' in arithmetic */ - parenlevel++; + case '}': + if (!(flags & RT_VARSUBST)) + goto word; + USTPUTC(CTLENDVAR, out); + return out; + case '(': + if (!(flags & RT_ARISUBST)) + goto special; USTPUTC(c, out); + out = readtoken1_loop(out, pgetc(), eofmark, flags | RT_ARIPAREN); break; - case CRP: /* ')' in arithmetic */ - if (parenlevel > 0) { - USTPUTC(c, out); - --parenlevel; - } else { + case ')': + if (!(flags & (RT_ARISUBST | RT_ARIPAREN))) + goto special; + if (!(flags & RT_ARIPAREN)) { if (pgetc() == ')') { USTPUTC(CTLENDARI, out); - if (!--arinest) - syntax = prevsyntax; - } else { - /* - * unbalanced parens - * (don't 2nd guess - no error) - */ - pungetc(); - USTPUTC(')', out); + return out; } + pungetc(); + USTPUTC(')', out); + } else { + USTPUTC(')', out); + return out; } + case '`': + if (flags & RT_SQSYNTAX) + goto word; + out = readtoken1_parsebackq(out, flags, 1); break; - case CBQUOTE: /* '`' */ - PARSEBACKQOLD(); - break; - case CEOF: + case PEOF: goto endword; /* exit outer loop */ - case CIGN: - break; - default: - if (varnest == 0) - goto endword; /* exit outer loop */ - if (c != PEOA) { +special: + case PEOA: + case '<': case '>': // case '(': case ')': + case ';': case '&': case '|': case ' ': case '\t': + case CTLVARS: + if (!flags) + goto endword; + if (c != PEOA) USTPUTC(c, out); - } + break; } c = pgetc(); } } endword: - if (syntax == ARISYNTAX) + if (flags & RT_ARISUBST) synerror("Missing '))'"); - if (syntax != BASESYNTAX && eofmark == NULL) + if (flags & RT_STRING) synerror("Unterminated quoted string"); - if (varnest != 0) { - /* { */ + if (flags & RT_VARSUBST) { + /* "{" */ synerror("Missing '}'"); } + pungetc(); + return out; +} + +STATIC int +readtoken1_endword(char *out, char *eofmark) +{ + size_t len; + int c; + USTPUTC('\0', out); len = out - (char *)stackblock(); out = stackblock(); + if (eofmark == NULL) { + c = pgetc(); if ((c == '>' || c == '<') - && quotef == 0 + && !quoteflag && len <= 2 && (*out == '\0' || is_digit(*out))) { - PARSEREDIR(); - return lasttoken = TREDIR; + return readtoken1_parseredir(out, c); } else { pungetc(); } } - quoteflag = quotef; - backquotelist = bqlist; grabstackblock(len); wordtext = out; return lasttoken = TWORD; -/* end of readtoken routine */ - - +} /* * Check to see whether we are at the end of the here document. When this @@ -1057,30 +1053,32 @@ endword: * we are at the end of the here document, this routine sets the c to PEOF. */ -checkend: { - if (realeofmark(eofmark)) { +STATIC char * +readtoken1_checkend(char *out, int *c, char *eofmark, int flags) +{ + if (eofmark) { int markloc; char *p; - if (c == PEOA) { - c = pgetc2(); + if (*c == PEOA) { + *c = pgetc2(); } - if (striptabs) { - while (c == '\t') { - c = pgetc2(); + if (flags & RT_STRIPTABS) { + while (*c == '\t') { + *c = pgetc2(); } } markloc = out - (char *)stackblock(); - for (p = eofmark; STPUTC(c, out), *p; p++) { - if (c != *p) + for (p = eofmark; STPUTC(*c, out), *p; p++) { + if (*c != *p) goto more_heredoc; - c = pgetc2(); + *c = pgetc2(); } - if (c == '\n' || c == PEOF) { - c = PEOF; + if (*c == '\n' || *c == PEOF) { + *c = PEOF; nlnoprompt(); } else { int len; @@ -1090,8 +1088,8 @@ more_heredoc: len = out - p; if (len) { - len -= c < 0; - c = p[-1]; + len -= *c < 0; + *c = p[-1]; if (len) { char *str; @@ -1106,7 +1104,7 @@ more_heredoc: STADJUST((char *)stackblock() + markloc - out, out); } - goto checkend_return; + return out; } @@ -1116,7 +1114,9 @@ more_heredoc: * first character of the redirection operator. */ -parseredir: { +STATIC int +readtoken1_parseredir(char *out, int c) +{ char fd = *out; union node *np; @@ -1146,7 +1146,7 @@ parseredir: { heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc)); heredoc->here = np; if ((c = pgetc()) == '-') { - heredoc->striptabs = 1; + heredoc->striptabs = RT_STRIPTABS; } else { heredoc->striptabs = 0; pungetc(); @@ -1170,7 +1170,7 @@ parseredir: { if (fd != '\0') np->nfile.fd = digit_val(fd); redirnode = np; - goto parseredir_return; + return lasttoken = TREDIR; } @@ -1179,7 +1179,10 @@ parseredir: { * and nothing else. */ -parsesub: { +STATIC char * +readtoken1_parsesub(char *out, char *eofmark, int flags) +{ + int c; int subtype; int typeloc; char *p; @@ -1195,12 +1198,14 @@ parsesub: { pungetc(); } else if (c == '(') { /* $(command) or $((arith)) */ if (pgetc_eatbnl() == '(') { - PARSEARITH(); + out = readtoken1_parsearith(out, eofmark, flags); } else { pungetc(); - PARSEBACKQNEW(); + out = readtoken1_parsebackq(out, flags, 0); } } else { + int newflags = (flags & (RT_DQSYNTAX | RT_STRIPTABS)) | RT_VARSUBST; + USTPUTC(CTLVAR, out); typeloc = out - (char *)stackblock(); STADJUST(1, out); @@ -1276,6 +1281,7 @@ varname: subtype++; else pungetc(); + newflags &= ~RT_DQSYNTAX; break; } } @@ -1284,14 +1290,11 @@ badsub: pungetc(); } *((char *)stackblock() + typeloc) = subtype; - if (subtype != VSNORMAL) { - varnest++; - if (dblquote) - dqvarnest++; - } STPUTC('=', out); + if (subtype != VSNORMAL) + out = readtoken1_loop(out, pgetc(), eofmark, newflags); } - goto parsesub_return; + return out; } @@ -1302,7 +1305,9 @@ badsub: * characters on the top of the stack which must be preserved. */ -parsebackq: { +STATIC char * +readtoken1_parsebackq(char *out, int flags, int oldstyle) +{ struct nodelist **nlpp; union node *n; char *str; @@ -1346,7 +1351,7 @@ parsebackq: { continue; } if (pc != '\\' && pc != '`' && pc != '$' - && (!dblquote || pc != '"')) + && (!(flags & RT_DQSYNTAX) || pc != '"')) STPUTC('\\', pout); if (pc > PEOA) { break; @@ -1374,7 +1379,7 @@ done: setinputstring(pstr); } } - nlpp = &bqlist; + nlpp = &backquotelist; while (*nlpp) nlpp = &(*nlpp)->next; *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist)); @@ -1385,7 +1390,9 @@ done: doprompt = 0; } + struct nodelist *savebqlist = backquotelist; n = list(2); + backquotelist = savebqlist; if (oldstyle) doprompt = saveprompt; @@ -1411,27 +1418,22 @@ done: STADJUST(savelen, out); } USTPUTC(CTLBACKQ, out); - if (oldstyle) - goto parsebackq_oldreturn; - else - goto parsebackq_newreturn; + return out; } + + /* * Parse an arithmetic expansion (indicate start of one and set state) */ -parsearith: { - if (++arinest == 1) { - prevsyntax = syntax; - syntax = ARISYNTAX; - } +STATIC char * +readtoken1_parsearith(char *out, char *eofmark, int flags) +{ USTPUTC(CTLARI, out); - goto parsearith_return; + return readtoken1_loop(out, pgetc(), eofmark, RT_DQSYNTAX | RT_ARISUBST | (flags & RT_STRIPTABS)); } -} /* end of readtoken */ - #ifdef mkinit @@ -1523,7 +1525,7 @@ expandstr(const char *ps) saveprompt = doprompt; doprompt = 0; - readtoken1(pgetc(), DQSYNTAX, FAKEEOFMARK, 0); + readtoken1(pgetc(), NULL, RT_HEREDOC | RT_DQSYNTAX); doprompt = saveprompt; diff --git a/src/parser.h b/src/parser.h index 2875cce..cecd9aa 100644 --- a/src/parser.h +++ b/src/parser.h @@ -47,6 +47,15 @@ #define CTLQUOTEMARK -120 #define CTL_LAST -120 /* last 'special' character */ +#define CTLVARS \ + CTLESC: \ + case CTLVAR: \ + case CTLENDVAR: \ + case CTLBACKQ: \ + case CTLARI: \ + case CTLENDARI: \ + case CTLQUOTEMARK + /* variable substitution byte (follows CTLVAR) */ #define VSTYPE 0x0f /* type of variable substitution */ #define VSNUL 0x10 /* colon--treat the empty string as unset */