From patchwork Sat Mar 10 02:04:00 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Harald van Dijk <harald@gigawatt.nl>
X-Patchwork-Id: 10272897
X-Patchwork-Delegate: herbert@gondor.apana.org.au
Return-Path: <dash-owner@kernel.org>
Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org
	[172.30.200.125])
	by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id
	06D216016D for <patchwork-dash@patchwork.kernel.org>;
	Sat, 10 Mar 2018 02:03:10 +0000 (UTC)
Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1])
	by mail.wl.linuxfoundation.org (Postfix) with ESMTP id C5D932A0CD
	for <patchwork-dash@patchwork.kernel.org>;
	Sat, 10 Mar 2018 02:03:09 +0000 (UTC)
Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486)
	id B9F832A0D3; Sat, 10 Mar 2018 02:03:09 +0000 (UTC)
X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on
	pdx-wl-mail.web.codeaurora.org
X-Spam-Level: 
X-Spam-Status: No, score=-7.0 required=2.0 tests=BAYES_00,DKIM_SIGNED,
	DKIM_VALID, DKIM_VALID_AU, RCVD_IN_DNSWL_HI,
	T_TVD_MIME_EPI autolearn=ham version=3.3.1
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 37A5D2A0CD
	for <patchwork-dash@patchwork.kernel.org>;
	Sat, 10 Mar 2018 02:03:04 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1751349AbeCJCDC (ORCPT
	<rfc822;patchwork-dash@patchwork.kernel.org>);
	Fri, 9 Mar 2018 21:03:02 -0500
Received: from home.gigawatt.nl ([83.163.3.213]:55670 "EHLO home.gigawatt.nl"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1751347AbeCJCDB (ORCPT <rfc822;dash@vger.kernel.org>);
	Fri, 9 Mar 2018 21:03:01 -0500
Received: from [IPv6:2001:980:4809:1:e045:1301:c405:78bf] (unknown
	[IPv6:2001:980:4809:1:e045:1301:c405:78bf])
	by home.gigawatt.nl (Postfix) with ESMTPSA id 08DD6540079D;
	Sat, 10 Mar 2018 02:02:57 +0000 (UTC)
DKIM-Filter: OpenDKIM Filter v2.11.0 home.gigawatt.nl 08DD6540079D
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gigawatt.nl;
	s=default; t=1520647378;
	bh=ldhCowJxDN4R411/U7J3AmVbednjuaPcieAnIFXXkcw=; l=25658;
	h=Subject:From:To:Cc:References:Date:In-Reply-To:From;
	b=ZL3SA+/AxSjvryHKlYKyJxwASYtuP9mVjgRTR8Ru+GG3kWHyjD5kCyLEvecr1vyBL
	2ZHevvVcdsYXA+vKhuIWfmSDbJoiDpGqs8iMUJd91CqrvtWsBdrCbzgdghjOhSsuRT
	IwohIOK2gMyyMx4JJaCYOOa2LAJdxFZ3VPHCroF0=
Subject: Re: dash bug: double-quoted "\" breaks glob protection for next char
From: Harald van Dijk <harald@gigawatt.nl>
To: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Martijn Dekker <martijn@inlv.org>,
	Denys Vlasenko <vda.linux@googlemail.com>, dash@vger.kernel.org
References: 
 <CAK1hOcOha+c0CkK+5QVs3Vttcrw1z7kKy+pZ_fX5fSN-+W_VKA@mail.gmail.com>
	<a8ec6fb3-2882-0057-c9f8-96b8f01abba7@gigawatt.nl>
	<86692fea-c33f-d26d-3b26-6e43bc22a0ee@gigawatt.nl>
	<20180302074922.GA19418@gondor.apana.org.au>
	<4242819b-4aee-1238-203f-ec08d001be05@gigawatt.nl>
	<e8d17ec3-8bd1-a502-816b-638996dca9ab@gigawatt.nl>
	<ae008292-cbe3-969d-9002-6ab74acacf81@inlv.org>
	<b06a377a-4467-5b30-f648-12305ab8f714@gigawatt.nl>
	<7dac7df9-4093-095e-dd71-2d7383edd8c3@inlv.org>
	<041881f9-9084-4083-345a-8f85792b48ef@gigawatt.nl>
	<20180307162944.GA4960@gondor.apana.org.au>
	<a3a1d035-0903-47f1-1322-bbea8dceaa00@gigawatt.nl>
	<066e53c4-ad05-35bb-2da2-a377ce8f4629@gigawatt.nl>
Message-ID: <2192f69d-29ec-461c-d09e-cb63e309e8de@gigawatt.nl>
Date: Sat, 10 Mar 2018 03:04:00 +0100
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:58.0) Gecko/20100101
	Thunderbird/58.0
MIME-Version: 1.0
In-Reply-To: <066e53c4-ad05-35bb-2da2-a377ce8f4629@gigawatt.nl>
Content-Language: en-US
Sender: dash-owner@vger.kernel.org
Precedence: bulk
List-ID: <dash.vger.kernel.org>
X-Mailing-List: dash@vger.kernel.org
X-Virus-Scanned: ClamAV using ClamSMTP

On 3/8/18 1:40 AM, Harald van Dijk wrote:
> If the syntax stack is to be stored on the actual stack, then real 
> recursion could be used instead, as attached.

Even though it won't be accepted in dash, I continued with this approach 
for my own use. I've now got it to about 1800 bytes smaller (at -Os -s).

After the other changes I'd done, it became apparent to me that the 
syntax tables were unnecessary, and that they'd become fairly easy to 
get rid of. This was a big space saver that may be possible to apply to 
your version as well.

Cheers,
Harald van Dijk

diff --git a/src/expand.c b/src/expand.c
index 2a50830..acd5fdf 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -83,7 +83,7 @@
 #define RMESCAPE_HEAP	0x10	/* Malloc strings instead of stalloc */
 
 /* Add CTLESC when necessary. */
-#define QUOTES_ESC	(EXP_FULL | EXP_CASE | EXP_QPAT)
+#define QUOTES_ESC	(EXP_FULL | EXP_CASE)
 /* Do not skip NUL characters. */
 #define QUOTES_KEEPNUL	EXP_TILDE
 
@@ -115,8 +115,8 @@ STATIC char *exptilde(char *, char *, int);
 STATIC void expbackq(union node *, int);
 STATIC const char *subevalvar(char *, char *, int, int, int, int, int);
 STATIC char *evalvar(char *, int);
-STATIC size_t strtodest(const char *, const char *, int);
-STATIC void memtodest(const char *, size_t, const char *, int);
+STATIC size_t strtodest(const char *, int);
+STATIC void memtodest(const char *, size_t, int);
 STATIC ssize_t varvalue(char *, int, int, int *);
 STATIC void expandmeta(struct strlist *, int);
 #ifdef HAVE_GLOB
@@ -333,16 +333,6 @@ addquote:
 		case CTLESC:
 			startloc++;
 			length++;
-
-			/*
-			 * Quoted parameter expansion pattern: remove quote
-			 * unless inside inner quotes or we have a literal
-			 * backslash.
-			 */
-			if (((flag | inquotes) & (EXP_QPAT | EXP_QUOTED)) ==
-			    EXP_QPAT && *p != '\\')
-				break;
-
 			goto addquote;
 		case CTLVAR:
 			p = evalvar(p, flag | inquotes);
@@ -396,7 +386,7 @@ done:
 	if (!home || !*home)
 		goto lose;
 	*p = c;
-	strtodest(home, SQSYNTAX, quotes);
+	strtodest(home, quotes | EXP_QUOTED);
 	return (p);
 lose:
 	*p = c;
@@ -521,7 +511,6 @@ expbackq(union node *cmd, int flag)
 	char *p;
 	char *dest;
 	int startloc;
-	char const *syntax = flag & EXP_QUOTED ? DQSYNTAX : BASESYNTAX;
 	struct stackmark smark;
 
 	INTOFF;
@@ -535,7 +524,7 @@ expbackq(union node *cmd, int flag)
 	if (i == 0)
 		goto read;
 	for (;;) {
-		memtodest(p, i, syntax, flag & QUOTES_ESC);
+		memtodest(p, i, flag & (QUOTES_ESC | EXP_QUOTED));
 read:
 		if (in.fd < 0)
 			break;
@@ -651,8 +640,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc, int varfla
 	char *(*scan)(char *, char *, char *, char *, int , int);
 
 	argstr(p, EXP_TILDE | (subtype != VSASSIGN && subtype != VSQUESTION ?
-			       (flag & (EXP_QUOTED | EXP_QPAT) ?
-			        EXP_QPAT : EXP_CASE) : 0));
+			       EXP_CASE : 0));
 	STPUTC('\0', expdest);
 	argbackq = saveargbackq;
 	startp = stackblock() + startloc;
@@ -844,7 +832,7 @@ end:
  */
 
 STATIC void
-memtodest(const char *p, size_t len, const char *syntax, int quotes) {
+memtodest(const char *p, size_t len, int quotes) {
 	char *q;
 
 	if (unlikely(!len))
@@ -855,11 +843,17 @@ memtodest(const char *p, size_t len, const char *syntax, int quotes) {
 	do {
 		int c = (signed char)*p++;
 		if (c) {
-			if ((quotes & QUOTES_ESC) &&
-			    ((syntax[c] == CCTL) ||
-			     (((quotes & EXP_FULL) || syntax != BASESYNTAX) &&
-			      syntax[c] == CBACK)))
-				USTPUTC(CTLESC, q);
+			if (quotes & QUOTES_ESC) {
+				switch (c) {
+					case '\\':
+					case '!': case '*': case '?': case '[': case '=':
+					case '~': case ':': case '/': case '-': case ']':
+						if (quotes & EXP_QUOTED)
+					case CTLVARS:
+							USTPUTC(CTLESC, q);
+						break;
+				}
+			}
 		} else if (!(quotes & QUOTES_KEEPNUL))
 			continue;
 		USTPUTC(c, q);
@@ -870,13 +864,10 @@ memtodest(const char *p, size_t len, const char *syntax, int quotes) {
 
 
 STATIC size_t
-strtodest(p, syntax, quotes)
-	const char *p;
-	const char *syntax;
-	int quotes;
+strtodest(const char *p, int quotes)
 {
 	size_t len = strlen(p);
-	memtodest(p, len, syntax, quotes);
+	memtodest(p, len, quotes);
 	return len;
 }
 
@@ -895,15 +886,13 @@ varvalue(char *name, int varflags, int flags, int *quotedp)
 	int sep;
 	char sepc;
 	char **ap;
-	char const *syntax;
 	int quoted = *quotedp;
 	int subtype = varflags & VSTYPE;
 	int discard = subtype == VSPLUS || subtype == VSLENGTH;
-	int quotes = (discard ? 0 : (flags & QUOTES_ESC)) | QUOTES_KEEPNUL;
+	int quotes = quoted | (discard ? 0 : (flags & QUOTES_ESC)) | QUOTES_KEEPNUL;
 	ssize_t len = 0;
 
 	sep = (flags & EXP_FULL) << CHAR_BIT;
-	syntax = quoted ? DQSYNTAX : BASESYNTAX;
 
 	switch (*name) {
 	case '$':
@@ -946,11 +935,11 @@ param:
 		if (!(ap = shellparam.p))
 			return -1;
 		while ((p = *ap++)) {
-			len += strtodest(p, syntax, quotes);
+			len += strtodest(p, quotes);
 
 			if (*ap && sep) {
 				len++;
-				memtodest(&sepc, 1, syntax, quotes);
+				memtodest(&sepc, 1, quotes);
 			}
 		}
 		break;
@@ -975,7 +964,7 @@ value:
 		if (!p)
 			return -1;
 
-		len = strtodest(p, syntax, quotes);
+		len = strtodest(p, quotes);
 		break;
 	}
 
@@ -1644,7 +1633,6 @@ char *
 _rmescapes(char *str, int flag)
 {
 	char *p, *q, *r;
-	unsigned inquotes;
 	int notescaped;
 	int globbing;
 
@@ -1674,24 +1662,23 @@ _rmescapes(char *str, int flag)
 			q = mempcpy(q, str, len);
 		}
 	}
-	inquotes = 0;
 	globbing = flag & RMESCAPE_GLOB;
 	notescaped = globbing;
 	while (*p) {
 		if (*p == (char)CTLQUOTEMARK) {
-			inquotes = ~inquotes;
 			p++;
 			notescaped = globbing;
 			continue;
 		}
+		if (*p == '\\') {
+			/* naked back slash */
+			notescaped = 0;
+			goto copy;
+		}
 		if (*p == (char)CTLESC) {
 			p++;
 			if (notescaped)
 				*q++ = '\\';
-		} else if (*p == '\\' && !inquotes) {
-			/* naked back slash */
-			notescaped = 0;
-			goto copy;
 		}
 		notescaped = globbing;
 copy:
diff --git a/src/expand.h b/src/expand.h
index 26dc5b4..90f5328 100644
--- a/src/expand.h
+++ b/src/expand.h
@@ -55,7 +55,6 @@ struct arglist {
 #define	EXP_VARTILDE	0x4	/* expand tildes in an assignment */
 #define	EXP_REDIR	0x8	/* file glob for a redirection (1 match only) */
 #define EXP_CASE	0x10	/* keeps quotes around for CASE pattern */
-#define EXP_QPAT	0x20	/* pattern in quoted parameter expansion */
 #define EXP_VARTILDE2	0x40	/* expand tildes after colons only */
 #define EXP_WORD	0x80	/* expand word in parameter expansion */
 #define EXP_QUOTED	0x100	/* expand word in double quotes */
diff --git a/src/mksyntax.c b/src/mksyntax.c
index a23c18c..4753423 100644
--- a/src/mksyntax.c
+++ b/src/mksyntax.c
@@ -48,27 +48,6 @@ struct synclass {
 	char *comment;
 };
 
-/* Syntax classes */
-struct synclass synclass[] = {
-	{ "CWORD",	"character is nothing special" },
-	{ "CNL",	"newline character" },
-	{ "CBACK",	"a backslash character" },
-	{ "CSQUOTE",	"single quote" },
-	{ "CDQUOTE",	"double quote" },
-	{ "CENDQUOTE",	"a terminating quote" },
-	{ "CBQUOTE",	"backwards single quote" },
-	{ "CVAR",	"a dollar sign" },
-	{ "CENDVAR",	"a '}' character" },
-	{ "CLP",	"a left paren in arithmetic" },
-	{ "CRP",	"a right paren in arithmetic" },
-	{ "CEOF",	"end of file" },
-	{ "CCTL",	"like CWORD, except it must be escaped" },
-	{ "CSPCL",	"these terminate a word" },
-	{ "CIGN",	"character should be ignored" },
-	{ NULL,		NULL }
-};
-
-
 /*
  * Syntax classes for is_ functions.  Warning:  if you add new classes
  * you may have to change the definition of the is_in_name macro.
@@ -94,7 +73,6 @@ static FILE *hfile;
 static char *syntax[513];
 
 static void filltable(char *);
-static void init(void);
 static void add(char *, char *);
 static void print(char *);
 static void output_type_macros(void);
@@ -127,15 +105,6 @@ main(int argc, char **argv)
 	fputs("\n", hfile);
 
 	/* Generate the #define statements in the header file */
-	fputs("/* Syntax classes */\n", hfile);
-	for (i = 0 ; synclass[i].name ; i++) {
-		sprintf(buf, "#define %s %d", synclass[i].name, i);
-		fputs(buf, hfile);
-		for (pos = strlen(buf) ; pos < 32 ; pos = (pos + 8) & ~07)
-			putc('\t', hfile);
-		fprintf(hfile, "/* %s */\n", synclass[i].comment);
-	}
-	putc('\n', hfile);
 	fputs("/* Syntax classes for is_ functions */\n", hfile);
 	for (i = 0 ; is_entry[i].name ; i++) {
 		sprintf(buf, "#define %s %#o", is_entry[i].name, 1 << i);
@@ -149,57 +118,12 @@ main(int argc, char **argv)
 	fprintf(hfile, "#define PEOF %d\n\n", -130);
 	fprintf(hfile, "#define PEOA %d\n\n", -129);
 	putc('\n', hfile);
-	fputs("#define BASESYNTAX (basesyntax + SYNBASE)\n", hfile);
-	fputs("#define DQSYNTAX (dqsyntax + SYNBASE)\n", hfile);
-	fputs("#define SQSYNTAX (sqsyntax + SYNBASE)\n", hfile);
-	fputs("#define ARISYNTAX (arisyntax + SYNBASE)\n", hfile);
-	putc('\n', hfile);
 	output_type_macros();		/* is_digit, etc. */
 	putc('\n', hfile);
 
 	/* Generate the syntax tables. */
 	fputs("#include \"shell.h\"\n", cfile);
 	fputs("#include \"syntax.h\"\n\n", cfile);
-	init();
-	fputs("/* syntax table used when not in quotes */\n", cfile);
-	add("\n", "CNL");
-	add("\\", "CBACK");
-	add("'", "CSQUOTE");
-	add("\"", "CDQUOTE");
-	add("`", "CBQUOTE");
-	add("$", "CVAR");
-	add("}", "CENDVAR");
-	add("<>();&| \t", "CSPCL");
-	syntax[1] = "CSPCL";
-	print("basesyntax");
-	init();
-	fputs("\n/* syntax table used when in double quotes */\n", cfile);
-	add("\n", "CNL");
-	add("\\", "CBACK");
-	add("\"", "CENDQUOTE");
-	add("`", "CBQUOTE");
-	add("$", "CVAR");
-	add("}", "CENDVAR");
-	/* ':/' for tilde expansion, '-' for [a\-x] pattern ranges */
-	add("!*?[=~:/-]", "CCTL");
-	print("dqsyntax");
-	init();
-	fputs("\n/* syntax table used when in single quotes */\n", cfile);
-	add("\n", "CNL");
-	add("'", "CENDQUOTE");
-	/* ':/' for tilde expansion, '-' for [a\-x] pattern ranges */
-	add("!*?[=~:/-]\\", "CCTL");
-	print("sqsyntax");
-	init();
-	fputs("\n/* syntax table used when in arithmetic */\n", cfile);
-	add("\n", "CNL");
-	add("\\", "CBACK");
-	add("`", "CBQUOTE");
-	add("$", "CVAR");
-	add("}", "CENDVAR");
-	add("(", "CLP");
-	add(")", "CRP");
-	print("arisyntax");
 	filltable("0");
 	fputs("\n/* character classification table */\n", cfile);
 	add("0123456789", "ISDIGIT");
@@ -228,23 +152,6 @@ filltable(char *dftval)
 }
 
 
-/*
- * Initialize the syntax table with default values.
- */
-
-static void
-init(void)
-{
-	int ctl;
-
-	filltable("CWORD");
-	syntax[0] = "CEOF";
-	syntax[1] = "CIGN";
-	for (ctl = CTL_FIRST; ctl <= CTL_LAST; ctl++ )
-		syntax[130 + ctl] = "CCTL";
-}
-
-
 /*
  * Add entries to the syntax table.
  */
diff --git a/src/parser.c b/src/parser.c
index 3024ab1..42155f9 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -68,8 +68,15 @@
 
 
 
-/* Used by expandstr to get here-doc like behaviour. */
-#define FAKEEOFMARK (char *)1
+/* Flags for readtoken1(). */
+#define RT_SQSYNTAX  0x01
+#define RT_DQSYNTAX  0x02
+#define RT_HEREDOC   0x04
+#define RT_STRING    0x08
+#define RT_VARSUBST  0x10
+#define RT_ARISUBST  0x20
+#define RT_ARIPAREN  0x40
+#define RT_STRIPTABS 0x80
 
 
 
@@ -106,7 +113,7 @@ STATIC void parseheredoc(void);
 STATIC int peektoken(void);
 STATIC int readtoken(void);
 STATIC int xxreadtoken(void);
-STATIC int readtoken1(int, char const *, char *, int);
+STATIC int readtoken1(int, char *, int);
 STATIC void synexpect(int) __attribute__((__noreturn__));
 STATIC void synerror(const char *) __attribute__((__noreturn__));
 STATIC void setprompt(int);
@@ -121,11 +128,6 @@ isassignment(const char *p)
 	return *q == '=';
 }
 
-static inline int realeofmark(const char *eofmark)
-{
-	return eofmark && eofmark != FAKEEOFMARK;
-}
-
 
 /*
  * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
@@ -657,8 +659,7 @@ parseheredoc(void)
 		if (needprompt) {
 			setprompt(2);
 		}
-		readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
-				here->eofmark, here->striptabs);
+		readtoken1(pgetc(), here->eofmark, (here->here->type == NHERE ? RT_SQSYNTAX : RT_DQSYNTAX) | RT_HEREDOC | here->striptabs);
 		n = (union node *)stalloc(sizeof (struct narg));
 		n->narg.type = NARG;
 		n->narg.next = NULL;
@@ -828,7 +829,8 @@ xxreadtoken(void)
 		}
 	}
 breakloop:
-	return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
+	readtoken1(c, NULL, 0);
+	return lasttoken;
 #undef RETURN
 }
 
@@ -856,69 +858,59 @@ static int pgetc_eatbnl(void)
  * word which marks the end of the document and striptabs is true if
  * leading tabs should be stripped from the document.  The argument firstc
  * is the first character of the input token or document.
- *
- * Because C does not have internal subroutines, I have simulated them
- * using goto's to implement the subroutine linkage.  The following macros
- * will run code that appears at the end of readtoken1.
  */
 
-#define CHECKEND()	{goto checkend; checkend_return:;}
-#define PARSEREDIR()	{goto parseredir; parseredir_return:;}
-#define PARSESUB()	{goto parsesub; parsesub_return:;}
-#define PARSEBACKQOLD()	{oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
-#define PARSEBACKQNEW()	{oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
-#define	PARSEARITH()	{goto parsearith; parsearith_return:;}
+STATIC char *readtoken1_loop(char *, int, char *, int);
+STATIC int readtoken1_endword(char *, char *);
+STATIC char *readtoken1_checkend(char *, int *, char *, int);
+STATIC int readtoken1_parseredir(char *, int);
+STATIC char *readtoken1_parsesub(char *, char *, int);
+STATIC char *readtoken1_parsebackq(char *, int, int);
+STATIC char *readtoken1_parsearith(char *, char *, int);
 
 STATIC int
-readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
+readtoken1(int firstc, char *eofmark, int flags)
 {
-	int c = firstc;
 	char *out;
-	size_t len;
-	struct nodelist *bqlist;
-	int quotef;
-	int dblquote;
-	int varnest;	/* levels of variables expansion */
-	int arinest;	/* levels of arithmetic expansion */
-	int parenlevel;	/* levels of parens in arithmetic */
-	int dqvarnest;	/* levels of variables expansion within double quotes */
-	int oldstyle;
-	/* syntax before arithmetic */
-	char const *uninitialized_var(prevsyntax);
-
-	dblquote = 0;
-	if (syntax == DQSYNTAX)
-		dblquote = 1;
-	quotef = 0;
-	bqlist = NULL;
-	varnest = 0;
-	arinest = 0;
-	parenlevel = 0;
-	dqvarnest = 0;
 
+	quoteflag = 0;
+	backquotelist = NULL;
 	STARTSTACKSTR(out);
+	out = readtoken1_loop(out, firstc, eofmark, flags);
+	return readtoken1_endword(out, eofmark);
+}
+
+STATIC char *
+readtoken1_loop(char *out, int c, char *eofmark, int flags)
+{
+	int qsyntax;
+
 	loop: {	/* for each line, until end of word */
-		CHECKEND();	/* set c to PEOF if at end of here document */
+		out = readtoken1_checkend(out, &c, eofmark, flags);	/* set c to PEOF if at end of here document */
 		for (;;) {	/* until end of line or end of word */
 			CHECKSTRSPACE(4, out);	/* permit 4 calls to USTPUTC */
-			switch(syntax[c]) {
-			case CNL:	/* '\n' */
-				if (syntax == BASESYNTAX)
+			switch(c) {
+			case '\n':
+				if (!flags)
 					goto endword;	/* exit outer loop */
 				USTPUTC(c, out);
 				nlprompt();
 				c = pgetc();
 				goto loop;		/* continue outer loop */
-			case CWORD:
+word:
+			default:
 				USTPUTC(c, out);
 				break;
-			case CCTL:
-				if (eofmark == NULL || dblquote)
+control:
+			case '!': case '*': case '?': case '[': case '=':
+			case '~': case ':': case '/': case '-': case ']':
+				if (flags & (RT_SQSYNTAX | RT_DQSYNTAX) && !(flags & RT_HEREDOC))
 					USTPUTC(CTLESC, out);
 				USTPUTC(c, out);
 				break;
-			/* backslash */
-			case CBACK:
+			case '\\':
+				if (flags & RT_SQSYNTAX)
+					goto control;
 				c = pgetc2();
 				if (c == PEOF) {
 					USTPUTC(CTLESC, out);
@@ -928,128 +920,132 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 					nlprompt();
 				} else {
 					if (
-						dblquote &&
+						(flags & RT_DQSYNTAX) &&
 						c != '\\' && c != '`' &&
 						c != '$' && (
 							c != '"' ||
-							eofmark != NULL
+							!(flags & RT_STRING)
+						) && (
+							c != '}' ||
+							!(flags & RT_VARSUBST)
 						)
 					) {
+						USTPUTC(CTLESC, out);
 						USTPUTC('\\', out);
 					}
 					USTPUTC(CTLESC, out);
 					USTPUTC(c, out);
-					quotef++;
-				}
-				break;
-			case CSQUOTE:
-				syntax = SQSYNTAX;
-quotemark:
-				if (eofmark == NULL) {
-					USTPUTC(CTLQUOTEMARK, out);
+					quoteflag++;
 				}
 				break;
-			case CDQUOTE:
-				syntax = DQSYNTAX;
-				dblquote = 1;
-				goto quotemark;
-			case CENDQUOTE:
-				if (eofmark && !varnest)
-					USTPUTC(c, out);
-				else {
-					if (dqvarnest == 0) {
-						syntax = BASESYNTAX;
-						dblquote = 0;
-					}
-					quotef++;
-					goto quotemark;
+				do {
+			case '\'':
+					qsyntax = RT_SQSYNTAX;
+					break;
+			case '"':
+					qsyntax = RT_DQSYNTAX;
+					break;
+				} while(0);
+				if (flags & (RT_HEREDOC | RT_SQSYNTAX | RT_DQSYNTAX) & ~qsyntax)
+					goto word;
+				if ((flags & (qsyntax | RT_VARSUBST)) == qsyntax) {
+					quoteflag++;
+					if (!(flags & RT_HEREDOC))
+						USTPUTC(CTLQUOTEMARK, out);
+					return out;
 				}
+				USTPUTC(CTLQUOTEMARK, out);
+				out = readtoken1_loop(out, pgetc(), eofmark, RT_STRING | qsyntax | (flags & RT_STRIPTABS));
 				break;
-			case CVAR:	/* '$' */
-				PARSESUB();		/* parse substitution */
-				break;
-			case CENDVAR:	/* '}' */
-				if (varnest > 0) {
-					varnest--;
-					if (dqvarnest > 0) {
-						dqvarnest--;
-					}
-					USTPUTC(CTLENDVAR, out);
-				} else {
-					USTPUTC(c, out);
-				}
+			case '$':
+				if (flags & RT_SQSYNTAX)
+					goto word;
+				out = readtoken1_parsesub(out, eofmark, flags);		/* parse substitution */
 				break;
-			case CLP:	/* '(' in arithmetic */
-				parenlevel++;
+			case '}':
+				if (!(flags & RT_VARSUBST))
+					goto word;
+				USTPUTC(CTLENDVAR, out);
+				return out;
+			case '(':
+				if (!(flags & RT_ARISUBST))
+					goto special;
 				USTPUTC(c, out);
+				out = readtoken1_loop(out, pgetc(), eofmark, flags | RT_ARIPAREN);
 				break;
-			case CRP:	/* ')' in arithmetic */
-				if (parenlevel > 0) {
-					USTPUTC(c, out);
-					--parenlevel;
-				} else {
+			case ')':
+				if (!(flags & (RT_ARISUBST | RT_ARIPAREN)))
+					goto special;
+				if (!(flags & RT_ARIPAREN)) {
 					if (pgetc() == ')') {
 						USTPUTC(CTLENDARI, out);
-						if (!--arinest)
-							syntax = prevsyntax;
-					} else {
-						/*
-						 * unbalanced parens
-						 *  (don't 2nd guess - no error)
-						 */
-						pungetc();
-						USTPUTC(')', out);
+						return out;
 					}
+					pungetc();
+					USTPUTC(')', out);
+				} else {
+					USTPUTC(')', out);
+					return out;
 				}
+			case '`':
+				if (flags & RT_SQSYNTAX)
+					goto word;
+				out = readtoken1_parsebackq(out, flags, 1);
 				break;
-			case CBQUOTE:	/* '`' */
-				PARSEBACKQOLD();
-				break;
-			case CEOF:
+			case PEOF:
 				goto endword;		/* exit outer loop */
-			case CIGN:
-				break;
-			default:
-				if (varnest == 0)
-					goto endword;	/* exit outer loop */
-				if (c != PEOA) {
+special:
+			case PEOA:
+			case '<': case '>': // case '(': case ')':
+			case ';': case '&': case '|': case ' ': case '\t':
+			case CTLVARS:
+				if (!flags)
+					goto endword;
+				if (c != PEOA)
 					USTPUTC(c, out);
-				}
+				break;
 			}
 			c = pgetc();
 		}
 	}
 endword:
-	if (syntax == ARISYNTAX)
+	if (flags & RT_ARISUBST)
 		synerror("Missing '))'");
-	if (syntax != BASESYNTAX && eofmark == NULL)
+	if (flags & RT_STRING)
 		synerror("Unterminated quoted string");
-	if (varnest != 0) {
-		/* { */
+	if (flags & RT_VARSUBST) {
+		/* "{" */
 		synerror("Missing '}'");
 	}
+	pungetc();
+	return out;
+}
+
+STATIC int
+readtoken1_endword(char *out, char *eofmark)
+{
+	size_t len;
+	int c;
+
 	USTPUTC('\0', out);
 	len = out - (char *)stackblock();
 	out = stackblock();
+
 	if (eofmark == NULL) {
+		c = pgetc();
 		if ((c == '>' || c == '<')
-		 && quotef == 0
+		 && !quoteflag
 		 && len <= 2
 		 && (*out == '\0' || is_digit(*out))) {
-			PARSEREDIR();
-			return lasttoken = TREDIR;
+			return readtoken1_parseredir(out, c);
 		} else {
 			pungetc();
 		}
 	}
-	quoteflag = quotef;
-	backquotelist = bqlist;
 	grabstackblock(len);
 	wordtext = out;
 	return lasttoken = TWORD;
-/* end of readtoken routine */
-
-
+}
 
 /*
  * Check to see whether we are at the end of the here document.  When this
@@ -1057,30 +1053,32 @@ endword:
  * we are at the end of the here document, this routine sets the c to PEOF.
  */
 
-checkend: {
-	if (realeofmark(eofmark)) {
+STATIC char *
+readtoken1_checkend(char *out, int *c, char *eofmark, int flags)
+{
+	if (eofmark) {
 		int markloc;
 		char *p;
 
-		if (c == PEOA) {
-			c = pgetc2();
+		if (*c == PEOA) {
+			*c = pgetc2();
 		}
-		if (striptabs) {
-			while (c == '\t') {
-				c = pgetc2();
+		if (flags & RT_STRIPTABS) {
+			while (*c == '\t') {
+				*c = pgetc2();
 			}
 		}
 
 		markloc = out - (char *)stackblock();
-		for (p = eofmark; STPUTC(c, out), *p; p++) {
-			if (c != *p)
+		for (p = eofmark; STPUTC(*c, out), *p; p++) {
+			if (*c != *p)
 				goto more_heredoc;
 
-			c = pgetc2();
+			*c = pgetc2();
 		}
 
-		if (c == '\n' || c == PEOF) {
-			c = PEOF;
+		if (*c == '\n' || *c == PEOF) {
+			*c = PEOF;
 			nlnoprompt();
 		} else {
 			int len;
@@ -1090,8 +1088,8 @@ more_heredoc:
 			len = out - p;
 
 			if (len) {
-				len -= c < 0;
-				c = p[-1];
+				len -= *c < 0;
+				*c = p[-1];
 
 				if (len) {
 					char *str;
@@ -1106,7 +1104,7 @@ more_heredoc:
 
 		STADJUST((char *)stackblock() + markloc - out, out);
 	}
-	goto checkend_return;
+	return out;
 }
 
 
@@ -1116,7 +1114,9 @@ more_heredoc:
  * first character of the redirection operator.
  */
 
-parseredir: {
+STATIC int
+readtoken1_parseredir(char *out, int c)
+{
 	char fd = *out;
 	union node *np;
 
@@ -1146,7 +1146,7 @@ parseredir: {
 			heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
 			heredoc->here = np;
 			if ((c = pgetc()) == '-') {
-				heredoc->striptabs = 1;
+				heredoc->striptabs = RT_STRIPTABS;
 			} else {
 				heredoc->striptabs = 0;
 				pungetc();
@@ -1170,7 +1170,7 @@ parseredir: {
 	if (fd != '\0')
 		np->nfile.fd = digit_val(fd);
 	redirnode = np;
-	goto parseredir_return;
+	return lasttoken = TREDIR;
 }
 
 
@@ -1179,7 +1179,10 @@ parseredir: {
  * and nothing else.
  */
 
-parsesub: {
+STATIC char *
+readtoken1_parsesub(char *out, char *eofmark, int flags)
+{
+	int c;
 	int subtype;
 	int typeloc;
 	char *p;
@@ -1195,12 +1198,14 @@ parsesub: {
 		pungetc();
 	} else if (c == '(') {	/* $(command) or $((arith)) */
 		if (pgetc_eatbnl() == '(') {
-			PARSEARITH();
+			out = readtoken1_parsearith(out, eofmark, flags);
 		} else {
 			pungetc();
-			PARSEBACKQNEW();
+			out = readtoken1_parsebackq(out, flags, 0);
 		}
 	} else {
+		int newflags = (flags & (RT_DQSYNTAX | RT_STRIPTABS)) | RT_VARSUBST;
+
 		USTPUTC(CTLVAR, out);
 		typeloc = out - (char *)stackblock();
 		STADJUST(1, out);
@@ -1276,6 +1281,7 @@ varname:
 						subtype++;
 					else
 						pungetc();
+					newflags &= ~RT_DQSYNTAX;
 					break;
 				}
 			}
@@ -1284,14 +1290,11 @@ badsub:
 			pungetc();
 		}
 		*((char *)stackblock() + typeloc) = subtype;
-		if (subtype != VSNORMAL) {
-			varnest++;
-			if (dblquote)
-				dqvarnest++;
-		}
 		STPUTC('=', out);
+		if (subtype != VSNORMAL)
+			out = readtoken1_loop(out, pgetc(), eofmark, newflags);
 	}
-	goto parsesub_return;
+	return out;
 }
 
 
@@ -1302,7 +1305,9 @@ badsub:
  * characters on the top of the stack which must be preserved.
  */
 
-parsebackq: {
+STATIC char *
+readtoken1_parsebackq(char *out, int flags, int oldstyle)
+{
 	struct nodelist **nlpp;
 	union node *n;
 	char *str;
@@ -1346,7 +1351,7 @@ parsebackq: {
 					continue;
 				}
                                 if (pc != '\\' && pc != '`' && pc != '$'
-                                    && (!dblquote || pc != '"'))
+                                    && (!(flags & RT_DQSYNTAX) || pc != '"'))
                                         STPUTC('\\', pout);
 				if (pc > PEOA) {
 					break;
@@ -1374,7 +1379,7 @@ done:
 			setinputstring(pstr);
                 }
         }
-	nlpp = &bqlist;
+	nlpp = &backquotelist;
 	while (*nlpp)
 		nlpp = &(*nlpp)->next;
 	*nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
@@ -1385,7 +1390,9 @@ done:
 		doprompt = 0;
 	}
 
+	struct nodelist *savebqlist = backquotelist;
 	n = list(2);
+	backquotelist = savebqlist;
 
 	if (oldstyle)
 		doprompt = saveprompt;
@@ -1411,27 +1418,22 @@ done:
 		STADJUST(savelen, out);
 	}
 	USTPUTC(CTLBACKQ, out);
-	if (oldstyle)
-		goto parsebackq_oldreturn;
-	else
-		goto parsebackq_newreturn;
+	return out;
 }
 
+
+
 /*
  * Parse an arithmetic expansion (indicate start of one and set state)
  */
-parsearith: {
 
-	if (++arinest == 1) {
-		prevsyntax = syntax;
-		syntax = ARISYNTAX;
-	}
+STATIC char *
+readtoken1_parsearith(char *out, char *eofmark, int flags)
+{
 	USTPUTC(CTLARI, out);
-	goto parsearith_return;
+	return readtoken1_loop(out, pgetc(), eofmark, RT_DQSYNTAX | RT_ARISUBST | (flags & RT_STRIPTABS));
 }
 
-} /* end of readtoken */
-
 
 
 #ifdef mkinit
@@ -1523,7 +1525,7 @@ expandstr(const char *ps)
 	saveprompt = doprompt;
 	doprompt = 0;
 
-	readtoken1(pgetc(), DQSYNTAX, FAKEEOFMARK, 0);
+	readtoken1(pgetc(), NULL, RT_HEREDOC | RT_DQSYNTAX);
 
 	doprompt = saveprompt;
 
diff --git a/src/parser.h b/src/parser.h
index 2875cce..cecd9aa 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -47,6 +47,15 @@
 #define	CTLQUOTEMARK -120
 #define	CTL_LAST -120		/* last 'special' character */
 
+#define CTLVARS \
+	     CTLESC:      \
+	case CTLVAR:      \
+	case CTLENDVAR:   \
+	case CTLBACKQ:    \
+	case CTLARI:      \
+	case CTLENDARI:   \
+	case CTLQUOTEMARK
+
 /* variable substitution byte (follows CTLVAR) */
 #define VSTYPE	0x0f		/* type of variable substitution */
 #define VSNUL	0x10		/* colon--treat the empty string as unset */