@@ -32,27 +32,27 @@
* SUCH DAMAGE.
*/
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/stat.h>
+#include <ctype.h>
#include <dirent.h>
-#include <unistd.h>
-#ifdef HAVE_GETPWNAM
-#include <pwd.h>
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <inttypes.h>
-#include <limits.h>
-#include <string.h>
#ifdef HAVE_FNMATCH
#include <fnmatch.h>
#endif
#ifdef HAVE_GLOB
#include <glob.h>
#endif
-#include <ctype.h>
+#include <inttypes.h>
+#include <limits.h>
+#ifdef HAVE_GETPWNAM
+#include <pwd.h>
+#endif
+#include <string.h>
#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/stat.h>
+#include <unistd.h>
#include <wchar.h>
/*
@@ -550,8 +550,10 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend,
loc = startp;
loc2 = rmesc;
do {
- int match;
const char *s = loc2;
+ unsigned ml;
+ int match;
+
c = *loc2;
if (zero) {
*loc2 = '\0';
@@ -560,12 +562,26 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend,
match = pmatch(str, s);
*loc2 = c;
if (match)
- return loc;
- if (quotes && *loc == (char)CTLESC)
+ return quotes ? loc : loc2;
+
+ if (!c)
+ break;
+
+ if (*loc != (char)CTLMBCHAR) {
+ if (*loc == (char)CTLESC)
+ loc++;
loc++;
- loc++;
- loc2++;
- } while (c);
+ loc2++;
+ continue;
+ }
+
+ if (*++loc == (char)CTLESC)
+ loc++;
+
+ ml = (unsigned char)*loc;
+ loc += ml + 3;
+ loc2 += ml;
+ } while (1);
return 0;
}
@@ -573,14 +589,16 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend,
static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend,
char *str, int quotes, int zero
) {
- int esc = 0;
+ size_t esc = 0;
char *loc;
char *loc2;
for (loc = endp, loc2 = rmescend; loc >= startp; loc2--) {
- int match;
- char c = *loc2;
const char *s = loc2;
+ char c = *loc2;
+ unsigned ml;
+ int match;
+
if (zero) {
*loc2 = '\0';
s = rmesc;
@@ -588,17 +606,23 @@ static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend,
match = pmatch(str, s);
*loc2 = c;
if (match)
- return loc;
+ return quotes ? loc : loc2;
loc--;
- if (quotes) {
- if (--esc < 0) {
- esc = esclen(startp, loc);
- }
- if (esc % 2) {
- esc--;
- loc--;
- }
+ if (!esc--)
+ esc = esclen(startp, loc);
+ if (esc % 2) {
+ esc--;
+ loc--;
+ continue;
}
+ if (*loc != (char)CTLMBCHAR)
+ continue;
+
+ ml = (unsigned char)*--loc;
+ loc -= ml + 2;
+ if (*loc == (char)CTLESC)
+ loc--;
+ loc2 -= ml - 1;
}
return 0;
}
@@ -652,14 +676,11 @@ static char *subevalvar(char *start, char *str, int strloc, int startloc,
nstrloc = str - (char *)stackblock();
}
- rmesc = startp;
- if (quotes) {
- rmesc = _rmescapes(startp, RMESCAPE_ALLOC | RMESCAPE_GROW);
- if (rmesc != startp)
- rmescend = expdest;
- startp = stackblock() + startloc;
- str = stackblock() + nstrloc;
- }
+ rmesc = _rmescapes(startp, RMESCAPE_ALLOC | RMESCAPE_GROW);
+ if (rmesc != startp)
+ rmescend = expdest;
+ startp = stackblock() + startloc;
+ str = stackblock() + nstrloc;
rmescend--;
/* zero = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX */
@@ -669,16 +690,29 @@ static char *subevalvar(char *start, char *str, int strloc, int startloc,
endp = stackblock() + strloc - 1;
loc = scan(startp, endp, rmesc, rmescend, str, quotes, zero);
- if (loc) {
- if (zero) {
- memmove(startp, loc, endp - loc);
- loc = startp + (endp - loc);
+ if (!loc) {
+ if (quotes) {
+ rmesc = startp;
+ rmescend = endp;
}
- *loc = '\0';
- } else
- loc = endp;
+ } else if (!quotes) {
+ if (zero)
+ rmesc = loc;
+ else
+ rmescend = loc;
+ } else if (zero) {
+ rmesc = loc;
+ rmescend = endp;
+ } else {
+ rmesc = startp;
+ rmescend = loc;
+ }
+
+ memmove(startp, rmesc, rmescend - rmesc);
+ loc = startp + (rmescend - rmesc);
out:
+ *loc = '\0';
amount = loc - expdest;
STADJUST(amount, expdest);
@@ -704,6 +738,7 @@ evalvar(char *p, int flag)
ssize_t varlen;
int discard;
int quoted;
+ int mbchar;
varflags = *p++ & ~VSBIT;
subtype = varflags & VSTYPE;
@@ -713,8 +748,18 @@ evalvar(char *p, int flag)
startloc = expdest - (char *)stackblock();
p = strchr(p, '=') + 1;
+ mbchar = 0;
+ switch (subtype) {
+ case VSTRIMLEFT:
+ case VSTRIMLEFTMAX:
+ case VSTRIMRIGHT:
+ case VSTRIMRIGHTMAX:
+ mbchar = EXP_MBCHAR;
+ break;
+ }
+
again:
- varlen = varvalue(var, varflags, flag, quoted);
+ varlen = varvalue(var, varflags, flag | mbchar, quoted);
if (varflags & VSNUL)
varlen--;
@@ -801,7 +846,7 @@ static char *chtodest(int c, int flags, char *out)
{
const char *syntax = flags & EXP_QUOTED ? DQSYNTAX : BASESYNTAX;
- if ((flags & QUOTES_ESC) &&
+ if ((flags & (QUOTES_ESC | EXP_MBCHAR)) &&
((syntax[c] == CCTL) ||
(flags & EXP_QUOTED && syntax[c] == CBACK)))
USTPUTC(CTLESC, out);
@@ -823,9 +868,13 @@ static size_t memtodest(const char *p, size_t len, int flags)
if (unlikely(!len))
return 0;
- q = makestrspace(len * 2, expdest);
+ /* CTLMBCHAR, 2, c, c, 2, CTLMBCHAR */
+ q = makestrspace(len * 3, expdest);
do {
+ mbstate_t mbs = {};
+ size_t ml;
+
c = (signed char)*p++;
if (c)
@@ -833,19 +882,30 @@ static size_t memtodest(const char *p, size_t len, int flags)
else if (!(flags & EXP_KEEPNUL))
continue;
- if (c < 0) {
- mbstate_t mbs = {};
+ if (c >= 0)
+ goto copy;
- p--;
- do {
- q = chtodest(c, flags, q);
- } while (mbrlen(p++, 1, &mbs) == -2 &&
- (c = *p, --len));
- if (!len)
- break;
- continue;
+ ml = mbrlen(p - 1, len, &mbs);
+ if (ml == -1 || ml == -2 || ml < 2 || ml > MB_LEN_MAX)
+ goto copy;
+
+ if ((flags & (QUOTES_ESC | EXP_MBCHAR))) {
+ USTPUTC(CTLMBCHAR, q);
+ USTPUTC(ml, q);
}
+ q = mempcpy(q, p - 1, ml);
+
+ if ((flags & (QUOTES_ESC | EXP_MBCHAR))) {
+ USTPUTC(ml, q);
+ USTPUTC(CTLMBCHAR, q);
+ }
+
+ p += ml - 1;
+ len -= ml - 1;
+ continue;
+
+copy:
q = chtodest(c, flags, q);
} while (--len);
@@ -1720,6 +1780,8 @@ _rmescapes(char *str, int flag)
inquotes = 0;
notescaped = globbing;
while (*p) {
+ unsigned ml;
+
if (*p == (char)CTLQUOTEMARK) {
p++;
inquotes ^= globbing;
@@ -1743,6 +1805,18 @@ add_escape:
}
}
notescaped = globbing;
+
+ if (*p != (char)CTLMBCHAR)
+ goto copy;
+
+ if (*++p == (char)CTLESC)
+ p++;
+
+ ml = (unsigned char)*p++;
+ q = mempcpy(q, p, ml);
+ p += ml + 2;
+ continue;
+
copy:
*q++ = *p++;
}
@@ -60,6 +60,7 @@ struct arglist {
#define EXP_QUOTED 0x100 /* expand word in double quotes */
#define EXP_KEEPNUL 0x200 /* do not skip NUL characters */
#define EXP_DISCARD 0x400 /* discard result of expansion */
+#define EXP_MBCHAR 0x800 /* mark multi-byte characters */
struct jmploc;
@@ -67,7 +67,7 @@ const char cqchars[] = {
#ifdef HAVE_FNMATCH
'^',
#endif
- CTLESC, CTLQUOTEMARK, 0
+ CTLESC, CTLMBCHAR, CTLQUOTEMARK, 0
};
const char illnum[] = "Illegal number: %s";
const char homestr[] = "HOME";
@@ -44,6 +44,7 @@ union node;
#define CTLVAR -126 /* variable defn */
#define CTLENDVAR -125
#define CTLBACKQ -124
+#define CTLMBCHAR -123
#define CTLARI -122 /* arithmetic expression */
#define CTLENDARI -121
#define CTLQUOTEMARK -120
When trimming variables in subevalvar, process multi-byte characters as one unit instead of their constituent bytes. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- src/expand.c | 192 ++++++++++++++++++++++++++++++++++--------------- src/expand.h | 1 + src/mystring.c | 2 +- src/parser.h | 1 + 4 files changed, 136 insertions(+), 60 deletions(-)