@@ -54,6 +54,7 @@
#include <sys/stat.h>
#include <unistd.h>
#include <wchar.h>
+#include <wctype.h>
/*
* Routines to expand arguments to commands. We have to deal with
@@ -164,6 +165,30 @@ esclen(const char *start, const char *p) {
return esc;
}
+static __attribute__((noinline)) unsigned mbnext(const char *p)
+{
+ unsigned start = 0;
+ unsigned end = 0;
+ unsigned ml;
+ int c;
+
+ c = p[end++];
+
+ switch (c) {
+ case CTLMBCHAR:
+ if (p[end] == CTLESC)
+ end++;
+ ml = (unsigned char)p[end++];
+ start = end;
+ end = ml + 2;
+ break;
+ case CTLESC:
+ start++;
+ break;
+ }
+
+ return start | end << 8;
+}
static inline const char *getpwhome(const char *name)
{
@@ -552,6 +577,7 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend,
loc2 = rmesc;
do {
const char *s = loc2;
+ unsigned mb;
unsigned ml;
int match;
@@ -568,19 +594,9 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend,
if (!c)
break;
- if (*loc != (char)CTLMBCHAR) {
- if (*loc == (char)CTLESC)
- loc++;
- loc++;
- loc2++;
- continue;
- }
-
- if (*++loc == (char)CTLESC)
- loc++;
-
- ml = (unsigned char)*loc;
- loc += ml + 3;
+ mb = mbnext(loc);
+ loc += (mb & 0xff) + (mb >> 8);
+ ml = (mb >> 8) > 3 ? (mb >> 8) - 2 : 1;
loc2 += ml;
} while (1);
return 0;
@@ -930,18 +946,22 @@ static size_t strtodest(const char *p, int flags)
STATIC ssize_t
varvalue(char *name, int varflags, int flags, int quoted)
{
+ int subtype = varflags & VSTYPE;
+ const char *seps;
+ ssize_t len = 0;
+ unsigned seplen;
+ size_t start;
+ int discard;
+ char sepc;
+ char **ap;
+ int sep;
int num;
char *p;
int i;
- int sep;
- char sepc;
- char **ap;
- int subtype = varflags & VSTYPE;
- int discard = (subtype == VSPLUS || subtype == VSLENGTH) |
- (flags & EXP_DISCARD);
- ssize_t len = 0;
- size_t start;
- char c;
+ int c;
+
+ discard = (subtype == VSPLUS || subtype == VSLENGTH) |
+ (flags & EXP_DISCARD);
if (!subtype) {
if (discard)
@@ -1004,15 +1024,27 @@ numvar:
sep &= ~quoted;
sep |= ifsset() ? (unsigned char)(c & ifsval()[0]) : ' ';
param:
- sepc = sep;
if (!(ap = shellparam.p))
return -1;
+ sepc = sep;
+ seps = &sepc;
+ seplen = 1;
+ if (sepc < 0) {
+ mbstate_t mbs = {};
+ size_t ml;
+
+ ml = mbrlen(ifsval(), strlen(ifsval()), &mbs);
+ if (ml != -1 && ml != -2 && ml > 1) {
+ seps = ifsval();
+ seplen = ml;
+ }
+ }
while ((p = *ap++)) {
len += strtodest(p, flags);
if (*ap && sep) {
len++;
- memtodest(&sepc, 1, flags | EXP_KEEPNUL);
+ memtodest(seps, seplen, flags | EXP_KEEPNUL);
}
}
break;
@@ -1074,7 +1106,54 @@ recordregion(int start, int end, int nulonly)
ifslastp->nulonly = nulonly;
}
+static __attribute__((noinline)) unsigned ifsisifs(
+ const char *p, unsigned ml, const char *ifs, size_t ifslen)
+{
+ bool isdefifs = false;
+ size_t slen = ifslen;
+ const char *s = ifs;
+ wchar_t c = *p;
+ bool isifs;
+ isifs = !c;
+ if (isifs) {
+ p = ifs;
+ c = *p;
+ slen = 0;
+ }
+
+ while (slen) {
+ mbstate_t mbst = {};
+ size_t ifsml;
+ wchar_t c2;
+
+ if ((signed char)*s > 0 ||
+ (ifsml = mbrtowc(&c2, s, slen, &mbst),
+ ifsml == -2 || ifsml == -1 || ifsml < 2)) {
+ if (c == *s) {
+ isifs = true;
+ break;
+ }
+ s++;
+ slen--;
+ continue;
+ }
+
+ if (ifsml == ml && !memcmp(p, s, ifsml)) {
+ isifs = true;
+ c = c2;
+ break;
+ }
+
+ s += ifsml;
+ slen -= ifsml;
+ }
+
+ if (isifs)
+ isdefifs = iswspace(c);
+
+ return isifs | isdefifs << 1;
+}
/*
* Break the argument string into pieces based upon IFS and add the
@@ -1086,16 +1165,16 @@ recordregion(int start, int end, int nulonly)
void
ifsbreakup(char *string, int maxargs, struct arglist *arglist)
{
+ const char *ifs, *realifs;
struct ifsregion *ifsp;
struct strlist *sp;
+ char *r = NULL;
+ size_t ifslen;
char *start;
+ int nulonly;
+ int ifsspc;
char *p;
char *q;
- char *r = NULL;
- const char *ifs, *realifs;
- int ifsspc;
- int nulonly;
-
start = string;
if (ifslastp != NULL) {
@@ -1110,21 +1189,27 @@ ifsbreakup(char *string, int maxargs, struct arglist *arglist)
afternul = nulonly;
nulonly = ifsp->nulonly;
ifs = nulonly ? nullstr : realifs;
+ ifslen = strlen(ifs);
ifsspc = 0;
while (p < string + ifsp->endoff) {
- int c;
- bool isifs;
+ unsigned ifschar;
+ unsigned sisifs;
bool isdefifs;
+ unsigned ml;
+ bool isifs;
q = p;
- c = *p++;
- if (c == (char)CTLESC)
- c = *p++;
- isifs = strchr(ifs, c);
- isdefifs = false;
- if (isifs)
- isdefifs = strchr(defifs, c);
+ ifschar = mbnext(p);
+ p += ifschar & 0xff;
+ ml = (ifschar >> 8) > 3 ?
+ (ifschar >> 8) - 2 : 0;
+
+ sisifs = ifsisifs(p, ml, ifs, ifslen);
+ p += ifschar >> 8;
+
+ isifs = sisifs & 1;
+ isdefifs = sisifs >> 1;
/* If only reading one more argument:
* If we have exactly one field,
@@ -1380,32 +1465,24 @@ static void expmeta_rmescapes(char *enddir, char *name)
preglob(strcpy(enddir, name), RMESCAPE_EMETA);
}
-static unsigned mbcharlen(char *p)
-{
- int esc = 0;
-
- if (*++p == (char)CTLESC)
- esc++;
-
- return esc + 3 + (unsigned char)p[esc];
-}
-
static int skipesc(char *p)
{
+ unsigned short mb;
int esc = 0;
- if (p[esc] == (char)CTLMBCHAR)
- return esc + mbcharlen(p);
+ mb = mbnext(p);
+ if ((mb >> 8) > 3)
+ return (mb & 0xff) + (mb >> 8) - 1;
- if (*p == (char)CTLESC)
- esc++;
+ esc = mb & 0xff;
if (p[esc] == '\\' && p[esc + 1]) {
esc++;
- if (p[esc] == (char)CTLMBCHAR)
- return esc + mbcharlen(p + esc);
- if (p[esc] == (char)CTLESC)
- esc++;
+ mb = mbnext(p + esc);
+ if ((mb >> 8) > 3)
+ return esc + (mb & 0xff) + (mb >> 8) - 1;
+
+ esc += mb & 0xff;
}
return esc;
@@ -1813,6 +1890,7 @@ _rmescapes(char *str, int flag)
inquotes = 0;
notescaped = globbing;
while (*p) {
+ unsigned mb;
unsigned ml;
if (*p == (char)CTLQUOTEMARK) {
@@ -1845,13 +1923,14 @@ add_escape:
}
notescaped = globbing;
- if (*p != (char)CTLMBCHAR)
+ mb = mbnext(p);
+ ml = mb >> 8;
+
+ if (ml <= 3)
goto copy;
- if (*++p == (char)CTLESC)
- p++;
-
- ml = (unsigned char)*p++;
+ ml -= 2;
+ p += mb & 0xff;
q = mempcpy(q, p, ml);
p += ml + 2;
continue;
When multi-byte characters are used in IFS, they will be used for field splitting. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- src/expand.c | 201 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 140 insertions(+), 61 deletions(-)