diff mbox series

[5/8] expand: Process multi-byte characters in expmeta

Message ID 98ac4609519b4885148ab6185c763bfa3be9f50e.1714215826.git.herbert@gondor.apana.org.au (mailing list archive)
State Superseded
Delegated to: Herbert Xu
Headers show
Series Add multi-byte support | expand

Commit Message

Herbert Xu April 20, 2024, 1:46 p.m. UTC
When glob(3) is not in use, make sure that expmeta processes
multi-byte characters correctly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 src/expand.c | 107 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 73 insertions(+), 34 deletions(-)
diff mbox series

Patch

diff --git a/src/expand.c b/src/expand.c
index 14c6a15..1e86058 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -84,6 +84,7 @@ 
 #define RMESCAPE_GLOB	0x2	/* Add backslashes for glob */
 #define RMESCAPE_GROW	0x8	/* Grow strings instead of stalloc */
 #define RMESCAPE_HEAP	0x10	/* Malloc strings instead of stalloc */
+#define RMESCAPE_EMETA	0x20	/* Remove backslashes too */
 
 /* Add CTLESC when necessary. */
 #define QUOTES_ESC	(EXP_FULL | EXP_CASE)
@@ -1349,15 +1350,13 @@  expandmeta(struct strlist *str)
 		savelastp = exparg.lastp;
 
 		INTOFF;
-		p = preglob(str->text, RMESCAPE_ALLOC | RMESCAPE_HEAP);
+		p = str->text;
 		len = strlen(p);
 		expdir_max = len + PATH_MAX;
 		expdir = ckmalloc(expdir_max);
 
 		expmeta(p, len, 0);
 		ckfree(expdir);
-		if (p != str->text)
-			ckfree(p);
 		INTON;
 		if (exparg.lastp == savelastp) {
 			/*
@@ -1378,6 +1377,41 @@  nometa:
 	}
 }
 
+static void expmeta_rmescapes(char *enddir, char *name)
+{
+	preglob(strcpy(enddir, name), RMESCAPE_EMETA);
+}
+
+static unsigned mbcharlen(char *p)
+{
+	int esc = 0;
+
+	if (*++p == (char)CTLESC)
+		esc++;
+
+	return esc + 3 + (unsigned char)p[esc];
+}
+
+static int skipesc(char *p)
+{
+	int esc = 0;
+
+	if (p[esc] == (char)CTLMBCHAR)
+		return esc + mbcharlen(p);
+
+	if (*p == (char)CTLESC)
+		esc++;
+
+	if (p[esc] == '\\' && p[esc + 1]) {
+		esc++;
+		if (p[esc] == (char)CTLMBCHAR)
+			return esc + mbcharlen(p + esc);
+		if (p[esc] == (char)CTLESC)
+			esc++;
+	}
+
+	return esc;
+}
 
 /*
  * Do metacharacter (i.e. *, ?, [...]) expansion.
@@ -1387,17 +1421,18 @@  STATIC void
 expmeta(char *name, unsigned name_len, unsigned expdir_len)
 {
 	char *enddir = expdir + expdir_len;
-	char *p;
+	struct stat64 statb;
+	struct dirent64 *dp;
 	const char *cp;
-	char *start;
 	char *endname;
 	int metaflag;
-	struct stat64 statb;
-	DIR *dirp;
-	struct dirent64 *dp;
-	int atend;
 	int matchdot;
+	char *start;
+	DIR *dirp;
+	char *pat;
+	char *p;
 	int esc;
+	int c;
 
 	metaflag = 0;
 	start = name;
@@ -1409,8 +1444,7 @@  expmeta(char *name, unsigned name_len, unsigned expdir_len)
 			if (*q == '!')
 				q++;
 			for (;;) {
-				if (*q == '\\')
-					q++;
+				q += skipesc(q);
 				if (*q == '/' || *q == '\0')
 					break;
 				if (*++q == ']') {
@@ -1419,8 +1453,8 @@  expmeta(char *name, unsigned name_len, unsigned expdir_len)
 				}
 			}
 		} else {
-			if (*p == '\\' && p[1])
-				esc++;
+			esc = skipesc(p);
+
 			if (p[esc] == '/') {
 				if (metaflag)
 					break;
@@ -1431,24 +1465,18 @@  expmeta(char *name, unsigned name_len, unsigned expdir_len)
 	if (metaflag == 0) {	/* we've reached the end of the file name */
 		if (!expdir_len)
 			return;
-		p = name;
-		do {
-			if (*p == '\\' && p[1])
-				p++;
-			*enddir++ = *p;
-		} while (*p++);
+		expmeta_rmescapes(enddir, name);
 		if (lstat64(expdir, &statb) >= 0)
 			addfname(expdir);
 		return;
 	}
 	endname = p;
 	if (name < start) {
-		p = name;
-		do {
-			if (*p == '\\' && p[1])
-				p++;
-			*enddir++ = *p++;
-		} while (p < start);
+		c = *start;
+		*start = 0;
+		expmeta_rmescapes(enddir, name);
+		*start = c;
+		enddir += strlen(enddir);
 	}
 	*enddir = 0;
 	cp = expdir;
@@ -1457,25 +1485,26 @@  expmeta(char *name, unsigned name_len, unsigned expdir_len)
 		cp = ".";
 	if ((dirp = opendir(cp)) == NULL)
 		return;
-	if (*endname == 0) {
-		atend = 1;
-	} else {
-		atend = 0;
+	c = *endname;
+	if (c) {
 		*endname = '\0';
 		endname += esc + 1;
 	}
 	name_len -= endname - name;
 	matchdot = 0;
 	p = start;
+	if (*p == (char)CTLESC)
+		p++;
 	if (*p == '\\')
 		p++;
 	if (*p == '.')
 		matchdot++;
+	pat = preglob(start, RMESCAPE_ALLOC | RMESCAPE_HEAP);
 	while (! int_pending() && (dp = readdir64(dirp)) != NULL) {
 		if (dp->d_name[0] == '.' && ! matchdot)
 			continue;
-		if (pmatch(start, dp->d_name)) {
-			if (atend) {
+		if (pmatch(pat, dp->d_name)) {
+			if (!c) {
 				scopy(dp->d_name, enddir);
 				addfname(expdir);
 			} else {
@@ -1498,9 +1527,11 @@  expmeta(char *name, unsigned name_len, unsigned expdir_len)
 			}
 		}
 	}
+	if (pat != start)
+		ckfree(pat);
 	closedir(dirp);
-	if (! atend)
-		endname[-esc - 1] = esc ? '\\' : '/';
+	if (c)
+		endname[-esc - 1] = c;
 }
 #endif	/* HAVE_GLOB */
 
@@ -1745,6 +1776,7 @@  _rmescapes(char *str, int flag)
 	int notescaped;
 	int globbing;
 	int inquotes;
+	int expmeta;
 
 	p = strpbrk(str, cqchars);
 	if (!p) {
@@ -1753,6 +1785,7 @@  _rmescapes(char *str, int flag)
 	q = p;
 	r = str;
 	globbing = flag & RMESCAPE_GLOB;
+	expmeta = (flag & RMESCAPE_EMETA) ? RMESCAPE_GLOB : 0;
 
 	if (flag & RMESCAPE_ALLOC) {
 		size_t len = p - str;
@@ -1792,6 +1825,10 @@  _rmescapes(char *str, int flag)
 		if (*p == '\\') {
 			/* naked back slash */
 			notescaped ^= globbing;
+			if (expmeta & ~notescaped) {
+				p++;
+				continue;
+			}
 			goto copy;
 		}
 		if (FNMATCH_IS_ENABLED && *p == '^')
@@ -1799,7 +1836,9 @@  _rmescapes(char *str, int flag)
 		if (*p == (char)CTLESC) {
 			p++;
 add_escape:
-			if (notescaped)
+			if (expmeta)
+				;
+			else if (notescaped)
 				*q++ = '\\';
 			else if (inquotes) {
 				*q++ = '\\';