diff mbox series

[v5,03/13] expand: Count multi-byte characters for VSLENGTH

Message ID 961f560182a2ddf2f565ae9af1a0f321986782b5.1717291579.git.herbert@gondor.apana.org.au (mailing list archive)
State Accepted
Delegated to: Herbert Xu
Headers show
Series Add multi-byte supportAdd multi-byte support | expand

Commit Message

Herbert Xu June 2, 2024, 1:28 a.m. UTC
Count multi-byte characters in variables and rather than bytes
and return that as the length expansion.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 src/expand.c   | 107 ++++++++++++++++++++++++++++++++++++++++---------
 src/memalloc.h |  10 ++---
 2 files changed, 94 insertions(+), 23 deletions(-)
diff mbox series

Patch

diff --git a/src/expand.c b/src/expand.c
index db46cf4..0a868d5 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -54,6 +54,7 @@ 
 #include <sys/time.h>
 #include <sys/stat.h>
 #include <unistd.h>
+#include <wchar.h>
 
 /*
  * Routines to expand arguments to commands.  We have to deal with
@@ -790,6 +791,41 @@  really_record:
 	return p;
 }
 
+static char *chtodest(int c, const char *syntax, char *out)
+{
+	if (syntax[c] == CCTL)
+		USTPUTC(CTLESC, out);
+	USTPUTC(c, out);
+
+	return out;
+}
+
+struct mbpair {
+	unsigned ml;
+	unsigned ql;
+};
+
+static struct mbpair mbtodest(const char *p, char *q, const char *syntax,
+			      size_t len)
+{
+	mbstate_t mbs = {};
+	struct mbpair mbp;
+	char *q0 = q;
+	size_t ml;
+
+	ml = mbrlen(--p, len, &mbs);
+	if (ml == -2 || ml == -1 || ml < 2)
+		ml = 1;
+
+	len = ml;
+	do {
+		q = chtodest((signed char)*p++, syntax, q);
+	} while (--len);
+
+	mbp.ml = ml - 1;
+	mbp.ql = q - q0;
+	return mbp;
+}
 
 /*
  * Put a string on the stack.
@@ -797,38 +833,72 @@  really_record:
 
 static size_t memtodest(const char *p, size_t len, int flags)
 {
-	const char *syntax = flags & EXP_QUOTED ? DQSYNTAX : BASESYNTAX;
+	const char *syntax;
+	size_t count = 0;
+	int expq;
 	char *q;
-	char *s;
 
 	if (unlikely(!len))
 		return 0;
 
 	q = makestrspace(len * 2, expdest);
-	s = q;
 
-	do {
+#if QUOTES_ESC != 0x11 || EXP_QUOTED != 0x100
+#error QUOTES_ESC != 0x11 || EXP_QUOTED != 0x100
+#endif
+	expq = flags & EXP_QUOTED;
+	if (likely(!(flags & (expq >> 4 | expq >> 8) & QUOTES_ESC))) {
+		while (len >= 8) {
+			uint64_t x = *(uint64_t *)(p + count);
+
+			if ((x | (x - 0x0101010101010101)) &
+			    0x8080808080808080)
+				break;
+
+			*(uint64_t *)(q + count) = x;
+
+			count += 8;
+			len -= 8;
+		}
+
+		q += count;
+		p += count;
+
+		syntax = flags & QUOTES_ESC ? BASESYNTAX : is_type;
+	} else
+		syntax = SQSYNTAX;
+
+	for (; len; len--) {
 		int c = (signed char)*p++;
-		if (c) {
-			if ((flags & QUOTES_ESC) &&
-			    ((syntax[c] == CCTL) ||
-			     (flags & EXP_QUOTED && syntax[c] == CBACK)))
-				USTPUTC(CTLESC, q);
-		} else if (!(flags & EXP_KEEPNUL))
+
+		if (unlikely(!c && !(flags & EXP_KEEPNUL)))
 			continue;
-		USTPUTC(c, q);
-	} while (--len);
+
+		count++;
+
+		if (unlikely(c < 0)) {
+			struct mbpair mbp = mbtodest(p, q, syntax, len);
+			unsigned mlm;
+
+			q += mbp.ql;
+			mlm = mbp.ml;
+			p += mlm;
+			len -= mlm;
+			continue;
+		}
+
+		q = chtodest(c, syntax, q);
+	}
 
 	expdest = q;
-	return q - s;
+	return count;
 }
 
 
 static size_t strtodest(const char *p, int flags)
 {
 	size_t len = strlen(p);
-	memtodest(p, len, flags);
-	return len;
+	return memtodest(p, len, flags);
 }
 
 
@@ -850,6 +920,7 @@  varvalue(char *name, int varflags, int flags, int quoted)
 	int discard = (subtype == VSPLUS || subtype == VSLENGTH) |
 		      (flags & EXP_DISCARD);
 	ssize_t len = 0;
+	size_t start;
 	char c;
 
 	if (!subtype) {
@@ -859,9 +930,9 @@  varvalue(char *name, int varflags, int flags, int quoted)
 		sh_error("Bad substitution");
 	}
 
-	flags |= EXP_KEEPNUL;
 	flags &= discard ? ~QUOTES_ESC : ~0;
 	sep = (flags & EXP_FULL) << CHAR_BIT;
+	start = expdest - (char *)stackblock();
 
 	switch (*name) {
 	case '$':
@@ -921,7 +992,7 @@  param:
 
 			if (*ap && sep) {
 				len++;
-				memtodest(&sepc, 1, flags);
+				memtodest(&sepc, 1, flags | EXP_KEEPNUL);
 			}
 		}
 		break;
@@ -951,7 +1022,7 @@  value:
 	}
 
 	if (discard)
-		STADJUST(-len, expdest);
+		expdest = (char *)stackblock() + start;
 
 	return len;
 }
diff --git a/src/memalloc.h b/src/memalloc.h
index a7f7996..1895c1e 100644
--- a/src/memalloc.h
+++ b/src/memalloc.h
@@ -81,11 +81,11 @@  static inline char *_STPUTC(int c, char *p) {
 #define STPUTC(c, p) ((p) = _STPUTC((c), (p)))
 #define CHECKSTRSPACE(n, p) \
 	({ \
-		char *q = (p); \
-		size_t l = (n); \
-		size_t m = sstrend - q; \
-		if (l > m) \
-			(p) = makestrspace(l, q); \
+		char *_q = (p); \
+		size_t _l = (n); \
+		size_t _m = sstrend - _q; \
+		if (_l > _m) \
+			(p) = makestrspace(_l, _q); \
 		0; \
 	})
 #define USTPUTC(c, p)	(*p++ = (c))