diff mbox

[dvb-apps] Fix scan handling for EN 300468 charsets

Message ID 4D93D4F9.3060305@redhat.com (mailing list archive)
State Superseded
Headers show

Commit Message

Mauro Carvalho Chehab March 31, 2011, 1:12 a.m. UTC
None
diff mbox

Patch

diff -r f4e015ebdac7 util/scan/scan.c
--- a/util/scan/scan.c	Thu Mar 17 14:46:30 2011 +0100
+++ b/util/scan/scan.c	Wed Mar 30 21:58:29 2011 -0300
@@ -33,6 +33,7 @@ 
 #include <assert.h>
 #include <glob.h>
 #include <ctype.h>
+#include <iconv.h>
 
 #include <linux/dvb/frontend.h>
 #include <linux/dvb/dmx.h>
@@ -66,6 +67,7 @@ 
 static int vdr_version = 3;
 static struct lnb_types_st lnb_type;
 static int unique_anon_services;
+char *default_charset = "ISO-6937-2";
 
 static enum fe_spectral_inversion spectral_inversion = INVERSION_AUTO;
 
@@ -543,60 +545,113 @@ 
 	}
 }
 
+/*
+ * handle character set correctly (e.g. via iconv)
+ *   c.f. EN 300 468 annex A 
+ */
+static void descriptorcpy(char **dest, const unsigned char *src, size_t len)
+{
+	size_t destlen, i;
+	char *p, *type = NULL;
+
+	if (*dest) {
+		free (*dest);
+		*dest = NULL;
+	}
+	if (!len)
+		return;
+
+	if (*src < 0x20) {
+		switch (*src) {
+		case 0x00:	type = "ISO-6937-2";		break;
+		case 0x01:	type = "ISO-8859-5";		break;
+		case 0x02:	type = "ISO-8859-6";		break;
+		case 0x03:	type = "ISO-8859-7";		break;
+		case 0x04:	type = "ISO-8859-8";		break;
+		case 0x05:	type = "ISO-8859-9";		break;
+		case 0x06:	type = "ISO-8859-10";		break;
+		case 0x07:	type = "ISO-8859-11";		break;
+		case 0x09:	type = "ISO-8859-13";		break;
+		case 0x0a:	type = "ISO-8859-14";		break;
+		case 0x0b:	type = "ISO-8859-15";		break;
+		case 0x11:	type = "ISO-10646";		break;
+		case 0x12:	type = "ISO-2022-KR";		break;
+		case 0x13:	type = "GB2312";		break;
+		case 0x14:	type = "BIG5";			break;
+		case 0x15:	type = "ISO-10646/UTF-8";	break;
+		case 0x10: /* ISO8859 */
+			if ((*(src + 1) != 0) || *(src + 2) > 0x0f)
+				break;
+			src+=2;
+			len-=2;
+			switch(*src) {
+			case 0x01:	type = "ISO-8859-1";		break;
+			case 0x02:	type = "ISO-8859-2";		break;
+			case 0x03:	type = "ISO-8859-3";		break;
+			case 0x04:	type = "ISO-8859-4";		break;
+			case 0x05:	type = "ISO-8859-5";		break;
+			case 0x06:	type = "ISO-8859-6";		break;
+			case 0x07:	type = "ISO-8859-7";		break;
+			case 0x08:	type = "ISO-8859-8";		break;
+			case 0x09:	type = "ISO-8859-9";		break;
+			case 0x0a:	type = "ISO-8859-10";		break;
+			case 0x0b:	type = "ISO-8859-11";		break;
+			case 0x0d:	type = "ISO-8859-13";		break;
+			case 0x0e:	type = "ISO-8859-14";		break;
+			case 0x0f:	type = "ISO-8859-15";		break;
+			}
+		}
+		src++;
+		len--;
+	} else {
+		type = default_charset;
+	}
+
+	/* Destin length should be bigger, to allow 2 char sequences */
+	destlen = len * 2;
+	*dest = malloc(destlen + 1);
+	p = *dest;
+
+	if (type) {
+		iconv_t cd = iconv_open("UTF-8//IGNORE", type);
+		if (cd == (iconv_t)(-1)) {
+			type = NULL;
+		} else {
+			iconv(cd, (char **)&src, &len, &p, &destlen);
+			iconv_close(cd);
+			*p = '\0';
+
+			return;
+		}
+	}
+
+	/* Fallback method: just output whatever non-control char */
+	p = *dest;
+	if (!type) {
+		for (i = 0; i < len; i++, src++) {
+			if (*src >= 0x20 && (*src < 0x80 || *src > 0x9f))
+				*p++ = *src;
+
+		}
+	}
+	*p = '\0';
+}
+
 static void parse_service_descriptor (const unsigned char *buf, struct service *s)
 {
 	unsigned char len;
-	unsigned char *src, *dest;
 
 	s->type = buf[2];
 
 	buf += 3;
 	len = *buf;
 	buf++;
-
-	if (s->provider_name)
-		free (s->provider_name);
-
-	s->provider_name = malloc (len + 1);
-	memcpy (s->provider_name, buf, len);
-	s->provider_name[len] = '\0';
-
-	/* remove control characters (FIXME: handle short/long name) */
-	/* FIXME: handle character set correctly (e.g. via iconv)
-	 * c.f. EN 300 468 annex A */
-	for (src = dest = (unsigned char *) s->provider_name; *src; src++)
-		if (*src >= 0x20 && (*src < 0x80 || *src > 0x9f))
-			*dest++ = *src;
-	*dest = '\0';
-	if (!s->provider_name[0]) {
-		/* zap zero length names */
-		free (s->provider_name);
-		s->provider_name = 0;
-	}
-
-	if (s->service_name)
-		free (s->service_name);
+	descriptorcpy(&s->provider_name, buf, len);
 
 	buf += len;
 	len = *buf;
 	buf++;
-
-	s->service_name = malloc (len + 1);
-	memcpy (s->service_name, buf, len);
-	s->service_name[len] = '\0';
-
-	/* remove control characters (FIXME: handle short/long name) */
-	/* FIXME: handle character set correctly (e.g. via iconv)
-	 * c.f. EN 300 468 annex A */
-	for (src = dest = (unsigned char *) s->service_name; *src; src++)
-		if (*src >= 0x20 && (*src < 0x80 || *src > 0x9f))
-			*dest++ = *src;
-	*dest = '\0';
-	if (!s->service_name[0]) {
-		/* zap zero length names */
-		free (s->service_name);
-		s->service_name = 0;
-	}
+	descriptorcpy(&s->service_name, buf, len);
 
 	info("0x%04x 0x%04x: pmt_pid 0x%04x %s -- %s (%s%s)\n",
 	    s->transport_stream_id,
@@ -2118,7 +2173,8 @@ 
 	"	-P do not use ATSC PSIP tables for scanning\n"
 	"	    (but only PAT and PMT) (applies for ATSC only)\n"
 	"	-A N	check for ATSC 1=Terrestrial [default], 2=Cable or 3=both\n"
-	"	-U	Uniquely name unknown services\n";
+	"	-U	Uniquely name unknown services\n"
+	"	-C cs	Override default charset (default = ISO-6937-2)\n";
 
 void
 bad_usage(char *pname, int problem)
@@ -2166,7 +2222,7 @@ 
 
 	/* start with default lnb type */
 	lnb_type = *lnb_enum(0);
-	while ((opt = getopt(argc, argv, "5cnpa:f:d:s:o:x:e:t:i:l:vquPA:U")) != -1) {
+	while ((opt = getopt(argc, argv, "5cnpa:f:d:s:o:x:e:t:i:l:vquPA:UC:")) != -1) {
 		switch (opt) {
 		case 'a':
 			adapter = strtoul(optarg, NULL, 0);
@@ -2246,6 +2302,9 @@ 
 		case 'U':
 			unique_anon_services = 1;
 			break;
+		case 'C':
+			default_charset = optarg;
+			break;
 		default:
 			bad_usage(argv[0], 0);
 			return -1;