From patchwork Thu Mar 31 01:12:25 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mauro Carvalho Chehab X-Patchwork-Id: 678351 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p2V1CURp028025 for ; Thu, 31 Mar 2011 01:12:31 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933707Ab1CaBM2 (ORCPT ); Wed, 30 Mar 2011 21:12:28 -0400 Received: from mx1.redhat.com ([209.132.183.28]:21430 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S933641Ab1CaBM2 (ORCPT ); Wed, 30 Mar 2011 21:12:28 -0400 Received: from int-mx10.intmail.prod.int.phx2.redhat.com (int-mx10.intmail.prod.int.phx2.redhat.com [10.5.11.23]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id p2V1CSek021823 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Wed, 30 Mar 2011 21:12:28 -0400 Received: from [10.3.230.187] (vpn-230-187.phx2.redhat.com [10.3.230.187]) by int-mx10.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id p2V1CQpr004363 for ; Wed, 30 Mar 2011 21:12:27 -0400 Message-ID: <4D93D4F9.3060305@redhat.com> Date: Wed, 30 Mar 2011 22:12:25 -0300 From: Mauro Carvalho Chehab User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101208 Red Hat/3.1.7-3.el6_0 Lightning/1.0b2 Thunderbird/3.1.7 MIME-Version: 1.0 To: Linux Media Mailing List Subject: [PATCH dvb-apps] Fix scan handling for EN 300468 charsets X-Scanned-By: MIMEDefang 2.68 on 10.5.11.23 Sender: linux-media-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-media@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Thu, 31 Mar 2011 01:12:31 +0000 (UTC) diff -r f4e015ebdac7 util/scan/scan.c --- a/util/scan/scan.c Thu Mar 17 14:46:30 2011 +0100 +++ b/util/scan/scan.c Wed Mar 30 21:58:29 2011 -0300 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -66,6 +67,7 @@ static int vdr_version = 3; static struct lnb_types_st lnb_type; static int unique_anon_services; +char *default_charset = "ISO-6937-2"; static enum fe_spectral_inversion spectral_inversion = INVERSION_AUTO; @@ -543,60 +545,113 @@ } } +/* + * handle character set correctly (e.g. via iconv) + * c.f. EN 300 468 annex A + */ +static void descriptorcpy(char **dest, const unsigned char *src, size_t len) +{ + size_t destlen, i; + char *p, *type = NULL; + + if (*dest) { + free (*dest); + *dest = NULL; + } + if (!len) + return; + + if (*src < 0x20) { + switch (*src) { + case 0x00: type = "ISO-6937-2"; break; + case 0x01: type = "ISO-8859-5"; break; + case 0x02: type = "ISO-8859-6"; break; + case 0x03: type = "ISO-8859-7"; break; + case 0x04: type = "ISO-8859-8"; break; + case 0x05: type = "ISO-8859-9"; break; + case 0x06: type = "ISO-8859-10"; break; + case 0x07: type = "ISO-8859-11"; break; + case 0x09: type = "ISO-8859-13"; break; + case 0x0a: type = "ISO-8859-14"; break; + case 0x0b: type = "ISO-8859-15"; break; + case 0x11: type = "ISO-10646"; break; + case 0x12: type = "ISO-2022-KR"; break; + case 0x13: type = "GB2312"; break; + case 0x14: type = "BIG5"; break; + case 0x15: type = "ISO-10646/UTF-8"; break; + case 0x10: /* ISO8859 */ + if ((*(src + 1) != 0) || *(src + 2) > 0x0f) + break; + src+=2; + len-=2; + switch(*src) { + case 0x01: type = "ISO-8859-1"; break; + case 0x02: type = "ISO-8859-2"; break; + case 0x03: type = "ISO-8859-3"; break; + case 0x04: type = "ISO-8859-4"; break; + case 0x05: type = "ISO-8859-5"; break; + case 0x06: type = "ISO-8859-6"; break; + case 0x07: type = "ISO-8859-7"; break; + case 0x08: type = "ISO-8859-8"; break; + case 0x09: type = "ISO-8859-9"; break; + case 0x0a: type = "ISO-8859-10"; break; + case 0x0b: type = "ISO-8859-11"; break; + case 0x0d: type = "ISO-8859-13"; break; + case 0x0e: type = "ISO-8859-14"; break; + case 0x0f: type = "ISO-8859-15"; break; + } + } + src++; + len--; + } else { + type = default_charset; + } + + /* Destin length should be bigger, to allow 2 char sequences */ + destlen = len * 2; + *dest = malloc(destlen + 1); + p = *dest; + + if (type) { + iconv_t cd = iconv_open("UTF-8//IGNORE", type); + if (cd == (iconv_t)(-1)) { + type = NULL; + } else { + iconv(cd, (char **)&src, &len, &p, &destlen); + iconv_close(cd); + *p = '\0'; + + return; + } + } + + /* Fallback method: just output whatever non-control char */ + p = *dest; + if (!type) { + for (i = 0; i < len; i++, src++) { + if (*src >= 0x20 && (*src < 0x80 || *src > 0x9f)) + *p++ = *src; + + } + } + *p = '\0'; +} + static void parse_service_descriptor (const unsigned char *buf, struct service *s) { unsigned char len; - unsigned char *src, *dest; s->type = buf[2]; buf += 3; len = *buf; buf++; - - if (s->provider_name) - free (s->provider_name); - - s->provider_name = malloc (len + 1); - memcpy (s->provider_name, buf, len); - s->provider_name[len] = '\0'; - - /* remove control characters (FIXME: handle short/long name) */ - /* FIXME: handle character set correctly (e.g. via iconv) - * c.f. EN 300 468 annex A */ - for (src = dest = (unsigned char *) s->provider_name; *src; src++) - if (*src >= 0x20 && (*src < 0x80 || *src > 0x9f)) - *dest++ = *src; - *dest = '\0'; - if (!s->provider_name[0]) { - /* zap zero length names */ - free (s->provider_name); - s->provider_name = 0; - } - - if (s->service_name) - free (s->service_name); + descriptorcpy(&s->provider_name, buf, len); buf += len; len = *buf; buf++; - - s->service_name = malloc (len + 1); - memcpy (s->service_name, buf, len); - s->service_name[len] = '\0'; - - /* remove control characters (FIXME: handle short/long name) */ - /* FIXME: handle character set correctly (e.g. via iconv) - * c.f. EN 300 468 annex A */ - for (src = dest = (unsigned char *) s->service_name; *src; src++) - if (*src >= 0x20 && (*src < 0x80 || *src > 0x9f)) - *dest++ = *src; - *dest = '\0'; - if (!s->service_name[0]) { - /* zap zero length names */ - free (s->service_name); - s->service_name = 0; - } + descriptorcpy(&s->service_name, buf, len); info("0x%04x 0x%04x: pmt_pid 0x%04x %s -- %s (%s%s)\n", s->transport_stream_id, @@ -2118,7 +2173,8 @@ " -P do not use ATSC PSIP tables for scanning\n" " (but only PAT and PMT) (applies for ATSC only)\n" " -A N check for ATSC 1=Terrestrial [default], 2=Cable or 3=both\n" - " -U Uniquely name unknown services\n"; + " -U Uniquely name unknown services\n" + " -C cs Override default charset (default = ISO-6937-2)\n"; void bad_usage(char *pname, int problem) @@ -2166,7 +2222,7 @@ /* start with default lnb type */ lnb_type = *lnb_enum(0); - while ((opt = getopt(argc, argv, "5cnpa:f:d:s:o:x:e:t:i:l:vquPA:U")) != -1) { + while ((opt = getopt(argc, argv, "5cnpa:f:d:s:o:x:e:t:i:l:vquPA:UC:")) != -1) { switch (opt) { case 'a': adapter = strtoul(optarg, NULL, 0); @@ -2246,6 +2302,9 @@ case 'U': unique_anon_services = 1; break; + case 'C': + default_charset = optarg; + break; default: bad_usage(argv[0], 0); return -1;