From c142fa2c40aa1d6fdd331535c6a65ead242b67a3 Mon Sep 17 00:00:00 2001 From: Michiel Broek Date: Tue, 20 Feb 2007 20:24:06 +0000 Subject: [PATCH] Added chartran code --- lib/charset.c | 383 ++++++++------------------------------------------ lib/clcomm.c | 14 +- lib/mbse.h | 2 - lib/mbselib.h | 48 +------ 4 files changed, 72 insertions(+), 375 deletions(-) diff --git a/lib/charset.c b/lib/charset.c index bf17a53c..95bceb0f 100644 --- a/lib/charset.c +++ b/lib/charset.c @@ -32,6 +32,13 @@ #include "mbselib.h" +int use_tran1 = FALSE; /* Translate stage 1 active */ +int use_tran2 = FALSE; /* Translate stage 2 active */ +iconv_t cd1; /* Conversion descriptor 1 */ +iconv_t cd2; /* Conversion descriptor 2 */ + + + /* * Search table for CHRS: kludge to the current name. * This table may include obsolete names (and we need @@ -81,36 +88,12 @@ struct _charmap charmap[] = { {FTNC_LATIN_9,(char *)"LATIN-9 2",(char *)"iso-8859-15",(char *)"LATIN-9", (char *)"ISO-8859-15",(char *)"en_US", (char *)"ISO 8859-1 (Western European EURO)"}, {FTNC_KOI8_R, (char *)"KOI8-R 2", (char *)"koi8-r", (char *)"KOI8-R", (char *)"KOI8-R", (char *)"ru_RUi.koi8r",(char *)"Unix codepage KOI8-R (Russian)"}, {FTNC_CP936, (char *)"CP936 2", (char *)"hz-gb-2312", (char *)"GB2312", (char *)"GB2312", (char *)"zh_CN.gbk", (char *)"IBM codepage 936 (Chinese, GBK)"}, + {FTNC_UTF8, (char *)"UTF-8 4", (char *)"utf-8", (char *)"UTF-8", (char *)"UTF-8", (char *)"en_US.UTF-8", (char *)"Unicode UTF-8 (ISO/IEC 10646)"}, {FTNC_ERROR, NULL, NULL, NULL, NULL, NULL, (char *)"ERROR"} }; -#ifndef USE_EXPERIMENT - -#define BUF_APPEND(d,s) str_append(d,sizeof(d),s) - - -/* - * Alias linked list - */ -static CharsetAlias *charset_alias_list = NULL; -static CharsetAlias *charset_alias_last = NULL; - -/* - * Table linked list - */ -static CharsetTable *charset_table_list = NULL; -static CharsetTable *charset_table_last = NULL; - -/* - * Current charset mapping table - */ -static CharsetTable *charset_table_used = NULL; - - -#endif - /* * Returns index of charset or -1 if not found. @@ -174,62 +157,6 @@ int find_ftn_charset(char *ftnkludge) -#ifndef USE_EXPERIMENT - - -/* - * str_printf(): wrapper for sprintf()/snprintf() - */ -int str_printf(char *buf, size_t len, const char *fmt, ...) -{ - va_list args; - int n; - - va_start(args, fmt); - - n = vsnprintf(buf, len, fmt, args); - if (n >= len) { - WriteError("Internal error - str_printf() buf overflow"); - /**NOT REACHED**/ - return FALSE; - } - - /* - * Make sure that buf[] is terminated with a \0. vsnprintf() - * should do this automatically as required by the ANSI C99 - * proposal, but one never knows ... see also discussion on - * BugTraq - */ - buf[len - 1] = 0; - va_end(args); - - return n; -} - - -char *str_append(char *d, size_t n, char *s) -{ - int max = n - strlen(d) - 1; - - strncat(d, s, max); - d[n-1] = 0; - return d; -} - - - -char *str_copy(char *d, size_t n, char *s) -{ - strncpy(d, s, n); - d[n-1] = 0; - return d; -} - -#define BUF_COPY(d,s) str_copy (d,sizeof(d),s) - -#endif - - char *getftnchrs(int val) { int i; @@ -313,285 +240,85 @@ char *getchrsdesc(int val) } -#ifndef USE_EXPERIMENT /* - * Alloc new CharsetTable and put into linked list + * Initialize charset translation. Translation can be done in 2 stages + * with UTF-8 as the common centre because for example translate between + * CP438 and ISO-8859-1 doesn't work directly. If translation is needed + * with one side is UTF-8, only one stage will be used. If two the same + * charactersets are given, the translation is off. + * On success return 0, on error return -1 and write errorlog. */ -CharsetTable *charset_table_new(void) +int chartran_init(char *fromset, char *toset) { - CharsetTable *p; + if (use_tran1 || use_tran2) { + WriteError("chartran_init() called while still open"); + chartran_close(); + } - /* Alloc and clear */ - p = (CharsetTable *)xmalloc(sizeof(CharsetTable)); - memset(p, 0, sizeof(CharsetTable)); - p->next = NULL; /* Just to be sure */ - - /* Put into linked list */ - if(charset_table_list) - charset_table_last->next = p; - else - charset_table_list = p; - charset_table_last = p; + Syslog('-', "chartran_init(%s, %s)", fromset, toset); - return p; -} + if (strcmp(fromset, toset) == 0) { + Syslog('-', "nothing to translate"); + return 0; + } - -/* - * Alloc new CharsetAlias and put into linked list - */ -CharsetAlias *charset_alias_new(void) -{ - CharsetAlias *p; - - /* Alloc and clear */ - p = (CharsetAlias *)xmalloc(sizeof(CharsetAlias)); - memset(p, 0, sizeof(CharsetAlias)); - p->next = NULL; /* Just to be sure */ - - /* Put into linked list */ - if(charset_alias_list) - charset_alias_last->next = p; - else - charset_alias_list = p; - charset_alias_last = p; - - return p; -} - - - -/* - * Write binary mapping file - */ -int charset_write_bin(char *name) -{ - FILE *fp; - CharsetTable *pt; - CharsetAlias *pa; - - fp = fopen(name, "w+"); - if (!fp) - return FALSE; - - /* - * Write aliases - */ - for (pa = charset_alias_list; pa; pa=pa->next) { - fputc(CHARSET_FILE_ALIAS, fp); - fwrite(pa, sizeof(CharsetAlias), 1, fp); - if (ferror(fp)) { - fclose(fp); - return FALSE; + if (strcmp(fromset, (char *)"UTF-8")) { + cd1 = iconv_open("UTF-8", fromset); + if (cd1 != (iconv_t)-1) { + WriteError("$chartran_init(%s, %s): iconv_open(UTF-8, %s) error", fromset, toset, fromset); + return -1; } + use_tran1 = TRUE; } - /* - * Write tables - */ - for(pt = charset_table_list; pt; pt=pt->next) { - fputc(CHARSET_FILE_TABLE, fp); - fwrite(pt, sizeof(CharsetTable), 1, fp); - if (ferror(fp)) { - fclose(fp); - return FALSE; + if (strcmp(toset, (char *)"UTF-8")) { + cd2 = iconv_open(toset, (char *)"UTF-8"); + if (cd2 != (iconv_t)-1) { + WriteError("$chartran_init(%s, %s): iconv_open(%s, UTF-8s) error", fromset, toset, toset); + chartran_close(); + return -1; } + use_tran2 = TRUE; } - fclose(fp); - return TRUE; + return 0; } /* - * Read binary mapping file + * Deinit active charset translation. */ -int charset_read_bin(void) +void chartran_close(void) { - FILE *fp; - int c, n; - CharsetTable *pt; - CharsetAlias *pa; - char *name; - - name = calloc(PATH_MAX, sizeof(char)); - snprintf(name, PATH_MAX -1, "%s/etc/charset.bin", getenv("MBSE_ROOT")); - if ((fp = fopen(name, "r")) == NULL) { - WriteError("$Can't open %s", name); - free(name); - return FALSE; + Syslog('-', "chartran_close()"); + if (use_tran1) { + iconv_close(cd1); + use_tran1 = FALSE; } - free(name); - while ((c = fgetc(fp)) != EOF) { - switch(c) { - case CHARSET_FILE_ALIAS: pa = charset_alias_new(); - n = fread((void *)pa, sizeof(CharsetAlias), 1, fp); - pa->next = NULL; /* overwritten by fread() */ - if (n != 1) - return FALSE; -// Syslog('s', "read charset alias: %s -> %s", pa->alias, pa->name); - break; - case CHARSET_FILE_TABLE: pt = charset_table_new(); - n = fread((void *)pt, sizeof(CharsetTable), 1, fp); - pt->next = NULL; /* overwritten by fread() */ - if (n != 1) - return FALSE; -// Syslog('s', "read charset table: %s -> %s", pt->in, pt->out); - break; - default: return FALSE; - break; - } + if (use_tran2) { + iconv_close(cd2); + use_tran2 = FALSE; } - - if(ferror(fp)) - return FALSE; - fclose(fp); - return TRUE; } /* - * Convert to MIME quoted-printable =XX if qp==TRUE + * Translate a string, chartran_init must have been called to register + * the charactersets to translate between. */ -char *charset_qpen(int c, int qp) +char *chartran(char *input) { - static char buf[4]; + static char outbuf[1024]; - c &= 0xff; - - if (qp && (c == '=' || c >= 0x80)) - str_printf(buf, sizeof(buf), "=%02.2X", c & 0xff); - else { - buf[0] = c; - buf[1] = 0; - } - - return buf; + memset(&outbuf, 0, sizeof(outbuf)); + strncat(outbuf, input, sizeof(outbuf) -1); + + return outbuf; } -/* - * Map single character - */ -char *charset_map_c(int c, int qp) -{ - static char buf[MAX_CHARSET_OUT * 4]; - char *s; - - c &= 0xff; - buf[0] = 0; - - if (charset_table_used && c>=0x80) { - s = charset_table_used->map[c - 0x80]; - while(*s) - BUF_APPEND(buf, charset_qpen(*s++, qp)); - } else { - BUF_COPY(buf, charset_qpen(c, qp)); - } - - return buf; -} - - - -/* - * Search alias - */ -char *charset_alias_fsc(char *name) -{ - CharsetAlias *pa; - - /* - * Search for aliases - */ - for (pa = charset_alias_list; pa; pa=pa->next) { - if (strcasecmp(pa->name, name) == 0) - return pa->alias; - } - - return name; -} - - - -char *charset_alias_rfc(char *name) -{ - CharsetAlias *pa; - - /* - * Search for aliases - */ - for (pa = charset_alias_list; pa; pa=pa->next) { - if (strcasecmp(pa->alias, name) == 0) - return pa->name; - } - - return name; -} - - - -/* - * Set character mapping table - */ -int charset_set_in_out(char *in, char *out) -{ - CharsetTable *pt; - CharsetAlias *pa; - int i; - - if (!in || !out) - return FALSE; - - /* - * Check if charset.bin is loaded. - */ - if ((charset_alias_list == NULL) || (charset_table_list == NULL)) - charset_read_bin(); - - - /* - * For charset names with a space (level number), shorten the name. - */ - for (i = 0; i < strlen(in); i++) - if (in[i] == ' ') { - in[i] = '\0'; - break; - } - - for (i = 0; i < strlen(out); i++) - if (out[i] == ' ') { - out[i] = '\0'; - break; - } - - /* Search for aliases */ - for (pa = charset_alias_list; pa; pa=pa->next) { - if (strcasecmp(pa->alias, in) == 0) - in = pa->name; - if (strcasecmp(pa->alias, out) == 0) - out = pa->name; - } - Syslog('m', "charset: aliases in=%s out=%s", in, out); - - /* Search for matching table */ - for (pt = charset_table_list; pt; pt=pt->next) { - if ((strcasecmp(pt->in, in) == 0) && (strcasecmp(pt->out, out) == 0)) { - Syslog('s', "charset: table found in=%s out=%s", pt->in, pt->out); - charset_table_used = pt; - return TRUE; - } - } - - Syslog('s', "charset: no table found in=%s out=%s", in, out); - charset_table_used = NULL; - return FALSE; -} - -#endif - diff --git a/lib/clcomm.c b/lib/clcomm.c index 62598648..9ead86c4 100644 --- a/lib/clcomm.c +++ b/lib/clcomm.c @@ -579,8 +579,9 @@ char *cldecode(char *s) char *printable(char *s, int l) { - int len; - char *p; + int len; + char *p; + unsigned char c; if (pbuff) free(pbuff); @@ -602,7 +603,14 @@ char *printable(char *s, int l) pbuff=(char*)xmalloc(len*3+1); p=pbuff; while (len--) { - if (isprint(*(unsigned char*)s)) + /* + * Don't use isprint to check if a character is printable because we + * don't want some current locale to have some influence on this + * conversion. Just filer low ascii values. + */ + c = *(unsigned char*)s; +// if (isprint(*(unsigned char*)s)) + if ((c >= 32) && (c < 128)) *p++=*s; else switch (*s) { diff --git a/lib/mbse.h b/lib/mbse.h index dacdfe99..d913dff0 100644 --- a/lib/mbse.h +++ b/lib/mbse.h @@ -109,9 +109,7 @@ time_t ltime; time_t Time_Now; char current_language[10]; /* Current language of the user */ -#ifdef USE_EXPERIMENT int utf8; -#endif #endif diff --git a/lib/mbselib.h b/lib/mbselib.h index f5cbbf66..2d3d4281 100644 --- a/lib/mbselib.h +++ b/lib/mbselib.h @@ -429,7 +429,7 @@ struct icmp_filter { /***************************************************************************** * - * Supported character sets, only level 2 are defined. + * Supported character sets. */ #define FTNC_ERROR -1 /* Error entry */ #define FTNC_NONE 0 /* Undefined */ @@ -446,7 +446,8 @@ struct icmp_filter { #define FTNC_KOI8_R 11 /* Unix koi8-r */ #define FTNC_CP936 12 /* IBM CP 936 (Chinese, GBK) */ #define FTNC_LATIN_9 13 /* ISO 8859-15 (West Europe EURO */ -#define FTNC_MAXCHARS 13 /* Highest charset number */ +#define FTNC_UTF8 14 /* UTF-8 */ +#define FTNC_MAXCHARS 14 /* Highest charset number */ extern struct _charalias { @@ -2598,51 +2599,14 @@ int create_tmpwork(void); /* Create tmp workdir */ * * Charset mapping */ - -#ifndef USE_EXPERIMENT - -#define MAX_CHARSET_NAME 16 -#define MAX_CHARSET_IN 128 -#define MAX_CHARSET_OUT 4 - -#define CHARSET_FILE_ALIAS 'A' /* Id for binary file */ -#define CHARSET_FILE_TABLE 'T' /* Id for binary file */ - - -typedef struct st_charset_alias { - char alias[MAX_CHARSET_NAME]; /* Alias charset name */ - char name[MAX_CHARSET_NAME]; /* Real charset name */ - struct st_charset_alias *next; -} CharsetAlias; - -typedef struct st_charset_table { - char in[MAX_CHARSET_NAME]; /* Input charset name */ - char out[MAX_CHARSET_NAME]; /* Output charset name */ - char map[MAX_CHARSET_IN][MAX_CHARSET_OUT]; - struct st_charset_table *next; -} CharsetTable; - -#endif - int find_ftn_charset(char *); /* Return FTN charset index */ char *getftnchrs(int); /* Return FTN characterset name */ char *getrfcchrs(int); /* Return RFC characterset name */ char *getlocale(int); /* Return locale name */ char *getchrsdesc(int); /* Return characterset description */ - -#ifndef USE_EXPERIMENT - -CharsetTable *charset_table_new(void); /* Add table to linked list */ -CharsetAlias *charset_alias_new(void); /* Add alias to linked list */ -int charset_write_bin(char *); /* Save charset.bin */ -int charset_read_bin(void); /* Load ~/etc/charset.bin */ -char *charset_qpen(int, int); /* Convert to MIME quoted-printable */ -char *charset_map_c(int, int); /* map single character */ -char *charset_alias_fsc(char *); /* Search FSC alias */ -char *charset_alias_rfc(char *); /* Search RFC alias */ -int charset_set_in_out(char *, char *); /* Setup mapping */ - -#endif +int chartran_init(char *, char *); /* Initialize chartran */ +void chartran_close(void); /* Deinit chartran */ +char *chartran(char *); /* Translate string */ /****************************************************************************