Added chartran code

This commit is contained in:
Michiel Broek 2007-02-20 20:24:06 +00:00
parent 18bf084a6c
commit c142fa2c40
4 changed files with 72 additions and 375 deletions

View File

@ -32,6 +32,13 @@
#include "mbselib.h"
int use_tran1 = FALSE; /* Translate stage 1 active */
int use_tran2 = FALSE; /* Translate stage 2 active */
iconv_t cd1; /* Conversion descriptor 1 */
iconv_t cd2; /* Conversion descriptor 2 */
/*
* Search table for CHRS: kludge to the current name.
* This table may include obsolete names (and we need
@ -81,36 +88,12 @@ struct _charmap charmap[] = {
{FTNC_LATIN_9,(char *)"LATIN-9 2",(char *)"iso-8859-15",(char *)"LATIN-9", (char *)"ISO-8859-15",(char *)"en_US", (char *)"ISO 8859-1 (Western European EURO)"},
{FTNC_KOI8_R, (char *)"KOI8-R 2", (char *)"koi8-r", (char *)"KOI8-R", (char *)"KOI8-R", (char *)"ru_RUi.koi8r",(char *)"Unix codepage KOI8-R (Russian)"},
{FTNC_CP936, (char *)"CP936 2", (char *)"hz-gb-2312", (char *)"GB2312", (char *)"GB2312", (char *)"zh_CN.gbk", (char *)"IBM codepage 936 (Chinese, GBK)"},
{FTNC_UTF8, (char *)"UTF-8 4", (char *)"utf-8", (char *)"UTF-8", (char *)"UTF-8", (char *)"en_US.UTF-8", (char *)"Unicode UTF-8 (ISO/IEC 10646)"},
{FTNC_ERROR, NULL, NULL, NULL, NULL, NULL, (char *)"ERROR"}
};
#ifndef USE_EXPERIMENT
#define BUF_APPEND(d,s) str_append(d,sizeof(d),s)
/*
* Alias linked list
*/
static CharsetAlias *charset_alias_list = NULL;
static CharsetAlias *charset_alias_last = NULL;
/*
* Table linked list
*/
static CharsetTable *charset_table_list = NULL;
static CharsetTable *charset_table_last = NULL;
/*
* Current charset mapping table
*/
static CharsetTable *charset_table_used = NULL;
#endif
/*
* Returns index of charset or -1 if not found.
@ -174,62 +157,6 @@ int find_ftn_charset(char *ftnkludge)
#ifndef USE_EXPERIMENT
/*
* str_printf(): wrapper for sprintf()/snprintf()
*/
int str_printf(char *buf, size_t len, const char *fmt, ...)
{
va_list args;
int n;
va_start(args, fmt);
n = vsnprintf(buf, len, fmt, args);
if (n >= len) {
WriteError("Internal error - str_printf() buf overflow");
/**NOT REACHED**/
return FALSE;
}
/*
* Make sure that buf[] is terminated with a \0. vsnprintf()
* should do this automatically as required by the ANSI C99
* proposal, but one never knows ... see also discussion on
* BugTraq
*/
buf[len - 1] = 0;
va_end(args);
return n;
}
char *str_append(char *d, size_t n, char *s)
{
int max = n - strlen(d) - 1;
strncat(d, s, max);
d[n-1] = 0;
return d;
}
char *str_copy(char *d, size_t n, char *s)
{
strncpy(d, s, n);
d[n-1] = 0;
return d;
}
#define BUF_COPY(d,s) str_copy (d,sizeof(d),s)
#endif
char *getftnchrs(int val)
{
int i;
@ -313,285 +240,85 @@ char *getchrsdesc(int val)
}
#ifndef USE_EXPERIMENT
/*
* Alloc new CharsetTable and put into linked list
* Initialize charset translation. Translation can be done in 2 stages
* with UTF-8 as the common centre because for example translate between
* CP438 and ISO-8859-1 doesn't work directly. If translation is needed
* with one side is UTF-8, only one stage will be used. If two the same
* charactersets are given, the translation is off.
* On success return 0, on error return -1 and write errorlog.
*/
CharsetTable *charset_table_new(void)
int chartran_init(char *fromset, char *toset)
{
CharsetTable *p;
if (use_tran1 || use_tran2) {
WriteError("chartran_init() called while still open");
chartran_close();
}
/* Alloc and clear */
p = (CharsetTable *)xmalloc(sizeof(CharsetTable));
memset(p, 0, sizeof(CharsetTable));
p->next = NULL; /* Just to be sure */
Syslog('-', "chartran_init(%s, %s)", fromset, toset);
/* Put into linked list */
if(charset_table_list)
charset_table_last->next = p;
else
charset_table_list = p;
charset_table_last = p;
if (strcmp(fromset, toset) == 0) {
Syslog('-', "nothing to translate");
return 0;
}
return p;
if (strcmp(fromset, (char *)"UTF-8")) {
cd1 = iconv_open("UTF-8", fromset);
if (cd1 != (iconv_t)-1) {
WriteError("$chartran_init(%s, %s): iconv_open(UTF-8, %s) error", fromset, toset, fromset);
return -1;
}
use_tran1 = TRUE;
}
if (strcmp(toset, (char *)"UTF-8")) {
cd2 = iconv_open(toset, (char *)"UTF-8");
if (cd2 != (iconv_t)-1) {
WriteError("$chartran_init(%s, %s): iconv_open(%s, UTF-8s) error", fromset, toset, toset);
chartran_close();
return -1;
}
use_tran2 = TRUE;
}
return 0;
}
/*
* Alloc new CharsetAlias and put into linked list
* Deinit active charset translation.
*/
CharsetAlias *charset_alias_new(void)
void chartran_close(void)
{
CharsetAlias *p;
Syslog('-', "chartran_close()");
if (use_tran1) {
iconv_close(cd1);
use_tran1 = FALSE;
}
/* Alloc and clear */
p = (CharsetAlias *)xmalloc(sizeof(CharsetAlias));
memset(p, 0, sizeof(CharsetAlias));
p->next = NULL; /* Just to be sure */
/* Put into linked list */
if(charset_alias_list)
charset_alias_last->next = p;
else
charset_alias_list = p;
charset_alias_last = p;
return p;
if (use_tran2) {
iconv_close(cd2);
use_tran2 = FALSE;
}
}
/*
* Write binary mapping file
* Translate a string, chartran_init must have been called to register
* the charactersets to translate between.
*/
int charset_write_bin(char *name)
char *chartran(char *input)
{
FILE *fp;
CharsetTable *pt;
CharsetAlias *pa;
static char outbuf[1024];
fp = fopen(name, "w+");
if (!fp)
return FALSE;
memset(&outbuf, 0, sizeof(outbuf));
strncat(outbuf, input, sizeof(outbuf) -1);
/*
* Write aliases
*/
for (pa = charset_alias_list; pa; pa=pa->next) {
fputc(CHARSET_FILE_ALIAS, fp);
fwrite(pa, sizeof(CharsetAlias), 1, fp);
if (ferror(fp)) {
fclose(fp);
return FALSE;
}
}
/*
* Write tables
*/
for(pt = charset_table_list; pt; pt=pt->next) {
fputc(CHARSET_FILE_TABLE, fp);
fwrite(pt, sizeof(CharsetTable), 1, fp);
if (ferror(fp)) {
fclose(fp);
return FALSE;
}
}
fclose(fp);
return TRUE;
return outbuf;
}
/*
* Read binary mapping file
*/
int charset_read_bin(void)
{
FILE *fp;
int c, n;
CharsetTable *pt;
CharsetAlias *pa;
char *name;
name = calloc(PATH_MAX, sizeof(char));
snprintf(name, PATH_MAX -1, "%s/etc/charset.bin", getenv("MBSE_ROOT"));
if ((fp = fopen(name, "r")) == NULL) {
WriteError("$Can't open %s", name);
free(name);
return FALSE;
}
free(name);
while ((c = fgetc(fp)) != EOF) {
switch(c) {
case CHARSET_FILE_ALIAS: pa = charset_alias_new();
n = fread((void *)pa, sizeof(CharsetAlias), 1, fp);
pa->next = NULL; /* overwritten by fread() */
if (n != 1)
return FALSE;
// Syslog('s', "read charset alias: %s -> %s", pa->alias, pa->name);
break;
case CHARSET_FILE_TABLE: pt = charset_table_new();
n = fread((void *)pt, sizeof(CharsetTable), 1, fp);
pt->next = NULL; /* overwritten by fread() */
if (n != 1)
return FALSE;
// Syslog('s', "read charset table: %s -> %s", pt->in, pt->out);
break;
default: return FALSE;
break;
}
}
if(ferror(fp))
return FALSE;
fclose(fp);
return TRUE;
}
/*
* Convert to MIME quoted-printable =XX if qp==TRUE
*/
char *charset_qpen(int c, int qp)
{
static char buf[4];
c &= 0xff;
if (qp && (c == '=' || c >= 0x80))
str_printf(buf, sizeof(buf), "=%02.2X", c & 0xff);
else {
buf[0] = c;
buf[1] = 0;
}
return buf;
}
/*
* Map single character
*/
char *charset_map_c(int c, int qp)
{
static char buf[MAX_CHARSET_OUT * 4];
char *s;
c &= 0xff;
buf[0] = 0;
if (charset_table_used && c>=0x80) {
s = charset_table_used->map[c - 0x80];
while(*s)
BUF_APPEND(buf, charset_qpen(*s++, qp));
} else {
BUF_COPY(buf, charset_qpen(c, qp));
}
return buf;
}
/*
* Search alias
*/
char *charset_alias_fsc(char *name)
{
CharsetAlias *pa;
/*
* Search for aliases
*/
for (pa = charset_alias_list; pa; pa=pa->next) {
if (strcasecmp(pa->name, name) == 0)
return pa->alias;
}
return name;
}
char *charset_alias_rfc(char *name)
{
CharsetAlias *pa;
/*
* Search for aliases
*/
for (pa = charset_alias_list; pa; pa=pa->next) {
if (strcasecmp(pa->alias, name) == 0)
return pa->name;
}
return name;
}
/*
* Set character mapping table
*/
int charset_set_in_out(char *in, char *out)
{
CharsetTable *pt;
CharsetAlias *pa;
int i;
if (!in || !out)
return FALSE;
/*
* Check if charset.bin is loaded.
*/
if ((charset_alias_list == NULL) || (charset_table_list == NULL))
charset_read_bin();
/*
* For charset names with a space (level number), shorten the name.
*/
for (i = 0; i < strlen(in); i++)
if (in[i] == ' ') {
in[i] = '\0';
break;
}
for (i = 0; i < strlen(out); i++)
if (out[i] == ' ') {
out[i] = '\0';
break;
}
/* Search for aliases */
for (pa = charset_alias_list; pa; pa=pa->next) {
if (strcasecmp(pa->alias, in) == 0)
in = pa->name;
if (strcasecmp(pa->alias, out) == 0)
out = pa->name;
}
Syslog('m', "charset: aliases in=%s out=%s", in, out);
/* Search for matching table */
for (pt = charset_table_list; pt; pt=pt->next) {
if ((strcasecmp(pt->in, in) == 0) && (strcasecmp(pt->out, out) == 0)) {
Syslog('s', "charset: table found in=%s out=%s", pt->in, pt->out);
charset_table_used = pt;
return TRUE;
}
}
Syslog('s', "charset: no table found in=%s out=%s", in, out);
charset_table_used = NULL;
return FALSE;
}
#endif

View File

@ -581,6 +581,7 @@ char *printable(char *s, int l)
{
int len;
char *p;
unsigned char c;
if (pbuff)
free(pbuff);
@ -602,7 +603,14 @@ char *printable(char *s, int l)
pbuff=(char*)xmalloc(len*3+1);
p=pbuff;
while (len--) {
if (isprint(*(unsigned char*)s))
/*
* Don't use isprint to check if a character is printable because we
* don't want some current locale to have some influence on this
* conversion. Just filer low ascii values.
*/
c = *(unsigned char*)s;
// if (isprint(*(unsigned char*)s))
if ((c >= 32) && (c < 128))
*p++=*s;
else
switch (*s) {

View File

@ -109,9 +109,7 @@ time_t ltime;
time_t Time_Now;
char current_language[10]; /* Current language of the user */
#ifdef USE_EXPERIMENT
int utf8;
#endif
#endif

View File

@ -429,7 +429,7 @@ struct icmp_filter {
/*****************************************************************************
*
* Supported character sets, only level 2 are defined.
* Supported character sets.
*/
#define FTNC_ERROR -1 /* Error entry */
#define FTNC_NONE 0 /* Undefined */
@ -446,7 +446,8 @@ struct icmp_filter {
#define FTNC_KOI8_R 11 /* Unix koi8-r */
#define FTNC_CP936 12 /* IBM CP 936 (Chinese, GBK) */
#define FTNC_LATIN_9 13 /* ISO 8859-15 (West Europe EURO */
#define FTNC_MAXCHARS 13 /* Highest charset number */
#define FTNC_UTF8 14 /* UTF-8 */
#define FTNC_MAXCHARS 14 /* Highest charset number */
extern struct _charalias {
@ -2598,51 +2599,14 @@ int create_tmpwork(void); /* Create tmp workdir */
*
* Charset mapping
*/
#ifndef USE_EXPERIMENT
#define MAX_CHARSET_NAME 16
#define MAX_CHARSET_IN 128
#define MAX_CHARSET_OUT 4
#define CHARSET_FILE_ALIAS 'A' /* Id for binary file */
#define CHARSET_FILE_TABLE 'T' /* Id for binary file */
typedef struct st_charset_alias {
char alias[MAX_CHARSET_NAME]; /* Alias charset name */
char name[MAX_CHARSET_NAME]; /* Real charset name */
struct st_charset_alias *next;
} CharsetAlias;
typedef struct st_charset_table {
char in[MAX_CHARSET_NAME]; /* Input charset name */
char out[MAX_CHARSET_NAME]; /* Output charset name */
char map[MAX_CHARSET_IN][MAX_CHARSET_OUT];
struct st_charset_table *next;
} CharsetTable;
#endif
int find_ftn_charset(char *); /* Return FTN charset index */
char *getftnchrs(int); /* Return FTN characterset name */
char *getrfcchrs(int); /* Return RFC characterset name */
char *getlocale(int); /* Return locale name */
char *getchrsdesc(int); /* Return characterset description */
#ifndef USE_EXPERIMENT
CharsetTable *charset_table_new(void); /* Add table to linked list */
CharsetAlias *charset_alias_new(void); /* Add alias to linked list */
int charset_write_bin(char *); /* Save charset.bin */
int charset_read_bin(void); /* Load ~/etc/charset.bin */
char *charset_qpen(int, int); /* Convert to MIME quoted-printable */
char *charset_map_c(int, int); /* map single character */
char *charset_alias_fsc(char *); /* Search FSC alias */
char *charset_alias_rfc(char *); /* Search RFC alias */
int charset_set_in_out(char *, char *); /* Setup mapping */
#endif
int chartran_init(char *, char *); /* Initialize chartran */
void chartran_close(void); /* Deinit chartran */
char *chartran(char *); /* Translate string */
/****************************************************************************