--- psiconv/trunk/lib/psiconv/unicode.c 2004/02/21 13:24:04 216 +++ psiconv/trunk/lib/psiconv/unicode.c 2004/02/22 22:24:39 217 @@ -22,6 +22,8 @@ #include "error.h" #include "unicode.h" +#include "parse_routines.h" +#include "generate_routines.h" #include @@ -86,53 +88,93 @@ } -psiconv_ucs2 psiconv_unicode_from_char(const psiconv_config config, - psiconv_u8 input) -{ - return config->unicode_table[input]?config->unicode_table[input]: - config->unknown_unicode_char; -} - -/* This is quite inefficient at the moment; the obvious ways of making it - faster consume quite a bit of memory, though */ -psiconv_u8 psiconv_unicode_to_char(psiconv_config config,psiconv_ucs2 input) -{ - int i; - for (i = 0; i < 256; i++) - if (config->unicode_table[i] == input) - break; - return (i == 256?config->unknown_epoc_char:i); -} - -int psiconv_unicode_from_chars(const psiconv_config config, - const psiconv_u8 *input, - psiconv_ucs2 **output) +psiconv_ucs2 psiconv_unicode_read_char(const psiconv_config config, + psiconv_buffer buf, + int lev,psiconv_u32 off, + int *length,int *status) +{ + psiconv_u8 char1,char2,char3; + psiconv_ucs2 result=0; + int res; + int len=0; + + char1 = psiconv_read_u8(config,buf,lev,off+len,&res); + if (res) + goto ERROR; + len ++; + + if (config->unicode) { + if (char1 >= 0xf0) { + res = PSICONV_E_PARSE; + goto ERROR; + } else if (char1 < 0x80) + result = char1; + else { + char2 = psiconv_read_u8(config,buf,lev,off+len,&res); + len ++; + if ((char2 & 0xc0) != 0x80) { + res = PSICONV_E_PARSE; + goto ERROR; + } + if (char1 < 0xe0) + result = ((char1 & 0x1f) << 6) | (char2 & 0x3f); + else { + char3 = psiconv_read_u8(config,buf,lev,off+len,&res); + len ++; + if ((char3 & 0xc0) != 0x80) { + res = PSICONV_E_PARSE; + goto ERROR; + } + result = ((char1 & 0x0f) << 12) | ((char2 & 0x3f) << 6) | + (char3 & 0x3f); + } + } + } else + result = config->unicode_table[char1]?config->unicode_table[char1]: + config->unknown_unicode_char; +ERROR: + if (length) + *length = len; + if (status) + *status = res; + return result; +} + +int psiconv_unicode_write_char(const psiconv_config config, + psiconv_buffer buf, + int lev, psiconv_ucs2 value) { + psiconv_u8 byte; int i; - if (!output) - return PSICONV_E_NOMEM; - if (!(*output = malloc(sizeof(**output) * (1 + strlen((const char *) input))))) - return PSICONV_E_NOMEM; - for (i = 0; i < strlen((const char *) input); i++) - (*output)[i] = psiconv_unicode_from_char(config,input[i]); - (*output)[i] = 0x0000; - return PSICONV_E_OK; -} + int res=0; -int psiconv_unicode_to_chars(const psiconv_config config, - const psiconv_ucs2 *input, - psiconv_u8 **output) -{ - int i; - if (!output) - return -PSICONV_E_NOMEM; - if (!(*output = malloc(sizeof(**output) * - (1 + psiconv_unicode_strlen(input))))) - return -PSICONV_E_NOMEM; - for (i = 0; i < psiconv_unicode_strlen(input); i++) - (*output)[i] = psiconv_unicode_to_char(config,input[i]); - (*output)[i] = 0x00; - return -PSICONV_E_OK; + if (config->unicode) { + if (value < 0x80) { + if ((res = psiconv_write_u8(config,buf,lev,value))) + goto ERROR; + } else if (value < 0x800) { + if ((res = psiconv_write_u8(config,buf,lev,0xc0 | (value >> 6)))) + goto ERROR; + if ((res = psiconv_write_u8(config,buf,lev,0x80 | (value & 0x3f)))) + goto ERROR; + } else { + if ((res = psiconv_write_u8(config,buf,lev,0xe0 | (value >> 12)))) + goto ERROR; + if ((res = psiconv_write_u8(config,buf,lev,0x80 | ((value >> 6) & 0x3f)))) + goto ERROR; + if ((res = psiconv_write_u8(config,buf,lev,0x80 | (value & 0x3f)))) + goto ERROR; + } + } else { + for (i = 0; i < 256; i++) + if (config->unicode_table[i] == value) + break; + if ((res = psiconv_write_u8(config,buf,lev, + i == 256?config->unknown_epoc_char:i))) + goto ERROR; + } +ERROR: + return res; } int psiconv_unicode_strlen(const psiconv_ucs2 *input) @@ -174,3 +216,13 @@ return 0; } + +psiconv_ucs2 *psiconv_unicode_empty_string(void) +{ + psiconv_ucs2 *result; + result = malloc(sizeof(psiconv_ucs2)); + if (result) + result[0] = 0; + return result; +} +