--- psiconv/trunk/lib/psiconv/unicode.c 2004/01/04 15:47:16 181 +++ psiconv/trunk/lib/psiconv/unicode.c 2004/02/25 16:11:17 228 @@ -1,6 +1,6 @@ /* unicode.c - Part of psiconv, a PSION 5 file formats converter - Copyright (c) 2003 Frodo Looijaard + Copyright (c) 2003-2004 Frodo Looijaard This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,6 +22,8 @@ #include "error.h" #include "unicode.h" +#include "parse_routines.h" +#include "generate_routines.h" #include @@ -45,7 +47,7 @@ /* 0x48 */ 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, /* 0x58 */ 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - /* 0x60 */ 0x0060, 0x0060, 0x0060, 0x0060, 0x0060, 0x0060, 0x0060, 0x0067, + /* 0x60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, /* 0x68 */ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, /* 0x78 */ 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000, @@ -73,64 +75,196 @@ extern int psiconv_unicode_select_characterset(const psiconv_config config, int charset) { - memcpy(config->unicode_table,table_cp1252,sizeof(psiconv_ucs2) * 0x100); + switch(charset) { + case 0: config->unicode = psiconv_bool_true; + break; + case 1: config->unicode = psiconv_bool_false; + memcpy(config->unicode_table,table_cp1252, + sizeof(psiconv_ucs2) * 0x100); + break; + default: return -1; + } return 0; } -psiconv_ucs2 psiconv_unicode_from_char(const psiconv_config config, - psiconv_u8 input) +psiconv_ucs2 psiconv_unicode_read_char(const psiconv_config config, + psiconv_buffer buf, + int lev,psiconv_u32 off, + int *length,int *status) { - return config->unicode_table[input]?config->unicode_table[input]: - config->unknown_unicode_char; + psiconv_u8 char1,char2,char3; + psiconv_ucs2 result=0; + int res; + int len=0; + + char1 = psiconv_read_u8(config,buf,lev,off+len,&res); + if (res) + goto ERROR; + len ++; + + if (config->unicode) { + if (char1 >= 0xf0) { + res = PSICONV_E_PARSE; + goto ERROR; + } else if (char1 < 0x80) + result = char1; + else { + char2 = psiconv_read_u8(config,buf,lev,off+len,&res); + len ++; + if ((char2 & 0xc0) != 0x80) { + res = PSICONV_E_PARSE; + goto ERROR; + } + if (char1 < 0xe0) + result = ((char1 & 0x1f) << 6) | (char2 & 0x3f); + else { + char3 = psiconv_read_u8(config,buf,lev,off+len,&res); + len ++; + if ((char3 & 0xc0) != 0x80) { + res = PSICONV_E_PARSE; + goto ERROR; + } + result = ((char1 & 0x0f) << 12) | ((char2 & 0x3f) << 6) | + (char3 & 0x3f); + } + } + } else + result = config->unicode_table[char1]?config->unicode_table[char1]: + config->unknown_unicode_char; +ERROR: + if (length) + *length = len; + if (status) + *status = res; + return result; } -/* This is quite inefficient at the moment; the obvious ways of making it - faster consume quite a bit of memory, though */ -psiconv_u8 psiconv_unicode_to_char(psiconv_config config,psiconv_ucs2 input) +int psiconv_unicode_write_char(const psiconv_config config, + psiconv_buffer buf, + int lev, psiconv_ucs2 value) { + psiconv_u8 byte; int i; - for (i = 0; i < 256; i++) - if (config->unicode_table[i] == input) - break; - return (i == 256?config->unknown_epoc_char:i); + int res=0; + + if (config->unicode) { + if (value < 0x80) { + if ((res = psiconv_write_u8(config,buf,lev,value))) + goto ERROR; + } else if (value < 0x800) { + if ((res = psiconv_write_u8(config,buf,lev,0xc0 | (value >> 6)))) + goto ERROR; + if ((res = psiconv_write_u8(config,buf,lev,0x80 | (value & 0x3f)))) + goto ERROR; + } else { + if ((res = psiconv_write_u8(config,buf,lev,0xe0 | (value >> 12)))) + goto ERROR; + if ((res = psiconv_write_u8(config,buf,lev,0x80 | ((value >> 6) & 0x3f)))) + goto ERROR; + if ((res = psiconv_write_u8(config,buf,lev,0x80 | (value & 0x3f)))) + goto ERROR; + } + } else { + for (i = 0; i < 256; i++) + if (config->unicode_table[i] == value) + break; + if ((res = psiconv_write_u8(config,buf,lev, + i == 256?config->unknown_epoc_char:i))) + goto ERROR; + } +ERROR: + return res; } -int psiconv_unicode_from_chars(const psiconv_config config, - const psiconv_u8 *input, - psiconv_ucs2 **output) +int psiconv_unicode_strlen(const psiconv_ucs2 *input) { - int i; - if (!output) - return PSICONV_E_NOMEM; - if (!(*output = malloc(sizeof(**output) * (1 + strlen(input))))) - return PSICONV_E_NOMEM; - for (i = 0; i < strlen(input); i++) - (*output)[i] = psiconv_unicode_from_char(config,input[i]); - (*output)[i] = 0x0000; - return PSICONV_E_OK; + int i = 0; + while (input[i]) + i++; + return i; } -int psiconv_unicode_to_chars(const psiconv_config config, - const psiconv_ucs2 *input, - psiconv_u8 **output) +psiconv_ucs2 *psiconv_unicode_strdup(const psiconv_ucs2 *input) { - int i; - if (!output) - return -PSICONV_E_NOMEM; - if (!(*output = malloc(sizeof(**output) * + psiconv_ucs2 *output; + int i = 0; + + if (!(output = malloc(sizeof(*output) * (1 + psiconv_unicode_strlen(input))))) - return -PSICONV_E_NOMEM; - for (i = 0; i < psiconv_unicode_strlen(input); i++) - (*output)[i] = psiconv_unicode_to_char(config,input[i]); - (*output)[i] = 0x00; - return -PSICONV_E_OK; + return NULL; + while ((output[i] = input[i])) + i++; + return output; } -int psiconv_unicode_strlen(const psiconv_ucs2 *input) +int psiconv_unicode_strcmp(const psiconv_ucs2 *str1, const psiconv_ucs2 *str2) { int i = 0; - while (input[i]) + while (str1[i] && str2[i]) { + if (str1[i] < str2[i]) + return -1; + if (str1[i] > str2[i]) + return 1; i++; - return i; + } + if (str1[i] < str2[i]) + return -1; + else if (str1[i] > str2[i]) + return 1; + else + return 0; +} + + +psiconv_ucs2 *psiconv_unicode_empty_string(void) +{ + psiconv_ucs2 *result; + result = malloc(sizeof(psiconv_ucs2)); + if (result) + result[0] = 0; + return result; +} + + +psiconv_ucs2 *psiconv_unicode_from_list(psiconv_list input) +{ + psiconv_ucs2 *result; + int i; + psiconv_ucs2 *character; + + if (!(result = malloc(sizeof(psiconv_ucs2) * (psiconv_list_length(input)+1)))) + goto ERROR1; + for (i = 0; i < psiconv_list_length(input); i++) { + if (!(character = psiconv_list_get(input,i))) + goto ERROR2; + result[i] = *character; + } + result[i] = 0; + return result; + +ERROR2: + free(result); +ERROR1: + return NULL; +} + + +psiconv_ucs2 *psiconv_unicode_strstr(const psiconv_ucs2 *haystack, + const psiconv_ucs2 *needle) +{ + int i,j,haystack_len,needle_len; + haystack_len = psiconv_unicode_strlen(haystack); + needle_len = psiconv_unicode_strlen(needle); + + + + for (i = 0; i < haystack_len - needle_len + 1; i++) { + for (j = 0; j < needle_len; j++) + if (haystack[i+j] != needle[j]) + break; + if (j == needle_len) + return haystack+i; + } + return NULL; }