| 1 |
frodo |
181 |
/* |
| 2 |
|
|
unicode.c - Part of psiconv, a PSION 5 file formats converter |
| 3 |
frodo |
196 |
Copyright (c) 2003-2004 Frodo Looijaard <frodol@dds.nl> |
| 4 |
frodo |
181 |
|
| 5 |
|
|
This program is free software; you can redistribute it and/or modify |
| 6 |
|
|
it under the terms of the GNU General Public License as published by |
| 7 |
|
|
the Free Software Foundation; either version 2 of the License, or |
| 8 |
|
|
(at your option) any later version. |
| 9 |
|
|
|
| 10 |
|
|
This program is distributed in the hope that it will be useful, |
| 11 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 |
|
|
GNU General Public License for more details. |
| 14 |
|
|
|
| 15 |
|
|
You should have received a copy of the GNU General Public License |
| 16 |
|
|
along with this program; if not, write to the Free Software |
| 17 |
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| 18 |
|
|
*/ |
| 19 |
|
|
|
| 20 |
|
|
#include "config.h" |
| 21 |
|
|
#include "compat.h" |
| 22 |
|
|
#include "error.h" |
| 23 |
|
|
|
| 24 |
|
|
#include "unicode.h" |
| 25 |
frodo |
217 |
#include "parse_routines.h" |
| 26 |
|
|
#include "generate_routines.h" |
| 27 |
frodo |
181 |
|
| 28 |
|
|
#include <string.h> |
| 29 |
|
|
|
| 30 |
|
|
#ifdef DMALLOC |
| 31 |
|
|
#include <dmalloc.h> |
| 32 |
|
|
#endif |
| 33 |
|
|
|
| 34 |
|
|
|
| 35 |
|
|
|
| 36 |
|
|
psiconv_ucs2 table_cp1252[0x100] = |
| 37 |
|
|
{ |
| 38 |
|
|
/* 0x00 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0006, 0x0007, |
| 39 |
|
|
/* 0x08 */ 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, |
| 40 |
|
|
/* 0x10 */ 0x00a0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
| 41 |
|
|
/* 0x18 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
| 42 |
|
|
/* 0x20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, |
| 43 |
|
|
/* 0x28 */ 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, |
| 44 |
|
|
/* 0x30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, |
| 45 |
|
|
/* 0x38 */ 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, |
| 46 |
|
|
/* 0x40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, |
| 47 |
|
|
/* 0x48 */ 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, |
| 48 |
|
|
/* 0x50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, |
| 49 |
|
|
/* 0x58 */ 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, |
| 50 |
frodo |
184 |
/* 0x60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, |
| 51 |
frodo |
181 |
/* 0x68 */ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, |
| 52 |
|
|
/* 0x70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, |
| 53 |
|
|
/* 0x78 */ 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000, |
| 54 |
|
|
/* 0x80 */ 0x20ac, 0x0000, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, |
| 55 |
|
|
/* 0x88 */ 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017d, 0x0000, |
| 56 |
|
|
/* 0x90 */ 0x0000, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, |
| 57 |
|
|
/* 0x98 */ 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x0000, 0x017e, 0x0178, |
| 58 |
|
|
/* 0xa0 */ 0x0000, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, |
| 59 |
|
|
/* 0xa8 */ 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, |
| 60 |
|
|
/* 0xb0 */ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, |
| 61 |
|
|
/* 0xb8 */ 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, |
| 62 |
|
|
/* 0xc0 */ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, |
| 63 |
|
|
/* 0xd8 */ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, |
| 64 |
|
|
/* 0xd0 */ 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, |
| 65 |
|
|
/* 0xe8 */ 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, |
| 66 |
|
|
/* 0xe0 */ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, |
| 67 |
|
|
/* 0xc8 */ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, |
| 68 |
|
|
/* 0xf0 */ 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, |
| 69 |
|
|
/* 0xf8 */ 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff |
| 70 |
|
|
}; |
| 71 |
|
|
|
| 72 |
|
|
|
| 73 |
|
|
|
| 74 |
|
|
/* TODO: Check the charset number, select the correct one */ |
| 75 |
|
|
extern int psiconv_unicode_select_characterset(const psiconv_config config, |
| 76 |
|
|
int charset) |
| 77 |
|
|
{ |
| 78 |
frodo |
182 |
switch(charset) { |
| 79 |
|
|
case 0: config->unicode = psiconv_bool_true; |
| 80 |
|
|
break; |
| 81 |
|
|
case 1: config->unicode = psiconv_bool_false; |
| 82 |
|
|
memcpy(config->unicode_table,table_cp1252, |
| 83 |
|
|
sizeof(psiconv_ucs2) * 0x100); |
| 84 |
|
|
break; |
| 85 |
|
|
default: return -1; |
| 86 |
|
|
} |
| 87 |
frodo |
181 |
return 0; |
| 88 |
|
|
} |
| 89 |
|
|
|
| 90 |
|
|
|
| 91 |
frodo |
217 |
psiconv_ucs2 psiconv_unicode_read_char(const psiconv_config config, |
| 92 |
|
|
psiconv_buffer buf, |
| 93 |
|
|
int lev,psiconv_u32 off, |
| 94 |
|
|
int *length,int *status) |
| 95 |
frodo |
181 |
{ |
| 96 |
frodo |
217 |
psiconv_u8 char1,char2,char3; |
| 97 |
|
|
psiconv_ucs2 result=0; |
| 98 |
|
|
int res; |
| 99 |
|
|
int len=0; |
| 100 |
frodo |
181 |
|
| 101 |
frodo |
217 |
char1 = psiconv_read_u8(config,buf,lev,off+len,&res); |
| 102 |
|
|
if (res) |
| 103 |
|
|
goto ERROR; |
| 104 |
|
|
len ++; |
| 105 |
|
|
|
| 106 |
|
|
if (config->unicode) { |
| 107 |
|
|
if (char1 >= 0xf0) { |
| 108 |
|
|
res = PSICONV_E_PARSE; |
| 109 |
|
|
goto ERROR; |
| 110 |
|
|
} else if (char1 < 0x80) |
| 111 |
|
|
result = char1; |
| 112 |
|
|
else { |
| 113 |
|
|
char2 = psiconv_read_u8(config,buf,lev,off+len,&res); |
| 114 |
|
|
len ++; |
| 115 |
|
|
if ((char2 & 0xc0) != 0x80) { |
| 116 |
|
|
res = PSICONV_E_PARSE; |
| 117 |
|
|
goto ERROR; |
| 118 |
|
|
} |
| 119 |
|
|
if (char1 < 0xe0) |
| 120 |
|
|
result = ((char1 & 0x1f) << 6) | (char2 & 0x3f); |
| 121 |
|
|
else { |
| 122 |
|
|
char3 = psiconv_read_u8(config,buf,lev,off+len,&res); |
| 123 |
|
|
len ++; |
| 124 |
|
|
if ((char3 & 0xc0) != 0x80) { |
| 125 |
|
|
res = PSICONV_E_PARSE; |
| 126 |
|
|
goto ERROR; |
| 127 |
|
|
} |
| 128 |
|
|
result = ((char1 & 0x0f) << 12) | ((char2 & 0x3f) << 6) | |
| 129 |
|
|
(char3 & 0x3f); |
| 130 |
|
|
} |
| 131 |
|
|
} |
| 132 |
|
|
} else |
| 133 |
|
|
result = config->unicode_table[char1]?config->unicode_table[char1]: |
| 134 |
|
|
config->unknown_unicode_char; |
| 135 |
|
|
ERROR: |
| 136 |
|
|
if (length) |
| 137 |
|
|
*length = len; |
| 138 |
|
|
if (status) |
| 139 |
|
|
*status = res; |
| 140 |
|
|
return result; |
| 141 |
frodo |
181 |
} |
| 142 |
|
|
|
| 143 |
frodo |
217 |
int psiconv_unicode_write_char(const psiconv_config config, |
| 144 |
|
|
psiconv_buffer buf, |
| 145 |
|
|
int lev, psiconv_ucs2 value) |
| 146 |
frodo |
181 |
{ |
| 147 |
frodo |
217 |
psiconv_u8 byte; |
| 148 |
frodo |
181 |
int i; |
| 149 |
frodo |
217 |
int res=0; |
| 150 |
frodo |
181 |
|
| 151 |
frodo |
217 |
if (config->unicode) { |
| 152 |
|
|
if (value < 0x80) { |
| 153 |
|
|
if ((res = psiconv_write_u8(config,buf,lev,value))) |
| 154 |
|
|
goto ERROR; |
| 155 |
|
|
} else if (value < 0x800) { |
| 156 |
|
|
if ((res = psiconv_write_u8(config,buf,lev,0xc0 | (value >> 6)))) |
| 157 |
|
|
goto ERROR; |
| 158 |
|
|
if ((res = psiconv_write_u8(config,buf,lev,0x80 | (value & 0x3f)))) |
| 159 |
|
|
goto ERROR; |
| 160 |
|
|
} else { |
| 161 |
|
|
if ((res = psiconv_write_u8(config,buf,lev,0xe0 | (value >> 12)))) |
| 162 |
|
|
goto ERROR; |
| 163 |
|
|
if ((res = psiconv_write_u8(config,buf,lev,0x80 | ((value >> 6) & 0x3f)))) |
| 164 |
|
|
goto ERROR; |
| 165 |
|
|
if ((res = psiconv_write_u8(config,buf,lev,0x80 | (value & 0x3f)))) |
| 166 |
|
|
goto ERROR; |
| 167 |
|
|
} |
| 168 |
|
|
} else { |
| 169 |
|
|
for (i = 0; i < 256; i++) |
| 170 |
|
|
if (config->unicode_table[i] == value) |
| 171 |
|
|
break; |
| 172 |
|
|
if ((res = psiconv_write_u8(config,buf,lev, |
| 173 |
|
|
i == 256?config->unknown_epoc_char:i))) |
| 174 |
|
|
goto ERROR; |
| 175 |
|
|
} |
| 176 |
|
|
ERROR: |
| 177 |
|
|
return res; |
| 178 |
frodo |
181 |
} |
| 179 |
|
|
|
| 180 |
|
|
int psiconv_unicode_strlen(const psiconv_ucs2 *input) |
| 181 |
|
|
{ |
| 182 |
|
|
int i = 0; |
| 183 |
|
|
while (input[i]) |
| 184 |
|
|
i++; |
| 185 |
|
|
return i; |
| 186 |
|
|
} |
| 187 |
frodo |
184 |
|
| 188 |
|
|
psiconv_ucs2 *psiconv_unicode_strdup(const psiconv_ucs2 *input) |
| 189 |
|
|
{ |
| 190 |
|
|
psiconv_ucs2 *output; |
| 191 |
frodo |
192 |
int i = 0; |
| 192 |
frodo |
184 |
|
| 193 |
|
|
if (!(output = malloc(sizeof(*output) * |
| 194 |
|
|
(1 + psiconv_unicode_strlen(input))))) |
| 195 |
|
|
return NULL; |
| 196 |
|
|
while ((output[i] = input[i])) |
| 197 |
|
|
i++; |
| 198 |
|
|
return output; |
| 199 |
|
|
} |
| 200 |
|
|
|
| 201 |
|
|
int psiconv_unicode_strcmp(const psiconv_ucs2 *str1, const psiconv_ucs2 *str2) |
| 202 |
|
|
{ |
| 203 |
|
|
int i = 0; |
| 204 |
|
|
while (str1[i] && str2[i]) { |
| 205 |
|
|
if (str1[i] < str2[i]) |
| 206 |
|
|
return -1; |
| 207 |
|
|
if (str1[i] > str2[i]) |
| 208 |
|
|
return 1; |
| 209 |
|
|
i++; |
| 210 |
|
|
} |
| 211 |
|
|
if (str1[i] < str2[i]) |
| 212 |
|
|
return -1; |
| 213 |
|
|
else if (str1[i] > str2[i]) |
| 214 |
|
|
return 1; |
| 215 |
|
|
else |
| 216 |
|
|
return 0; |
| 217 |
|
|
} |
| 218 |
|
|
|
| 219 |
frodo |
217 |
|
| 220 |
|
|
psiconv_ucs2 *psiconv_unicode_empty_string(void) |
| 221 |
|
|
{ |
| 222 |
|
|
psiconv_ucs2 *result; |
| 223 |
|
|
result = malloc(sizeof(psiconv_ucs2)); |
| 224 |
|
|
if (result) |
| 225 |
|
|
result[0] = 0; |
| 226 |
|
|
return result; |
| 227 |
|
|
} |
| 228 |
|
|
|