1 | /* |
1 | /* |
2 | unicode.c - Part of psiconv, a PSION 5 file formats converter |
2 | unicode.c - Part of psiconv, a PSION 5 file formats converter |
3 | Copyright (c) 2003 Frodo Looijaard <frodol@dds.nl> |
3 | Copyright (c) 2003-2014 Frodo Looijaard <frodo@frodo.looijaard.name> |
4 | |
4 | |
5 | This program is free software; you can redistribute it and/or modify |
5 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published by |
6 | it under the terms of the GNU General Public License as published by |
7 | the Free Software Foundation; either version 2 of the License, or |
7 | the Free Software Foundation; either version 2 of the License, or |
8 | (at your option) any later version. |
8 | (at your option) any later version. |
… | |
… | |
20 | #include "config.h" |
20 | #include "config.h" |
21 | #include "compat.h" |
21 | #include "compat.h" |
22 | #include "error.h" |
22 | #include "error.h" |
23 | |
23 | |
24 | #include "unicode.h" |
24 | #include "unicode.h" |
|
|
25 | #include "parse_routines.h" |
|
|
26 | #include "generate_routines.h" |
25 | |
27 | |
26 | #include <string.h> |
28 | #include <string.h> |
|
|
29 | #include <stdlib.h> |
27 | |
30 | |
28 | #ifdef DMALLOC |
31 | #ifdef DMALLOC |
29 | #include <dmalloc.h> |
32 | #include <dmalloc.h> |
30 | #endif |
33 | #endif |
31 | |
34 | |
… | |
… | |
43 | /* 0x38 */ 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, |
46 | /* 0x38 */ 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, |
44 | /* 0x40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, |
47 | /* 0x40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, |
45 | /* 0x48 */ 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, |
48 | /* 0x48 */ 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, |
46 | /* 0x50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, |
49 | /* 0x50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, |
47 | /* 0x58 */ 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, |
50 | /* 0x58 */ 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, |
48 | /* 0x60 */ 0x0060, 0x0060, 0x0060, 0x0060, 0x0060, 0x0060, 0x0060, 0x0067, |
51 | /* 0x60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, |
49 | /* 0x68 */ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, |
52 | /* 0x68 */ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, |
50 | /* 0x70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, |
53 | /* 0x70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, |
51 | /* 0x78 */ 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000, |
54 | /* 0x78 */ 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000, |
52 | /* 0x80 */ 0x20ac, 0x0000, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, |
55 | /* 0x80 */ 0x20ac, 0x0000, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, |
53 | /* 0x88 */ 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017d, 0x0000, |
56 | /* 0x88 */ 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017d, 0x0000, |
… | |
… | |
84 | } |
87 | } |
85 | return 0; |
88 | return 0; |
86 | } |
89 | } |
87 | |
90 | |
88 | |
91 | |
89 | psiconv_ucs2 psiconv_unicode_from_char(const psiconv_config config, |
92 | psiconv_ucs2 psiconv_unicode_read_char(const psiconv_config config, |
90 | psiconv_u8 input) |
93 | psiconv_buffer buf, |
|
|
94 | int lev,psiconv_u32 off, |
|
|
95 | int *length,int *status) |
91 | { |
96 | { |
|
|
97 | psiconv_u8 char1,char2,char3; |
|
|
98 | psiconv_ucs2 result=0; |
|
|
99 | int res; |
|
|
100 | int len=0; |
|
|
101 | |
|
|
102 | char1 = psiconv_read_u8(config,buf,lev,off+len,&res); |
|
|
103 | if (res) |
|
|
104 | goto ERROR; |
|
|
105 | len ++; |
|
|
106 | |
|
|
107 | if (config->unicode) { |
|
|
108 | if (char1 >= 0xf0) { |
|
|
109 | res = PSICONV_E_PARSE; |
|
|
110 | goto ERROR; |
|
|
111 | } else if (char1 < 0x80) |
|
|
112 | result = char1; |
|
|
113 | else { |
|
|
114 | char2 = psiconv_read_u8(config,buf,lev,off+len,&res); |
|
|
115 | len ++; |
|
|
116 | if ((char2 & 0xc0) != 0x80) { |
|
|
117 | res = PSICONV_E_PARSE; |
|
|
118 | goto ERROR; |
|
|
119 | } |
|
|
120 | if (char1 < 0xe0) |
|
|
121 | result = ((char1 & 0x1f) << 6) | (char2 & 0x3f); |
|
|
122 | else { |
|
|
123 | char3 = psiconv_read_u8(config,buf,lev,off+len,&res); |
|
|
124 | len ++; |
|
|
125 | if ((char3 & 0xc0) != 0x80) { |
|
|
126 | res = PSICONV_E_PARSE; |
|
|
127 | goto ERROR; |
|
|
128 | } |
|
|
129 | result = ((char1 & 0x0f) << 12) | ((char2 & 0x3f) << 6) | |
|
|
130 | (char3 & 0x3f); |
|
|
131 | } |
|
|
132 | } |
|
|
133 | } else |
92 | return config->unicode_table[input]?config->unicode_table[input]: |
134 | result = config->unicode_table[char1]?config->unicode_table[char1]: |
93 | config->unknown_unicode_char; |
135 | config->unknown_unicode_char; |
|
|
136 | ERROR: |
|
|
137 | if (length) |
|
|
138 | *length = len; |
|
|
139 | if (status) |
|
|
140 | *status = res; |
|
|
141 | return result; |
94 | } |
142 | } |
95 | |
143 | |
96 | /* This is quite inefficient at the moment; the obvious ways of making it |
144 | int psiconv_unicode_write_char(const psiconv_config config, |
97 | faster consume quite a bit of memory, though */ |
145 | psiconv_buffer buf, |
98 | psiconv_u8 psiconv_unicode_to_char(psiconv_config config,psiconv_ucs2 input) |
146 | int lev, psiconv_ucs2 value) |
99 | { |
147 | { |
100 | int i; |
148 | int i; |
|
|
149 | int res=0; |
|
|
150 | |
|
|
151 | if (config->unicode) { |
|
|
152 | if (value < 0x80) { |
|
|
153 | if ((res = psiconv_write_u8(config,buf,lev,value))) |
|
|
154 | goto ERROR; |
|
|
155 | } else if (value < 0x800) { |
|
|
156 | if ((res = psiconv_write_u8(config,buf,lev,0xc0 | (value >> 6)))) |
|
|
157 | goto ERROR; |
|
|
158 | if ((res = psiconv_write_u8(config,buf,lev,0x80 | (value & 0x3f)))) |
|
|
159 | goto ERROR; |
|
|
160 | } else { |
|
|
161 | if ((res = psiconv_write_u8(config,buf,lev,0xe0 | (value >> 12)))) |
|
|
162 | goto ERROR; |
|
|
163 | if ((res = psiconv_write_u8(config,buf,lev,0x80 | ((value >> 6) & 0x3f)))) |
|
|
164 | goto ERROR; |
|
|
165 | if ((res = psiconv_write_u8(config,buf,lev,0x80 | (value & 0x3f)))) |
|
|
166 | goto ERROR; |
|
|
167 | } |
|
|
168 | } else { |
101 | for (i = 0; i < 256; i++) |
169 | for (i = 0; i < 256; i++) |
102 | if (config->unicode_table[i] == input) |
170 | if (config->unicode_table[i] == value) |
103 | break; |
171 | break; |
104 | return (i == 256?config->unknown_epoc_char:i); |
172 | if ((res = psiconv_write_u8(config,buf,lev, |
105 | } |
173 | i == 256?config->unknown_epoc_char:i))) |
106 | |
174 | goto ERROR; |
107 | int psiconv_unicode_from_chars(const psiconv_config config, |
175 | } |
108 | const psiconv_u8 *input, |
176 | ERROR: |
109 | psiconv_ucs2 **output) |
177 | return res; |
110 | { |
|
|
111 | int i; |
|
|
112 | if (!output) |
|
|
113 | return PSICONV_E_NOMEM; |
|
|
114 | if (!(*output = malloc(sizeof(**output) * (1 + strlen(input))))) |
|
|
115 | return PSICONV_E_NOMEM; |
|
|
116 | for (i = 0; i < strlen(input); i++) |
|
|
117 | (*output)[i] = psiconv_unicode_from_char(config,input[i]); |
|
|
118 | (*output)[i] = 0x0000; |
|
|
119 | return PSICONV_E_OK; |
|
|
120 | } |
|
|
121 | |
|
|
122 | int psiconv_unicode_to_chars(const psiconv_config config, |
|
|
123 | const psiconv_ucs2 *input, |
|
|
124 | psiconv_u8 **output) |
|
|
125 | { |
|
|
126 | int i; |
|
|
127 | if (!output) |
|
|
128 | return -PSICONV_E_NOMEM; |
|
|
129 | if (!(*output = malloc(sizeof(**output) * |
|
|
130 | (1 + psiconv_unicode_strlen(input))))) |
|
|
131 | return -PSICONV_E_NOMEM; |
|
|
132 | for (i = 0; i < psiconv_unicode_strlen(input); i++) |
|
|
133 | (*output)[i] = psiconv_unicode_to_char(config,input[i]); |
|
|
134 | (*output)[i] = 0x00; |
|
|
135 | return -PSICONV_E_OK; |
|
|
136 | } |
178 | } |
137 | |
179 | |
138 | int psiconv_unicode_strlen(const psiconv_ucs2 *input) |
180 | int psiconv_unicode_strlen(const psiconv_ucs2 *input) |
139 | { |
181 | { |
140 | int i = 0; |
182 | int i = 0; |
141 | while (input[i]) |
183 | while (input[i]) |
142 | i++; |
184 | i++; |
143 | return i; |
185 | return i; |
144 | } |
186 | } |
|
|
187 | |
|
|
188 | psiconv_ucs2 *psiconv_unicode_strdup(const psiconv_ucs2 *input) |
|
|
189 | { |
|
|
190 | psiconv_ucs2 *output; |
|
|
191 | int i = 0; |
|
|
192 | |
|
|
193 | if (!(output = malloc(sizeof(*output) * |
|
|
194 | (1 + psiconv_unicode_strlen(input))))) |
|
|
195 | return NULL; |
|
|
196 | while ((output[i] = input[i])) |
|
|
197 | i++; |
|
|
198 | return output; |
|
|
199 | } |
|
|
200 | |
|
|
201 | int psiconv_unicode_strcmp(const psiconv_ucs2 *str1, const psiconv_ucs2 *str2) |
|
|
202 | { |
|
|
203 | int i = 0; |
|
|
204 | while (str1[i] && str2[i]) { |
|
|
205 | if (str1[i] < str2[i]) |
|
|
206 | return -1; |
|
|
207 | if (str1[i] > str2[i]) |
|
|
208 | return 1; |
|
|
209 | i++; |
|
|
210 | } |
|
|
211 | if (str1[i] < str2[i]) |
|
|
212 | return -1; |
|
|
213 | else if (str1[i] > str2[i]) |
|
|
214 | return 1; |
|
|
215 | else |
|
|
216 | return 0; |
|
|
217 | } |
|
|
218 | |
|
|
219 | |
|
|
220 | psiconv_ucs2 *psiconv_unicode_empty_string(void) |
|
|
221 | { |
|
|
222 | psiconv_ucs2 *result; |
|
|
223 | result = malloc(sizeof(psiconv_ucs2)); |
|
|
224 | if (result) |
|
|
225 | result[0] = 0; |
|
|
226 | return result; |
|
|
227 | } |
|
|
228 | |
|
|
229 | |
|
|
230 | psiconv_ucs2 *psiconv_unicode_from_list(psiconv_list input) |
|
|
231 | { |
|
|
232 | psiconv_ucs2 *result; |
|
|
233 | int i; |
|
|
234 | psiconv_ucs2 *character; |
|
|
235 | |
|
|
236 | if (!(result = malloc(sizeof(psiconv_ucs2) * (psiconv_list_length(input)+1)))) |
|
|
237 | goto ERROR1; |
|
|
238 | for (i = 0; i < psiconv_list_length(input); i++) { |
|
|
239 | if (!(character = psiconv_list_get(input,i))) |
|
|
240 | goto ERROR2; |
|
|
241 | result[i] = *character; |
|
|
242 | } |
|
|
243 | result[i] = 0; |
|
|
244 | return result; |
|
|
245 | |
|
|
246 | ERROR2: |
|
|
247 | free(result); |
|
|
248 | ERROR1: |
|
|
249 | return NULL; |
|
|
250 | } |
|
|
251 | |
|
|
252 | |
|
|
253 | psiconv_ucs2 *psiconv_unicode_strstr(const psiconv_ucs2 *haystack, |
|
|
254 | const psiconv_ucs2 *needle) |
|
|
255 | { |
|
|
256 | int i,j,haystack_len,needle_len; |
|
|
257 | haystack_len = psiconv_unicode_strlen(haystack); |
|
|
258 | needle_len = psiconv_unicode_strlen(needle); |
|
|
259 | |
|
|
260 | |
|
|
261 | |
|
|
262 | for (i = 0; i < haystack_len - needle_len + 1; i++) { |
|
|
263 | for (j = 0; j < needle_len; j++) |
|
|
264 | if (haystack[i+j] != needle[j]) |
|
|
265 | break; |
|
|
266 | if (j == needle_len) |
|
|
267 | return (psiconv_ucs2 *) haystack+i; |
|
|
268 | } |
|
|
269 | return NULL; |
|
|
270 | } |