1 | /* |
1 | /* |
2 | unicode.c - Part of psiconv, a PSION 5 file formats converter |
2 | unicode.c - Part of psiconv, a PSION 5 file formats converter |
3 | Copyright (c) 2003 Frodo Looijaard <frodol@dds.nl> |
3 | Copyright (c) 2003-2014 Frodo Looijaard <frodo@frodo.looijaard.name> |
4 | |
4 | |
5 | This program is free software; you can redistribute it and/or modify |
5 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published by |
6 | it under the terms of the GNU General Public License as published by |
7 | the Free Software Foundation; either version 2 of the License, or |
7 | the Free Software Foundation; either version 2 of the License, or |
8 | (at your option) any later version. |
8 | (at your option) any later version. |
… | |
… | |
20 | #include "config.h" |
20 | #include "config.h" |
21 | #include "compat.h" |
21 | #include "compat.h" |
22 | #include "error.h" |
22 | #include "error.h" |
23 | |
23 | |
24 | #include "unicode.h" |
24 | #include "unicode.h" |
|
|
25 | #include "parse_routines.h" |
|
|
26 | #include "generate_routines.h" |
25 | |
27 | |
26 | #include <string.h> |
28 | #include <string.h> |
|
|
29 | #include <stdlib.h> |
27 | |
30 | |
28 | #ifdef DMALLOC |
31 | #ifdef DMALLOC |
29 | #include <dmalloc.h> |
32 | #include <dmalloc.h> |
30 | #endif |
33 | #endif |
31 | |
34 | |
… | |
… | |
84 | } |
87 | } |
85 | return 0; |
88 | return 0; |
86 | } |
89 | } |
87 | |
90 | |
88 | |
91 | |
89 | psiconv_ucs2 psiconv_unicode_from_char(const psiconv_config config, |
92 | psiconv_ucs2 psiconv_unicode_read_char(const psiconv_config config, |
90 | psiconv_u8 input) |
93 | psiconv_buffer buf, |
|
|
94 | int lev,psiconv_u32 off, |
|
|
95 | int *length,int *status) |
91 | { |
96 | { |
|
|
97 | psiconv_u8 char1,char2,char3; |
|
|
98 | psiconv_ucs2 result=0; |
|
|
99 | int res; |
|
|
100 | int len=0; |
|
|
101 | |
|
|
102 | char1 = psiconv_read_u8(config,buf,lev,off+len,&res); |
|
|
103 | if (res) |
|
|
104 | goto ERROR; |
|
|
105 | len ++; |
|
|
106 | |
|
|
107 | if (config->unicode) { |
|
|
108 | if (char1 >= 0xf0) { |
|
|
109 | res = PSICONV_E_PARSE; |
|
|
110 | goto ERROR; |
|
|
111 | } else if (char1 < 0x80) |
|
|
112 | result = char1; |
|
|
113 | else { |
|
|
114 | char2 = psiconv_read_u8(config,buf,lev,off+len,&res); |
|
|
115 | len ++; |
|
|
116 | if ((char2 & 0xc0) != 0x80) { |
|
|
117 | res = PSICONV_E_PARSE; |
|
|
118 | goto ERROR; |
|
|
119 | } |
|
|
120 | if (char1 < 0xe0) |
|
|
121 | result = ((char1 & 0x1f) << 6) | (char2 & 0x3f); |
|
|
122 | else { |
|
|
123 | char3 = psiconv_read_u8(config,buf,lev,off+len,&res); |
|
|
124 | len ++; |
|
|
125 | if ((char3 & 0xc0) != 0x80) { |
|
|
126 | res = PSICONV_E_PARSE; |
|
|
127 | goto ERROR; |
|
|
128 | } |
|
|
129 | result = ((char1 & 0x0f) << 12) | ((char2 & 0x3f) << 6) | |
|
|
130 | (char3 & 0x3f); |
|
|
131 | } |
|
|
132 | } |
|
|
133 | } else |
92 | return config->unicode_table[input]?config->unicode_table[input]: |
134 | result = config->unicode_table[char1]?config->unicode_table[char1]: |
93 | config->unknown_unicode_char; |
135 | config->unknown_unicode_char; |
|
|
136 | ERROR: |
|
|
137 | if (length) |
|
|
138 | *length = len; |
|
|
139 | if (status) |
|
|
140 | *status = res; |
|
|
141 | return result; |
94 | } |
142 | } |
95 | |
143 | |
96 | /* This is quite inefficient at the moment; the obvious ways of making it |
144 | int psiconv_unicode_write_char(const psiconv_config config, |
97 | faster consume quite a bit of memory, though */ |
145 | psiconv_buffer buf, |
98 | psiconv_u8 psiconv_unicode_to_char(psiconv_config config,psiconv_ucs2 input) |
146 | int lev, psiconv_ucs2 value) |
99 | { |
147 | { |
100 | int i; |
148 | int i; |
|
|
149 | int res=0; |
|
|
150 | |
|
|
151 | if (config->unicode) { |
|
|
152 | if (value < 0x80) { |
|
|
153 | if ((res = psiconv_write_u8(config,buf,lev,value))) |
|
|
154 | goto ERROR; |
|
|
155 | } else if (value < 0x800) { |
|
|
156 | if ((res = psiconv_write_u8(config,buf,lev,0xc0 | (value >> 6)))) |
|
|
157 | goto ERROR; |
|
|
158 | if ((res = psiconv_write_u8(config,buf,lev,0x80 | (value & 0x3f)))) |
|
|
159 | goto ERROR; |
|
|
160 | } else { |
|
|
161 | if ((res = psiconv_write_u8(config,buf,lev,0xe0 | (value >> 12)))) |
|
|
162 | goto ERROR; |
|
|
163 | if ((res = psiconv_write_u8(config,buf,lev,0x80 | ((value >> 6) & 0x3f)))) |
|
|
164 | goto ERROR; |
|
|
165 | if ((res = psiconv_write_u8(config,buf,lev,0x80 | (value & 0x3f)))) |
|
|
166 | goto ERROR; |
|
|
167 | } |
|
|
168 | } else { |
101 | for (i = 0; i < 256; i++) |
169 | for (i = 0; i < 256; i++) |
102 | if (config->unicode_table[i] == input) |
170 | if (config->unicode_table[i] == value) |
103 | break; |
171 | break; |
104 | return (i == 256?config->unknown_epoc_char:i); |
172 | if ((res = psiconv_write_u8(config,buf,lev, |
105 | } |
173 | i == 256?config->unknown_epoc_char:i))) |
106 | |
174 | goto ERROR; |
107 | int psiconv_unicode_from_chars(const psiconv_config config, |
175 | } |
108 | const psiconv_u8 *input, |
176 | ERROR: |
109 | psiconv_ucs2 **output) |
177 | return res; |
110 | { |
|
|
111 | int i; |
|
|
112 | if (!output) |
|
|
113 | return PSICONV_E_NOMEM; |
|
|
114 | if (!(*output = malloc(sizeof(**output) * (1 + strlen(input))))) |
|
|
115 | return PSICONV_E_NOMEM; |
|
|
116 | for (i = 0; i < strlen(input); i++) |
|
|
117 | (*output)[i] = psiconv_unicode_from_char(config,input[i]); |
|
|
118 | (*output)[i] = 0x0000; |
|
|
119 | return PSICONV_E_OK; |
|
|
120 | } |
|
|
121 | |
|
|
122 | int psiconv_unicode_to_chars(const psiconv_config config, |
|
|
123 | const psiconv_ucs2 *input, |
|
|
124 | psiconv_u8 **output) |
|
|
125 | { |
|
|
126 | int i; |
|
|
127 | if (!output) |
|
|
128 | return -PSICONV_E_NOMEM; |
|
|
129 | if (!(*output = malloc(sizeof(**output) * |
|
|
130 | (1 + psiconv_unicode_strlen(input))))) |
|
|
131 | return -PSICONV_E_NOMEM; |
|
|
132 | for (i = 0; i < psiconv_unicode_strlen(input); i++) |
|
|
133 | (*output)[i] = psiconv_unicode_to_char(config,input[i]); |
|
|
134 | (*output)[i] = 0x00; |
|
|
135 | return -PSICONV_E_OK; |
|
|
136 | } |
178 | } |
137 | |
179 | |
138 | int psiconv_unicode_strlen(const psiconv_ucs2 *input) |
180 | int psiconv_unicode_strlen(const psiconv_ucs2 *input) |
139 | { |
181 | { |
140 | int i = 0; |
182 | int i = 0; |
… | |
… | |
144 | } |
186 | } |
145 | |
187 | |
146 | psiconv_ucs2 *psiconv_unicode_strdup(const psiconv_ucs2 *input) |
188 | psiconv_ucs2 *psiconv_unicode_strdup(const psiconv_ucs2 *input) |
147 | { |
189 | { |
148 | psiconv_ucs2 *output; |
190 | psiconv_ucs2 *output; |
149 | int i = 0; |
191 | int i = 0; |
150 | |
192 | |
151 | if (!(output = malloc(sizeof(*output) * |
193 | if (!(output = malloc(sizeof(*output) * |
152 | (1 + psiconv_unicode_strlen(input))))) |
194 | (1 + psiconv_unicode_strlen(input))))) |
153 | return NULL; |
195 | return NULL; |
154 | while ((output[i] = input[i])) |
196 | while ((output[i] = input[i])) |
… | |
… | |
172 | return 1; |
214 | return 1; |
173 | else |
215 | else |
174 | return 0; |
216 | return 0; |
175 | } |
217 | } |
176 | |
218 | |
|
|
219 | |
|
|
220 | psiconv_ucs2 *psiconv_unicode_empty_string(void) |
|
|
221 | { |
|
|
222 | psiconv_ucs2 *result; |
|
|
223 | result = malloc(sizeof(psiconv_ucs2)); |
|
|
224 | if (result) |
|
|
225 | result[0] = 0; |
|
|
226 | return result; |
|
|
227 | } |
|
|
228 | |
|
|
229 | |
|
|
230 | psiconv_ucs2 *psiconv_unicode_from_list(psiconv_list input) |
|
|
231 | { |
|
|
232 | psiconv_ucs2 *result; |
|
|
233 | int i; |
|
|
234 | psiconv_ucs2 *character; |
|
|
235 | |
|
|
236 | if (!(result = malloc(sizeof(psiconv_ucs2) * (psiconv_list_length(input)+1)))) |
|
|
237 | goto ERROR1; |
|
|
238 | for (i = 0; i < psiconv_list_length(input); i++) { |
|
|
239 | if (!(character = psiconv_list_get(input,i))) |
|
|
240 | goto ERROR2; |
|
|
241 | result[i] = *character; |
|
|
242 | } |
|
|
243 | result[i] = 0; |
|
|
244 | return result; |
|
|
245 | |
|
|
246 | ERROR2: |
|
|
247 | free(result); |
|
|
248 | ERROR1: |
|
|
249 | return NULL; |
|
|
250 | } |
|
|
251 | |
|
|
252 | |
|
|
253 | psiconv_ucs2 *psiconv_unicode_strstr(const psiconv_ucs2 *haystack, |
|
|
254 | const psiconv_ucs2 *needle) |
|
|
255 | { |
|
|
256 | int i,j,haystack_len,needle_len; |
|
|
257 | haystack_len = psiconv_unicode_strlen(haystack); |
|
|
258 | needle_len = psiconv_unicode_strlen(needle); |
|
|
259 | |
|
|
260 | |
|
|
261 | |
|
|
262 | for (i = 0; i < haystack_len - needle_len + 1; i++) { |
|
|
263 | for (j = 0; j < needle_len; j++) |
|
|
264 | if (haystack[i+j] != needle[j]) |
|
|
265 | break; |
|
|
266 | if (j == needle_len) |
|
|
267 | return (psiconv_ucs2 *) haystack+i; |
|
|
268 | } |
|
|
269 | return NULL; |
|
|
270 | } |