/[public]/psiconv/trunk/lib/psiconv/unicode.c
ViewVC logotype

Annotation of /psiconv/trunk/lib/psiconv/unicode.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 351 - (hide annotations)
Wed Oct 22 19:53:40 2014 UTC (9 years, 6 months ago) by frodo
File MIME type: text/plain
File size: 8254 byte(s)
(Frodo) Update copyright year in all source files

1 frodo 181 /*
2     unicode.c - Part of psiconv, a PSION 5 file formats converter
3 frodo 351 Copyright (c) 2003-2014 Frodo Looijaard <frodo@frodo.looijaard.name>
4 frodo 181
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9    
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13     GNU General Public License for more details.
14    
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18     */
19    
20     #include "config.h"
21     #include "compat.h"
22     #include "error.h"
23    
24     #include "unicode.h"
25 frodo 217 #include "parse_routines.h"
26     #include "generate_routines.h"
27 frodo 181
28     #include <string.h>
29 frodo 268 #include <stdlib.h>
30 frodo 181
31     #ifdef DMALLOC
32     #include <dmalloc.h>
33     #endif
34    
35    
36    
37     psiconv_ucs2 table_cp1252[0x100] =
38     {
39     /* 0x00 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0006, 0x0007,
40     /* 0x08 */ 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
41     /* 0x10 */ 0x00a0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
42     /* 0x18 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
43     /* 0x20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
44     /* 0x28 */ 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
45     /* 0x30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
46     /* 0x38 */ 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
47     /* 0x40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
48     /* 0x48 */ 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
49     /* 0x50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
50     /* 0x58 */ 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
51 frodo 184 /* 0x60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
52 frodo 181 /* 0x68 */ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
53     /* 0x70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
54     /* 0x78 */ 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000,
55     /* 0x80 */ 0x20ac, 0x0000, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
56     /* 0x88 */ 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017d, 0x0000,
57     /* 0x90 */ 0x0000, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
58     /* 0x98 */ 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x0000, 0x017e, 0x0178,
59     /* 0xa0 */ 0x0000, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
60     /* 0xa8 */ 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
61     /* 0xb0 */ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
62     /* 0xb8 */ 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
63     /* 0xc0 */ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
64     /* 0xd8 */ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
65     /* 0xd0 */ 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
66     /* 0xe8 */ 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
67     /* 0xe0 */ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
68     /* 0xc8 */ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
69     /* 0xf0 */ 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
70     /* 0xf8 */ 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff
71     };
72    
73    
74    
75     /* TODO: Check the charset number, select the correct one */
76     extern int psiconv_unicode_select_characterset(const psiconv_config config,
77     int charset)
78     {
79 frodo 182 switch(charset) {
80     case 0: config->unicode = psiconv_bool_true;
81     break;
82     case 1: config->unicode = psiconv_bool_false;
83     memcpy(config->unicode_table,table_cp1252,
84     sizeof(psiconv_ucs2) * 0x100);
85     break;
86     default: return -1;
87     }
88 frodo 181 return 0;
89     }
90    
91    
92 frodo 217 psiconv_ucs2 psiconv_unicode_read_char(const psiconv_config config,
93     psiconv_buffer buf,
94     int lev,psiconv_u32 off,
95     int *length,int *status)
96 frodo 181 {
97 frodo 217 psiconv_u8 char1,char2,char3;
98     psiconv_ucs2 result=0;
99     int res;
100     int len=0;
101 frodo 181
102 frodo 217 char1 = psiconv_read_u8(config,buf,lev,off+len,&res);
103     if (res)
104     goto ERROR;
105     len ++;
106    
107     if (config->unicode) {
108     if (char1 >= 0xf0) {
109     res = PSICONV_E_PARSE;
110     goto ERROR;
111     } else if (char1 < 0x80)
112     result = char1;
113     else {
114     char2 = psiconv_read_u8(config,buf,lev,off+len,&res);
115     len ++;
116     if ((char2 & 0xc0) != 0x80) {
117     res = PSICONV_E_PARSE;
118     goto ERROR;
119     }
120     if (char1 < 0xe0)
121     result = ((char1 & 0x1f) << 6) | (char2 & 0x3f);
122     else {
123     char3 = psiconv_read_u8(config,buf,lev,off+len,&res);
124     len ++;
125     if ((char3 & 0xc0) != 0x80) {
126     res = PSICONV_E_PARSE;
127     goto ERROR;
128     }
129     result = ((char1 & 0x0f) << 12) | ((char2 & 0x3f) << 6) |
130     (char3 & 0x3f);
131     }
132     }
133     } else
134     result = config->unicode_table[char1]?config->unicode_table[char1]:
135     config->unknown_unicode_char;
136     ERROR:
137     if (length)
138     *length = len;
139     if (status)
140     *status = res;
141     return result;
142 frodo 181 }
143    
144 frodo 217 int psiconv_unicode_write_char(const psiconv_config config,
145     psiconv_buffer buf,
146     int lev, psiconv_ucs2 value)
147 frodo 181 {
148     int i;
149 frodo 217 int res=0;
150 frodo 181
151 frodo 217 if (config->unicode) {
152     if (value < 0x80) {
153     if ((res = psiconv_write_u8(config,buf,lev,value)))
154     goto ERROR;
155     } else if (value < 0x800) {
156     if ((res = psiconv_write_u8(config,buf,lev,0xc0 | (value >> 6))))
157     goto ERROR;
158     if ((res = psiconv_write_u8(config,buf,lev,0x80 | (value & 0x3f))))
159     goto ERROR;
160     } else {
161     if ((res = psiconv_write_u8(config,buf,lev,0xe0 | (value >> 12))))
162     goto ERROR;
163     if ((res = psiconv_write_u8(config,buf,lev,0x80 | ((value >> 6) & 0x3f))))
164     goto ERROR;
165     if ((res = psiconv_write_u8(config,buf,lev,0x80 | (value & 0x3f))))
166     goto ERROR;
167     }
168     } else {
169     for (i = 0; i < 256; i++)
170     if (config->unicode_table[i] == value)
171     break;
172     if ((res = psiconv_write_u8(config,buf,lev,
173     i == 256?config->unknown_epoc_char:i)))
174     goto ERROR;
175     }
176     ERROR:
177     return res;
178 frodo 181 }
179    
180     int psiconv_unicode_strlen(const psiconv_ucs2 *input)
181     {
182     int i = 0;
183     while (input[i])
184     i++;
185     return i;
186     }
187 frodo 184
188     psiconv_ucs2 *psiconv_unicode_strdup(const psiconv_ucs2 *input)
189     {
190     psiconv_ucs2 *output;
191 frodo 192 int i = 0;
192 frodo 184
193     if (!(output = malloc(sizeof(*output) *
194     (1 + psiconv_unicode_strlen(input)))))
195     return NULL;
196     while ((output[i] = input[i]))
197     i++;
198     return output;
199     }
200    
201     int psiconv_unicode_strcmp(const psiconv_ucs2 *str1, const psiconv_ucs2 *str2)
202     {
203     int i = 0;
204     while (str1[i] && str2[i]) {
205     if (str1[i] < str2[i])
206     return -1;
207     if (str1[i] > str2[i])
208     return 1;
209     i++;
210     }
211     if (str1[i] < str2[i])
212     return -1;
213     else if (str1[i] > str2[i])
214     return 1;
215     else
216     return 0;
217     }
218    
219 frodo 217
220     psiconv_ucs2 *psiconv_unicode_empty_string(void)
221     {
222     psiconv_ucs2 *result;
223     result = malloc(sizeof(psiconv_ucs2));
224     if (result)
225     result[0] = 0;
226     return result;
227     }
228    
229 frodo 228
230     psiconv_ucs2 *psiconv_unicode_from_list(psiconv_list input)
231     {
232     psiconv_ucs2 *result;
233     int i;
234     psiconv_ucs2 *character;
235    
236     if (!(result = malloc(sizeof(psiconv_ucs2) * (psiconv_list_length(input)+1))))
237     goto ERROR1;
238     for (i = 0; i < psiconv_list_length(input); i++) {
239     if (!(character = psiconv_list_get(input,i)))
240     goto ERROR2;
241     result[i] = *character;
242     }
243     result[i] = 0;
244     return result;
245    
246     ERROR2:
247     free(result);
248     ERROR1:
249     return NULL;
250     }
251    
252    
253     psiconv_ucs2 *psiconv_unicode_strstr(const psiconv_ucs2 *haystack,
254     const psiconv_ucs2 *needle)
255     {
256     int i,j,haystack_len,needle_len;
257     haystack_len = psiconv_unicode_strlen(haystack);
258     needle_len = psiconv_unicode_strlen(needle);
259    
260    
261    
262     for (i = 0; i < haystack_len - needle_len + 1; i++) {
263     for (j = 0; j < needle_len; j++)
264     if (haystack[i+j] != needle[j])
265     break;
266     if (j == needle_len)
267 frodo 231 return (psiconv_ucs2 *) haystack+i;
268 frodo 228 }
269     return NULL;
270     }

frodo@frodo.looijaard.name
ViewVC Help
Powered by ViewVC 1.1.26