/[public]/psiconv/trunk/lib/psiconv/unicode.c
ViewVC logotype

Contents of /psiconv/trunk/lib/psiconv/unicode.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 192 - (show annotations)
Mon Feb 2 20:43:04 2004 UTC (20 years, 1 month ago) by frodo
File MIME type: text/plain
File size: 6272 byte(s)
(Frodo) Psiconv program update
  * Created html4 target
  * Update of xhtml target (print entities if ASCII, and others)
  * Made everything static that should not be exported
  * Renamed stuff to xhtml were appropriate
  * The fileformat data does now contain the supported Psion files to convert
  * This is also printed in the help text
  * ENCODING_ASCII_HTML introduced (only used internally)
  * Replaced debug, silent, verbose options with noise option
  * Default targets are XHTML and TIFF

1 /*
2 unicode.c - Part of psiconv, a PSION 5 file formats converter
3 Copyright (c) 2003 Frodo Looijaard <frodol@dds.nl>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20 #include "config.h"
21 #include "compat.h"
22 #include "error.h"
23
24 #include "unicode.h"
25
26 #include <string.h>
27
28 #ifdef DMALLOC
29 #include <dmalloc.h>
30 #endif
31
32
33
34 psiconv_ucs2 table_cp1252[0x100] =
35 {
36 /* 0x00 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0006, 0x0007,
37 /* 0x08 */ 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
38 /* 0x10 */ 0x00a0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
39 /* 0x18 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
40 /* 0x20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
41 /* 0x28 */ 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
42 /* 0x30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
43 /* 0x38 */ 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
44 /* 0x40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
45 /* 0x48 */ 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
46 /* 0x50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
47 /* 0x58 */ 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
48 /* 0x60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
49 /* 0x68 */ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
50 /* 0x70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
51 /* 0x78 */ 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000,
52 /* 0x80 */ 0x20ac, 0x0000, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
53 /* 0x88 */ 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017d, 0x0000,
54 /* 0x90 */ 0x0000, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
55 /* 0x98 */ 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x0000, 0x017e, 0x0178,
56 /* 0xa0 */ 0x0000, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
57 /* 0xa8 */ 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
58 /* 0xb0 */ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
59 /* 0xb8 */ 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
60 /* 0xc0 */ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
61 /* 0xd8 */ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
62 /* 0xd0 */ 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
63 /* 0xe8 */ 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
64 /* 0xe0 */ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
65 /* 0xc8 */ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
66 /* 0xf0 */ 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
67 /* 0xf8 */ 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff
68 };
69
70
71
72 /* TODO: Check the charset number, select the correct one */
73 extern int psiconv_unicode_select_characterset(const psiconv_config config,
74 int charset)
75 {
76 switch(charset) {
77 case 0: config->unicode = psiconv_bool_true;
78 break;
79 case 1: config->unicode = psiconv_bool_false;
80 memcpy(config->unicode_table,table_cp1252,
81 sizeof(psiconv_ucs2) * 0x100);
82 break;
83 default: return -1;
84 }
85 return 0;
86 }
87
88
89 psiconv_ucs2 psiconv_unicode_from_char(const psiconv_config config,
90 psiconv_u8 input)
91 {
92 return config->unicode_table[input]?config->unicode_table[input]:
93 config->unknown_unicode_char;
94 }
95
96 /* This is quite inefficient at the moment; the obvious ways of making it
97 faster consume quite a bit of memory, though */
98 psiconv_u8 psiconv_unicode_to_char(psiconv_config config,psiconv_ucs2 input)
99 {
100 int i;
101 for (i = 0; i < 256; i++)
102 if (config->unicode_table[i] == input)
103 break;
104 return (i == 256?config->unknown_epoc_char:i);
105 }
106
107 int psiconv_unicode_from_chars(const psiconv_config config,
108 const psiconv_u8 *input,
109 psiconv_ucs2 **output)
110 {
111 int i;
112 if (!output)
113 return PSICONV_E_NOMEM;
114 if (!(*output = malloc(sizeof(**output) * (1 + strlen(input)))))
115 return PSICONV_E_NOMEM;
116 for (i = 0; i < strlen(input); i++)
117 (*output)[i] = psiconv_unicode_from_char(config,input[i]);
118 (*output)[i] = 0x0000;
119 return PSICONV_E_OK;
120 }
121
122 int psiconv_unicode_to_chars(const psiconv_config config,
123 const psiconv_ucs2 *input,
124 psiconv_u8 **output)
125 {
126 int i;
127 if (!output)
128 return -PSICONV_E_NOMEM;
129 if (!(*output = malloc(sizeof(**output) *
130 (1 + psiconv_unicode_strlen(input)))))
131 return -PSICONV_E_NOMEM;
132 for (i = 0; i < psiconv_unicode_strlen(input); i++)
133 (*output)[i] = psiconv_unicode_to_char(config,input[i]);
134 (*output)[i] = 0x00;
135 return -PSICONV_E_OK;
136 }
137
138 int psiconv_unicode_strlen(const psiconv_ucs2 *input)
139 {
140 int i = 0;
141 while (input[i])
142 i++;
143 return i;
144 }
145
146 psiconv_ucs2 *psiconv_unicode_strdup(const psiconv_ucs2 *input)
147 {
148 psiconv_ucs2 *output;
149 int i = 0;
150
151 if (!(output = malloc(sizeof(*output) *
152 (1 + psiconv_unicode_strlen(input)))))
153 return NULL;
154 while ((output[i] = input[i]))
155 i++;
156 return output;
157 }
158
159 int psiconv_unicode_strcmp(const psiconv_ucs2 *str1, const psiconv_ucs2 *str2)
160 {
161 int i = 0;
162 while (str1[i] && str2[i]) {
163 if (str1[i] < str2[i])
164 return -1;
165 if (str1[i] > str2[i])
166 return 1;
167 i++;
168 }
169 if (str1[i] < str2[i])
170 return -1;
171 else if (str1[i] > str2[i])
172 return 1;
173 else
174 return 0;
175 }
176

frodo@frodo.looijaard.name
ViewVC Help
Powered by ViewVC 1.1.26