/[public]/psiconv/trunk/lib/psiconv/unicode.c
ViewVC logotype

Diff of /psiconv/trunk/lib/psiconv/unicode.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

Revision 181 Revision 268
1/* 1/*
2 unicode.c - Part of psiconv, a PSION 5 file formats converter 2 unicode.c - Part of psiconv, a PSION 5 file formats converter
3 Copyright (c) 2003 Frodo Looijaard <frodol@dds.nl> 3 Copyright (c) 2003-2004 Frodo Looijaard <frodol@dds.nl>
4 4
5 This program is free software; you can redistribute it and/or modify 5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by 6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or 7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version. 8 (at your option) any later version.
20#include "config.h" 20#include "config.h"
21#include "compat.h" 21#include "compat.h"
22#include "error.h" 22#include "error.h"
23 23
24#include "unicode.h" 24#include "unicode.h"
25#include "parse_routines.h"
26#include "generate_routines.h"
25 27
26#include <string.h> 28#include <string.h>
29#include <stdlib.h>
27 30
28#ifdef DMALLOC 31#ifdef DMALLOC
29#include <dmalloc.h> 32#include <dmalloc.h>
30#endif 33#endif
31 34
43 /* 0x38 */ 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 46 /* 0x38 */ 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
44 /* 0x40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 47 /* 0x40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
45 /* 0x48 */ 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 48 /* 0x48 */ 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
46 /* 0x50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 49 /* 0x50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
47 /* 0x58 */ 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 50 /* 0x58 */ 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
48 /* 0x60 */ 0x0060, 0x0060, 0x0060, 0x0060, 0x0060, 0x0060, 0x0060, 0x0067, 51 /* 0x60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
49 /* 0x68 */ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 52 /* 0x68 */ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
50 /* 0x70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 53 /* 0x70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
51 /* 0x78 */ 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000, 54 /* 0x78 */ 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000,
52 /* 0x80 */ 0x20ac, 0x0000, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, 55 /* 0x80 */ 0x20ac, 0x0000, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
53 /* 0x88 */ 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017d, 0x0000, 56 /* 0x88 */ 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017d, 0x0000,
71 74
72/* TODO: Check the charset number, select the correct one */ 75/* TODO: Check the charset number, select the correct one */
73extern int psiconv_unicode_select_characterset(const psiconv_config config, 76extern int psiconv_unicode_select_characterset(const psiconv_config config,
74 int charset) 77 int charset)
75{ 78{
76 memcpy(config->unicode_table,table_cp1252,sizeof(psiconv_ucs2) * 0x100); 79 switch(charset) {
80 case 0: config->unicode = psiconv_bool_true;
81 break;
82 case 1: config->unicode = psiconv_bool_false;
83 memcpy(config->unicode_table,table_cp1252,
84 sizeof(psiconv_ucs2) * 0x100);
85 break;
86 default: return -1;
87 }
77 return 0; 88 return 0;
78} 89}
79 90
80 91
81psiconv_ucs2 psiconv_unicode_from_char(const psiconv_config config, 92psiconv_ucs2 psiconv_unicode_read_char(const psiconv_config config,
82 psiconv_u8 input) 93 psiconv_buffer buf,
94 int lev,psiconv_u32 off,
95 int *length,int *status)
83{ 96{
97 psiconv_u8 char1,char2,char3;
98 psiconv_ucs2 result=0;
99 int res;
100 int len=0;
101
102 char1 = psiconv_read_u8(config,buf,lev,off+len,&res);
103 if (res)
104 goto ERROR;
105 len ++;
106
107 if (config->unicode) {
108 if (char1 >= 0xf0) {
109 res = PSICONV_E_PARSE;
110 goto ERROR;
111 } else if (char1 < 0x80)
112 result = char1;
113 else {
114 char2 = psiconv_read_u8(config,buf,lev,off+len,&res);
115 len ++;
116 if ((char2 & 0xc0) != 0x80) {
117 res = PSICONV_E_PARSE;
118 goto ERROR;
119 }
120 if (char1 < 0xe0)
121 result = ((char1 & 0x1f) << 6) | (char2 & 0x3f);
122 else {
123 char3 = psiconv_read_u8(config,buf,lev,off+len,&res);
124 len ++;
125 if ((char3 & 0xc0) != 0x80) {
126 res = PSICONV_E_PARSE;
127 goto ERROR;
128 }
129 result = ((char1 & 0x0f) << 12) | ((char2 & 0x3f) << 6) |
130 (char3 & 0x3f);
131 }
132 }
133 } else
84 return config->unicode_table[input]?config->unicode_table[input]: 134 result = config->unicode_table[char1]?config->unicode_table[char1]:
85 config->unknown_unicode_char; 135 config->unknown_unicode_char;
136ERROR:
137 if (length)
138 *length = len;
139 if (status)
140 *status = res;
141 return result;
86} 142}
87 143
88/* This is quite inefficient at the moment; the obvious ways of making it 144int psiconv_unicode_write_char(const psiconv_config config,
89 faster consume quite a bit of memory, though */ 145 psiconv_buffer buf,
90psiconv_u8 psiconv_unicode_to_char(psiconv_config config,psiconv_ucs2 input) 146 int lev, psiconv_ucs2 value)
91{ 147{
92 int i; 148 int i;
149 int res=0;
150
151 if (config->unicode) {
152 if (value < 0x80) {
153 if ((res = psiconv_write_u8(config,buf,lev,value)))
154 goto ERROR;
155 } else if (value < 0x800) {
156 if ((res = psiconv_write_u8(config,buf,lev,0xc0 | (value >> 6))))
157 goto ERROR;
158 if ((res = psiconv_write_u8(config,buf,lev,0x80 | (value & 0x3f))))
159 goto ERROR;
160 } else {
161 if ((res = psiconv_write_u8(config,buf,lev,0xe0 | (value >> 12))))
162 goto ERROR;
163 if ((res = psiconv_write_u8(config,buf,lev,0x80 | ((value >> 6) & 0x3f))))
164 goto ERROR;
165 if ((res = psiconv_write_u8(config,buf,lev,0x80 | (value & 0x3f))))
166 goto ERROR;
167 }
168 } else {
93 for (i = 0; i < 256; i++) 169 for (i = 0; i < 256; i++)
94 if (config->unicode_table[i] == input) 170 if (config->unicode_table[i] == value)
95 break; 171 break;
96 return (i == 256?config->unknown_epoc_char:i); 172 if ((res = psiconv_write_u8(config,buf,lev,
97} 173 i == 256?config->unknown_epoc_char:i)))
98 174 goto ERROR;
99int psiconv_unicode_from_chars(const psiconv_config config, 175 }
100 const psiconv_u8 *input, 176ERROR:
101 psiconv_ucs2 **output) 177 return res;
102{
103 int i;
104 if (!output)
105 return PSICONV_E_NOMEM;
106 if (!(*output = malloc(sizeof(**output) * (1 + strlen(input)))))
107 return PSICONV_E_NOMEM;
108 for (i = 0; i < strlen(input); i++)
109 (*output)[i] = psiconv_unicode_from_char(config,input[i]);
110 (*output)[i] = 0x0000;
111 return PSICONV_E_OK;
112}
113
114int psiconv_unicode_to_chars(const psiconv_config config,
115 const psiconv_ucs2 *input,
116 psiconv_u8 **output)
117{
118 int i;
119 if (!output)
120 return -PSICONV_E_NOMEM;
121 if (!(*output = malloc(sizeof(**output) *
122 (1 + psiconv_unicode_strlen(input)))))
123 return -PSICONV_E_NOMEM;
124 for (i = 0; i < psiconv_unicode_strlen(input); i++)
125 (*output)[i] = psiconv_unicode_to_char(config,input[i]);
126 (*output)[i] = 0x00;
127 return -PSICONV_E_OK;
128} 178}
129 179
130int psiconv_unicode_strlen(const psiconv_ucs2 *input) 180int psiconv_unicode_strlen(const psiconv_ucs2 *input)
131{ 181{
132 int i = 0; 182 int i = 0;
133 while (input[i]) 183 while (input[i])
134 i++; 184 i++;
135 return i; 185 return i;
136} 186}
187
188psiconv_ucs2 *psiconv_unicode_strdup(const psiconv_ucs2 *input)
189{
190 psiconv_ucs2 *output;
191 int i = 0;
192
193 if (!(output = malloc(sizeof(*output) *
194 (1 + psiconv_unicode_strlen(input)))))
195 return NULL;
196 while ((output[i] = input[i]))
197 i++;
198 return output;
199}
200
201int psiconv_unicode_strcmp(const psiconv_ucs2 *str1, const psiconv_ucs2 *str2)
202{
203 int i = 0;
204 while (str1[i] && str2[i]) {
205 if (str1[i] < str2[i])
206 return -1;
207 if (str1[i] > str2[i])
208 return 1;
209 i++;
210 }
211 if (str1[i] < str2[i])
212 return -1;
213 else if (str1[i] > str2[i])
214 return 1;
215 else
216 return 0;
217}
218
219
220psiconv_ucs2 *psiconv_unicode_empty_string(void)
221{
222 psiconv_ucs2 *result;
223 result = malloc(sizeof(psiconv_ucs2));
224 if (result)
225 result[0] = 0;
226 return result;
227}
228
229
230psiconv_ucs2 *psiconv_unicode_from_list(psiconv_list input)
231{
232 psiconv_ucs2 *result;
233 int i;
234 psiconv_ucs2 *character;
235
236 if (!(result = malloc(sizeof(psiconv_ucs2) * (psiconv_list_length(input)+1))))
237 goto ERROR1;
238 for (i = 0; i < psiconv_list_length(input); i++) {
239 if (!(character = psiconv_list_get(input,i)))
240 goto ERROR2;
241 result[i] = *character;
242 }
243 result[i] = 0;
244 return result;
245
246ERROR2:
247 free(result);
248ERROR1:
249 return NULL;
250}
251
252
253psiconv_ucs2 *psiconv_unicode_strstr(const psiconv_ucs2 *haystack,
254 const psiconv_ucs2 *needle)
255{
256 int i,j,haystack_len,needle_len;
257 haystack_len = psiconv_unicode_strlen(haystack);
258 needle_len = psiconv_unicode_strlen(needle);
259
260
261
262 for (i = 0; i < haystack_len - needle_len + 1; i++) {
263 for (j = 0; j < needle_len; j++)
264 if (haystack[i+j] != needle[j])
265 break;
266 if (j == needle_len)
267 return (psiconv_ucs2 *) haystack+i;
268 }
269 return NULL;
270}

Legend:
Removed from v.181  
changed lines
  Added in v.268

frodo@frodo.looijaard.name
ViewVC Help
Powered by ViewVC 1.1.26