/[public]/psiconv/trunk/lib/psiconv/parse_common.c
ViewVC logotype

Contents of /psiconv/trunk/lib/psiconv/parse_common.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 12 - (show annotations)
Mon Oct 11 16:15:14 1999 UTC (24 years, 5 months ago) by frodo
File MIME type: text/plain
File size: 22182 byte(s)
(Frodo) Full MBM support - untested

1 /*
2 parse_common.c - Part of psiconv, a PSION 5 file formats converter
3 Copyright (c) 1999 Frodo Looijaard <frodol@dds.nl>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20 #include "config.h"
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include "data.h"
25 #include "parse_routines.h"
26
27 static int psiconv_parse_layout_section(const psiconv_buffer buf,
28 int lev,psiconv_u32 off,
29 int *length,
30 psiconv_text_and_layout result,
31 psiconv_word_styles_section styles,
32 int with_styles);
33
34 int psiconv_parse_header_section(const psiconv_buffer buf,int lev,
35 psiconv_u32 off, int *length,
36 psiconv_header_section *result)
37 {
38 int res=0;
39 int len=0;
40 psiconv_u32 temp;
41
42 psiconv_progress(lev+1,off+len,"Going to read the header section");
43 (*result) = malloc(sizeof(**result));
44
45 psiconv_progress(lev+2,off+len,"Going to read UID1 to UID3");
46 (*result)->uid1 = psiconv_read_u32(buf,lev+2,off+len);
47 psiconv_debug(lev+2,off+len,"UID1: %08x",(*result)->uid1);
48 if ((*result)->uid1 != PSICONV_ID_PSION5) {
49 psiconv_warn(lev+2,off+len,"UID1 has unknown value. This is probably "
50 "not a (parsable) Psion 5 file");
51 res = -1;
52 }
53 len += 4;
54 (*result)->uid2 = psiconv_read_u32(buf,lev+2,off+len);
55 psiconv_debug(lev+2,off+len,"UID2: %08x",(*result)->uid2);
56 len += 4;
57 (*result)->uid3 = psiconv_read_u32(buf,lev+2,off+len);
58 psiconv_debug(lev+2,off+len,"UID3: %08x",(*result)->uid3);
59 len += 4;
60
61 (*result)->file = psiconv_unknown_file;
62 if ((*result)->uid1 == PSICONV_ID_PSION5) {
63 if ((*result)->uid2 == PSICONV_ID_DATA_FILE) {
64 if ((*result)->uid3 == PSICONV_ID_WORD) {
65 (*result)->file = psiconv_word_file;
66 psiconv_debug(lev+2,off+len,"File is a Word file");
67 } else if ((*result)->uid3 == PSICONV_ID_TEXTED) {
68 (*result)->file = psiconv_texted_file;
69 psiconv_debug(lev+2,off+len,"File is a TextEd file");
70 }
71 } else if ((*result)->uid2 == PSICONV_ID_MBM_FILE) {
72 (*result)->file = psiconv_mbm_file;
73 if ((*result)->uid3 != 0x00)
74 psiconv_warn(lev+2,off+len,"UID3 set in MBM file?!?");
75 psiconv_debug(lev+2,off+len,"File is a MBM file");
76 }
77 }
78 if ((*result)->file == psiconv_unknown_file) {
79 psiconv_warn(lev+2,off+len,"Unknown file type");
80 (*result)->file = psiconv_unknown_file;
81 }
82
83 psiconv_progress(lev+2,off+len,"Checking UID4");
84 temp = psiconv_read_u32(buf,lev+2,off+len);
85 if (temp == psiconv_checkuid((*result)->uid1,(*result)->uid2,
86 (*result)->uid3))
87 psiconv_debug(lev+2,off+len,"Checksum %08x is correct",temp);
88 else {
89 psiconv_warn(lev+2,off+len,"Checksum failed, file corrupted!");
90 psiconv_debug(lev+2,off+len,"Expected checksum %08x, found %08x",
91 psiconv_checkuid((*result)->uid1,(*result)->uid2,
92 (*result)->uid3),temp);
93 res = -1;
94 }
95 len += 4;
96
97 psiconv_progress(lev+2,off+len,"Going to read Section Table Section offset");
98 (*result)->section_table_offset = psiconv_read_u32(buf,lev+2,off+len);
99 psiconv_debug(lev+2,off+len,"Section Table Section starts at offset %08x",
100 (*result)->section_table_offset) ;
101 len += 4;
102
103 if (length)
104 *length = len;
105
106 psiconv_progress(lev+1,off+len-1,
107 "End of Header Section (total length: %08x)",len);
108
109 return res;
110 }
111
112 int psiconv_parse_section_table_section(const psiconv_buffer buf, int lev,
113 psiconv_u32 off, int *length,
114 psiconv_section_table_section *result)
115 {
116 int res=0;
117 int len=0;
118 psiconv_section_table_entry entry;
119
120 int i;
121 psiconv_u8 nr;
122
123 psiconv_progress(lev+1,off+len,"Going to read the section table section");
124 *result = psiconv_list_new(sizeof(*entry));
125
126 psiconv_progress(lev+2,off+len,"Going to read the section table length");
127 nr = psiconv_read_u8(buf,lev+2,off+len);
128 psiconv_debug(lev+2,off+len,"Length: %08x",nr);
129 if (nr & 0x01) {
130 psiconv_warn(lev+2,off+len,
131 "Section table length odd - ignoring last entry");
132 res = -1;
133 }
134 len ++;
135
136 psiconv_progress(lev+2,off+len,"Going to read the section table entries");
137 entry = malloc(sizeof(*entry));
138 for (i = 0; i < nr / 2; i++) {
139 entry->id = psiconv_read_u32(buf,lev+2,off + len);
140 psiconv_debug(lev+2,off + len,"Entry %d: ID = %08x",i,entry->id);
141 len += 0x04;
142 entry->offset = psiconv_read_u32(buf,lev+2,off + len);
143 psiconv_debug(lev+2,off +len,"Entry %d: Offset = %08x",i,entry->offset);
144 len += 0x04;
145 psiconv_list_add(*result,entry);
146 }
147
148 free(entry);
149
150 if (length)
151 *length = len;
152
153 psiconv_progress(lev+1,off+len-1,"End of section table section "
154 "(total length: %08x", len);
155
156 return res;
157 }
158
159 int psiconv_parse_application_id_section(const psiconv_buffer buf, int lev,
160 psiconv_u32 off, int *length,
161 psiconv_application_id_section *result)
162 {
163 int res=0;
164 int len=0;
165 int leng;
166
167 psiconv_progress(lev+1,off,"Going to read the application id section");
168 (*result) = malloc(sizeof(**result));
169
170 psiconv_progress(lev+2,off+len,"Going to read the type identifier");
171 (*result)->id = psiconv_read_u32(buf,lev+2,off+len);
172 psiconv_debug(lev+2,off+len,"Identifier: %08x",(*result)->id);
173 len += 4;
174
175 psiconv_progress(lev+2,off+len,"Going to read the application id string");
176 (*result)->name = psiconv_read_string(buf,lev+2,off+len,&leng);
177 len += leng;
178
179 if (length)
180 *length = len;
181
182 psiconv_progress(lev+1,off+len-1,"End of application id section "
183 "(total length: %08x", len);
184
185 return res;
186 }
187
188 int psiconv_parse_text_section(const psiconv_buffer buf,int lev,psiconv_u32 off,
189 int *length,psiconv_text_and_layout *result)
190 {
191
192 int res = 0;
193 int len=0;
194
195 psiconv_u32 text_len;
196 psiconv_paragraph para;
197
198 int nr;
199 int i,j,start,leng;
200 char *str_copy;
201
202 psiconv_progress(lev+1,off,"Going to parse the text section");
203 psiconv_progress(lev+2,off,"Reading the text length");
204 text_len = psiconv_read_X(buf,lev+2,off,&leng);
205 psiconv_debug(lev+2,off,"Length: %08x",text_len);
206 len += leng;
207
208 *result = psiconv_list_new(sizeof(*para));
209 para = malloc(sizeof(*para));
210
211 psiconv_progress(lev+2,off+len,"Going to read all paragraph text");
212 nr = 0;
213 start = 0;
214 for (i = 0; i < text_len; i++)
215 if (psiconv_read_u8(buf,lev+2,off+len+i) == 0x06) {
216 para->text = malloc(i - start + 1);
217 for (j = 0; j < i - start; j++)
218 para->text[j] = psiconv_read_u8(buf,lev+1,off + len + start + j);
219 para->text[j] = 0;
220
221 psiconv_list_add(*result,para);
222
223 str_copy = psiconv_make_printable(para->text);
224 psiconv_debug(lev+2,off+i+len,"Line %d: %d characters",nr,
225 strlen(str_copy) +1);
226 psiconv_debug(lev+2,off+i+len,"Line %d: `%s'",nr,str_copy);
227 free(str_copy);
228
229 start = i + 1;
230 nr ++;
231 }
232
233 if (start != text_len) {
234 res = -1;
235 psiconv_warn(lev+2,off+start+len,
236 "Last line does not end on EOL (%d characters left)", len - start);
237 para->text = malloc(text_len - start + 1);
238 for (j = 0; j < text_len - start; j++)
239 para->text[j] = psiconv_read_u8(buf,lev+2,off + start + j + len);
240 para->text[text_len - start] = 0;
241 psiconv_list_add(*result,para);
242 str_copy = psiconv_make_printable(para->text);
243 psiconv_debug(lev+2,off+start+len,"Last line: %d characters",nr,
244 strlen(str_copy)+1);
245 psiconv_debug(lev+2,off+start+len,"Last line: `%s'",str_copy);
246 free(str_copy);
247 }
248
249 free(para);
250
251 /* Initialize the remaining parts of each paragraph */
252 for (i = 0; i < psiconv_list_length(*result); i ++) {
253 para = psiconv_list_get(*result,i);
254 para->in_lines = psiconv_list_new(sizeof(struct psiconv_in_line_layout));
255 para->replacements = psiconv_list_new(sizeof(struct psiconv_replacement));
256 para->base_style = 0;
257 para->base_character = psiconv_basic_character_layout();
258 para->base_paragraph = psiconv_basic_paragraph_layout();
259 }
260
261
262 len += text_len;
263
264 if (length)
265 *length = len;
266
267 psiconv_progress(lev+1,off+len-1,"End of text section (total length: %08x",
268 len);
269
270 return res;
271 }
272
273 /* First do a parse_text_section, or you will get into trouble here */
274 int psiconv_parse_layout_section(const psiconv_buffer buf,
275 int lev,psiconv_u32 off,
276 int *length,
277 psiconv_text_and_layout result,
278 psiconv_word_styles_section styles,
279 int with_styles)
280 {
281 int res = 0;
282 int len = 0;
283 psiconv_u32 temp;
284 int parse_styles,nr,i,j,total,leng,line_length;
285
286 typedef struct anon_style
287 {
288 int nr;
289 psiconv_s16 base_style;
290 psiconv_character_layout character;
291 psiconv_paragraph_layout paragraph;
292 } *anon_style;
293
294 typedef psiconv_list anon_style_list; /* of struct anon_style */
295
296 anon_style_list anon_styles;
297 struct anon_style anon;
298 anon_style anon_ptr=NULL;
299
300 psiconv_paragraph para;
301 struct psiconv_in_line_layout in_line;
302
303 int *inline_count;
304
305
306 psiconv_progress(lev+1,off,"Going to read the layout section");
307
308 psiconv_progress(lev+2,off,"Going to read the section type");
309 temp = psiconv_read_u16(buf,lev+2,off+len);
310 psiconv_debug(lev+2,off+len,"Type: %02x",temp);
311 parse_styles = with_styles;
312 if ((temp == 0x0001) && !with_styles) {
313 psiconv_warn(lev+2,off+len,"Styleless layout section expected, "
314 "but styled section found!");
315 parse_styles = 1;
316 res = -1;
317 } else if ((temp == 0x0000) && (with_styles)) {
318 psiconv_warn(lev+2,off+len,"Styled layout section expected, "
319 "but styleless section found!");
320 parse_styles = 0;
321 res = -1;
322 } else if ((temp != 0x0000) && (temp != 0x0001)) {
323 psiconv_warn(lev+2,off+len,
324 "Layout section type indicator has unknown value!");
325 res = -1;
326 }
327 len += 0x02;
328
329 psiconv_progress(lev+2,off+len,"Going to read paragraph type list");
330 anon_styles = psiconv_list_new(sizeof(anon));
331 psiconv_progress(lev+3,off+len,"Going to read paragraph type list length");
332 nr = psiconv_read_u8(buf,lev+3,off+len);
333 psiconv_debug(lev+3,off+len,"Length: %02x",nr);
334 len ++;
335
336 psiconv_progress(lev+3,off+len,
337 "Going to read the paragraph type list elements");
338 for (i = 0; i < nr; i ++) {
339 psiconv_progress(lev+3,off+len,"Element %d",i);
340 anon.nr = psiconv_read_u32(buf,lev+4,off+len);
341 psiconv_debug(lev+4,off+len,"Number: %08x",anon.nr);
342 len += 0x04;
343
344 psiconv_progress(lev+4,off,"Going to determine the base style");
345 if (parse_styles) {
346 anon.base_style = psiconv_read_u8(buf,lev+3,
347 off+len+4+psiconv_read_u32(buf,lev+4,
348 off+len));
349 psiconv_debug(lev+4,off+len+psiconv_read_u32(buf,lev+4,off+len),
350 "Style indicator: %02x",anon.base_style);
351 } else
352 anon.base_style = 0;
353 anon.paragraph = psiconv_clone_paragraph_layout(psiconv_get_style
354 (styles,anon.base_style)->paragraph);
355 anon.character = psiconv_clone_character_layout(psiconv_get_style
356 (styles,anon.base_style)->character);
357
358 psiconv_progress(lev+4,off+len,"Going to read the paragraph layout");
359 res |= psiconv_parse_paragraph_layout_list(buf,lev+4,off+len,&leng,
360 anon.paragraph);
361 len += leng;
362 if (parse_styles)
363 len ++;
364
365 psiconv_progress(lev+4,off+len,"Going to read the character layout");
366 res |= psiconv_parse_character_layout_list(buf,lev+4,off+len,&leng,
367 anon.character);
368 len += leng;
369 psiconv_list_add(anon_styles,&anon);
370 }
371
372 psiconv_progress(lev+2,off+len,"Going to parse the paragraph element list");
373 psiconv_progress(lev+3,off+len,"Going to read the number of paragraphs");
374 nr = psiconv_read_u32(buf,lev+3,off+len);
375 if (nr != psiconv_list_length(result)) {
376 psiconv_warn(lev+3,off+len,
377 "Number of text paragraphs and paragraph elements does not match");
378 psiconv_debug(lev+3,off+len,
379 "%d text paragraphs, %d paragraph elements",
380 psiconv_list_length(result),nr);
381 }
382 psiconv_debug(lev+3,off+len,"Number of paragraphs: %d",nr);
383 len += 4;
384 inline_count = malloc(nr * sizeof(*inline_count));
385
386 psiconv_progress(lev+3,off+len,"Going to read the paragraph elements");
387 for (i = 0; i < nr; i ++) {
388 psiconv_progress(lev+3,off+len,"Element %d",i);
389 if (i >= psiconv_list_length(result)) {
390 psiconv_debug(lev+4,off+len,"Going to allocate a new element");
391 para = malloc(sizeof(*para));
392 para->in_lines = psiconv_list_new(sizeof(struct psiconv_in_line_layout));
393 para->base_style = 0;
394 para->base_character = psiconv_basic_character_layout();
395 para->base_paragraph = psiconv_basic_paragraph_layout();
396 free(para);
397 }
398 para = psiconv_list_get(result,i);
399
400 psiconv_progress(lev+4,off+len,"Going to read the paragraph length");
401 temp = psiconv_read_u32(buf,lev+4,off+len);
402 if (temp != strlen(para->text)+1) {
403 res = -1;
404 psiconv_warn(lev+4,off+len,
405 "Disagreement of the length of paragraph in layout section");
406 psiconv_debug(lev+4,off+len,
407 "Paragraph length: layout section says %d, counted %d",
408 temp,strlen(para->text)+1);
409 } else
410 psiconv_debug(lev+4,off+len,"Paragraph length: %d",temp);
411 len += 4;
412
413 psiconv_progress(lev+4,off+len,"Going to read the paragraph type");
414 temp = psiconv_read_u8(buf,lev+4,off+len);
415 if (temp != 0x00) {
416 psiconv_debug(lev+4,off+len,"Type: %02x",temp);
417 for (j = 0; j < psiconv_list_length(anon_styles); j++) {
418 anon_ptr = psiconv_list_get(anon_styles,j);
419 if (temp == anon_ptr->nr)
420 break;
421 }
422 if (j == psiconv_list_length(anon_styles)) {
423 psiconv_warn(lev+4,off+len,"Layout section paragraph type unknown");
424 psiconv_debug(lev+4,off+len,"Unknown type - using base styles instead");
425 para->base_style = 0;
426 psiconv_free_paragraph_layout(para->base_paragraph);
427 psiconv_free_character_layout(para->base_character);
428 para->base_paragraph = psiconv_clone_paragraph_layout(psiconv_get_style
429 (styles,0)->paragraph);
430 para->base_character = psiconv_clone_character_layout(psiconv_get_style
431 (styles,0)->character);
432 } else {
433 para->base_style = anon_ptr->base_style;
434 psiconv_free_paragraph_layout(para->base_paragraph);
435 psiconv_free_character_layout(para->base_character);
436 para->base_paragraph = psiconv_clone_paragraph_layout
437 (anon_ptr->paragraph);
438 para->base_character = psiconv_clone_character_layout
439 (anon_ptr->character);
440 }
441 inline_count[i] = 0;
442 len += 0x01;
443 } else {
444 psiconv_debug(lev+4,off+len,"Type: %02x (not based on a paragraph type)"
445 ,temp);
446 len += 0x01;
447 if (parse_styles) {
448 psiconv_progress(lev+4,off+len+psiconv_read_u32(buf,lev+4,off+len),
449 "Going to read the paragraph element base style");
450 temp = psiconv_read_u8(buf,lev+4,
451 off+len+psiconv_read_u32(buf,lev+4,off+len));
452 psiconv_debug(lev+4,off+len+psiconv_read_u32(buf,lev+4,off+len),
453 "Style: %02x",temp);
454 } else
455 temp = 0x00;
456 psiconv_free_paragraph_layout(para->base_paragraph);
457 psiconv_free_character_layout(para->base_character);
458 para->base_paragraph = psiconv_clone_paragraph_layout(psiconv_get_style
459 (styles,temp)->paragraph);
460 para->base_character = psiconv_clone_character_layout(psiconv_get_style
461 (styles,temp)->character);
462 para->base_style = temp;
463 psiconv_progress(lev+4,off+len,"Going to read paragraph layout");
464 psiconv_parse_paragraph_layout_list(buf,lev+4,off+len,&leng,
465 para->base_paragraph);
466 len += leng;
467 if (parse_styles)
468 len += 1;
469 psiconv_progress(lev+4,off+len,"Going to read number of in-line "
470 "layout elements");
471 inline_count[i] = psiconv_read_u32(buf,lev+4,off+len);
472 psiconv_debug(lev+4,off+len,"Nr: %08x",inline_count[i]);
473 len += 4;
474 }
475 }
476
477 psiconv_progress(lev+2,off+len,"Going to read the text layout inline list");
478
479 psiconv_progress(lev+3,off+len,"Going to read the number of elements");
480 nr = psiconv_read_u32(buf,lev+3,off+len);
481 psiconv_debug(lev+3,off,"Elements: %08x",nr);
482 len += 0x04;
483
484 psiconv_progress(lev+3,off+len,
485 "Going to read the text layout inline elements");
486 total = 0;
487 for (i = 0; i < psiconv_list_length(result); i++) {
488 para = psiconv_list_get(result,i);
489 line_length = -1;
490 for (j = 0; j < inline_count[i]; j++) {
491 psiconv_progress(lev+3,off+len,"Element %d: Paragraph %d, element %d",
492 total,i,j);
493 if (total >= nr) {
494 psiconv_warn(lev+3,off+len,
495 "Layout section inlines: not enough element");
496 res = -1;
497 psiconv_debug(lev+3,off+len,"Can't read element!");
498 } else {
499 total ++;
500 in_line.layout = psiconv_clone_character_layout(para->base_character);
501 psiconv_progress(lev+4,off+len,"Going to read the element type");
502 temp = psiconv_read_u8(buf,lev+4,len+off);
503 len += 1;
504 psiconv_debug(lev+4,off,"Type: %02x",temp);
505 psiconv_progress(lev+4,off,
506 "Going to read the number of characters it applies to");
507 in_line.length = psiconv_read_u32(buf,lev+4,len+off);
508 psiconv_debug(lev+4,off+len,"Length: %02x",in_line.length);
509 len += 4;
510 psiconv_progress(lev+4,off+len,"Going to read the character layout");
511 res |= psiconv_parse_character_layout_list(buf,lev+4,off+len,&leng,
512 in_line.layout);
513 len += leng;
514
515 if (temp == 0x01) {
516 psiconv_debug(lev+4,off+len,"Skipping object data");
517 len += 0x10;
518 } else if (temp != 0x00) {
519 psiconv_warn(lev+4,off+len,"Layout section unknown inline type");
520 res = -1;
521 }
522 if (line_length + in_line.length > strlen(para->text)) {
523 psiconv_warn(lev+4,off+len,
524 "Layout section inlines: line length mismatch");
525 res = -1;
526 in_line.length = strlen(para->text) - line_length;
527 }
528 line_length += in_line.length;
529 psiconv_list_add(para->in_lines,&in_line);
530 }
531 }
532 }
533
534 if (total != nr) {
535 psiconv_warn(lev+4,off+len,
536 "Layout section too many inlines, skipping remaining");
537 }
538
539 free(inline_count);
540
541 for (i = 0 ; i < psiconv_list_length(anon_styles); i ++) {
542 anon_ptr = psiconv_list_get(anon_styles,i);
543 psiconv_free_character_layout(anon_ptr->character);
544 psiconv_free_paragraph_layout(anon_ptr->paragraph);
545 }
546 psiconv_list_free(anon_styles);
547
548 if (length)
549 *length = len;
550
551 psiconv_progress(lev+1,off+len-1,"End of layout section (total length: %08x",
552 len);
553
554 return res;
555 }
556
557 int psiconv_parse_styled_layout_section(const psiconv_buffer buf,
558 int lev,psiconv_u32 off,
559 int *length,
560 psiconv_text_and_layout result,
561 psiconv_word_styles_section styles)
562 {
563 return psiconv_parse_layout_section(buf,lev,off,length,result,styles,1);
564 }
565
566 int psiconv_parse_styleless_layout_section(const psiconv_buffer buf,
567 int lev,psiconv_u32 off,
568 int *length,
569 psiconv_text_and_layout result,
570 psiconv_character_layout base_char,
571 psiconv_paragraph_layout base_para)
572 {
573 int res;
574 psiconv_word_styles_section styles_section;
575
576 styles_section = malloc(sizeof(*styles_section));
577 styles_section->normal = malloc(sizeof(*styles_section->normal));
578 styles_section->normal->character = psiconv_clone_character_layout(base_char);
579 styles_section->normal->paragraph = psiconv_clone_paragraph_layout(base_para);
580 styles_section->normal->hotkey = 0;
581 styles_section->normal->name = strdup("");
582 styles_section->styles = psiconv_list_new(sizeof(struct psiconv_word_style));
583
584 res = psiconv_parse_layout_section(buf,lev,off,length,result,
585 styles_section,0);
586
587 psiconv_free_word_styles_section(styles_section);
588 return res;
589 }
590

frodo@frodo.looijaard.name
ViewVC Help
Powered by ViewVC 1.1.26