/[public]/psiconv/trunk/lib/psiconv/parse_common.c
ViewVC logotype

Contents of /psiconv/trunk/lib/psiconv/parse_common.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations)
Sun Oct 3 21:10:47 1999 UTC (24 years, 6 months ago) by frodo
File MIME type: text/plain
File size: 21926 byte(s)
Imported sources

1 /*
2 parse_common.c - Part of psiconv, a PSION 5 file formats converter
3 Copyright (c) 1999 Frodo Looijaard <frodol@dds.nl>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20 #include "config.h"
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include "data.h"
25 #include "parse_routines.h"
26
27 static int psiconv_parse_layout_section(const psiconv_buffer buf,
28 int lev,psiconv_u32 off,
29 int *length,
30 psiconv_text_and_layout result,
31 psiconv_word_styles_section styles,
32 int with_styles);
33
34 int psiconv_parse_header_section(const psiconv_buffer buf,int lev,
35 psiconv_u32 off, int *length,
36 psiconv_header_section *result)
37 {
38 int res=0;
39 int len=0;
40 psiconv_u32 temp;
41
42 psiconv_progress(lev+1,off+len,"Going to read the header section");
43 (*result) = malloc(sizeof(**result));
44
45 psiconv_progress(lev+2,off+len,"Going to read UID1 to UID3");
46 (*result)->uid1 = psiconv_read_u32(buf,lev+2,off+len);
47 psiconv_debug(lev+2,off+len,"UID1: %08x",(*result)->uid1);
48 if ((*result)->uid1 != PSICONV_ID_PSION5) {
49 psiconv_warn(lev+2,off+len,"UID1 has unknown value. This is probably "
50 "not a (parsable) Psion 5 file");
51 res = -1;
52 }
53 len += 4;
54 (*result)->uid2 = psiconv_read_u32(buf,lev+2,off+len);
55 psiconv_debug(lev+2,off+len,"UID2: %08x",(*result)->uid2);
56 len += 4;
57 (*result)->uid3 = psiconv_read_u32(buf,lev+2,off+len);
58 psiconv_debug(lev+2,off+len,"UID3: %08x",(*result)->uid3);
59 len += 4;
60
61 (*result)->file = psiconv_unknown_file;
62 if ((*result)->uid1 == PSICONV_ID_PSION5) {
63 if ((*result)->uid2 == PSICONV_ID_DATA_FILE) {
64 if ((*result)->uid3 == PSICONV_ID_WORD) {
65 (*result)->file = psiconv_word_file;
66 psiconv_debug(lev+2,off+len,"File is a Word file");
67 } else if ((*result)->uid3 == PSICONV_ID_TEXTED) {
68 (*result)->file = psiconv_texted_file;
69 psiconv_debug(lev+2,off+len,"File is a TextEd file");
70 }
71 }
72 }
73 if ((*result)->file == psiconv_unknown_file) {
74 psiconv_warn(lev+2,off+len,"Unknown file type");
75 (*result)->file = psiconv_unknown_file;
76 }
77
78 psiconv_progress(lev+2,off+len,"Checking UID4");
79 temp = psiconv_read_u32(buf,lev+2,off+len);
80 if (temp == psiconv_checkuid((*result)->uid1,(*result)->uid2,
81 (*result)->uid3))
82 psiconv_debug(lev+2,off+len,"Checksum %08x is correct",temp);
83 else {
84 psiconv_warn(lev+2,off+len,"Checksum failed, file corrupted!");
85 psiconv_debug(lev+2,off+len,"Expected checksum %08x, found %08x",
86 psiconv_checkuid((*result)->uid1,(*result)->uid2,
87 (*result)->uid3),temp);
88 res = -1;
89 }
90 len += 4;
91
92 psiconv_progress(lev+2,off+len,"Going to read Section Table Section offset");
93 (*result)->section_table_offset = psiconv_read_u32(buf,lev+2,off+len);
94 psiconv_debug(lev+2,off+len,"Section Table Section starts at offset %08x",
95 (*result)->section_table_offset) ;
96 len += 4;
97
98 if (length)
99 *length = len;
100
101 psiconv_progress(lev+1,off+len-1,
102 "End of Header Section (total length: %08x)",len);
103
104 return res;
105 }
106
107 int psiconv_parse_section_table_section(const psiconv_buffer buf, int lev,
108 psiconv_u32 off, int *length,
109 psiconv_section_table_section *result)
110 {
111 int res=0;
112 int len=0;
113 psiconv_section_table_entry entry;
114
115 int i;
116 psiconv_u8 nr;
117
118 psiconv_progress(lev+1,off+len,"Going to read the section table section");
119 *result = psiconv_list_new(sizeof(*entry));
120
121 psiconv_progress(lev+2,off+len,"Going to read the section table length");
122 nr = psiconv_read_u8(buf,lev+2,off+len);
123 psiconv_debug(lev+2,off+len,"Length: %08x",nr);
124 if (nr & 0x01) {
125 psiconv_warn(lev+2,off+len,
126 "Section table length odd - ignoring last entry");
127 res = -1;
128 }
129 len ++;
130
131 psiconv_progress(lev+2,off+len,"Going to read the section table entries");
132 entry = malloc(sizeof(*entry));
133 for (i = 0; i < nr / 2; i++) {
134 entry->id = psiconv_read_u32(buf,lev+2,off + len);
135 psiconv_debug(lev+2,off + len,"Entry %d: ID = %08x",i,entry->id);
136 len += 0x04;
137 entry->offset = psiconv_read_u32(buf,lev+2,off + len);
138 psiconv_debug(lev+2,off +len,"Entry %d: Offset = %08x",i,entry->offset);
139 len += 0x04;
140 psiconv_list_add(*result,entry);
141 }
142
143 free(entry);
144
145 if (length)
146 *length = len;
147
148 psiconv_progress(lev+1,off+len-1,"End of section table section "
149 "(total length: %08x", len);
150
151 return res;
152 }
153
154 int psiconv_parse_application_id_section(const psiconv_buffer buf, int lev,
155 psiconv_u32 off, int *length,
156 psiconv_application_id_section *result)
157 {
158 int res=0;
159 int len=0;
160 int leng;
161
162 psiconv_progress(lev+1,off,"Going to read the application id section");
163 (*result) = malloc(sizeof(**result));
164
165 psiconv_progress(lev+2,off+len,"Going to read the type identifier");
166 (*result)->id = psiconv_read_u32(buf,lev+2,off+len);
167 psiconv_debug(lev+2,off+len,"Identifier: %08x",(*result)->id);
168 len += 4;
169
170 psiconv_progress(lev+2,off+len,"Going to read the application id string");
171 (*result)->name = psiconv_read_string(buf,lev+2,off+len,&leng);
172 len += leng;
173
174 if (length)
175 *length = len;
176
177 psiconv_progress(lev+1,off+len-1,"End of application id section "
178 "(total length: %08x", len);
179
180 return res;
181 }
182
183 int psiconv_parse_text_section(const psiconv_buffer buf,int lev,psiconv_u32 off,
184 int *length,psiconv_text_and_layout *result)
185 {
186
187 int res = 0;
188 int len=0;
189
190 psiconv_u32 text_len;
191 psiconv_paragraph para;
192
193 int nr;
194 int i,j,start,leng;
195 char *str_copy;
196
197 psiconv_progress(lev+1,off,"Going to parse the text section");
198 psiconv_progress(lev+2,off,"Reading the text length");
199 text_len = psiconv_read_X(buf,lev+2,off,&leng);
200 psiconv_debug(lev+2,off,"Length: %08x",text_len);
201 len += leng;
202
203 *result = psiconv_list_new(sizeof(*para));
204 para = malloc(sizeof(*para));
205
206 psiconv_progress(lev+2,off+len,"Going to read all paragraph text");
207 nr = 0;
208 start = 0;
209 for (i = 0; i < text_len; i++)
210 if (psiconv_read_u8(buf,lev+2,off+len+i) == 0x06) {
211 para->text = malloc(i - start + 1);
212 for (j = 0; j < i - start; j++)
213 para->text[j] = psiconv_read_u8(buf,lev+1,off + len + start + j);
214 para->text[j] = 0;
215
216 psiconv_list_add(*result,para);
217
218 str_copy = psiconv_make_printable(para->text);
219 psiconv_debug(lev+2,off+i+len,"Line %d: %d characters",nr,
220 strlen(str_copy) +1);
221 psiconv_debug(lev+2,off+i+len,"Line %d: `%s'",nr,str_copy);
222 free(str_copy);
223
224 start = i + 1;
225 nr ++;
226 }
227
228 if (start != text_len) {
229 res = -1;
230 psiconv_warn(lev+2,off+start+len,
231 "Last line does not end on EOL (%d characters left)", len - start);
232 para->text = malloc(text_len - start + 1);
233 for (j = 0; j < text_len - start; j++)
234 para->text[j] = psiconv_read_u8(buf,lev+2,off + start + j + len);
235 para->text[text_len - start] = 0;
236 psiconv_list_add(*result,para);
237 str_copy = psiconv_make_printable(para->text);
238 psiconv_debug(lev+2,off+start+len,"Last line: %d characters",nr,
239 strlen(str_copy)+1);
240 psiconv_debug(lev+2,off+start+len,"Last line: `%s'",str_copy);
241 free(str_copy);
242 }
243
244 free(para);
245
246 /* Initialize the remaining parts of each paragraph */
247 for (i = 0; i < psiconv_list_length(*result); i ++) {
248 para = psiconv_list_get(*result,i);
249 para->in_lines = psiconv_list_new(sizeof(struct psiconv_in_line_layout));
250 para->replacements = psiconv_list_new(sizeof(struct psiconv_replacement));
251 para->base_style = 0;
252 para->base_character = psiconv_basic_character_layout();
253 para->base_paragraph = psiconv_basic_paragraph_layout();
254 }
255
256
257 len += text_len;
258
259 if (length)
260 *length = len;
261
262 psiconv_progress(lev+1,off+len-1,"End of text section (total length: %08x",
263 len);
264
265 return res;
266 }
267
268 /* First do a parse_text_section, or you will get into trouble here */
269 int psiconv_parse_layout_section(const psiconv_buffer buf,
270 int lev,psiconv_u32 off,
271 int *length,
272 psiconv_text_and_layout result,
273 psiconv_word_styles_section styles,
274 int with_styles)
275 {
276 int res = 0;
277 int len = 0;
278 psiconv_u32 temp;
279 int parse_styles,nr,i,j,total,leng,line_length;
280
281 typedef struct anon_style
282 {
283 int nr;
284 psiconv_s16 base_style;
285 psiconv_character_layout character;
286 psiconv_paragraph_layout paragraph;
287 } *anon_style;
288
289 typedef psiconv_list anon_style_list; /* of struct anon_style */
290
291 anon_style_list anon_styles;
292 struct anon_style anon;
293 anon_style anon_ptr=NULL;
294
295 psiconv_paragraph para;
296 struct psiconv_in_line_layout in_line;
297
298 int *inline_count;
299
300
301 psiconv_progress(lev+1,off,"Going to read the layout section");
302
303 psiconv_progress(lev+2,off,"Going to read the section type");
304 temp = psiconv_read_u16(buf,lev+2,off+len);
305 psiconv_debug(lev+2,off+len,"Type: %02x",temp);
306 parse_styles = with_styles;
307 if ((temp == 0x0001) && !with_styles) {
308 psiconv_warn(lev+2,off+len,"Styleless layout section expected, "
309 "but styled section found!");
310 parse_styles = 1;
311 res = -1;
312 } else if ((temp == 0x0000) && (with_styles)) {
313 psiconv_warn(lev+2,off+len,"Styled layout section expected, "
314 "but styleless section found!");
315 parse_styles = 0;
316 res = -1;
317 } else if ((temp != 0x0000) && (temp != 0x0001)) {
318 psiconv_warn(lev+2,off+len,
319 "Layout section type indicator has unknown value!");
320 res = -1;
321 }
322 len += 0x02;
323
324 psiconv_progress(lev+2,off+len,"Going to read paragraph type list");
325 anon_styles = psiconv_list_new(sizeof(anon));
326 psiconv_progress(lev+3,off+len,"Going to read paragraph type list length");
327 nr = psiconv_read_u8(buf,lev+3,off+len);
328 psiconv_debug(lev+3,off+len,"Length: %02x",nr);
329 len ++;
330
331 psiconv_progress(lev+3,off+len,
332 "Going to read the paragraph type list elements");
333 for (i = 0; i < nr; i ++) {
334 psiconv_progress(lev+3,off+len,"Element %d",i);
335 anon.nr = psiconv_read_u32(buf,lev+4,off+len);
336 psiconv_debug(lev+4,off+len,"Number: %08x",anon.nr);
337 len += 0x04;
338
339 psiconv_progress(lev+4,off,"Going to determine the base style");
340 if (parse_styles) {
341 anon.base_style = psiconv_read_u8(buf,lev+3,
342 off+len+4+psiconv_read_u32(buf,lev+4,
343 off+len));
344 psiconv_debug(lev+4,off+len+psiconv_read_u32(buf,lev+4,off+len),
345 "Style indicator: %02x",anon.base_style);
346 } else
347 anon.base_style = 0;
348 anon.paragraph = psiconv_clone_paragraph_layout(psiconv_get_style
349 (styles,anon.base_style)->paragraph);
350 anon.character = psiconv_clone_character_layout(psiconv_get_style
351 (styles,anon.base_style)->character);
352
353 psiconv_progress(lev+4,off+len,"Going to read the paragraph layout");
354 res |= psiconv_parse_paragraph_layout_list(buf,lev+4,off+len,&leng,
355 anon.paragraph);
356 len += leng;
357 if (parse_styles)
358 len ++;
359
360 psiconv_progress(lev+4,off+len,"Going to read the character layout");
361 res |= psiconv_parse_character_layout_list(buf,lev+4,off+len,&leng,
362 anon.character);
363 len += leng;
364 psiconv_list_add(anon_styles,&anon);
365 }
366
367 psiconv_progress(lev+2,off+len,"Going to parse the paragraph element list");
368 psiconv_progress(lev+3,off+len,"Going to read the number of paragraphs");
369 nr = psiconv_read_u32(buf,lev+3,off+len);
370 if (nr != psiconv_list_length(result)) {
371 psiconv_warn(lev+3,off+len,
372 "Number of text paragraphs and paragraph elements does not match");
373 psiconv_debug(lev+3,off+len,
374 "%d text paragraphs, %d paragraph elements",
375 psiconv_list_length(result),nr);
376 }
377 psiconv_debug(lev+3,off+len,"Number of paragraphs: %d",nr);
378 len += 4;
379 inline_count = malloc(nr * sizeof(*inline_count));
380
381 psiconv_progress(lev+3,off+len,"Going to read the paragraph elements");
382 for (i = 0; i < nr; i ++) {
383 psiconv_progress(lev+3,off+len,"Element %d",i);
384 if (i >= psiconv_list_length(result)) {
385 psiconv_debug(lev+4,off+len,"Going to allocate a new element");
386 para = malloc(sizeof(*para));
387 para->in_lines = psiconv_list_new(sizeof(struct psiconv_in_line_layout));
388 para->base_style = 0;
389 para->base_character = psiconv_basic_character_layout();
390 para->base_paragraph = psiconv_basic_paragraph_layout();
391 free(para);
392 }
393 para = psiconv_list_get(result,i);
394
395 psiconv_progress(lev+4,off+len,"Going to read the paragraph length");
396 temp = psiconv_read_u32(buf,lev+4,off+len);
397 if (temp != strlen(para->text)+1) {
398 res = -1;
399 psiconv_warn(lev+4,off+len,
400 "Disagreement of the length of paragraph in layout section");
401 psiconv_debug(lev+4,off+len,
402 "Paragraph length: layout section says %d, counted %d",
403 temp,strlen(para->text)+1);
404 } else
405 psiconv_debug(lev+4,off+len,"Paragraph length: %d",temp);
406 len += 4;
407
408 psiconv_progress(lev+4,off+len,"Going to read the paragraph type");
409 temp = psiconv_read_u8(buf,lev+4,off+len);
410 if (temp != 0x00) {
411 psiconv_debug(lev+4,off+len,"Type: %02x",temp);
412 for (j = 0; j < psiconv_list_length(anon_styles); j++) {
413 anon_ptr = psiconv_list_get(anon_styles,j);
414 if (temp == anon_ptr->nr)
415 break;
416 }
417 if (j == psiconv_list_length(anon_styles)) {
418 psiconv_warn(lev+4,off+len,"Layout section paragraph type unknown");
419 psiconv_debug(lev+4,off+len,"Unknown type - using base styles instead");
420 para->base_style = 0;
421 psiconv_free_paragraph_layout(para->base_paragraph);
422 psiconv_free_character_layout(para->base_character);
423 para->base_paragraph = psiconv_clone_paragraph_layout(psiconv_get_style
424 (styles,0)->paragraph);
425 para->base_character = psiconv_clone_character_layout(psiconv_get_style
426 (styles,0)->character);
427 } else {
428 para->base_style = anon_ptr->base_style;
429 psiconv_free_paragraph_layout(para->base_paragraph);
430 psiconv_free_character_layout(para->base_character);
431 para->base_paragraph = psiconv_clone_paragraph_layout
432 (anon_ptr->paragraph);
433 para->base_character = psiconv_clone_character_layout
434 (anon_ptr->character);
435 }
436 inline_count[i] = 0;
437 len += 0x01;
438 } else {
439 psiconv_debug(lev+4,off+len,"Type: %02x (not based on a paragraph type)"
440 ,temp);
441 len += 0x01;
442 if (parse_styles) {
443 psiconv_progress(lev+4,off+len+psiconv_read_u32(buf,lev+4,off+len),
444 "Going to read the paragraph element base style");
445 temp = psiconv_read_u8(buf,lev+4,
446 off+len+psiconv_read_u32(buf,lev+4,off+len));
447 psiconv_debug(lev+4,off+len+psiconv_read_u32(buf,lev+4,off+len),
448 "Style: %02x",temp);
449 } else
450 temp = 0x00;
451 psiconv_free_paragraph_layout(para->base_paragraph);
452 psiconv_free_character_layout(para->base_character);
453 para->base_paragraph = psiconv_clone_paragraph_layout(psiconv_get_style
454 (styles,temp)->paragraph);
455 para->base_character = psiconv_clone_character_layout(psiconv_get_style
456 (styles,temp)->character);
457 para->base_style = temp;
458 psiconv_progress(lev+4,off+len,"Going to read paragraph layout");
459 psiconv_parse_paragraph_layout_list(buf,lev+4,off+len,&leng,
460 para->base_paragraph);
461 len += leng;
462 if (parse_styles)
463 len += 1;
464 psiconv_progress(lev+4,off+len,"Going to read number of in-line "
465 "layout elements");
466 inline_count[i] = psiconv_read_u32(buf,lev+4,off+len);
467 psiconv_debug(lev+4,off+len,"Nr: %08x",inline_count[i]);
468 len += 4;
469 }
470 }
471
472 psiconv_progress(lev+2,off+len,"Going to read the text layout inline list");
473
474 psiconv_progress(lev+3,off+len,"Going to read the number of elements");
475 nr = psiconv_read_u32(buf,lev+3,off+len);
476 psiconv_debug(lev+3,off,"Elements: %08x",nr);
477 len += 0x04;
478
479 psiconv_progress(lev+3,off+len,
480 "Going to read the text layout inline elements");
481 total = 0;
482 for (i = 0; i < psiconv_list_length(result); i++) {
483 para = psiconv_list_get(result,i);
484 line_length = -1;
485 for (j = 0; j < inline_count[i]; j++) {
486 psiconv_progress(lev+3,off+len,"Element %d: Paragraph %d, element %d",
487 total,i,j);
488 if (total >= nr) {
489 psiconv_warn(lev+3,off+len,
490 "Layout section inlines: not enough element");
491 res = -1;
492 psiconv_debug(lev+3,off+len,"Can't read element!");
493 } else {
494 total ++;
495 in_line.layout = psiconv_clone_character_layout(para->base_character);
496 psiconv_progress(lev+4,off+len,"Going to read the element type");
497 temp = psiconv_read_u8(buf,lev+4,len+off);
498 len += 1;
499 psiconv_debug(lev+4,off,"Type: %02x",temp);
500 psiconv_progress(lev+4,off,
501 "Going to read the number of characters it applies to");
502 in_line.length = psiconv_read_u32(buf,lev+4,len+off);
503 psiconv_debug(lev+4,off+len,"Length: %02x",in_line.length);
504 len += 4;
505 psiconv_progress(lev+4,off+len,"Going to read the character layout");
506 res |= psiconv_parse_character_layout_list(buf,lev+4,off+len,&leng,
507 in_line.layout);
508 len += leng;
509
510 if (temp == 0x01) {
511 psiconv_debug(lev+4,off+len,"Skipping object data");
512 len += 0x10;
513 } else if (temp != 0x00) {
514 psiconv_warn(lev+4,off+len,"Layout section unknown inline type");
515 res = -1;
516 }
517 if (line_length + in_line.length > strlen(para->text)) {
518 psiconv_warn(lev+4,off+len,
519 "Layout section inlines: line length mismatch");
520 res = -1;
521 in_line.length = strlen(para->text) - line_length;
522 }
523 line_length += in_line.length;
524 psiconv_list_add(para->in_lines,&in_line);
525 }
526 }
527 }
528
529 if (total != nr) {
530 psiconv_warn(lev+4,off+len,
531 "Layout section too many inlines, skipping remaining");
532 }
533
534 free(inline_count);
535
536 for (i = 0 ; i < psiconv_list_length(anon_styles); i ++) {
537 anon_ptr = psiconv_list_get(anon_styles,i);
538 psiconv_free_character_layout(anon_ptr->character);
539 psiconv_free_paragraph_layout(anon_ptr->paragraph);
540 }
541 psiconv_list_free(anon_styles);
542
543 if (length)
544 *length = len;
545
546 psiconv_progress(lev+1,off+len-1,"End of layout section (total length: %08x",
547 len);
548
549 return res;
550 }
551
552 int psiconv_parse_styled_layout_section(const psiconv_buffer buf,
553 int lev,psiconv_u32 off,
554 int *length,
555 psiconv_text_and_layout result,
556 psiconv_word_styles_section styles)
557 {
558 return psiconv_parse_layout_section(buf,lev,off,length,result,styles,1);
559 }
560
561 int psiconv_parse_styleless_layout_section(const psiconv_buffer buf,
562 int lev,psiconv_u32 off,
563 int *length,
564 psiconv_text_and_layout result,
565 psiconv_character_layout base_char,
566 psiconv_paragraph_layout base_para)
567 {
568 int res;
569 psiconv_word_styles_section styles_section;
570
571 styles_section = malloc(sizeof(*styles_section));
572 styles_section->normal = malloc(sizeof(*styles_section->normal));
573 styles_section->normal->character = psiconv_clone_character_layout(base_char);
574 styles_section->normal->paragraph = psiconv_clone_paragraph_layout(base_para);
575 styles_section->normal->hotkey = 0;
576 styles_section->normal->name = strdup("");
577 styles_section->styles = psiconv_list_new(sizeof(struct psiconv_word_style));
578
579 res = psiconv_parse_layout_section(buf,lev,off,length,result,
580 styles_section,0);
581
582 psiconv_free_word_styles_section(styles_section);
583 return res;
584 }
585

frodo@frodo.looijaard.name
ViewVC Help
Powered by ViewVC 1.1.26