/[public]/psiconv/trunk/lib/psiconv/parse_common.c
ViewVC logotype

Contents of /psiconv/trunk/lib/psiconv/parse_common.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 18 - (show annotations)
Wed Oct 27 13:09:40 1999 UTC (24 years, 5 months ago) by frodo
File MIME type: text/plain
File size: 21888 byte(s)
(Frodo) Several changes in header parsing

The definition of header section is changed to exclude the long with the
offset of the Section Table Section. This allows easier integration of
Data-like file formats.

psiconv_parse_{texted,word,mbm}_file now do not parse the header section.
This will allow easier integration for objects-within-objects. They start
at the long with the offset of the Section Table Section.

psiconv_file_type now returns the read header section, and its length.

1 /*
2 parse_common.c - Part of psiconv, a PSION 5 file formats converter
3 Copyright (c) 1999 Frodo Looijaard <frodol@dds.nl>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20 #include "config.h"
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include "data.h"
25 #include "parse_routines.h"
26
27 static int psiconv_parse_layout_section(const psiconv_buffer buf,
28 int lev,psiconv_u32 off,
29 int *length,
30 psiconv_text_and_layout result,
31 psiconv_word_styles_section styles,
32 int with_styles);
33
34 int psiconv_parse_header_section(const psiconv_buffer buf,int lev,
35 psiconv_u32 off, int *length,
36 psiconv_header_section *result)
37 {
38 int res=0;
39 int len=0;
40 psiconv_u32 temp;
41
42 psiconv_progress(lev+1,off+len,"Going to read the header section");
43 (*result) = malloc(sizeof(**result));
44
45 psiconv_progress(lev+2,off+len,"Going to read UID1 to UID3");
46 (*result)->uid1 = psiconv_read_u32(buf,lev+2,off+len);
47 psiconv_debug(lev+2,off+len,"UID1: %08x",(*result)->uid1);
48 if ((*result)->uid1 != PSICONV_ID_PSION5) {
49 psiconv_warn(lev+2,off+len,"UID1 has unknown value. This is probably "
50 "not a (parsable) Psion 5 file");
51 res = -1;
52 }
53 len += 4;
54 (*result)->uid2 = psiconv_read_u32(buf,lev+2,off+len);
55 psiconv_debug(lev+2,off+len,"UID2: %08x",(*result)->uid2);
56 len += 4;
57 (*result)->uid3 = psiconv_read_u32(buf,lev+2,off+len);
58 psiconv_debug(lev+2,off+len,"UID3: %08x",(*result)->uid3);
59 len += 4;
60
61 (*result)->file = psiconv_unknown_file;
62 if ((*result)->uid1 == PSICONV_ID_PSION5) {
63 if ((*result)->uid2 == PSICONV_ID_DATA_FILE) {
64 if ((*result)->uid3 == PSICONV_ID_WORD) {
65 (*result)->file = psiconv_word_file;
66 psiconv_debug(lev+2,off+len,"File is a Word file");
67 } else if ((*result)->uid3 == PSICONV_ID_TEXTED) {
68 (*result)->file = psiconv_texted_file;
69 psiconv_debug(lev+2,off+len,"File is a TextEd file");
70 }
71 } else if ((*result)->uid2 == PSICONV_ID_MBM_FILE) {
72 (*result)->file = psiconv_mbm_file;
73 if ((*result)->uid3 != 0x00)
74 psiconv_warn(lev+2,off+len,"UID3 set in MBM file?!?");
75 psiconv_debug(lev+2,off+len,"File is a MBM file");
76 }
77 }
78 if ((*result)->file == psiconv_unknown_file) {
79 psiconv_warn(lev+2,off+len,"Unknown file type");
80 (*result)->file = psiconv_unknown_file;
81 }
82
83 psiconv_progress(lev+2,off+len,"Checking UID4");
84 temp = psiconv_read_u32(buf,lev+2,off+len);
85 if (temp == psiconv_checkuid((*result)->uid1,(*result)->uid2,
86 (*result)->uid3))
87 psiconv_debug(lev+2,off+len,"Checksum %08x is correct",temp);
88 else {
89 psiconv_warn(lev+2,off+len,"Checksum failed, file corrupted!");
90 psiconv_debug(lev+2,off+len,"Expected checksum %08x, found %08x",
91 psiconv_checkuid((*result)->uid1,(*result)->uid2,
92 (*result)->uid3),temp);
93 res = -1;
94 }
95 len += 4;
96
97 if (length)
98 *length = len;
99
100 psiconv_progress(lev+1,off+len-1,
101 "End of Header Section (total length: %08x)",len);
102
103 return res;
104 }
105
106 int psiconv_parse_section_table_section(const psiconv_buffer buf, int lev,
107 psiconv_u32 off, int *length,
108 psiconv_section_table_section *result)
109 {
110 int res=0;
111 int len=0;
112 psiconv_section_table_entry entry;
113
114 int i;
115 psiconv_u8 nr;
116
117 psiconv_progress(lev+1,off+len,"Going to read the section table section");
118 *result = psiconv_list_new(sizeof(*entry));
119
120 psiconv_progress(lev+2,off+len,"Going to read the section table length");
121 nr = psiconv_read_u8(buf,lev+2,off+len);
122 psiconv_debug(lev+2,off+len,"Length: %08x",nr);
123 if (nr & 0x01) {
124 psiconv_warn(lev+2,off+len,
125 "Section table length odd - ignoring last entry");
126 res = -1;
127 }
128 len ++;
129
130 psiconv_progress(lev+2,off+len,"Going to read the section table entries");
131 entry = malloc(sizeof(*entry));
132 for (i = 0; i < nr / 2; i++) {
133 entry->id = psiconv_read_u32(buf,lev+2,off + len);
134 psiconv_debug(lev+2,off + len,"Entry %d: ID = %08x",i,entry->id);
135 len += 0x04;
136 entry->offset = psiconv_read_u32(buf,lev+2,off + len);
137 psiconv_debug(lev+2,off +len,"Entry %d: Offset = %08x",i,entry->offset);
138 len += 0x04;
139 psiconv_list_add(*result,entry);
140 }
141
142 free(entry);
143
144 if (length)
145 *length = len;
146
147 psiconv_progress(lev+1,off+len-1,"End of section table section "
148 "(total length: %08x", len);
149
150 return res;
151 }
152
153 int psiconv_parse_application_id_section(const psiconv_buffer buf, int lev,
154 psiconv_u32 off, int *length,
155 psiconv_application_id_section *result)
156 {
157 int res=0;
158 int len=0;
159 int leng;
160
161 psiconv_progress(lev+1,off,"Going to read the application id section");
162 (*result) = malloc(sizeof(**result));
163
164 psiconv_progress(lev+2,off+len,"Going to read the type identifier");
165 (*result)->id = psiconv_read_u32(buf,lev+2,off+len);
166 psiconv_debug(lev+2,off+len,"Identifier: %08x",(*result)->id);
167 len += 4;
168
169 psiconv_progress(lev+2,off+len,"Going to read the application id string");
170 (*result)->name = psiconv_read_string(buf,lev+2,off+len,&leng);
171 len += leng;
172
173 if (length)
174 *length = len;
175
176 psiconv_progress(lev+1,off+len-1,"End of application id section "
177 "(total length: %08x", len);
178
179 return res;
180 }
181
182 int psiconv_parse_text_section(const psiconv_buffer buf,int lev,psiconv_u32 off,
183 int *length,psiconv_text_and_layout *result)
184 {
185
186 int res = 0;
187 int len=0;
188
189 psiconv_u32 text_len;
190 psiconv_paragraph para;
191
192 int nr;
193 int i,j,start,leng;
194 char *str_copy;
195
196 psiconv_progress(lev+1,off,"Going to parse the text section");
197 psiconv_progress(lev+2,off,"Reading the text length");
198 text_len = psiconv_read_X(buf,lev+2,off,&leng);
199 psiconv_debug(lev+2,off,"Length: %08x",text_len);
200 len += leng;
201
202 *result = psiconv_list_new(sizeof(*para));
203 para = malloc(sizeof(*para));
204
205 psiconv_progress(lev+2,off+len,"Going to read all paragraph text");
206 nr = 0;
207 start = 0;
208 for (i = 0; i < text_len; i++)
209 if (psiconv_read_u8(buf,lev+2,off+len+i) == 0x06) {
210 para->text = malloc(i - start + 1);
211 for (j = 0; j < i - start; j++)
212 para->text[j] = psiconv_read_u8(buf,lev+1,off + len + start + j);
213 para->text[j] = 0;
214
215 psiconv_list_add(*result,para);
216
217 str_copy = psiconv_make_printable(para->text);
218 psiconv_debug(lev+2,off+i+len,"Line %d: %d characters",nr,
219 strlen(str_copy) +1);
220 psiconv_debug(lev+2,off+i+len,"Line %d: `%s'",nr,str_copy);
221 free(str_copy);
222
223 start = i + 1;
224 nr ++;
225 }
226
227 if (start != text_len) {
228 res = -1;
229 psiconv_warn(lev+2,off+start+len,
230 "Last line does not end on EOL (%d characters left)", len - start);
231 para->text = malloc(text_len - start + 1);
232 for (j = 0; j < text_len - start; j++)
233 para->text[j] = psiconv_read_u8(buf,lev+2,off + start + j + len);
234 para->text[text_len - start] = 0;
235 psiconv_list_add(*result,para);
236 str_copy = psiconv_make_printable(para->text);
237 psiconv_debug(lev+2,off+start+len,"Last line: %d characters",nr,
238 strlen(str_copy)+1);
239 psiconv_debug(lev+2,off+start+len,"Last line: `%s'",str_copy);
240 free(str_copy);
241 }
242
243 free(para);
244
245 /* Initialize the remaining parts of each paragraph */
246 for (i = 0; i < psiconv_list_length(*result); i ++) {
247 para = psiconv_list_get(*result,i);
248 para->in_lines = psiconv_list_new(sizeof(struct psiconv_in_line_layout));
249 para->replacements = psiconv_list_new(sizeof(struct psiconv_replacement));
250 para->base_style = 0;
251 para->base_character = psiconv_basic_character_layout();
252 para->base_paragraph = psiconv_basic_paragraph_layout();
253 }
254
255
256 len += text_len;
257
258 if (length)
259 *length = len;
260
261 psiconv_progress(lev+1,off+len-1,"End of text section (total length: %08x",
262 len);
263
264 return res;
265 }
266
267 /* First do a parse_text_section, or you will get into trouble here */
268 int psiconv_parse_layout_section(const psiconv_buffer buf,
269 int lev,psiconv_u32 off,
270 int *length,
271 psiconv_text_and_layout result,
272 psiconv_word_styles_section styles,
273 int with_styles)
274 {
275 int res = 0;
276 int len = 0;
277 psiconv_u32 temp;
278 int parse_styles,nr,i,j,total,leng,line_length;
279
280 typedef struct anon_style
281 {
282 int nr;
283 psiconv_s16 base_style;
284 psiconv_character_layout character;
285 psiconv_paragraph_layout paragraph;
286 } *anon_style;
287
288 typedef psiconv_list anon_style_list; /* of struct anon_style */
289
290 anon_style_list anon_styles;
291 struct anon_style anon;
292 anon_style anon_ptr=NULL;
293
294 psiconv_paragraph para;
295 struct psiconv_in_line_layout in_line;
296
297 int *inline_count;
298
299
300 psiconv_progress(lev+1,off,"Going to read the layout section");
301
302 psiconv_progress(lev+2,off,"Going to read the section type");
303 temp = psiconv_read_u16(buf,lev+2,off+len);
304 psiconv_debug(lev+2,off+len,"Type: %02x",temp);
305 parse_styles = with_styles;
306 if ((temp == 0x0001) && !with_styles) {
307 psiconv_warn(lev+2,off+len,"Styleless layout section expected, "
308 "but styled section found!");
309 parse_styles = 1;
310 res = -1;
311 } else if ((temp == 0x0000) && (with_styles)) {
312 psiconv_warn(lev+2,off+len,"Styled layout section expected, "
313 "but styleless section found!");
314 parse_styles = 0;
315 res = -1;
316 } else if ((temp != 0x0000) && (temp != 0x0001)) {
317 psiconv_warn(lev+2,off+len,
318 "Layout section type indicator has unknown value!");
319 res = -1;
320 }
321 len += 0x02;
322
323 psiconv_progress(lev+2,off+len,"Going to read paragraph type list");
324 anon_styles = psiconv_list_new(sizeof(anon));
325 psiconv_progress(lev+3,off+len,"Going to read paragraph type list length");
326 nr = psiconv_read_u8(buf,lev+3,off+len);
327 psiconv_debug(lev+3,off+len,"Length: %02x",nr);
328 len ++;
329
330 psiconv_progress(lev+3,off+len,
331 "Going to read the paragraph type list elements");
332 for (i = 0; i < nr; i ++) {
333 psiconv_progress(lev+3,off+len,"Element %d",i);
334 anon.nr = psiconv_read_u32(buf,lev+4,off+len);
335 psiconv_debug(lev+4,off+len,"Number: %08x",anon.nr);
336 len += 0x04;
337
338 psiconv_progress(lev+4,off,"Going to determine the base style");
339 if (parse_styles) {
340 anon.base_style = psiconv_read_u8(buf,lev+3,
341 off+len+4+psiconv_read_u32(buf,lev+4,
342 off+len));
343 psiconv_debug(lev+4,off+len+psiconv_read_u32(buf,lev+4,off+len),
344 "Style indicator: %02x",anon.base_style);
345 } else
346 anon.base_style = 0;
347 anon.paragraph = psiconv_clone_paragraph_layout(psiconv_get_style
348 (styles,anon.base_style)->paragraph);
349 anon.character = psiconv_clone_character_layout(psiconv_get_style
350 (styles,anon.base_style)->character);
351
352 psiconv_progress(lev+4,off+len,"Going to read the paragraph layout");
353 res |= psiconv_parse_paragraph_layout_list(buf,lev+4,off+len,&leng,
354 anon.paragraph);
355 len += leng;
356 if (parse_styles)
357 len ++;
358
359 psiconv_progress(lev+4,off+len,"Going to read the character layout");
360 res |= psiconv_parse_character_layout_list(buf,lev+4,off+len,&leng,
361 anon.character);
362 len += leng;
363 psiconv_list_add(anon_styles,&anon);
364 }
365
366 psiconv_progress(lev+2,off+len,"Going to parse the paragraph element list");
367 psiconv_progress(lev+3,off+len,"Going to read the number of paragraphs");
368 nr = psiconv_read_u32(buf,lev+3,off+len);
369 if (nr != psiconv_list_length(result)) {
370 psiconv_warn(lev+3,off+len,
371 "Number of text paragraphs and paragraph elements does not match");
372 psiconv_debug(lev+3,off+len,
373 "%d text paragraphs, %d paragraph elements",
374 psiconv_list_length(result),nr);
375 }
376 psiconv_debug(lev+3,off+len,"Number of paragraphs: %d",nr);
377 len += 4;
378 inline_count = malloc(nr * sizeof(*inline_count));
379
380 psiconv_progress(lev+3,off+len,"Going to read the paragraph elements");
381 for (i = 0; i < nr; i ++) {
382 psiconv_progress(lev+3,off+len,"Element %d",i);
383 if (i >= psiconv_list_length(result)) {
384 psiconv_debug(lev+4,off+len,"Going to allocate a new element");
385 para = malloc(sizeof(*para));
386 para->in_lines = psiconv_list_new(sizeof(struct psiconv_in_line_layout));
387 para->base_style = 0;
388 para->base_character = psiconv_basic_character_layout();
389 para->base_paragraph = psiconv_basic_paragraph_layout();
390 free(para);
391 }
392 para = psiconv_list_get(result,i);
393
394 psiconv_progress(lev+4,off+len,"Going to read the paragraph length");
395 temp = psiconv_read_u32(buf,lev+4,off+len);
396 if (temp != strlen(para->text)+1) {
397 res = -1;
398 psiconv_warn(lev+4,off+len,
399 "Disagreement of the length of paragraph in layout section");
400 psiconv_debug(lev+4,off+len,
401 "Paragraph length: layout section says %d, counted %d",
402 temp,strlen(para->text)+1);
403 } else
404 psiconv_debug(lev+4,off+len,"Paragraph length: %d",temp);
405 len += 4;
406
407 psiconv_progress(lev+4,off+len,"Going to read the paragraph type");
408 temp = psiconv_read_u8(buf,lev+4,off+len);
409 if (temp != 0x00) {
410 psiconv_debug(lev+4,off+len,"Type: %02x",temp);
411 for (j = 0; j < psiconv_list_length(anon_styles); j++) {
412 anon_ptr = psiconv_list_get(anon_styles,j);
413 if (temp == anon_ptr->nr)
414 break;
415 }
416 if (j == psiconv_list_length(anon_styles)) {
417 psiconv_warn(lev+4,off+len,"Layout section paragraph type unknown");
418 psiconv_debug(lev+4,off+len,"Unknown type - using base styles instead");
419 para->base_style = 0;
420 psiconv_free_paragraph_layout(para->base_paragraph);
421 psiconv_free_character_layout(para->base_character);
422 para->base_paragraph = psiconv_clone_paragraph_layout(psiconv_get_style
423 (styles,0)->paragraph);
424 para->base_character = psiconv_clone_character_layout(psiconv_get_style
425 (styles,0)->character);
426 } else {
427 para->base_style = anon_ptr->base_style;
428 psiconv_free_paragraph_layout(para->base_paragraph);
429 psiconv_free_character_layout(para->base_character);
430 para->base_paragraph = psiconv_clone_paragraph_layout
431 (anon_ptr->paragraph);
432 para->base_character = psiconv_clone_character_layout
433 (anon_ptr->character);
434 }
435 inline_count[i] = 0;
436 len += 0x01;
437 } else {
438 psiconv_debug(lev+4,off+len,"Type: %02x (not based on a paragraph type)"
439 ,temp);
440 len += 0x01;
441 if (parse_styles) {
442 psiconv_progress(lev+4,off+len+psiconv_read_u32(buf,lev+4,off+len),
443 "Going to read the paragraph element base style");
444 temp = psiconv_read_u8(buf,lev+4,
445 off+len+psiconv_read_u32(buf,lev+4,off+len));
446 psiconv_debug(lev+4,off+len+psiconv_read_u32(buf,lev+4,off+len),
447 "Style: %02x",temp);
448 } else
449 temp = 0x00;
450 psiconv_free_paragraph_layout(para->base_paragraph);
451 psiconv_free_character_layout(para->base_character);
452 para->base_paragraph = psiconv_clone_paragraph_layout(psiconv_get_style
453 (styles,temp)->paragraph);
454 para->base_character = psiconv_clone_character_layout(psiconv_get_style
455 (styles,temp)->character);
456 para->base_style = temp;
457 psiconv_progress(lev+4,off+len,"Going to read paragraph layout");
458 psiconv_parse_paragraph_layout_list(buf,lev+4,off+len,&leng,
459 para->base_paragraph);
460 len += leng;
461 if (parse_styles)
462 len += 1;
463 psiconv_progress(lev+4,off+len,"Going to read number of in-line "
464 "layout elements");
465 inline_count[i] = psiconv_read_u32(buf,lev+4,off+len);
466 psiconv_debug(lev+4,off+len,"Nr: %08x",inline_count[i]);
467 len += 4;
468 }
469 }
470
471 psiconv_progress(lev+2,off+len,"Going to read the text layout inline list");
472
473 psiconv_progress(lev+3,off+len,"Going to read the number of elements");
474 nr = psiconv_read_u32(buf,lev+3,off+len);
475 psiconv_debug(lev+3,off,"Elements: %08x",nr);
476 len += 0x04;
477
478 psiconv_progress(lev+3,off+len,
479 "Going to read the text layout inline elements");
480 total = 0;
481 for (i = 0; i < psiconv_list_length(result); i++) {
482 para = psiconv_list_get(result,i);
483 line_length = -1;
484 for (j = 0; j < inline_count[i]; j++) {
485 psiconv_progress(lev+3,off+len,"Element %d: Paragraph %d, element %d",
486 total,i,j);
487 if (total >= nr) {
488 psiconv_warn(lev+3,off+len,
489 "Layout section inlines: not enough element");
490 res = -1;
491 psiconv_debug(lev+3,off+len,"Can't read element!");
492 } else {
493 total ++;
494 in_line.layout = psiconv_clone_character_layout(para->base_character);
495 psiconv_progress(lev+4,off+len,"Going to read the element type");
496 temp = psiconv_read_u8(buf,lev+4,len+off);
497 len += 1;
498 psiconv_debug(lev+4,off,"Type: %02x",temp);
499 psiconv_progress(lev+4,off,
500 "Going to read the number of characters it applies to");
501 in_line.length = psiconv_read_u32(buf,lev+4,len+off);
502 psiconv_debug(lev+4,off+len,"Length: %02x",in_line.length);
503 len += 4;
504 psiconv_progress(lev+4,off+len,"Going to read the character layout");
505 res |= psiconv_parse_character_layout_list(buf,lev+4,off+len,&leng,
506 in_line.layout);
507 len += leng;
508
509 if (temp == 0x01) {
510 psiconv_debug(lev+4,off+len,"Skipping object data");
511 len += 0x10;
512 } else if (temp != 0x00) {
513 psiconv_warn(lev+4,off+len,"Layout section unknown inline type");
514 res = -1;
515 }
516 if (line_length + in_line.length > strlen(para->text)) {
517 psiconv_warn(lev+4,off+len,
518 "Layout section inlines: line length mismatch");
519 res = -1;
520 in_line.length = strlen(para->text) - line_length;
521 }
522 line_length += in_line.length;
523 psiconv_list_add(para->in_lines,&in_line);
524 }
525 }
526 }
527
528 if (total != nr) {
529 psiconv_warn(lev+4,off+len,
530 "Layout section too many inlines, skipping remaining");
531 }
532
533 free(inline_count);
534
535 for (i = 0 ; i < psiconv_list_length(anon_styles); i ++) {
536 anon_ptr = psiconv_list_get(anon_styles,i);
537 psiconv_free_character_layout(anon_ptr->character);
538 psiconv_free_paragraph_layout(anon_ptr->paragraph);
539 }
540 psiconv_list_free(anon_styles);
541
542 if (length)
543 *length = len;
544
545 psiconv_progress(lev+1,off+len-1,"End of layout section (total length: %08x",
546 len);
547
548 return res;
549 }
550
551 int psiconv_parse_styled_layout_section(const psiconv_buffer buf,
552 int lev,psiconv_u32 off,
553 int *length,
554 psiconv_text_and_layout result,
555 psiconv_word_styles_section styles)
556 {
557 return psiconv_parse_layout_section(buf,lev,off,length,result,styles,1);
558 }
559
560 int psiconv_parse_styleless_layout_section(const psiconv_buffer buf,
561 int lev,psiconv_u32 off,
562 int *length,
563 psiconv_text_and_layout result,
564 psiconv_character_layout base_char,
565 psiconv_paragraph_layout base_para)
566 {
567 int res;
568 psiconv_word_styles_section styles_section;
569
570 styles_section = malloc(sizeof(*styles_section));
571 styles_section->normal = malloc(sizeof(*styles_section->normal));
572 styles_section->normal->character = psiconv_clone_character_layout(base_char);
573 styles_section->normal->paragraph = psiconv_clone_paragraph_layout(base_para);
574 styles_section->normal->hotkey = 0;
575 styles_section->normal->name = strdup("");
576 styles_section->styles = psiconv_list_new(sizeof(struct psiconv_word_style));
577
578 res = psiconv_parse_layout_section(buf,lev,off,length,result,
579 styles_section,0);
580
581 psiconv_free_word_styles_section(styles_section);
582 return res;
583 }
584

frodo@frodo.looijaard.name
ViewVC Help
Powered by ViewVC 1.1.26