/[public]/psiconv/trunk/lib/psiconv/parse_common.c
ViewVC logotype

Annotation of /psiconv/trunk/lib/psiconv/parse_common.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 18 - (hide annotations)
Wed Oct 27 13:09:40 1999 UTC (24 years, 5 months ago) by frodo
File MIME type: text/plain
File size: 21888 byte(s)
(Frodo) Several changes in header parsing

The definition of header section is changed to exclude the long with the
offset of the Section Table Section. This allows easier integration of
Data-like file formats.

psiconv_parse_{texted,word,mbm}_file now do not parse the header section.
This will allow easier integration for objects-within-objects. They start
at the long with the offset of the Section Table Section.

psiconv_file_type now returns the read header section, and its length.

1 frodo 2 /*
2     parse_common.c - Part of psiconv, a PSION 5 file formats converter
3     Copyright (c) 1999 Frodo Looijaard <frodol@dds.nl>
4    
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9    
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13     GNU General Public License for more details.
14    
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18     */
19    
20     #include "config.h"
21     #include <stdlib.h>
22     #include <string.h>
23    
24     #include "data.h"
25     #include "parse_routines.h"
26    
27     static int psiconv_parse_layout_section(const psiconv_buffer buf,
28     int lev,psiconv_u32 off,
29     int *length,
30     psiconv_text_and_layout result,
31     psiconv_word_styles_section styles,
32     int with_styles);
33    
34     int psiconv_parse_header_section(const psiconv_buffer buf,int lev,
35     psiconv_u32 off, int *length,
36     psiconv_header_section *result)
37     {
38     int res=0;
39     int len=0;
40     psiconv_u32 temp;
41    
42     psiconv_progress(lev+1,off+len,"Going to read the header section");
43     (*result) = malloc(sizeof(**result));
44    
45     psiconv_progress(lev+2,off+len,"Going to read UID1 to UID3");
46     (*result)->uid1 = psiconv_read_u32(buf,lev+2,off+len);
47     psiconv_debug(lev+2,off+len,"UID1: %08x",(*result)->uid1);
48     if ((*result)->uid1 != PSICONV_ID_PSION5) {
49     psiconv_warn(lev+2,off+len,"UID1 has unknown value. This is probably "
50     "not a (parsable) Psion 5 file");
51     res = -1;
52     }
53     len += 4;
54     (*result)->uid2 = psiconv_read_u32(buf,lev+2,off+len);
55     psiconv_debug(lev+2,off+len,"UID2: %08x",(*result)->uid2);
56     len += 4;
57     (*result)->uid3 = psiconv_read_u32(buf,lev+2,off+len);
58     psiconv_debug(lev+2,off+len,"UID3: %08x",(*result)->uid3);
59     len += 4;
60    
61     (*result)->file = psiconv_unknown_file;
62     if ((*result)->uid1 == PSICONV_ID_PSION5) {
63     if ((*result)->uid2 == PSICONV_ID_DATA_FILE) {
64     if ((*result)->uid3 == PSICONV_ID_WORD) {
65     (*result)->file = psiconv_word_file;
66     psiconv_debug(lev+2,off+len,"File is a Word file");
67     } else if ((*result)->uid3 == PSICONV_ID_TEXTED) {
68     (*result)->file = psiconv_texted_file;
69     psiconv_debug(lev+2,off+len,"File is a TextEd file");
70     }
71 frodo 12 } else if ((*result)->uid2 == PSICONV_ID_MBM_FILE) {
72     (*result)->file = psiconv_mbm_file;
73     if ((*result)->uid3 != 0x00)
74     psiconv_warn(lev+2,off+len,"UID3 set in MBM file?!?");
75     psiconv_debug(lev+2,off+len,"File is a MBM file");
76     }
77 frodo 2 }
78     if ((*result)->file == psiconv_unknown_file) {
79     psiconv_warn(lev+2,off+len,"Unknown file type");
80     (*result)->file = psiconv_unknown_file;
81     }
82    
83     psiconv_progress(lev+2,off+len,"Checking UID4");
84     temp = psiconv_read_u32(buf,lev+2,off+len);
85     if (temp == psiconv_checkuid((*result)->uid1,(*result)->uid2,
86     (*result)->uid3))
87     psiconv_debug(lev+2,off+len,"Checksum %08x is correct",temp);
88     else {
89     psiconv_warn(lev+2,off+len,"Checksum failed, file corrupted!");
90     psiconv_debug(lev+2,off+len,"Expected checksum %08x, found %08x",
91     psiconv_checkuid((*result)->uid1,(*result)->uid2,
92     (*result)->uid3),temp);
93     res = -1;
94     }
95     len += 4;
96    
97     if (length)
98     *length = len;
99    
100     psiconv_progress(lev+1,off+len-1,
101     "End of Header Section (total length: %08x)",len);
102    
103     return res;
104     }
105    
106     int psiconv_parse_section_table_section(const psiconv_buffer buf, int lev,
107     psiconv_u32 off, int *length,
108     psiconv_section_table_section *result)
109     {
110     int res=0;
111     int len=0;
112     psiconv_section_table_entry entry;
113    
114     int i;
115     psiconv_u8 nr;
116    
117     psiconv_progress(lev+1,off+len,"Going to read the section table section");
118     *result = psiconv_list_new(sizeof(*entry));
119    
120     psiconv_progress(lev+2,off+len,"Going to read the section table length");
121     nr = psiconv_read_u8(buf,lev+2,off+len);
122     psiconv_debug(lev+2,off+len,"Length: %08x",nr);
123     if (nr & 0x01) {
124     psiconv_warn(lev+2,off+len,
125     "Section table length odd - ignoring last entry");
126     res = -1;
127     }
128     len ++;
129    
130     psiconv_progress(lev+2,off+len,"Going to read the section table entries");
131     entry = malloc(sizeof(*entry));
132     for (i = 0; i < nr / 2; i++) {
133     entry->id = psiconv_read_u32(buf,lev+2,off + len);
134     psiconv_debug(lev+2,off + len,"Entry %d: ID = %08x",i,entry->id);
135     len += 0x04;
136     entry->offset = psiconv_read_u32(buf,lev+2,off + len);
137     psiconv_debug(lev+2,off +len,"Entry %d: Offset = %08x",i,entry->offset);
138     len += 0x04;
139     psiconv_list_add(*result,entry);
140     }
141    
142     free(entry);
143    
144     if (length)
145     *length = len;
146    
147     psiconv_progress(lev+1,off+len-1,"End of section table section "
148     "(total length: %08x", len);
149    
150     return res;
151     }
152    
153     int psiconv_parse_application_id_section(const psiconv_buffer buf, int lev,
154     psiconv_u32 off, int *length,
155     psiconv_application_id_section *result)
156     {
157     int res=0;
158     int len=0;
159     int leng;
160    
161     psiconv_progress(lev+1,off,"Going to read the application id section");
162     (*result) = malloc(sizeof(**result));
163    
164     psiconv_progress(lev+2,off+len,"Going to read the type identifier");
165     (*result)->id = psiconv_read_u32(buf,lev+2,off+len);
166     psiconv_debug(lev+2,off+len,"Identifier: %08x",(*result)->id);
167     len += 4;
168    
169     psiconv_progress(lev+2,off+len,"Going to read the application id string");
170     (*result)->name = psiconv_read_string(buf,lev+2,off+len,&leng);
171     len += leng;
172    
173     if (length)
174     *length = len;
175    
176     psiconv_progress(lev+1,off+len-1,"End of application id section "
177     "(total length: %08x", len);
178    
179     return res;
180     }
181    
182     int psiconv_parse_text_section(const psiconv_buffer buf,int lev,psiconv_u32 off,
183     int *length,psiconv_text_and_layout *result)
184     {
185    
186     int res = 0;
187     int len=0;
188    
189     psiconv_u32 text_len;
190     psiconv_paragraph para;
191    
192     int nr;
193     int i,j,start,leng;
194     char *str_copy;
195    
196     psiconv_progress(lev+1,off,"Going to parse the text section");
197     psiconv_progress(lev+2,off,"Reading the text length");
198     text_len = psiconv_read_X(buf,lev+2,off,&leng);
199     psiconv_debug(lev+2,off,"Length: %08x",text_len);
200     len += leng;
201    
202     *result = psiconv_list_new(sizeof(*para));
203     para = malloc(sizeof(*para));
204    
205     psiconv_progress(lev+2,off+len,"Going to read all paragraph text");
206     nr = 0;
207     start = 0;
208     for (i = 0; i < text_len; i++)
209     if (psiconv_read_u8(buf,lev+2,off+len+i) == 0x06) {
210     para->text = malloc(i - start + 1);
211     for (j = 0; j < i - start; j++)
212     para->text[j] = psiconv_read_u8(buf,lev+1,off + len + start + j);
213     para->text[j] = 0;
214    
215     psiconv_list_add(*result,para);
216    
217     str_copy = psiconv_make_printable(para->text);
218     psiconv_debug(lev+2,off+i+len,"Line %d: %d characters",nr,
219     strlen(str_copy) +1);
220     psiconv_debug(lev+2,off+i+len,"Line %d: `%s'",nr,str_copy);
221     free(str_copy);
222    
223     start = i + 1;
224     nr ++;
225     }
226    
227     if (start != text_len) {
228     res = -1;
229     psiconv_warn(lev+2,off+start+len,
230     "Last line does not end on EOL (%d characters left)", len - start);
231     para->text = malloc(text_len - start + 1);
232     for (j = 0; j < text_len - start; j++)
233     para->text[j] = psiconv_read_u8(buf,lev+2,off + start + j + len);
234     para->text[text_len - start] = 0;
235     psiconv_list_add(*result,para);
236     str_copy = psiconv_make_printable(para->text);
237     psiconv_debug(lev+2,off+start+len,"Last line: %d characters",nr,
238     strlen(str_copy)+1);
239     psiconv_debug(lev+2,off+start+len,"Last line: `%s'",str_copy);
240     free(str_copy);
241     }
242    
243     free(para);
244    
245     /* Initialize the remaining parts of each paragraph */
246     for (i = 0; i < psiconv_list_length(*result); i ++) {
247     para = psiconv_list_get(*result,i);
248     para->in_lines = psiconv_list_new(sizeof(struct psiconv_in_line_layout));
249     para->replacements = psiconv_list_new(sizeof(struct psiconv_replacement));
250     para->base_style = 0;
251     para->base_character = psiconv_basic_character_layout();
252     para->base_paragraph = psiconv_basic_paragraph_layout();
253     }
254    
255    
256     len += text_len;
257    
258     if (length)
259     *length = len;
260    
261     psiconv_progress(lev+1,off+len-1,"End of text section (total length: %08x",
262     len);
263    
264     return res;
265     }
266    
267     /* First do a parse_text_section, or you will get into trouble here */
268     int psiconv_parse_layout_section(const psiconv_buffer buf,
269     int lev,psiconv_u32 off,
270     int *length,
271     psiconv_text_and_layout result,
272     psiconv_word_styles_section styles,
273     int with_styles)
274     {
275     int res = 0;
276     int len = 0;
277     psiconv_u32 temp;
278     int parse_styles,nr,i,j,total,leng,line_length;
279    
280     typedef struct anon_style
281     {
282     int nr;
283     psiconv_s16 base_style;
284     psiconv_character_layout character;
285     psiconv_paragraph_layout paragraph;
286     } *anon_style;
287    
288     typedef psiconv_list anon_style_list; /* of struct anon_style */
289    
290     anon_style_list anon_styles;
291     struct anon_style anon;
292     anon_style anon_ptr=NULL;
293    
294     psiconv_paragraph para;
295     struct psiconv_in_line_layout in_line;
296    
297     int *inline_count;
298    
299    
300     psiconv_progress(lev+1,off,"Going to read the layout section");
301    
302     psiconv_progress(lev+2,off,"Going to read the section type");
303     temp = psiconv_read_u16(buf,lev+2,off+len);
304     psiconv_debug(lev+2,off+len,"Type: %02x",temp);
305     parse_styles = with_styles;
306     if ((temp == 0x0001) && !with_styles) {
307     psiconv_warn(lev+2,off+len,"Styleless layout section expected, "
308     "but styled section found!");
309     parse_styles = 1;
310     res = -1;
311     } else if ((temp == 0x0000) && (with_styles)) {
312     psiconv_warn(lev+2,off+len,"Styled layout section expected, "
313     "but styleless section found!");
314     parse_styles = 0;
315     res = -1;
316     } else if ((temp != 0x0000) && (temp != 0x0001)) {
317     psiconv_warn(lev+2,off+len,
318     "Layout section type indicator has unknown value!");
319     res = -1;
320     }
321     len += 0x02;
322    
323     psiconv_progress(lev+2,off+len,"Going to read paragraph type list");
324     anon_styles = psiconv_list_new(sizeof(anon));
325     psiconv_progress(lev+3,off+len,"Going to read paragraph type list length");
326     nr = psiconv_read_u8(buf,lev+3,off+len);
327     psiconv_debug(lev+3,off+len,"Length: %02x",nr);
328     len ++;
329    
330     psiconv_progress(lev+3,off+len,
331     "Going to read the paragraph type list elements");
332     for (i = 0; i < nr; i ++) {
333     psiconv_progress(lev+3,off+len,"Element %d",i);
334     anon.nr = psiconv_read_u32(buf,lev+4,off+len);
335     psiconv_debug(lev+4,off+len,"Number: %08x",anon.nr);
336     len += 0x04;
337    
338     psiconv_progress(lev+4,off,"Going to determine the base style");
339     if (parse_styles) {
340     anon.base_style = psiconv_read_u8(buf,lev+3,
341     off+len+4+psiconv_read_u32(buf,lev+4,
342     off+len));
343     psiconv_debug(lev+4,off+len+psiconv_read_u32(buf,lev+4,off+len),
344     "Style indicator: %02x",anon.base_style);
345     } else
346     anon.base_style = 0;
347     anon.paragraph = psiconv_clone_paragraph_layout(psiconv_get_style
348     (styles,anon.base_style)->paragraph);
349     anon.character = psiconv_clone_character_layout(psiconv_get_style
350     (styles,anon.base_style)->character);
351    
352     psiconv_progress(lev+4,off+len,"Going to read the paragraph layout");
353     res |= psiconv_parse_paragraph_layout_list(buf,lev+4,off+len,&leng,
354     anon.paragraph);
355     len += leng;
356     if (parse_styles)
357     len ++;
358    
359     psiconv_progress(lev+4,off+len,"Going to read the character layout");
360     res |= psiconv_parse_character_layout_list(buf,lev+4,off+len,&leng,
361     anon.character);
362     len += leng;
363     psiconv_list_add(anon_styles,&anon);
364     }
365    
366     psiconv_progress(lev+2,off+len,"Going to parse the paragraph element list");
367     psiconv_progress(lev+3,off+len,"Going to read the number of paragraphs");
368     nr = psiconv_read_u32(buf,lev+3,off+len);
369     if (nr != psiconv_list_length(result)) {
370     psiconv_warn(lev+3,off+len,
371     "Number of text paragraphs and paragraph elements does not match");
372     psiconv_debug(lev+3,off+len,
373     "%d text paragraphs, %d paragraph elements",
374     psiconv_list_length(result),nr);
375     }
376     psiconv_debug(lev+3,off+len,"Number of paragraphs: %d",nr);
377     len += 4;
378     inline_count = malloc(nr * sizeof(*inline_count));
379    
380     psiconv_progress(lev+3,off+len,"Going to read the paragraph elements");
381     for (i = 0; i < nr; i ++) {
382     psiconv_progress(lev+3,off+len,"Element %d",i);
383     if (i >= psiconv_list_length(result)) {
384     psiconv_debug(lev+4,off+len,"Going to allocate a new element");
385     para = malloc(sizeof(*para));
386     para->in_lines = psiconv_list_new(sizeof(struct psiconv_in_line_layout));
387     para->base_style = 0;
388     para->base_character = psiconv_basic_character_layout();
389     para->base_paragraph = psiconv_basic_paragraph_layout();
390     free(para);
391     }
392     para = psiconv_list_get(result,i);
393    
394     psiconv_progress(lev+4,off+len,"Going to read the paragraph length");
395     temp = psiconv_read_u32(buf,lev+4,off+len);
396     if (temp != strlen(para->text)+1) {
397     res = -1;
398     psiconv_warn(lev+4,off+len,
399     "Disagreement of the length of paragraph in layout section");
400     psiconv_debug(lev+4,off+len,
401     "Paragraph length: layout section says %d, counted %d",
402     temp,strlen(para->text)+1);
403     } else
404     psiconv_debug(lev+4,off+len,"Paragraph length: %d",temp);
405     len += 4;
406    
407     psiconv_progress(lev+4,off+len,"Going to read the paragraph type");
408     temp = psiconv_read_u8(buf,lev+4,off+len);
409     if (temp != 0x00) {
410     psiconv_debug(lev+4,off+len,"Type: %02x",temp);
411     for (j = 0; j < psiconv_list_length(anon_styles); j++) {
412     anon_ptr = psiconv_list_get(anon_styles,j);
413     if (temp == anon_ptr->nr)
414     break;
415     }
416     if (j == psiconv_list_length(anon_styles)) {
417     psiconv_warn(lev+4,off+len,"Layout section paragraph type unknown");
418     psiconv_debug(lev+4,off+len,"Unknown type - using base styles instead");
419     para->base_style = 0;
420     psiconv_free_paragraph_layout(para->base_paragraph);
421     psiconv_free_character_layout(para->base_character);
422     para->base_paragraph = psiconv_clone_paragraph_layout(psiconv_get_style
423     (styles,0)->paragraph);
424     para->base_character = psiconv_clone_character_layout(psiconv_get_style
425     (styles,0)->character);
426     } else {
427     para->base_style = anon_ptr->base_style;
428     psiconv_free_paragraph_layout(para->base_paragraph);
429     psiconv_free_character_layout(para->base_character);
430     para->base_paragraph = psiconv_clone_paragraph_layout
431     (anon_ptr->paragraph);
432     para->base_character = psiconv_clone_character_layout
433     (anon_ptr->character);
434     }
435     inline_count[i] = 0;
436     len += 0x01;
437     } else {
438     psiconv_debug(lev+4,off+len,"Type: %02x (not based on a paragraph type)"
439     ,temp);
440     len += 0x01;
441     if (parse_styles) {
442     psiconv_progress(lev+4,off+len+psiconv_read_u32(buf,lev+4,off+len),
443     "Going to read the paragraph element base style");
444     temp = psiconv_read_u8(buf,lev+4,
445     off+len+psiconv_read_u32(buf,lev+4,off+len));
446     psiconv_debug(lev+4,off+len+psiconv_read_u32(buf,lev+4,off+len),
447     "Style: %02x",temp);
448     } else
449     temp = 0x00;
450     psiconv_free_paragraph_layout(para->base_paragraph);
451     psiconv_free_character_layout(para->base_character);
452     para->base_paragraph = psiconv_clone_paragraph_layout(psiconv_get_style
453     (styles,temp)->paragraph);
454     para->base_character = psiconv_clone_character_layout(psiconv_get_style
455     (styles,temp)->character);
456     para->base_style = temp;
457     psiconv_progress(lev+4,off+len,"Going to read paragraph layout");
458     psiconv_parse_paragraph_layout_list(buf,lev+4,off+len,&leng,
459     para->base_paragraph);
460     len += leng;
461     if (parse_styles)
462     len += 1;
463     psiconv_progress(lev+4,off+len,"Going to read number of in-line "
464     "layout elements");
465     inline_count[i] = psiconv_read_u32(buf,lev+4,off+len);
466     psiconv_debug(lev+4,off+len,"Nr: %08x",inline_count[i]);
467     len += 4;
468     }
469     }
470    
471     psiconv_progress(lev+2,off+len,"Going to read the text layout inline list");
472    
473     psiconv_progress(lev+3,off+len,"Going to read the number of elements");
474     nr = psiconv_read_u32(buf,lev+3,off+len);
475     psiconv_debug(lev+3,off,"Elements: %08x",nr);
476     len += 0x04;
477    
478     psiconv_progress(lev+3,off+len,
479     "Going to read the text layout inline elements");
480     total = 0;
481     for (i = 0; i < psiconv_list_length(result); i++) {
482     para = psiconv_list_get(result,i);
483     line_length = -1;
484     for (j = 0; j < inline_count[i]; j++) {
485     psiconv_progress(lev+3,off+len,"Element %d: Paragraph %d, element %d",
486     total,i,j);
487     if (total >= nr) {
488     psiconv_warn(lev+3,off+len,
489     "Layout section inlines: not enough element");
490     res = -1;
491     psiconv_debug(lev+3,off+len,"Can't read element!");
492     } else {
493     total ++;
494     in_line.layout = psiconv_clone_character_layout(para->base_character);
495     psiconv_progress(lev+4,off+len,"Going to read the element type");
496     temp = psiconv_read_u8(buf,lev+4,len+off);
497     len += 1;
498     psiconv_debug(lev+4,off,"Type: %02x",temp);
499     psiconv_progress(lev+4,off,
500     "Going to read the number of characters it applies to");
501     in_line.length = psiconv_read_u32(buf,lev+4,len+off);
502     psiconv_debug(lev+4,off+len,"Length: %02x",in_line.length);
503     len += 4;
504     psiconv_progress(lev+4,off+len,"Going to read the character layout");
505     res |= psiconv_parse_character_layout_list(buf,lev+4,off+len,&leng,
506     in_line.layout);
507     len += leng;
508    
509     if (temp == 0x01) {
510     psiconv_debug(lev+4,off+len,"Skipping object data");
511     len += 0x10;
512     } else if (temp != 0x00) {
513     psiconv_warn(lev+4,off+len,"Layout section unknown inline type");
514     res = -1;
515     }
516     if (line_length + in_line.length > strlen(para->text)) {
517     psiconv_warn(lev+4,off+len,
518     "Layout section inlines: line length mismatch");
519     res = -1;
520     in_line.length = strlen(para->text) - line_length;
521     }
522     line_length += in_line.length;
523     psiconv_list_add(para->in_lines,&in_line);
524     }
525     }
526     }
527    
528     if (total != nr) {
529     psiconv_warn(lev+4,off+len,
530     "Layout section too many inlines, skipping remaining");
531     }
532    
533     free(inline_count);
534    
535     for (i = 0 ; i < psiconv_list_length(anon_styles); i ++) {
536     anon_ptr = psiconv_list_get(anon_styles,i);
537     psiconv_free_character_layout(anon_ptr->character);
538     psiconv_free_paragraph_layout(anon_ptr->paragraph);
539     }
540     psiconv_list_free(anon_styles);
541    
542     if (length)
543     *length = len;
544    
545     psiconv_progress(lev+1,off+len-1,"End of layout section (total length: %08x",
546     len);
547    
548     return res;
549     }
550    
551     int psiconv_parse_styled_layout_section(const psiconv_buffer buf,
552     int lev,psiconv_u32 off,
553     int *length,
554     psiconv_text_and_layout result,
555     psiconv_word_styles_section styles)
556     {
557     return psiconv_parse_layout_section(buf,lev,off,length,result,styles,1);
558     }
559    
560     int psiconv_parse_styleless_layout_section(const psiconv_buffer buf,
561     int lev,psiconv_u32 off,
562     int *length,
563     psiconv_text_and_layout result,
564     psiconv_character_layout base_char,
565     psiconv_paragraph_layout base_para)
566     {
567     int res;
568     psiconv_word_styles_section styles_section;
569    
570     styles_section = malloc(sizeof(*styles_section));
571     styles_section->normal = malloc(sizeof(*styles_section->normal));
572     styles_section->normal->character = psiconv_clone_character_layout(base_char);
573     styles_section->normal->paragraph = psiconv_clone_paragraph_layout(base_para);
574     styles_section->normal->hotkey = 0;
575     styles_section->normal->name = strdup("");
576     styles_section->styles = psiconv_list_new(sizeof(struct psiconv_word_style));
577    
578     res = psiconv_parse_layout_section(buf,lev,off,length,result,
579     styles_section,0);
580    
581     psiconv_free_word_styles_section(styles_section);
582     return res;
583     }
584    

frodo@frodo.looijaard.name
ViewVC Help
Powered by ViewVC 1.1.26