1 | /* |
1 | /* |
2 | * gen_text.c - Part of psiconv, a PSION 5 file formats converter |
2 | * gen_text.c - Part of psiconv, a PSION 5 file formats converter |
3 | * Copyright (c) 1999 Andrew Johnson <anjohnson@iee.org> |
3 | * Copyright (c) 1999 Andrew Johnson <anjohnson@iee.org> |
4 | * Portions Copyright (c) 1999 Frodo Looijaard <frodol@dds.nl> |
4 | * Portions Copyright (c) 1999-2014 Frodo Looijaard <frodo@frodo.looijaard.name> |
5 | * |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify |
6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by |
7 | * it under the terms of the GNU General Public License as published by |
8 | * the Free Software Foundation; either version 2 of the License, or |
8 | * the Free Software Foundation; either version 2 of the License, or |
9 | * (at your option) any later version. |
9 | * (at your option) any later version. |
… | |
… | |
19 | */ |
19 | */ |
20 | |
20 | |
21 | #include "config.h" |
21 | #include "config.h" |
22 | #include <stdio.h> |
22 | #include <stdio.h> |
23 | #include <string.h> |
23 | #include <string.h> |
24 | #include "psiconv/data.h" |
24 | #include <psiconv/data.h> |
25 | #include "psiconv/list.h" |
25 | #include <psiconv/list.h> |
|
|
26 | #include <psiconv/unicode.h> |
|
|
27 | #include "general.h" |
26 | #include "gen.h" |
28 | #include "gen.h" |
27 | #include "psiconv.h" |
29 | #include "psiconv.h" |
28 | |
30 | |
|
|
31 | #ifdef DMALLOC |
|
|
32 | #include "dmalloc.h" |
|
|
33 | #endif |
29 | |
34 | |
30 | /* |
35 | static void output_para(const psiconv_config config,psiconv_list list, |
31 | * Various string tables for HTML4 settings |
36 | const psiconv_paragraph para,encoding encoding_type); |
32 | */ |
37 | static void gen_word(const psiconv_config config, psiconv_list list, |
|
|
38 | psiconv_word_f wf, encoding encoding_type); |
|
|
39 | static void gen_texted(const psiconv_config config, psiconv_list list, |
|
|
40 | psiconv_texted_f tf, encoding encoding_type); |
|
|
41 | static int gen_txt(const psiconv_config config, psiconv_list list, |
|
|
42 | const psiconv_file file, const char *dest, |
|
|
43 | const encoding encoding_type); |
33 | |
44 | |
34 | /* Character conversion table */ |
|
|
35 | static const char *char_table[0x100] = { |
|
|
36 | /* 0x00 */ "", "", "", "", "", "", "\n", "\n", |
|
|
37 | /* 0x08 */ "\n", "\t", "", "", "", "", "", "", |
|
|
38 | /* 0x10 */ " ", "", "", "", "", "", "", "", |
|
|
39 | /* 0x18 */ "", "", "", "", "", "", "", "", |
|
|
40 | /* 0x20 */ " ", "!", "\"", "#", "$", "%", "&", "'", |
|
|
41 | /* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/", |
|
|
42 | /* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7", |
|
|
43 | /* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?", |
|
|
44 | /* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G", |
|
|
45 | /* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O", |
|
|
46 | /* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W", |
|
|
47 | /* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_", |
|
|
48 | /* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g", |
|
|
49 | /* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o", |
|
|
50 | /* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w", |
|
|
51 | /* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "", |
|
|
52 | /* 0x80 */ "", "", ",", "f", ",,", "...", "+", "#", |
|
|
53 | /* 0x88 */ "^", "\176/oo","S", "<", "OE", "", "", "", |
|
|
54 | /* 0x90 */ "", "`", "'", "``", "''", "*", "-", "--", |
|
|
55 | /* 0x98 */ "~", "(TM)", "s", ">", "oe", "", "", "Y", |
|
|
56 | /* 0xa0 */ "\xa0", "\xa1", "\xa2", "\xa3", "\xa4", "\xa5", "\xa6", "\xa7", |
|
|
57 | /* 0xa8 */ "\xa8", "\xa9", "\xaa", "\xab", "\xac", "\xad", "\xae", "\xaf", |
|
|
58 | /* 0xb0 */ "\xb0", "\xb1", "\xb2", "\xb3", "\xb4", "\xb5", "\xb6", "\xb7", |
|
|
59 | /* 0xb8 */ "\xb8", "\xb9", "\xba", "\xbb", "\xbc", "\xbd", "\xbe", "\xbf", |
|
|
60 | /* 0xc0 */ "\xc0", "\xc1", "\xc2", "\xc3", "\xc4", "\xc5", "\xc6", "\xc7", |
|
|
61 | /* 0xc8 */ "\xc8", "\xc9", "\xca", "\xcb", "\xcc", "\xcd", "\xce", "\xcf", |
|
|
62 | /* 0xd0 */ "\xd0", "\xd1", "\xd2", "\xd3", "\xd4", "\xd5", "\xd6", "\xd7", |
|
|
63 | /* 0xd8 */ "\xd8", "\xd9", "\xda", "\xdb", "\xdc", "\xdd", "\xde", "\xdf", |
|
|
64 | /* 0xe0 */ "\xe0", "\xe1", "\xe2", "\xe3", "\xe4", "\xe5", "\xe6", "\xe7", |
|
|
65 | /* 0xe8 */ "\xe8", "\xe9", "\xea", "\xeb", "\xec", "\xed", "\xee", "\xef", |
|
|
66 | /* 0xf0 */ "\xf0", "\xf1", "\xf2", "\xf3", "\xf4", "\xf5", "\xf6", "\xf7", |
|
|
67 | /* 0xf8 */ "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff", |
|
|
68 | }; |
|
|
69 | |
|
|
70 | |
|
|
71 | /* Output a string, doing character conversions */ |
|
|
72 | static void fput_text(FILE * of, const char *text, int length) { |
|
|
73 | int j; |
|
|
74 | |
|
|
75 | for (j = 0; j < length; j++) { |
|
|
76 | fputs(char_table[(unsigned char) (text[j])], of); |
|
|
77 | } |
|
|
78 | } |
|
|
79 | |
|
|
80 | |
|
|
81 | /* Output a paragraph */ |
|
|
82 | static void fput_para(FILE * of, |
|
|
83 | const psiconv_paragraph para,int extra_nl) |
|
|
84 | { |
|
|
85 | if (para->base_paragraph->bullet->on) { |
|
|
86 | fprintf(of, "%s ", char_table[para->base_paragraph->bullet->character]); |
|
|
87 | } |
|
|
88 | fput_text(of, para->text, strlen(para->text)); |
|
|
89 | fputs("\n", of); |
|
|
90 | if (extra_nl) |
|
|
91 | fputs("\n", of); |
|
|
92 | } |
|
|
93 | |
|
|
94 | static void psiconv_gen_txt_texted(FILE * of, psiconv_texted_f tf) |
|
|
95 | { |
|
|
96 | int i; |
|
|
97 | psiconv_paragraph para; |
|
|
98 | |
|
|
99 | if (tf->page_sec->header->text) { |
|
|
100 | if (tf->page_sec->header->on_first_page) { |
|
|
101 | for (i=0; i < psiconv_list_length(tf->page_sec->header->text->paragraphs); i++) { |
|
|
102 | para = psiconv_list_get(tf->page_sec->header->text->paragraphs, i); |
|
|
103 | fput_text(of, para->text, strlen(para->text)); |
|
|
104 | fputs("\n", of); |
|
|
105 | } |
|
|
106 | } |
|
|
107 | } |
|
|
108 | fputs("\n",of); |
|
|
109 | |
|
|
110 | for (i=0; i < psiconv_list_length(tf->texted_sec->paragraphs); i++) { |
|
|
111 | para = psiconv_list_get(tf->texted_sec->paragraphs, i); |
|
|
112 | fput_para(of, para,0); |
|
|
113 | } |
|
|
114 | |
|
|
115 | fputs("\n",of); |
|
|
116 | if (tf->page_sec->header->text) { |
|
|
117 | for (i=0; i < psiconv_list_length(tf->page_sec->footer->text->paragraphs); i++) { |
|
|
118 | para = psiconv_list_get(tf->page_sec->footer->text->paragraphs, i); |
|
|
119 | fput_text(of, para->text, strlen(para->text)); |
|
|
120 | fputs("\n", of); |
|
|
121 | } |
|
|
122 | } |
|
|
123 | } |
|
|
124 | |
|
|
125 | static void psiconv_gen_txt_word(FILE * of, psiconv_word_f wf) |
|
|
126 | { |
|
|
127 | int i; |
|
|
128 | psiconv_paragraph para; |
|
|
129 | |
|
|
130 | if (wf->page_sec->header->on_first_page) { |
|
|
131 | for (i=0; i < psiconv_list_length(wf->page_sec->header->text->paragraphs); i++) { |
|
|
132 | para = psiconv_list_get(wf->page_sec->header->text->paragraphs, i); |
|
|
133 | fput_text(of, para->text, strlen(para->text)); |
|
|
134 | fputs("\n\n", of); |
|
|
135 | } |
|
|
136 | } |
|
|
137 | fputs("\n\n", of); |
|
|
138 | |
|
|
139 | for (i=0; i < psiconv_list_length(wf->paragraphs); i++) { |
|
|
140 | para = psiconv_list_get(wf->paragraphs, i); |
|
|
141 | fput_para(of, para,1); |
|
|
142 | } |
|
|
143 | |
|
|
144 | fputs("\n\n", of); |
|
|
145 | for (i=0; i < psiconv_list_length(wf->page_sec->footer->text->paragraphs); i++) { |
|
|
146 | para = psiconv_list_get(wf->page_sec->footer->text->paragraphs, i); |
|
|
147 | fput_text(of, para->text, strlen(para->text)); |
|
|
148 | fputs("\n\n", of); |
|
|
149 | } |
|
|
150 | } |
|
|
151 | |
|
|
152 | static int psiconv_gen_txt(const char *filename, const psiconv_file file, |
|
|
153 | const char *dest) |
|
|
154 | { |
|
|
155 | FILE *of = fopen(filename,"w"); |
|
|
156 | if (! of) |
|
|
157 | return -1; |
|
|
158 | |
|
|
159 | if (file->type == psiconv_word_file) { |
|
|
160 | psiconv_gen_txt_word(of,(psiconv_word_f) file->file); |
|
|
161 | } else if (file->type == psiconv_texted_file) { |
|
|
162 | psiconv_gen_txt_texted(of,(psiconv_texted_f) file->file); |
|
|
163 | } else { |
|
|
164 | fclose(of); |
|
|
165 | return -1; |
|
|
166 | } |
|
|
167 | return fclose(of); |
|
|
168 | } |
|
|
169 | |
|
|
170 | static struct psiconv_fileformat_s ff = |
45 | static struct fileformat_s ff = |
171 | { |
46 | { |
172 | "ASCII", |
47 | "ASCII", |
173 | "Plain text without much layout", |
48 | "Plain text without much layout", |
174 | psiconv_gen_txt |
49 | FORMAT_WORD | FORMAT_TEXTED, |
|
|
50 | gen_txt |
175 | }; |
51 | }; |
176 | |
52 | |
|
|
53 | |
|
|
54 | void output_para(const psiconv_config config,psiconv_list list, |
|
|
55 | const psiconv_paragraph para,encoding encoding_type) |
|
|
56 | { |
|
|
57 | int i; |
|
|
58 | if (para && para->base_paragraph && para->base_paragraph->bullet && |
|
|
59 | para->base_paragraph->bullet->on) { |
|
|
60 | output_char(config,list,para->base_paragraph->bullet->character, |
|
|
61 | encoding_type); |
|
|
62 | output_char(config,list,' ', encoding_type); |
|
|
63 | output_char(config,list,' ', encoding_type); |
|
|
64 | output_char(config,list,' ', encoding_type); |
|
|
65 | } |
|
|
66 | if (para && para->text) { |
|
|
67 | for (i = 0; i < psiconv_unicode_strlen(para->text); i++) |
|
|
68 | switch (para->text[i]) { |
|
|
69 | case 0x06: |
|
|
70 | case 0x07: |
|
|
71 | case 0x08: |
|
|
72 | output_char(config,list,'\n',encoding_type); |
|
|
73 | break; |
|
|
74 | case 0x09: |
|
|
75 | case 0x0a: |
|
|
76 | output_char(config,list,'\t',encoding_type); |
|
|
77 | break; |
|
|
78 | case 0x0b: |
|
|
79 | case 0x0c: |
|
|
80 | output_char(config,list,'-',encoding_type); |
|
|
81 | break; |
|
|
82 | case 0x0f: |
|
|
83 | output_char(config,list,' ',encoding_type); |
|
|
84 | break; |
|
|
85 | case 0x00: |
|
|
86 | case 0x01: |
|
|
87 | case 0x02: |
|
|
88 | case 0x03: |
|
|
89 | case 0x04: |
|
|
90 | case 0x05: |
|
|
91 | case 0x0e: |
|
|
92 | case 0x10: |
|
|
93 | case 0x11: |
|
|
94 | case 0x12: |
|
|
95 | case 0x13: |
|
|
96 | case 0x14: |
|
|
97 | case 0x15: |
|
|
98 | case 0x16: |
|
|
99 | case 0x17: |
|
|
100 | case 0x18: |
|
|
101 | case 0x19: |
|
|
102 | case 0x1a: |
|
|
103 | case 0x1c: |
|
|
104 | case 0x1d: |
|
|
105 | case 0x1e: |
|
|
106 | case 0x1f: |
|
|
107 | break; |
|
|
108 | default: |
|
|
109 | output_char(config,list,para->text[i],encoding_type); |
|
|
110 | break; |
|
|
111 | } |
|
|
112 | output_char(config,list,'\n',encoding_type); |
|
|
113 | } |
|
|
114 | } |
|
|
115 | |
|
|
116 | void gen_word(const psiconv_config config, psiconv_list list, |
|
|
117 | psiconv_word_f wf, encoding encoding_type) |
|
|
118 | { |
|
|
119 | int i; |
|
|
120 | psiconv_paragraph para; |
|
|
121 | |
|
|
122 | if (wf && wf->page_sec && wf->page_sec->header && |
|
|
123 | wf->page_sec->header->text && wf->page_sec->header->text->paragraphs) { |
|
|
124 | for (i=0; |
|
|
125 | i < psiconv_list_length(wf->page_sec->header-> |
|
|
126 | text->paragraphs); i++) { |
|
|
127 | para = psiconv_list_get(wf->page_sec->header->text->paragraphs, |
|
|
128 | i); |
|
|
129 | output_para(config,list,para,encoding_type); |
|
|
130 | } |
|
|
131 | } |
|
|
132 | output_char(config,list,'\n',encoding_type); |
|
|
133 | |
|
|
134 | if (wf && wf->paragraphs) |
|
|
135 | for (i=0; i < psiconv_list_length(wf->paragraphs); i++) { |
|
|
136 | para = psiconv_list_get(wf->paragraphs, i); |
|
|
137 | output_para(config, list,para,encoding_type); |
|
|
138 | } |
|
|
139 | |
|
|
140 | output_char(config,list,'\n',encoding_type); |
|
|
141 | |
|
|
142 | if (wf && wf->page_sec && wf->page_sec->footer && |
|
|
143 | wf->page_sec->footer->text && wf->page_sec->footer->text->paragraphs) { |
|
|
144 | for (i=0; |
|
|
145 | i < psiconv_list_length(wf->page_sec->footer-> |
|
|
146 | text->paragraphs); i++) { |
|
|
147 | para = psiconv_list_get(wf->page_sec->footer->text->paragraphs, i); |
|
|
148 | output_para(config,list,para,encoding_type); |
|
|
149 | } |
|
|
150 | } |
|
|
151 | } |
|
|
152 | |
|
|
153 | void gen_texted(const psiconv_config config, psiconv_list list, |
|
|
154 | psiconv_texted_f tf, encoding encoding_type) |
|
|
155 | { |
|
|
156 | int i; |
|
|
157 | psiconv_paragraph para; |
|
|
158 | |
|
|
159 | if (tf && tf->page_sec && tf->page_sec->header && |
|
|
160 | tf->page_sec->header->text && tf->page_sec->header->text->paragraphs) { |
|
|
161 | for (i=0; |
|
|
162 | i < psiconv_list_length(tf->page_sec->header-> |
|
|
163 | text->paragraphs); i++) { |
|
|
164 | para = psiconv_list_get(tf->page_sec->header->text->paragraphs, |
|
|
165 | i); |
|
|
166 | output_para(config,list,para,encoding_type); |
|
|
167 | } |
|
|
168 | } |
|
|
169 | output_char(config,list,'\n',encoding_type); |
|
|
170 | |
|
|
171 | if (tf && tf->texted_sec && tf->texted_sec->paragraphs) |
|
|
172 | for (i=0; i < psiconv_list_length(tf->texted_sec->paragraphs); i++) { |
|
|
173 | para = psiconv_list_get(tf->texted_sec->paragraphs, i); |
|
|
174 | output_para(config, list,para,encoding_type); |
|
|
175 | } |
|
|
176 | |
|
|
177 | output_char(config,list,'\n',encoding_type); |
|
|
178 | |
|
|
179 | if (tf && tf->page_sec && tf->page_sec->footer && |
|
|
180 | tf->page_sec->footer->text && tf->page_sec->footer->text->paragraphs) { |
|
|
181 | for (i=0; |
|
|
182 | i < psiconv_list_length(tf->page_sec->footer-> |
|
|
183 | text->paragraphs); i++) { |
|
|
184 | para = psiconv_list_get(tf->page_sec->footer->text->paragraphs, i); |
|
|
185 | output_para(config,list,para,encoding_type); |
|
|
186 | } |
|
|
187 | } |
|
|
188 | } |
|
|
189 | |
|
|
190 | int gen_txt(const psiconv_config config, psiconv_list list, |
|
|
191 | const psiconv_file file, const char *dest, |
|
|
192 | const encoding encoding_type) |
|
|
193 | { |
|
|
194 | if (file->type == psiconv_word_file) { |
|
|
195 | gen_word(config,list,(psiconv_word_f) file->file,encoding_type); |
|
|
196 | return 0; |
|
|
197 | } else if (file->type == psiconv_texted_file) { |
|
|
198 | gen_texted(config,list,(psiconv_texted_f) file->file,encoding_type); |
|
|
199 | return 0; |
|
|
200 | } else |
|
|
201 | return -1; |
|
|
202 | } |
|
|
203 | |
177 | void init_txt(void) |
204 | void init_txt(void) |
178 | { |
205 | { |
179 | psiconv_list_add(fileformat_list,&ff); |
206 | psiconv_list_add(fileformat_list,&ff); |
180 | } |
207 | } |
181 | |
208 | |