1 |
/* |
2 |
* gen_text.c - Part of psiconv, a PSION 5 file formats converter |
3 |
* Copyright (c) 1999 Andrew Johnson <anjohnson@iee.org> |
4 |
* Portions Copyright (c) 1999 Frodo Looijaard <frodol@dds.nl> |
5 |
* |
6 |
* This program is free software; you can redistribute it and/or modify |
7 |
* it under the terms of the GNU General Public License as published by |
8 |
* the Free Software Foundation; either version 2 of the License, or |
9 |
* (at your option) any later version. |
10 |
* |
11 |
* This program is distributed in the hope that it will be useful, |
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
* GNU General Public License for more details. |
15 |
* |
16 |
* You should have received a copy of the GNU General Public License |
17 |
* along with this program; if not, write to the Free Software |
18 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 |
*/ |
20 |
|
21 |
/* |
22 |
2002/Apr. Keita KAWABE |
23 |
Support for narrow build Asian Psions added. |
24 |
|
25 |
If the encoding_type is PSICONV_ENCODING_UTF8, use utf8_table for |
26 |
character conversion. Otherwise use char_table. |
27 |
*/ |
28 |
|
29 |
#include "config.h" |
30 |
#include <stdio.h> |
31 |
#include <string.h> |
32 |
#include "psiconv/data.h" |
33 |
#include "psiconv/list.h" |
34 |
#include "gen.h" |
35 |
#include "psiconv.h" |
36 |
|
37 |
#ifdef DMALLOC |
38 |
#include "dmalloc.h" |
39 |
#endif |
40 |
|
41 |
|
42 |
/* |
43 |
* Various string tables for HTML4 settings |
44 |
*/ |
45 |
|
46 |
/* Character conversion table */ |
47 |
static const char *char_table[0x100] = { |
48 |
/* 0x00 */ "", "", "", "", "", "", "\n", "\n", |
49 |
/* 0x08 */ "\n", "\t", "", "", "", "", "", "", |
50 |
/* 0x10 */ " ", "", "", "", "", "", "", "", |
51 |
/* 0x18 */ "", "", "", "", "", "", "", "", |
52 |
/* 0x20 */ " ", "!", "\"", "#", "$", "%", "&", "'", |
53 |
/* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/", |
54 |
/* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7", |
55 |
/* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?", |
56 |
/* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G", |
57 |
/* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O", |
58 |
/* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W", |
59 |
/* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_", |
60 |
/* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g", |
61 |
/* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o", |
62 |
/* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w", |
63 |
/* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "", |
64 |
/* 0x80 */ "", "", ",", "f", ",,", "...", "+", "#", |
65 |
/* 0x88 */ "^", "\176/oo","S", "<", "OE", "", "", "", |
66 |
/* 0x90 */ "", "`", "'", "``", "''", "*", "-", "--", |
67 |
/* 0x98 */ "~", "(TM)", "s", ">", "oe", "", "", "Y", |
68 |
/* 0xa0 */ "\xa0", "\xa1", "\xa2", "\xa3", "\xa4", "\xa5", "\xa6", "\xa7", |
69 |
/* 0xa8 */ "\xa8", "\xa9", "\xaa", "\xab", "\xac", "\xad", "\xae", "\xaf", |
70 |
/* 0xb0 */ "\xb0", "\xb1", "\xb2", "\xb3", "\xb4", "\xb5", "\xb6", "\xb7", |
71 |
/* 0xb8 */ "\xb8", "\xb9", "\xba", "\xbb", "\xbc", "\xbd", "\xbe", "\xbf", |
72 |
/* 0xc0 */ "\xc0", "\xc1", "\xc2", "\xc3", "\xc4", "\xc5", "\xc6", "\xc7", |
73 |
/* 0xc8 */ "\xc8", "\xc9", "\xca", "\xcb", "\xcc", "\xcd", "\xce", "\xcf", |
74 |
/* 0xd0 */ "\xd0", "\xd1", "\xd2", "\xd3", "\xd4", "\xd5", "\xd6", "\xd7", |
75 |
/* 0xd8 */ "\xd8", "\xd9", "\xda", "\xdb", "\xdc", "\xdd", "\xde", "\xdf", |
76 |
/* 0xe0 */ "\xe0", "\xe1", "\xe2", "\xe3", "\xe4", "\xe5", "\xe6", "\xe7", |
77 |
/* 0xe8 */ "\xe8", "\xe9", "\xea", "\xeb", "\xec", "\xed", "\xee", "\xef", |
78 |
/* 0xf0 */ "\xf0", "\xf1", "\xf2", "\xf3", "\xf4", "\xf5", "\xf6", "\xf7", |
79 |
/* 0xf8 */ "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff", |
80 |
}; |
81 |
|
82 |
static const char *utf_table[0x100] = { |
83 |
/* 0x00 */ "", "", "", "", "", "", "\n", "\n", |
84 |
/* 0x08 */ "\n", "\t", "", "", "", "", "", "", |
85 |
/* 0x10 */ " ", "", "", "", "", "", "", "", |
86 |
/* 0x18 */ "", "", "", "", "", "", "", "", |
87 |
/* 0x20 */ " ", "!", "\"", "#", "$", "%", "&", "'", |
88 |
/* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/", |
89 |
/* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7", |
90 |
/* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?", |
91 |
/* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G", |
92 |
/* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O", |
93 |
/* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W", |
94 |
/* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_", |
95 |
/* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g", |
96 |
/* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o", |
97 |
/* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w", |
98 |
/* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "\x7f", |
99 |
/* 0x80 */ "\x80", "\x81", "\x82", "\x83", "\x84", "\x85", "\x86", "\x87", |
100 |
/* 0x88 */ "\x88", "\x89", "\x8a", "\x8b", "\x8c", "\x8d", "\x8e", "\x8f", |
101 |
/* 0x90 */ "\x90", "\x91", "\x92", "\x93", "\x94", "\x95", "\x96", "\x97", |
102 |
/* 0x98 */ "\x98", "\x99", "\x9a", "\x9b", "\x9c", "\x9d", "\x9e", "\x9f", |
103 |
/* 0xa0 */ "\xa0", "\xa1", "\xa2", "\xa3", "\xa4", "\xa5", "\xa6", "\xa7", |
104 |
/* 0xa8 */ "\xa8", "\xa9", "\xaa", "\xab", "\xac", "\xad", "\xae", "\xaf", |
105 |
/* 0xb0 */ "\xb0", "\xb1", "\xb2", "\xb3", "\xb4", "\xb5", "\xb6", "\xb7", |
106 |
/* 0xb8 */ "\xb8", "\xb9", "\xba", "\xbb", "\xbc", "\xbd", "\xbe", "\xbf", |
107 |
/* 0xc0 */ "\xc0", "\xc1", "\xc2", "\xc3", "\xc4", "\xc5", "\xc6", "\xc7", |
108 |
/* 0xc8 */ "\xc8", "\xc9", "\xca", "\xcb", "\xcc", "\xcd", "\xce", "\xcf", |
109 |
/* 0xd0 */ "\xd0", "\xd1", "\xd2", "\xd3", "\xd4", "\xd5", "\xd6", "\xd7", |
110 |
/* 0xd8 */ "\xd8", "\xd9", "\xda", "\xdb", "\xdc", "\xdd", "\xde", "\xdf", |
111 |
/* 0xe0 */ "\xe0", "\xe1", "\xe2", "\xe3", "\xe4", "\xe5", "\xe6", "\xe7", |
112 |
/* 0xe8 */ "\xe8", "\xe9", "\xea", "\xeb", "\xec", "\xed", "\xee", "\xef", |
113 |
/* 0xf0 */ "\xf0", "\xf1", "\xf2", "\xf3", "\xf4", "\xf5", "\xf6", "\xf7", |
114 |
/* 0xf8 */ "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff", |
115 |
}; |
116 |
|
117 |
/* a flag to indicate the use of UTF8 */ |
118 |
static psiconv_encoding encoding = PSICONV_ENCODING_CP1252; |
119 |
|
120 |
/* Output a string, doing character conversions */ |
121 |
static void fput_text(FILE * of, const char *text, int length) { |
122 |
int j; |
123 |
char **table; |
124 |
|
125 |
if (encoding == PSICONV_ENCODING_UTF8){ |
126 |
table=(char**)utf_table; |
127 |
}else{ |
128 |
table=(char**)char_table; |
129 |
} |
130 |
|
131 |
for (j = 0; j < length; j++) { |
132 |
fputs(table[(unsigned char) (text[j])], of); |
133 |
} |
134 |
} |
135 |
|
136 |
|
137 |
/* Output a paragraph */ |
138 |
static void fput_para(FILE * of, |
139 |
const psiconv_paragraph para,int extra_nl) |
140 |
{ |
141 |
if (para->base_paragraph->bullet->on) { |
142 |
fprintf(of, "%s ", char_table[para->base_paragraph->bullet->character]); |
143 |
} |
144 |
fput_text(of, para->text, strlen(para->text)); |
145 |
fputs("\n", of); |
146 |
if (extra_nl) |
147 |
fputs("\n", of); |
148 |
} |
149 |
|
150 |
static void psiconv_gen_txt_texted(FILE * of, psiconv_texted_f tf) |
151 |
{ |
152 |
int i; |
153 |
psiconv_paragraph para; |
154 |
|
155 |
if (tf->page_sec->header->text) { |
156 |
if (tf->page_sec->header->on_first_page) { |
157 |
for (i=0; i < psiconv_list_length(tf->page_sec->header->text->paragraphs); i++) { |
158 |
para = psiconv_list_get(tf->page_sec->header->text->paragraphs, i); |
159 |
fput_text(of, para->text, strlen(para->text)); |
160 |
fputs("\n", of); |
161 |
} |
162 |
} |
163 |
} |
164 |
fputs("\n",of); |
165 |
|
166 |
for (i=0; i < psiconv_list_length(tf->texted_sec->paragraphs); i++) { |
167 |
para = psiconv_list_get(tf->texted_sec->paragraphs, i); |
168 |
fput_para(of, para,0); |
169 |
} |
170 |
|
171 |
fputs("\n",of); |
172 |
if (tf->page_sec->header->text) { |
173 |
for (i=0; i < psiconv_list_length(tf->page_sec->footer->text->paragraphs); i++) { |
174 |
para = psiconv_list_get(tf->page_sec->footer->text->paragraphs, i); |
175 |
fput_text(of, para->text, strlen(para->text)); |
176 |
fputs("\n", of); |
177 |
} |
178 |
} |
179 |
} |
180 |
|
181 |
static void psiconv_gen_txt_word(FILE * of, psiconv_word_f wf) |
182 |
{ |
183 |
int i; |
184 |
psiconv_paragraph para; |
185 |
|
186 |
if (wf->page_sec->header->on_first_page) { |
187 |
for (i=0; i < psiconv_list_length(wf->page_sec->header->text->paragraphs); i++) { |
188 |
para = psiconv_list_get(wf->page_sec->header->text->paragraphs, i); |
189 |
fput_text(of, para->text, strlen(para->text)); |
190 |
fputs("\n\n", of); |
191 |
} |
192 |
} |
193 |
fputs("\n\n", of); |
194 |
|
195 |
for (i=0; i < psiconv_list_length(wf->paragraphs); i++) { |
196 |
para = psiconv_list_get(wf->paragraphs, i); |
197 |
fput_para(of, para,1); |
198 |
} |
199 |
|
200 |
fputs("\n\n", of); |
201 |
for (i=0; i < psiconv_list_length(wf->page_sec->footer->text->paragraphs); i++) { |
202 |
para = psiconv_list_get(wf->page_sec->footer->text->paragraphs, i); |
203 |
fput_text(of, para->text, strlen(para->text)); |
204 |
fputs("\n\n", of); |
205 |
} |
206 |
} |
207 |
|
208 |
static int psiconv_gen_txt(const char *filename, const psiconv_file file, |
209 |
const char *dest, |
210 |
const psiconv_encoding encoding_type) |
211 |
{ |
212 |
FILE *of = fopen(filename,"w"); |
213 |
if (! of) |
214 |
return -1; |
215 |
|
216 |
encoding=encoding_type; |
217 |
|
218 |
if (file->type == psiconv_word_file) { |
219 |
psiconv_gen_txt_word(of,(psiconv_word_f) file->file); |
220 |
} else if (file->type == psiconv_texted_file) { |
221 |
psiconv_gen_txt_texted(of,(psiconv_texted_f) file->file); |
222 |
} else { |
223 |
fclose(of); |
224 |
return -1; |
225 |
} |
226 |
return fclose(of); |
227 |
} |
228 |
|
229 |
static struct psiconv_fileformat_s ff = |
230 |
{ |
231 |
"ASCII", |
232 |
"Plain text without much layout", |
233 |
psiconv_gen_txt |
234 |
}; |
235 |
|
236 |
void init_txt(void) |
237 |
{ |
238 |
psiconv_list_add(fileformat_list,&ff); |
239 |
} |
240 |
|