/[public]/psiconv/trunk/program/psiconv/gen_txt.c
ViewVC logotype

Contents of /psiconv/trunk/program/psiconv/gen_txt.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 147 - (show annotations)
Fri May 10 15:55:55 2002 UTC (21 years, 10 months ago) by frodo
File MIME type: text/plain
File size: 9626 byte(s)
(Frodo) UTF-8 support (Keita Kawabe, keite.kawabe@mpq.mpg.de)

1 /*
2 * gen_text.c - Part of psiconv, a PSION 5 file formats converter
3 * Copyright (c) 1999 Andrew Johnson <anjohnson@iee.org>
4 * Portions Copyright (c) 1999 Frodo Looijaard <frodol@dds.nl>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21 /*
22 2002/Apr. Keita KAWABE
23 Support for narrow build Asian Psions added.
24
25 If the encoding_type is PSICONV_ENCODING_UTF8, use utf8_table for
26 character conversion. Otherwise use char_table.
27 */
28
29 #include "config.h"
30 #include <stdio.h>
31 #include <string.h>
32 #include "psiconv/data.h"
33 #include "psiconv/list.h"
34 #include "gen.h"
35 #include "psiconv.h"
36
37 #ifdef DMALLOC
38 #include "dmalloc.h"
39 #endif
40
41
42 /*
43 * Various string tables for HTML4 settings
44 */
45
46 /* Character conversion table */
47 static const char *char_table[0x100] = {
48 /* 0x00 */ "", "", "", "", "", "", "\n", "\n",
49 /* 0x08 */ "\n", "\t", "", "", "", "", "", "",
50 /* 0x10 */ " ", "", "", "", "", "", "", "",
51 /* 0x18 */ "", "", "", "", "", "", "", "",
52 /* 0x20 */ " ", "!", "\"", "#", "$", "%", "&", "'",
53 /* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/",
54 /* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7",
55 /* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?",
56 /* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G",
57 /* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O",
58 /* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W",
59 /* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_",
60 /* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g",
61 /* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o",
62 /* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w",
63 /* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "",
64 /* 0x80 */ "", "", ",", "f", ",,", "...", "+", "#",
65 /* 0x88 */ "^", "\176/oo","S", "<", "OE", "", "", "",
66 /* 0x90 */ "", "`", "'", "``", "''", "*", "-", "--",
67 /* 0x98 */ "~", "(TM)", "s", ">", "oe", "", "", "Y",
68 /* 0xa0 */ "\xa0", "\xa1", "\xa2", "\xa3", "\xa4", "\xa5", "\xa6", "\xa7",
69 /* 0xa8 */ "\xa8", "\xa9", "\xaa", "\xab", "\xac", "\xad", "\xae", "\xaf",
70 /* 0xb0 */ "\xb0", "\xb1", "\xb2", "\xb3", "\xb4", "\xb5", "\xb6", "\xb7",
71 /* 0xb8 */ "\xb8", "\xb9", "\xba", "\xbb", "\xbc", "\xbd", "\xbe", "\xbf",
72 /* 0xc0 */ "\xc0", "\xc1", "\xc2", "\xc3", "\xc4", "\xc5", "\xc6", "\xc7",
73 /* 0xc8 */ "\xc8", "\xc9", "\xca", "\xcb", "\xcc", "\xcd", "\xce", "\xcf",
74 /* 0xd0 */ "\xd0", "\xd1", "\xd2", "\xd3", "\xd4", "\xd5", "\xd6", "\xd7",
75 /* 0xd8 */ "\xd8", "\xd9", "\xda", "\xdb", "\xdc", "\xdd", "\xde", "\xdf",
76 /* 0xe0 */ "\xe0", "\xe1", "\xe2", "\xe3", "\xe4", "\xe5", "\xe6", "\xe7",
77 /* 0xe8 */ "\xe8", "\xe9", "\xea", "\xeb", "\xec", "\xed", "\xee", "\xef",
78 /* 0xf0 */ "\xf0", "\xf1", "\xf2", "\xf3", "\xf4", "\xf5", "\xf6", "\xf7",
79 /* 0xf8 */ "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff",
80 };
81
82 static const char *utf_table[0x100] = {
83 /* 0x00 */ "", "", "", "", "", "", "\n", "\n",
84 /* 0x08 */ "\n", "\t", "", "", "", "", "", "",
85 /* 0x10 */ " ", "", "", "", "", "", "", "",
86 /* 0x18 */ "", "", "", "", "", "", "", "",
87 /* 0x20 */ " ", "!", "\"", "#", "$", "%", "&", "'",
88 /* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/",
89 /* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7",
90 /* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?",
91 /* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G",
92 /* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O",
93 /* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W",
94 /* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_",
95 /* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g",
96 /* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o",
97 /* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w",
98 /* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "\x7f",
99 /* 0x80 */ "\x80", "\x81", "\x82", "\x83", "\x84", "\x85", "\x86", "\x87",
100 /* 0x88 */ "\x88", "\x89", "\x8a", "\x8b", "\x8c", "\x8d", "\x8e", "\x8f",
101 /* 0x90 */ "\x90", "\x91", "\x92", "\x93", "\x94", "\x95", "\x96", "\x97",
102 /* 0x98 */ "\x98", "\x99", "\x9a", "\x9b", "\x9c", "\x9d", "\x9e", "\x9f",
103 /* 0xa0 */ "\xa0", "\xa1", "\xa2", "\xa3", "\xa4", "\xa5", "\xa6", "\xa7",
104 /* 0xa8 */ "\xa8", "\xa9", "\xaa", "\xab", "\xac", "\xad", "\xae", "\xaf",
105 /* 0xb0 */ "\xb0", "\xb1", "\xb2", "\xb3", "\xb4", "\xb5", "\xb6", "\xb7",
106 /* 0xb8 */ "\xb8", "\xb9", "\xba", "\xbb", "\xbc", "\xbd", "\xbe", "\xbf",
107 /* 0xc0 */ "\xc0", "\xc1", "\xc2", "\xc3", "\xc4", "\xc5", "\xc6", "\xc7",
108 /* 0xc8 */ "\xc8", "\xc9", "\xca", "\xcb", "\xcc", "\xcd", "\xce", "\xcf",
109 /* 0xd0 */ "\xd0", "\xd1", "\xd2", "\xd3", "\xd4", "\xd5", "\xd6", "\xd7",
110 /* 0xd8 */ "\xd8", "\xd9", "\xda", "\xdb", "\xdc", "\xdd", "\xde", "\xdf",
111 /* 0xe0 */ "\xe0", "\xe1", "\xe2", "\xe3", "\xe4", "\xe5", "\xe6", "\xe7",
112 /* 0xe8 */ "\xe8", "\xe9", "\xea", "\xeb", "\xec", "\xed", "\xee", "\xef",
113 /* 0xf0 */ "\xf0", "\xf1", "\xf2", "\xf3", "\xf4", "\xf5", "\xf6", "\xf7",
114 /* 0xf8 */ "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff",
115 };
116
117 /* a flag to indicate the use of UTF8 */
118 static psiconv_encoding encoding = PSICONV_ENCODING_CP1252;
119
120 /* Output a string, doing character conversions */
121 static void fput_text(FILE * of, const char *text, int length) {
122 int j;
123 char **table;
124
125 if (encoding == PSICONV_ENCODING_UTF8){
126 table=(char**)utf_table;
127 }else{
128 table=(char**)char_table;
129 }
130
131 for (j = 0; j < length; j++) {
132 fputs(table[(unsigned char) (text[j])], of);
133 }
134 }
135
136
137 /* Output a paragraph */
138 static void fput_para(FILE * of,
139 const psiconv_paragraph para,int extra_nl)
140 {
141 if (para->base_paragraph->bullet->on) {
142 fprintf(of, "%s ", char_table[para->base_paragraph->bullet->character]);
143 }
144 fput_text(of, para->text, strlen(para->text));
145 fputs("\n", of);
146 if (extra_nl)
147 fputs("\n", of);
148 }
149
150 static void psiconv_gen_txt_texted(FILE * of, psiconv_texted_f tf)
151 {
152 int i;
153 psiconv_paragraph para;
154
155 if (tf->page_sec->header->text) {
156 if (tf->page_sec->header->on_first_page) {
157 for (i=0; i < psiconv_list_length(tf->page_sec->header->text->paragraphs); i++) {
158 para = psiconv_list_get(tf->page_sec->header->text->paragraphs, i);
159 fput_text(of, para->text, strlen(para->text));
160 fputs("\n", of);
161 }
162 }
163 }
164 fputs("\n",of);
165
166 for (i=0; i < psiconv_list_length(tf->texted_sec->paragraphs); i++) {
167 para = psiconv_list_get(tf->texted_sec->paragraphs, i);
168 fput_para(of, para,0);
169 }
170
171 fputs("\n",of);
172 if (tf->page_sec->header->text) {
173 for (i=0; i < psiconv_list_length(tf->page_sec->footer->text->paragraphs); i++) {
174 para = psiconv_list_get(tf->page_sec->footer->text->paragraphs, i);
175 fput_text(of, para->text, strlen(para->text));
176 fputs("\n", of);
177 }
178 }
179 }
180
181 static void psiconv_gen_txt_word(FILE * of, psiconv_word_f wf)
182 {
183 int i;
184 psiconv_paragraph para;
185
186 if (wf->page_sec->header->on_first_page) {
187 for (i=0; i < psiconv_list_length(wf->page_sec->header->text->paragraphs); i++) {
188 para = psiconv_list_get(wf->page_sec->header->text->paragraphs, i);
189 fput_text(of, para->text, strlen(para->text));
190 fputs("\n\n", of);
191 }
192 }
193 fputs("\n\n", of);
194
195 for (i=0; i < psiconv_list_length(wf->paragraphs); i++) {
196 para = psiconv_list_get(wf->paragraphs, i);
197 fput_para(of, para,1);
198 }
199
200 fputs("\n\n", of);
201 for (i=0; i < psiconv_list_length(wf->page_sec->footer->text->paragraphs); i++) {
202 para = psiconv_list_get(wf->page_sec->footer->text->paragraphs, i);
203 fput_text(of, para->text, strlen(para->text));
204 fputs("\n\n", of);
205 }
206 }
207
208 static int psiconv_gen_txt(const char *filename, const psiconv_file file,
209 const char *dest,
210 const psiconv_encoding encoding_type)
211 {
212 FILE *of = fopen(filename,"w");
213 if (! of)
214 return -1;
215
216 encoding=encoding_type;
217
218 if (file->type == psiconv_word_file) {
219 psiconv_gen_txt_word(of,(psiconv_word_f) file->file);
220 } else if (file->type == psiconv_texted_file) {
221 psiconv_gen_txt_texted(of,(psiconv_texted_f) file->file);
222 } else {
223 fclose(of);
224 return -1;
225 }
226 return fclose(of);
227 }
228
229 static struct psiconv_fileformat_s ff =
230 {
231 "ASCII",
232 "Plain text without much layout",
233 psiconv_gen_txt
234 };
235
236 void init_txt(void)
237 {
238 psiconv_list_add(fileformat_list,&ff);
239 }
240

frodo@frodo.looijaard.name
ViewVC Help
Powered by ViewVC 1.1.26