/[public]/psiconv/trunk/program/psiconv/gen_txt.c
ViewVC logotype

Diff of /psiconv/trunk/program/psiconv/gen_txt.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

Revision 184 Revision 185
1/* 1/*
2 * gen_text.c - Part of psiconv, a PSION 5 file formats converter 2 * gen_text.c - Part of psiconv, a PSION 5 file formats converter
3 * Copyright (c) 1999 Andrew Johnson <anjohnson@iee.org> 3 * Copyright (c) 1999 Andrew Johnson <anjohnson@iee.org>
4 * Portions Copyright (c) 1999 Frodo Looijaard <frodol@dds.nl> 4 * Portions Copyright (c) 1999,2003 Frodo Looijaard <frodol@dds.nl>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or 8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version. 9 * (at your option) any later version.
16 * You should have received a copy of the GNU General Public License 16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software 17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */ 19 */
20 20
21/*
22 2002/Apr. Keita KAWABE
23 Support for narrow build Asian Psions added.
24
25 If the encoding_type is PSICONV_ENCODING_UTF8, use utf8_table for
26 character conversion. Otherwise use char_table.
27*/
28
29#include "config.h" 21#include "config.h"
30#include <stdio.h> 22#include <stdio.h>
31#include <string.h> 23#include <string.h>
32#include "psiconv/data.h" 24#include <psiconv/data.h>
33#include "psiconv/list.h" 25#include <psiconv/list.h>
26#include <psiconv/unicode.h>
27#include "general.h"
34#include "gen.h" 28#include "gen.h"
35#include "psiconv.h" 29#include "psiconv.h"
36 30
37#ifdef DMALLOC 31#ifdef DMALLOC
38#include "dmalloc.h" 32#include "dmalloc.h"
39#endif 33#endif
40 34
41 35static void output_para(const psiconv_config config,psiconv_list list,
42/* 36 const psiconv_paragraph para,encoding encoding_type);
43 * Various string tables for HTML4 settings 37static void gen_txt_word(const psiconv_config config, psiconv_list list,
44 */ 38 psiconv_word_f wf, encoding encoding_type);
45 39static void gen_txt_texted(const psiconv_config config, psiconv_list list,
46/* Character conversion table */ 40 psiconv_texted_f tf, encoding encoding_type);
47static const char *char_table[0x100] = { 41static int gen_txt(const psiconv_config config, psiconv_list list,
48 /* 0x00 */ "", "", "", "", "", "", "\n", "\n", 42 const psiconv_file file, const char *dest,
49 /* 0x08 */ "\n", "\t", "", "", "", "", "", "",
50 /* 0x10 */ " ", "", "", "", "", "", "", "",
51 /* 0x18 */ "", "", "", "", "", "", "", "",
52 /* 0x20 */ " ", "!", "\"", "#", "$", "%", "&", "'",
53 /* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/",
54 /* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7",
55 /* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?",
56 /* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G",
57 /* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O",
58 /* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W",
59 /* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_",
60 /* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g",
61 /* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o",
62 /* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w",
63 /* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "",
64 /* 0x80 */ "", "", ",", "f", ",,", "...", "+", "#",
65 /* 0x88 */ "^", "\176/oo","S", "<", "OE", "", "", "",
66 /* 0x90 */ "", "`", "'", "``", "''", "*", "-", "--",
67 /* 0x98 */ "~", "(TM)", "s", ">", "oe", "", "", "Y",
68 /* 0xa0 */ "\xa0", "\xa1", "\xa2", "\xa3", "\xa4", "\xa5", "\xa6", "\xa7",
69 /* 0xa8 */ "\xa8", "\xa9", "\xaa", "\xab", "\xac", "\xad", "\xae", "\xaf",
70 /* 0xb0 */ "\xb0", "\xb1", "\xb2", "\xb3", "\xb4", "\xb5", "\xb6", "\xb7",
71 /* 0xb8 */ "\xb8", "\xb9", "\xba", "\xbb", "\xbc", "\xbd", "\xbe", "\xbf",
72 /* 0xc0 */ "\xc0", "\xc1", "\xc2", "\xc3", "\xc4", "\xc5", "\xc6", "\xc7",
73 /* 0xc8 */ "\xc8", "\xc9", "\xca", "\xcb", "\xcc", "\xcd", "\xce", "\xcf",
74 /* 0xd0 */ "\xd0", "\xd1", "\xd2", "\xd3", "\xd4", "\xd5", "\xd6", "\xd7",
75 /* 0xd8 */ "\xd8", "\xd9", "\xda", "\xdb", "\xdc", "\xdd", "\xde", "\xdf",
76 /* 0xe0 */ "\xe0", "\xe1", "\xe2", "\xe3", "\xe4", "\xe5", "\xe6", "\xe7",
77 /* 0xe8 */ "\xe8", "\xe9", "\xea", "\xeb", "\xec", "\xed", "\xee", "\xef",
78 /* 0xf0 */ "\xf0", "\xf1", "\xf2", "\xf3", "\xf4", "\xf5", "\xf6", "\xf7",
79 /* 0xf8 */ "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff",
80};
81
82static const char *utf_table[0x100] = {
83 /* 0x00 */ "", "", "", "", "", "", "\n", "\n",
84 /* 0x08 */ "\n", "\t", "", "", "", "", "", "",
85 /* 0x10 */ " ", "", "", "", "", "", "", "",
86 /* 0x18 */ "", "", "", "", "", "", "", "",
87 /* 0x20 */ " ", "!", "\"", "#", "$", "%", "&", "'",
88 /* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/",
89 /* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7",
90 /* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?",
91 /* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G",
92 /* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O",
93 /* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W",
94 /* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_",
95 /* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g",
96 /* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o",
97 /* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w",
98 /* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "\x7f",
99 /* 0x80 */ "\x80", "\x81", "\x82", "\x83", "\x84", "\x85", "\x86", "\x87",
100 /* 0x88 */ "\x88", "\x89", "\x8a", "\x8b", "\x8c", "\x8d", "\x8e", "\x8f",
101 /* 0x90 */ "\x90", "\x91", "\x92", "\x93", "\x94", "\x95", "\x96", "\x97",
102 /* 0x98 */ "\x98", "\x99", "\x9a", "\x9b", "\x9c", "\x9d", "\x9e", "\x9f",
103 /* 0xa0 */ "\xa0", "\xa1", "\xa2", "\xa3", "\xa4", "\xa5", "\xa6", "\xa7",
104 /* 0xa8 */ "\xa8", "\xa9", "\xaa", "\xab", "\xac", "\xad", "\xae", "\xaf",
105 /* 0xb0 */ "\xb0", "\xb1", "\xb2", "\xb3", "\xb4", "\xb5", "\xb6", "\xb7",
106 /* 0xb8 */ "\xb8", "\xb9", "\xba", "\xbb", "\xbc", "\xbd", "\xbe", "\xbf",
107 /* 0xc0 */ "\xc0", "\xc1", "\xc2", "\xc3", "\xc4", "\xc5", "\xc6", "\xc7",
108 /* 0xc8 */ "\xc8", "\xc9", "\xca", "\xcb", "\xcc", "\xcd", "\xce", "\xcf",
109 /* 0xd0 */ "\xd0", "\xd1", "\xd2", "\xd3", "\xd4", "\xd5", "\xd6", "\xd7",
110 /* 0xd8 */ "\xd8", "\xd9", "\xda", "\xdb", "\xdc", "\xdd", "\xde", "\xdf",
111 /* 0xe0 */ "\xe0", "\xe1", "\xe2", "\xe3", "\xe4", "\xe5", "\xe6", "\xe7",
112 /* 0xe8 */ "\xe8", "\xe9", "\xea", "\xeb", "\xec", "\xed", "\xee", "\xef",
113 /* 0xf0 */ "\xf0", "\xf1", "\xf2", "\xf3", "\xf4", "\xf5", "\xf6", "\xf7",
114 /* 0xf8 */ "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff",
115};
116
117/* a flag to indicate the use of UTF8 */
118static psiconv_encoding encoding = PSICONV_ENCODING_CP1252;
119
120/* Output a string, doing character conversions */
121static void fput_text(FILE * of, const char *text, int length) {
122 int j;
123 char **table;
124
125 if (encoding == PSICONV_ENCODING_UTF8){
126 table=(char**)utf_table;
127 }else{
128 table=(char**)char_table;
129 }
130
131 for (j = 0; j < length; j++) {
132 fputs(table[(unsigned char) (text[j])], of);
133 }
134}
135
136
137/* Output a paragraph */
138static void fput_para(FILE * of,
139 const psiconv_paragraph para,int extra_nl)
140{
141 if (para->base_paragraph->bullet->on) {
142 fprintf(of, "%s ", char_table[para->base_paragraph->bullet->character]);
143 }
144 fput_text(of, para->text, strlen(para->text));
145 fputs("\n", of);
146 if (extra_nl)
147 fputs("\n", of);
148}
149
150static void psiconv_gen_txt_texted(FILE * of, psiconv_texted_f tf)
151{
152 int i;
153 psiconv_paragraph para;
154
155 if (tf->page_sec->header->text) {
156 if (tf->page_sec->header->on_first_page) {
157 for (i=0; i < psiconv_list_length(tf->page_sec->header->text->paragraphs); i++) {
158 para = psiconv_list_get(tf->page_sec->header->text->paragraphs, i);
159 fput_text(of, para->text, strlen(para->text));
160 fputs("\n", of);
161 }
162 }
163 }
164 fputs("\n",of);
165
166 for (i=0; i < psiconv_list_length(tf->texted_sec->paragraphs); i++) {
167 para = psiconv_list_get(tf->texted_sec->paragraphs, i);
168 fput_para(of, para,0);
169 }
170
171 fputs("\n",of);
172 if (tf->page_sec->header->text) {
173 for (i=0; i < psiconv_list_length(tf->page_sec->footer->text->paragraphs); i++) {
174 para = psiconv_list_get(tf->page_sec->footer->text->paragraphs, i);
175 fput_text(of, para->text, strlen(para->text));
176 fputs("\n", of);
177 }
178 }
179}
180
181static void psiconv_gen_txt_word(FILE * of, psiconv_word_f wf)
182{
183 int i;
184 psiconv_paragraph para;
185
186 if (wf->page_sec->header->on_first_page) {
187 for (i=0; i < psiconv_list_length(wf->page_sec->header->text->paragraphs); i++) {
188 para = psiconv_list_get(wf->page_sec->header->text->paragraphs, i);
189 fput_text(of, para->text, strlen(para->text));
190 fputs("\n\n", of);
191 }
192 }
193 fputs("\n\n", of);
194
195 for (i=0; i < psiconv_list_length(wf->paragraphs); i++) {
196 para = psiconv_list_get(wf->paragraphs, i);
197 fput_para(of, para,1);
198 }
199
200 fputs("\n\n", of);
201 for (i=0; i < psiconv_list_length(wf->page_sec->footer->text->paragraphs); i++) {
202 para = psiconv_list_get(wf->page_sec->footer->text->paragraphs, i);
203 fput_text(of, para->text, strlen(para->text));
204 fputs("\n\n", of);
205 }
206}
207
208static int psiconv_gen_txt(const char *filename, const psiconv_file file,
209 const char *dest,
210 const psiconv_encoding encoding_type) 43 const encoding encoding_type);
211{
212 FILE *of = fopen(filename,"w");
213 if (! of)
214 return -1;
215
216 encoding=encoding_type;
217 44
218 if (file->type == psiconv_word_file) {
219 psiconv_gen_txt_word(of,(psiconv_word_f) file->file);
220 } else if (file->type == psiconv_texted_file) {
221 psiconv_gen_txt_texted(of,(psiconv_texted_f) file->file);
222 } else {
223 fclose(of);
224 return -1;
225 }
226 return fclose(of);
227}
228
229static struct psiconv_fileformat_s ff = 45static struct fileformat_s ff =
230 { 46 {
231 "ASCII", 47 "ASCII",
232 "Plain text without much layout", 48 "Plain text without much layout",
233 psiconv_gen_txt 49 gen_txt
234 }; 50 };
235 51
52
53void output_para(const psiconv_config config,psiconv_list list,
54 const psiconv_paragraph para,encoding encoding_type)
55{
56 int i;
57 if (para && para->base_paragraph && para->base_paragraph->bullet &&
58 para->base_paragraph->bullet->on) {
59 output_char(config,list,para->base_paragraph->bullet->character,
60 encoding_type);
61 output_char(config,list,' ', encoding_type);
62 output_char(config,list,' ', encoding_type);
63 output_char(config,list,' ', encoding_type);
64 }
65 if (para && para->text) {
66 for (i = 0; i < psiconv_unicode_strlen(para->text); i++)
67 switch (para->text[i]) {
68 case 0x06:
69 case 0x07:
70 case 0x08:
71 output_char(config,list,'\n',encoding_type);
72 break;
73 case 0x09:
74 case 0x0a:
75 output_char(config,list,'\t',encoding_type);
76 break;
77 case 0x0b:
78 case 0x0c:
79 output_char(config,list,'-',encoding_type);
80 break;
81 case 0x0f:
82 output_char(config,list,' ',encoding_type);
83 break;
84 case 0x00:
85 case 0x01:
86 case 0x02:
87 case 0x03:
88 case 0x04:
89 case 0x05:
90 case 0x0e:
91 case 0x10:
92 case 0x11:
93 case 0x12:
94 case 0x13:
95 case 0x14:
96 case 0x15:
97 case 0x16:
98 case 0x17:
99 case 0x18:
100 case 0x19:
101 case 0x1a:
102 case 0x1c:
103 case 0x1d:
104 case 0x1e:
105 case 0x1f:
106 break;
107 default:
108 output_char(config,list,para->text[i],encoding_type);
109 break;
110 }
111 output_char(config,list,'\n',encoding_type);
112 }
113}
114
115void gen_txt_word(const psiconv_config config, psiconv_list list,
116 psiconv_word_f wf, encoding encoding_type)
117{
118 int i;
119 psiconv_paragraph para;
120
121 if (wf && wf->page_sec && wf->page_sec->header &&
122 wf->page_sec->header->text && wf->page_sec->header->text->paragraphs) {
123 for (i=0;
124 i < psiconv_list_length(wf->page_sec->header->
125 text->paragraphs); i++) {
126 para = psiconv_list_get(wf->page_sec->header->text->paragraphs,
127 i);
128 output_para(config,list,para,encoding_type);
129 }
130 }
131 output_char(config,list,'\n',encoding_type);
132
133 if (wf && wf->paragraphs)
134 for (i=0; i < psiconv_list_length(wf->paragraphs); i++) {
135 para = psiconv_list_get(wf->paragraphs, i);
136 output_para(config, list,para,encoding_type);
137 }
138
139 output_char(config,list,'\n',encoding_type);
140
141 if (wf && wf->page_sec && wf->page_sec->footer &&
142 wf->page_sec->footer->text && wf->page_sec->footer->text->paragraphs) {
143 for (i=0;
144 i < psiconv_list_length(wf->page_sec->footer->
145 text->paragraphs); i++) {
146 para = psiconv_list_get(wf->page_sec->footer->text->paragraphs, i);
147 output_para(config,list,para,encoding_type);
148 }
149 }
150}
151
152void gen_txt_texted(const psiconv_config config, psiconv_list list,
153 psiconv_texted_f tf, encoding encoding_type)
154{
155 int i;
156 psiconv_paragraph para;
157
158 if (tf && tf->page_sec && tf->page_sec->header &&
159 tf->page_sec->header->text && tf->page_sec->header->text->paragraphs) {
160 for (i=0;
161 i < psiconv_list_length(tf->page_sec->header->
162 text->paragraphs); i++) {
163 para = psiconv_list_get(tf->page_sec->header->text->paragraphs,
164 i);
165 output_para(config,list,para,encoding_type);
166 }
167 }
168 output_char(config,list,'\n',encoding_type);
169
170 if (tf && tf->texted_sec && tf->texted_sec->paragraphs)
171 for (i=0; i < psiconv_list_length(tf->texted_sec->paragraphs); i++) {
172 para = psiconv_list_get(tf->texted_sec->paragraphs, i);
173 output_para(config, list,para,encoding_type);
174 }
175
176 output_char(config,list,'\n',encoding_type);
177
178 if (tf && tf->page_sec && tf->page_sec->footer &&
179 tf->page_sec->footer->text && tf->page_sec->footer->text->paragraphs) {
180 for (i=0;
181 i < psiconv_list_length(tf->page_sec->footer->
182 text->paragraphs); i++) {
183 para = psiconv_list_get(tf->page_sec->footer->text->paragraphs, i);
184 output_para(config,list,para,encoding_type);
185 }
186 }
187}
188
189int gen_txt(const psiconv_config config, psiconv_list list,
190 const psiconv_file file, const char *dest,
191 const encoding encoding_type)
192{
193 if (file->type == psiconv_word_file) {
194 gen_txt_word(config,list,(psiconv_word_f) file->file,encoding_type);
195 return 0;
196 } else if (file->type == psiconv_texted_file) {
197 gen_txt_texted(config,list,(psiconv_texted_f) file->file,encoding_type);
198 return 0;
199 } else
200 return -1;
201}
202
236void init_txt(void) 203void init_txt(void)
237{ 204{
238 psiconv_list_add(fileformat_list,&ff); 205 psiconv_list_add(fileformat_list,&ff);
239} 206}
240 207

Legend:
Removed from v.184  
changed lines
  Added in v.185

frodo@frodo.looijaard.name
ViewVC Help
Powered by ViewVC 1.1.26