/[public]/psiconv/trunk/program/psiconv/gen_latex.c
ViewVC logotype

Contents of /psiconv/trunk/program/psiconv/gen_latex.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 147 - (show annotations)
Fri May 10 15:55:55 2002 UTC (21 years, 10 months ago) by frodo
File MIME type: text/plain
File size: 15729 byte(s)
(Frodo) UTF-8 support (Keita Kawabe, keite.kawabe@mpq.mpg.de)

1 /*
2 gen_latex.h - Part of psiconv, a PSION 5 file formats converter
3 Copyright (c) 2001 Jim Ottaway <j.ottaway@lse.ac.uk>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20 /* gen_latex.c
21
22 Sun Feb 11 12:16:29 2001
23
24 Jim Ottaway <j.ottaway@lse.ac.uk>
25
26 Conversion to LaTeX:
27
28 * Converts to the article class
29
30 * If the file is a Word file and has outline levels (i.e. is not a
31 Psion 5 file), outline levels 1 to 5 are converted to sectioning
32 commands, otherwise headings are just formatted
33
34 * If there is a style with the name 'quotation', the paragraph is
35 converted to a quotation environment (other styles/environments
36 could be added)
37
38 * Also does formatting commands (italics -> \emph, bold -> \textbf,
39 underline -> \underline), and character translation
40
41 */
42
43 /*
44 2002/Apr. Keita KAWABE
45 Support for narrow build Asian Psions added (experimental).
46
47 If the encoding_type is PSICONV_ENCODING_UTF8, use utf8_table for
48 character conversion. Otherwise use char_table.
49
50 When using UTF8 for LaTeX type, note that the resulting latex source
51 is not at all usable unless the user him/herself converts it to
52 appropriate encoding. This is because the encoding in localized TeXs
53 differ from system to system.
54 For example, if the user wants to generate a Japanese LaTeX source
55 from a UTF8 Psion Word, he/she first has to convert the Word document by
56
57 psiconv -TLaTeX -u word | iconv --from-code=utf8 --to-code=EUC-JP > test.tex
58
59 or something similar. After that, just proceed as normal.
60
61 If you want to use Japanese-localized documentclass "jarticle" rather
62 than "article", please uncomment all the commented-out lines that are
63 explicitly stated as "Uncomment if you'd like to use jarticle" in this
64 source.
65 */
66
67 #include "config.h"
68 #include <stdio.h>
69 #include <string.h>
70 #include <stdlib.h>
71 #include "psiconv/data.h"
72 #include "psiconv/list.h"
73 #include "gen.h"
74 #include "psiconv.h"
75
76 #ifdef DMALLOC
77 #include "dmalloc.h"
78 #endif
79
80 /* This is incomplete at the moment.
81
82 Most of the translation/faking of chars is borrowed from the
83 HTML::Latex.pm perl module. */
84 static const char *char_table[0x100] =
85 {
86 /* 0x00 */ "" ,"" ,"" ,"" ,"" ,"" ,"\n\n","\\\\"
87 ,
88 /* 0x08 */ "\n\n" ," " ,"" ,"" ,"" ,"" ,"" ,""
89 ,
90 /* 0x10 */ " " ,"" ,"" ,"" ,"" ,"" ,"" ,""
91 ,
92 /* 0x18 */ "" ,"" ,"" ,"" ,"" ,"" ,"" ,""
93 ,
94 /* 0x20 */ " " ,"!" ,"\"" ,"\\#" ,"\\$" ,"\\%" ,"\\&","'"
95 ,
96 /* 0x28 */ "(" ,")" ,"*" ,"+" ,"," ,"-" ,"." ,"/"
97 ,
98 /* 0x30 */ "0" ,"1" ,"2" ,"3" ,"4" ,"5" ,"6" ,"7"
99 ,
100 /* 0x38 */ "8" ,"9" ,":" ,";" ,"$<$" ,"=" ,"$>$" ,"?" ,
101 /* 0x40 */ "@" ,"A" ,"B" ,"C" ,"D" ,"E" ,"F" ,"G"
102 ,
103 /* 0x48 */ "H" ,"I" ,"J" ,"K" ,"L" ,"M" ,"N" ,"O"
104 ,
105 /* 0x50 */ "P" ,"Q" ,"R" ,"S" ,"T" ,"U" ,"V" ,"W"
106 ,
107 /* 0x58 */ "X" ,"Y" ,"Z" ,"[" ,"$\\backslash$" ,"]" ,"\\^{}"
108 ,"\\_" ,
109 /* 0x60 */ "`" ,"a" ,"b" ,"c" ,"d" ,"e" ,"f" ,"g"
110 ,
111 /* 0x68 */ "h" ,"i" ,"j" ,"k" ,"l" ,"m" ,"n" ,"o"
112 ,
113 /* 0x70 */ "p" ,"q" ,"r" ,"s" ,"t" ,"u" ,"v" ,"w"
114 ,
115 /* 0x78 */ "x" ,"y" ,"z" ,"\\{" ,"$|$" ,"\\}" ,"~"
116 ,"" ,
117 /* 0x80 */ "" ,"","","$f$","","{\\ldots}", "$\\dagger$","$\\ddagger$",
118 /* 0x88 */ "\\^{}","","","","\\OE","" ,"" ,"" ,
119 /* 0x90 */ "","`","'","``","''", "$\\bullet$","--","---",
120 /* 0x98 */ "~","{\\textsc{tm}}","","","\\oe","","","\\\"Y",
121 /* 0xa0 */ "","!`","c","{\\pounds}",
122 "","{Y\\hspace*{-1.4ex}--}","$|$","{\\S}",
123 /* 0xa8 */
124 "\"","{\\copyright}","$^{\\underline{a}}$","","$\\neg$","$-$","","$^-$",
125 /* 0xb0 */ "$^{\\circ}$","$\\pm$","$^2$","$^3$",
126 "$^\\prime$","$\\mu$","{\\P}","$\\cdot$",
127 /* 0xb8 */ ",","$^1$","$^{\\underline{\\circ}}$","",
128 "$\\frac{1}{4}$","$\\frac{1}{2}$","$\\frac{3}{4}$","?`",
129 /* 0xc0 */ "\\`A","\\'A","\\^A","\\~A", "\\\"A","\\AA","\\AE","\\c{C}",
130 /* 0xc8 */ "\\`E","\\'E","\\^E","\\\"E", "\\`I","\\'I","\\^I","\\\"I",
131 /* 0xd0 */ "{D\\hspace*{-1.7ex}-\\hspace{.9ex}}","\\~N","\\`O","\\'O",
132 "\\^O","\\~O","\\\"O","$\\times$",
133 /* 0xd8 */ "{\\O}","\\`U","\\'U","\\^U", "\\\"U","\\'Y","","",
134 /* 0xe0 */ "\\`a","\\'a","\\^a","\\~a",
135 "\\\"a","\\r{a}","{\\ae}","\\c{c}",
136 /* 0xe8 */ "\\`e","\\'e","\\^e","\\\"e",
137 "\\`{\\i}","\\'{\\i}","\\^{\\i}","\\\"{\\i}",
138 /* 0xf0 */ "\\v{o}","\\~n","\\`o","\\'o", "\\^o","\\~o","\\\"o","$\\div$",
139 /* 0xf8 */ "{\\o}","\\`u","\\'u","\\^u", "\\\"u","\\'y","","\\\"y"
140 };
141
142 static const char *utf_table[0x100] =
143 {
144 /* 0x00 */ "" ,"" ,"" ,"" ,"" ,"" ,"\n\n","\\\\"
145 ,
146 /* 0x08 */ "\n\n" ," " ,"" ,"" ,"" ,"" ,"" ,""
147 ,
148 /* 0x10 */ " " ,"" ,"" ,"" ,"" ,"" ,"" ,""
149 ,
150 /* 0x18 */ "" ,"" ,"" ,"" ,"" ,"" ,"" ,""
151 ,
152 /* 0x20 */ " " ,"!" ,"\"" ,"\\#" ,"\\$" ,"\\%" ,"\\&","'"
153 ,
154 /* 0x28 */ "(" ,")" ,"*" ,"+" ,"," ,"-" ,"." ,"/"
155 ,
156 /* 0x30 */ "0" ,"1" ,"2" ,"3" ,"4" ,"5" ,"6" ,"7"
157 ,
158 /* 0x38 */ "8" ,"9" ,":" ,";" ,"$<$" ,"=" ,"$>$" ,"?" ,
159 /* 0x40 */ "@" ,"A" ,"B" ,"C" ,"D" ,"E" ,"F" ,"G"
160 ,
161 /* 0x48 */ "H" ,"I" ,"J" ,"K" ,"L" ,"M" ,"N" ,"O"
162 ,
163 /* 0x50 */ "P" ,"Q" ,"R" ,"S" ,"T" ,"U" ,"V" ,"W"
164 ,
165 /* 0x58 */ "X" ,"Y" ,"Z" ,"[" ,"$\\backslash$" ,"]" ,"\\^{}"
166 ,"\\_" ,
167 /* 0x60 */ "`" ,"a" ,"b" ,"c" ,"d" ,"e" ,"f" ,"g"
168 ,
169 /* 0x68 */ "h" ,"i" ,"j" ,"k" ,"l" ,"m" ,"n" ,"o"
170 ,
171 /* 0x70 */ "p" ,"q" ,"r" ,"s" ,"t" ,"u" ,"v" ,"w"
172 ,
173 /* 0x78 */ "x" ,"y" ,"z" ,"\\{" ,"$|$" ,"\\}" ,"~"
174 ,"" ,
175 /* 0x80 */ "\x80", "\x81", "\x82", "\x83", "\x84", "\x85", "\x86", "\x87",
176 /* 0x88 */ "\x88", "\x89", "\x8a", "\x8b", "\x8c", "\x8d", "\x8e", "\x8f",
177 /* 0x90 */ "\x90", "\x91", "\x92", "\x93", "\x94", "\x95", "\x96", "\x97",
178 /* 0x98 */ "\x98", "\x99", "\x9a", "\x9b", "\x9c", "\x9d", "\x9e", "\x9f",
179 /* 0xa0 */ "\xa0", "\xa1", "\xa2", "\xa3", "\xa4", "\xa5", "\xa6", "\xa7",
180 /* 0xa8 */ "\xa8", "\xa9", "\xaa", "\xab", "\xac", "\xad", "\xae", "\xaf",
181 /* 0xb0 */ "\xb0", "\xb1", "\xb2", "\xb3", "\xb4", "\xb5", "\xb6", "\xb7",
182 /* 0xb8 */ "\xb8", "\xb9", "\xba", "\xbb", "\xbc", "\xbd", "\xbe", "\xbf",
183 /* 0xc0 */ "\xc0", "\xc1", "\xc2", "\xc3", "\xc4", "\xc5", "\xc6", "\xc7",
184 /* 0xc8 */ "\xc8", "\xc9", "\xca", "\xcb", "\xcc", "\xcd", "\xce", "\xcf",
185 /* 0xd0 */ "\xd0", "\xd1", "\xd2", "\xd3", "\xd4", "\xd5", "\xd6", "\xd7",
186 /* 0xd8 */ "\xd8", "\xd9", "\xda", "\xdb", "\xdc", "\xdd", "\xde", "\xdf",
187 /* 0xe0 */ "\xe0", "\xe1", "\xe2", "\xe3", "\xe4", "\xe5", "\xe6", "\xe7",
188 /* 0xe8 */ "\xe8", "\xe9", "\xea", "\xeb", "\xec", "\xed", "\xee", "\xef",
189 /* 0xf0 */ "\xf0", "\xf1", "\xf2", "\xf3", "\xf4", "\xf5", "\xf6", "\xf7",
190 /* 0xf8 */ "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff",
191 };
192
193 /* a flag to indicate the use of UTF8 */
194 static psiconv_encoding encoding = PSICONV_ENCODING_CP1252;
195
196 static psiconv_character_layout gen_base_char(const psiconv_font font,
197 const psiconv_color color,
198 const psiconv_color
199 back_color);
200 static void diff_char(FILE *of, const psiconv_character_layout old,
201 const psiconv_character_layout new, int *flags);
202 static void gen_para(FILE *of, const psiconv_paragraph para,
203 const psiconv_character_layout base_char,
204 psiconv_word_f wf);
205
206 static void psiconv_gen_latex_word(FILE *of,psiconv_word_f wf);
207 static void psiconv_gen_latex_texted(FILE *of,psiconv_texted_f tf);
208
209 psiconv_character_layout gen_base_char(const psiconv_font font,
210 const psiconv_color color,
211 const psiconv_color back_color)
212 {
213 struct psiconv_character_layout_s base_char_struct =
214 {
215 NULL, /* color */
216 NULL, /* back_color */
217 13.0, /* font_size */
218 psiconv_bool_false, /* italic */
219 psiconv_bool_false, /* bold */
220 psiconv_normalscript, /* super_sub */
221 psiconv_bool_false, /* underline */
222 psiconv_bool_false, /* strikethrough */
223 NULL, /* font */
224 };
225 base_char_struct.color = color;
226 base_char_struct.back_color = back_color;
227 base_char_struct.font = font;
228 return psiconv_clone_character_layout(&base_char_struct);
229 }
230
231 /* flags & 1: 1 if in a section
232 flags & 2: 1 if at end-of-paragraph
233 */
234 void diff_char(FILE *of, const psiconv_character_layout old,
235 const psiconv_character_layout new,
236 int *flags)
237 {
238 if ((*flags & 3) == 3) { /* end of section command argument */
239 putc('}',of);
240 return;
241 }
242 if (old->italic != new->italic) {
243 if (old->italic)
244 putc('}',of);
245 else
246 fputs("\\emph{",of);
247 }
248 if (old->bold != new->bold) {
249 if (old->bold)
250 putc('}',of);
251 else
252 fputs("\\textbf{",of);
253 }
254 if (old->underline != new->underline) {
255 if (old->underline)
256 putc('}',of);
257 else
258 fputs("\\underline{",of);
259 }
260 }
261
262
263 const static char *sections[] = {
264 "section",
265 "subsection",
266 "subsubsection",
267 "paragraph",
268 "subparagraph"
269 };
270
271 struct environment {
272 char *style_name;
273 char *environment_name;
274 };
275
276 const static struct environment environments[] = {
277 {"quotation", "quotation"},
278 {"", NULL}
279 };
280
281 char *find_env(char *style) {
282 int n = 0;
283 while (environments[n].environment_name != NULL) {
284 if (strcmp(style, environments[n].style_name) == 0) {
285 return environments[n].environment_name;
286 }
287 n++;
288 }
289 return NULL;
290 }
291
292 psiconv_bool_t bullet_switch_on = psiconv_bool_false;
293
294 void gen_para(FILE *of, const psiconv_paragraph para,
295 const psiconv_character_layout base_char,
296 psiconv_word_f wf)
297 {
298 int i,j,loc;
299 psiconv_character_layout cur_char;
300 psiconv_in_line_layout inl;
301 int flags = 0;
302 psiconv_word_style sty;
303 char *env = NULL;
304
305 char ** table;
306 if (encoding == PSICONV_ENCODING_UTF8){
307 table = (char**)utf_table;
308 }else{
309 table = (char**)char_table;
310 }
311
312 if (para->base_paragraph->bullet->on) {
313 if (! bullet_switch_on) {
314 fputs("\\begin{itemize}\n\n", of);
315 bullet_switch_on = psiconv_bool_true;
316 }
317 fputs("\\item ",of);
318 } else {
319 if (bullet_switch_on) {
320 fputs("\\end{itemize}\n\n", of);
321 bullet_switch_on = psiconv_bool_false;
322 }
323 }
324
325 cur_char = base_char;
326
327 if (wf) {
328 sty = psiconv_get_style(wf->styles_sec, para->base_style);
329 if (sty->name && (env = find_env(sty->name))) {
330 fputs("\\begin{",of);
331 fputs(env,of);
332 fputs("}\n",of);
333 } else {
334 if (sty->outline_level &&
335 (sty->outline_level > 0) && (sty->outline_level < 6)) {
336 putc('\\', of);
337 fputs(sections[(sty->outline_level - 1)], of);
338 putc('{', of);
339 cur_char = para->base_character; /* ignore initial formatting */
340 flags |= 1;
341 }
342 }
343 }
344
345 if (psiconv_list_length(para->in_lines) == 0) {
346 diff_char(of,cur_char,para->base_character,&flags);
347 cur_char = para->base_character;
348 }
349 loc = 0;
350
351 for (i = 0; i < psiconv_list_length(para->in_lines); i++) {
352 inl = psiconv_list_get(para->in_lines,i);
353 diff_char(of,cur_char,inl->layout,&flags);
354 cur_char = inl->layout;
355 for (j = loc; j < inl->length + loc; j ++) {
356 fputs(table[(unsigned char) (para->text[j])],of);
357 }
358 loc = j;
359 }
360
361 if (loc < strlen(para->text)) {
362 diff_char(of,cur_char,para->base_character,&flags);
363 cur_char = para->base_character;
364 for (j = loc; j < strlen(para->text); j ++) {
365 fputs(table[(unsigned char) (para->text[j])],of);
366 }
367 }
368
369 flags |= 2;
370 diff_char(of,cur_char,base_char,&flags);
371
372 if (env) {
373 fputs("\n\\end{",of);
374 fputs(env,of);
375 putc('}',of);
376 }
377
378 fputs("\n\n", of);
379 }
380
381 int psiconv_gen_latex(const char * filename,const psiconv_file file,
382 const char *dest, const psiconv_encoding encoding_type)
383 {
384 FILE *of = fopen(filename,"w");
385 if (! of)
386 return -1;
387
388 encoding = encoding_type;
389
390 if (file->type == psiconv_word_file) {
391 psiconv_gen_latex_word(of,(psiconv_word_f) file->file);
392 } else if (file->type == psiconv_texted_file) {
393 psiconv_gen_latex_texted(of,(psiconv_texted_f) file->file);
394 } else {
395 fclose(of);
396 return -1;
397 }
398 return fclose(of);
399 }
400
401 /* This isn't tested !!! */
402 void psiconv_gen_latex_texted(FILE *of,psiconv_texted_f tf)
403 {
404 psiconv_character_layout base_char;
405 psiconv_paragraph para;
406 int i;
407
408 /* We have nothing better */
409 base_char = psiconv_basic_character_layout();
410
411 /* Uncomment if you'd like to use jarticle...
412 if (encoding == PSICONV_ENCODING_UTF8){
413 fputs("\\documentclass{jarticle}\n\n\\begin{document}\n\n", of);
414 }else{
415 */
416 fputs("\\documentclass{article}\n\n\\begin{document}\n\n", of);
417 /* Uncomment if you'd like to use jarticle...
418 }
419 */
420 for (i = 0; i < psiconv_list_length(tf->texted_sec->paragraphs); i++) {
421 para = psiconv_list_get(tf->texted_sec->paragraphs,i);
422 gen_para(of,para,base_char, NULL);
423 }
424 fputs("\\end{document}",of);
425 psiconv_free_character_layout(base_char);
426 }
427
428 void psiconv_gen_latex_word(FILE *of,psiconv_word_f wf)
429 {
430 int i;
431 psiconv_paragraph para;
432 psiconv_color white,black;
433 psiconv_character_layout base_char;
434
435 white = malloc(sizeof(*white));
436 black = malloc(sizeof(*black));
437 white->red = 0x00;
438 white->green = 0x00;
439 white->blue = 0x00;
440 black->red = 0xff;
441 black->green = 0xff;
442 black->blue = 0xff;
443
444 /* To keep from generating a font desc for each line */
445 base_char = gen_base_char(wf->styles_sec->normal->character->font,
446 black,white);
447
448 psiconv_free_color(black);
449 psiconv_free_color(white);
450
451 /* Uncomment if you'd like to use jarticle
452 if (encoding == PSICONV_ENCODING_UTF8){
453 fputs("\\documentclass{jarticle}\n\n\\begin{document}\n\n", of);
454 }else{
455 */
456 fputs("\\documentclass{article}\n\n\\begin{document}\n\n", of);
457 /* Uncomment if you'd like to use jarticle
458 }
459 */
460
461 for (i = 0; i < psiconv_list_length(wf->paragraphs); i++) {
462 para = psiconv_list_get(wf->paragraphs,i);
463 gen_para(of,para,base_char,wf);
464 }
465 fputs("\\end{document}\n",of);
466 for (i = 0; i <
467 psiconv_list_length(wf->page_sec->header->text->paragraphs); i++) {
468 para = psiconv_list_get(wf->page_sec->header->text->paragraphs,i);
469 gen_para(of,para,base_char,wf);
470 }
471
472 psiconv_free_character_layout(base_char);
473 }
474
475 static struct psiconv_fileformat_s ff =
476 {
477 "LaTeX",
478 "LaTeX conversion to article class",
479 &psiconv_gen_latex
480 };
481
482 void init_latex(void)
483 {
484 psiconv_list_add(fileformat_list,&ff);
485 }
486

frodo@frodo.looijaard.name
ViewVC Help
Powered by ViewVC 1.1.26