/[public]/psiconv/trunk/program/psiconv/gen_latex.c
ViewVC logotype

Annotation of /psiconv/trunk/program/psiconv/gen_latex.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 147 - (hide annotations)
Fri May 10 15:55:55 2002 UTC (21 years, 11 months ago) by frodo
File MIME type: text/plain
File size: 15729 byte(s)
(Frodo) UTF-8 support (Keita Kawabe, keite.kawabe@mpq.mpg.de)

1 frodo 109 /*
2     gen_latex.h - Part of psiconv, a PSION 5 file formats converter
3     Copyright (c) 2001 Jim Ottaway <j.ottaway@lse.ac.uk>
4    
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9    
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13     GNU General Public License for more details.
14    
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18     */
19    
20     /* gen_latex.c
21    
22     Sun Feb 11 12:16:29 2001
23    
24     Jim Ottaway <j.ottaway@lse.ac.uk>
25    
26     Conversion to LaTeX:
27    
28     * Converts to the article class
29    
30     * If the file is a Word file and has outline levels (i.e. is not a
31     Psion 5 file), outline levels 1 to 5 are converted to sectioning
32     commands, otherwise headings are just formatted
33    
34     * If there is a style with the name 'quotation', the paragraph is
35     converted to a quotation environment (other styles/environments
36     could be added)
37    
38     * Also does formatting commands (italics -> \emph, bold -> \textbf,
39     underline -> \underline), and character translation
40    
41     */
42    
43 frodo 147 /*
44     2002/Apr. Keita KAWABE
45     Support for narrow build Asian Psions added (experimental).
46    
47     If the encoding_type is PSICONV_ENCODING_UTF8, use utf8_table for
48     character conversion. Otherwise use char_table.
49    
50     When using UTF8 for LaTeX type, note that the resulting latex source
51     is not at all usable unless the user him/herself converts it to
52     appropriate encoding. This is because the encoding in localized TeXs
53     differ from system to system.
54     For example, if the user wants to generate a Japanese LaTeX source
55     from a UTF8 Psion Word, he/she first has to convert the Word document by
56    
57     psiconv -TLaTeX -u word | iconv --from-code=utf8 --to-code=EUC-JP > test.tex
58    
59     or something similar. After that, just proceed as normal.
60    
61     If you want to use Japanese-localized documentclass "jarticle" rather
62     than "article", please uncomment all the commented-out lines that are
63     explicitly stated as "Uncomment if you'd like to use jarticle" in this
64     source.
65     */
66    
67 frodo 109 #include "config.h"
68     #include <stdio.h>
69     #include <string.h>
70     #include <stdlib.h>
71     #include "psiconv/data.h"
72     #include "psiconv/list.h"
73     #include "gen.h"
74     #include "psiconv.h"
75    
76 frodo 142 #ifdef DMALLOC
77     #include "dmalloc.h"
78     #endif
79    
80 frodo 109 /* This is incomplete at the moment.
81    
82     Most of the translation/faking of chars is borrowed from the
83     HTML::Latex.pm perl module. */
84     static const char *char_table[0x100] =
85     {
86     /* 0x00 */ "" ,"" ,"" ,"" ,"" ,"" ,"\n\n","\\\\"
87     ,
88     /* 0x08 */ "\n\n" ," " ,"" ,"" ,"" ,"" ,"" ,""
89     ,
90     /* 0x10 */ " " ,"" ,"" ,"" ,"" ,"" ,"" ,""
91     ,
92     /* 0x18 */ "" ,"" ,"" ,"" ,"" ,"" ,"" ,""
93     ,
94     /* 0x20 */ " " ,"!" ,"\"" ,"\\#" ,"\\$" ,"\\%" ,"\\&","'"
95     ,
96     /* 0x28 */ "(" ,")" ,"*" ,"+" ,"," ,"-" ,"." ,"/"
97     ,
98     /* 0x30 */ "0" ,"1" ,"2" ,"3" ,"4" ,"5" ,"6" ,"7"
99     ,
100     /* 0x38 */ "8" ,"9" ,":" ,";" ,"$<$" ,"=" ,"$>$" ,"?" ,
101     /* 0x40 */ "@" ,"A" ,"B" ,"C" ,"D" ,"E" ,"F" ,"G"
102     ,
103     /* 0x48 */ "H" ,"I" ,"J" ,"K" ,"L" ,"M" ,"N" ,"O"
104     ,
105     /* 0x50 */ "P" ,"Q" ,"R" ,"S" ,"T" ,"U" ,"V" ,"W"
106     ,
107     /* 0x58 */ "X" ,"Y" ,"Z" ,"[" ,"$\\backslash$" ,"]" ,"\\^{}"
108     ,"\\_" ,
109     /* 0x60 */ "`" ,"a" ,"b" ,"c" ,"d" ,"e" ,"f" ,"g"
110     ,
111     /* 0x68 */ "h" ,"i" ,"j" ,"k" ,"l" ,"m" ,"n" ,"o"
112     ,
113     /* 0x70 */ "p" ,"q" ,"r" ,"s" ,"t" ,"u" ,"v" ,"w"
114     ,
115     /* 0x78 */ "x" ,"y" ,"z" ,"\\{" ,"$|$" ,"\\}" ,"~"
116     ,"" ,
117     /* 0x80 */ "" ,"","","$f$","","{\\ldots}", "$\\dagger$","$\\ddagger$",
118     /* 0x88 */ "\\^{}","","","","\\OE","" ,"" ,"" ,
119     /* 0x90 */ "","`","'","``","''", "$\\bullet$","--","---",
120     /* 0x98 */ "~","{\\textsc{tm}}","","","\\oe","","","\\\"Y",
121     /* 0xa0 */ "","!`","c","{\\pounds}",
122     "","{Y\\hspace*{-1.4ex}--}","$|$","{\\S}",
123     /* 0xa8 */
124     "\"","{\\copyright}","$^{\\underline{a}}$","","$\\neg$","$-$","","$^-$",
125     /* 0xb0 */ "$^{\\circ}$","$\\pm$","$^2$","$^3$",
126     "$^\\prime$","$\\mu$","{\\P}","$\\cdot$",
127     /* 0xb8 */ ",","$^1$","$^{\\underline{\\circ}}$","",
128     "$\\frac{1}{4}$","$\\frac{1}{2}$","$\\frac{3}{4}$","?`",
129     /* 0xc0 */ "\\`A","\\'A","\\^A","\\~A", "\\\"A","\\AA","\\AE","\\c{C}",
130     /* 0xc8 */ "\\`E","\\'E","\\^E","\\\"E", "\\`I","\\'I","\\^I","\\\"I",
131     /* 0xd0 */ "{D\\hspace*{-1.7ex}-\\hspace{.9ex}}","\\~N","\\`O","\\'O",
132     "\\^O","\\~O","\\\"O","$\\times$",
133     /* 0xd8 */ "{\\O}","\\`U","\\'U","\\^U", "\\\"U","\\'Y","","",
134     /* 0xe0 */ "\\`a","\\'a","\\^a","\\~a",
135     "\\\"a","\\r{a}","{\\ae}","\\c{c}",
136     /* 0xe8 */ "\\`e","\\'e","\\^e","\\\"e",
137     "\\`{\\i}","\\'{\\i}","\\^{\\i}","\\\"{\\i}",
138     /* 0xf0 */ "\\v{o}","\\~n","\\`o","\\'o", "\\^o","\\~o","\\\"o","$\\div$",
139     /* 0xf8 */ "{\\o}","\\`u","\\'u","\\^u", "\\\"u","\\'y","","\\\"y"
140     };
141    
142 frodo 147 static const char *utf_table[0x100] =
143     {
144     /* 0x00 */ "" ,"" ,"" ,"" ,"" ,"" ,"\n\n","\\\\"
145     ,
146     /* 0x08 */ "\n\n" ," " ,"" ,"" ,"" ,"" ,"" ,""
147     ,
148     /* 0x10 */ " " ,"" ,"" ,"" ,"" ,"" ,"" ,""
149     ,
150     /* 0x18 */ "" ,"" ,"" ,"" ,"" ,"" ,"" ,""
151     ,
152     /* 0x20 */ " " ,"!" ,"\"" ,"\\#" ,"\\$" ,"\\%" ,"\\&","'"
153     ,
154     /* 0x28 */ "(" ,")" ,"*" ,"+" ,"," ,"-" ,"." ,"/"
155     ,
156     /* 0x30 */ "0" ,"1" ,"2" ,"3" ,"4" ,"5" ,"6" ,"7"
157     ,
158     /* 0x38 */ "8" ,"9" ,":" ,";" ,"$<$" ,"=" ,"$>$" ,"?" ,
159     /* 0x40 */ "@" ,"A" ,"B" ,"C" ,"D" ,"E" ,"F" ,"G"
160     ,
161     /* 0x48 */ "H" ,"I" ,"J" ,"K" ,"L" ,"M" ,"N" ,"O"
162     ,
163     /* 0x50 */ "P" ,"Q" ,"R" ,"S" ,"T" ,"U" ,"V" ,"W"
164     ,
165     /* 0x58 */ "X" ,"Y" ,"Z" ,"[" ,"$\\backslash$" ,"]" ,"\\^{}"
166     ,"\\_" ,
167     /* 0x60 */ "`" ,"a" ,"b" ,"c" ,"d" ,"e" ,"f" ,"g"
168     ,
169     /* 0x68 */ "h" ,"i" ,"j" ,"k" ,"l" ,"m" ,"n" ,"o"
170     ,
171     /* 0x70 */ "p" ,"q" ,"r" ,"s" ,"t" ,"u" ,"v" ,"w"
172     ,
173     /* 0x78 */ "x" ,"y" ,"z" ,"\\{" ,"$|$" ,"\\}" ,"~"
174     ,"" ,
175     /* 0x80 */ "\x80", "\x81", "\x82", "\x83", "\x84", "\x85", "\x86", "\x87",
176     /* 0x88 */ "\x88", "\x89", "\x8a", "\x8b", "\x8c", "\x8d", "\x8e", "\x8f",
177     /* 0x90 */ "\x90", "\x91", "\x92", "\x93", "\x94", "\x95", "\x96", "\x97",
178     /* 0x98 */ "\x98", "\x99", "\x9a", "\x9b", "\x9c", "\x9d", "\x9e", "\x9f",
179     /* 0xa0 */ "\xa0", "\xa1", "\xa2", "\xa3", "\xa4", "\xa5", "\xa6", "\xa7",
180     /* 0xa8 */ "\xa8", "\xa9", "\xaa", "\xab", "\xac", "\xad", "\xae", "\xaf",
181     /* 0xb0 */ "\xb0", "\xb1", "\xb2", "\xb3", "\xb4", "\xb5", "\xb6", "\xb7",
182     /* 0xb8 */ "\xb8", "\xb9", "\xba", "\xbb", "\xbc", "\xbd", "\xbe", "\xbf",
183     /* 0xc0 */ "\xc0", "\xc1", "\xc2", "\xc3", "\xc4", "\xc5", "\xc6", "\xc7",
184     /* 0xc8 */ "\xc8", "\xc9", "\xca", "\xcb", "\xcc", "\xcd", "\xce", "\xcf",
185     /* 0xd0 */ "\xd0", "\xd1", "\xd2", "\xd3", "\xd4", "\xd5", "\xd6", "\xd7",
186     /* 0xd8 */ "\xd8", "\xd9", "\xda", "\xdb", "\xdc", "\xdd", "\xde", "\xdf",
187     /* 0xe0 */ "\xe0", "\xe1", "\xe2", "\xe3", "\xe4", "\xe5", "\xe6", "\xe7",
188     /* 0xe8 */ "\xe8", "\xe9", "\xea", "\xeb", "\xec", "\xed", "\xee", "\xef",
189     /* 0xf0 */ "\xf0", "\xf1", "\xf2", "\xf3", "\xf4", "\xf5", "\xf6", "\xf7",
190     /* 0xf8 */ "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff",
191     };
192    
193     /* a flag to indicate the use of UTF8 */
194     static psiconv_encoding encoding = PSICONV_ENCODING_CP1252;
195    
196 frodo 109 static psiconv_character_layout gen_base_char(const psiconv_font font,
197     const psiconv_color color,
198     const psiconv_color
199     back_color);
200     static void diff_char(FILE *of, const psiconv_character_layout old,
201     const psiconv_character_layout new, int *flags);
202     static void gen_para(FILE *of, const psiconv_paragraph para,
203     const psiconv_character_layout base_char,
204     psiconv_word_f wf);
205    
206     static void psiconv_gen_latex_word(FILE *of,psiconv_word_f wf);
207     static void psiconv_gen_latex_texted(FILE *of,psiconv_texted_f tf);
208    
209     psiconv_character_layout gen_base_char(const psiconv_font font,
210     const psiconv_color color,
211     const psiconv_color back_color)
212     {
213     struct psiconv_character_layout_s base_char_struct =
214     {
215     NULL, /* color */
216     NULL, /* back_color */
217     13.0, /* font_size */
218     psiconv_bool_false, /* italic */
219     psiconv_bool_false, /* bold */
220     psiconv_normalscript, /* super_sub */
221     psiconv_bool_false, /* underline */
222     psiconv_bool_false, /* strikethrough */
223     NULL, /* font */
224     };
225     base_char_struct.color = color;
226     base_char_struct.back_color = back_color;
227     base_char_struct.font = font;
228     return psiconv_clone_character_layout(&base_char_struct);
229     }
230    
231     /* flags & 1: 1 if in a section
232     flags & 2: 1 if at end-of-paragraph
233     */
234     void diff_char(FILE *of, const psiconv_character_layout old,
235     const psiconv_character_layout new,
236     int *flags)
237     {
238     if ((*flags & 3) == 3) { /* end of section command argument */
239     putc('}',of);
240     return;
241     }
242     if (old->italic != new->italic) {
243     if (old->italic)
244     putc('}',of);
245     else
246     fputs("\\emph{",of);
247     }
248     if (old->bold != new->bold) {
249     if (old->bold)
250     putc('}',of);
251     else
252     fputs("\\textbf{",of);
253     }
254     if (old->underline != new->underline) {
255     if (old->underline)
256     putc('}',of);
257     else
258     fputs("\\underline{",of);
259     }
260     }
261    
262    
263     const static char *sections[] = {
264     "section",
265     "subsection",
266     "subsubsection",
267     "paragraph",
268     "subparagraph"
269     };
270    
271     struct environment {
272     char *style_name;
273     char *environment_name;
274     };
275    
276     const static struct environment environments[] = {
277     {"quotation", "quotation"},
278     {"", NULL}
279     };
280    
281     char *find_env(char *style) {
282     int n = 0;
283     while (environments[n].environment_name != NULL) {
284     if (strcmp(style, environments[n].style_name) == 0) {
285     return environments[n].environment_name;
286     }
287     n++;
288     }
289     return NULL;
290     }
291    
292     psiconv_bool_t bullet_switch_on = psiconv_bool_false;
293    
294     void gen_para(FILE *of, const psiconv_paragraph para,
295     const psiconv_character_layout base_char,
296     psiconv_word_f wf)
297     {
298     int i,j,loc;
299     psiconv_character_layout cur_char;
300     psiconv_in_line_layout inl;
301     int flags = 0;
302     psiconv_word_style sty;
303     char *env = NULL;
304    
305 frodo 147 char ** table;
306     if (encoding == PSICONV_ENCODING_UTF8){
307     table = (char**)utf_table;
308     }else{
309     table = (char**)char_table;
310     }
311    
312 frodo 109 if (para->base_paragraph->bullet->on) {
313     if (! bullet_switch_on) {
314     fputs("\\begin{itemize}\n\n", of);
315     bullet_switch_on = psiconv_bool_true;
316     }
317     fputs("\\item ",of);
318     } else {
319     if (bullet_switch_on) {
320     fputs("\\end{itemize}\n\n", of);
321     bullet_switch_on = psiconv_bool_false;
322     }
323     }
324    
325     cur_char = base_char;
326    
327     if (wf) {
328     sty = psiconv_get_style(wf->styles_sec, para->base_style);
329     if (sty->name && (env = find_env(sty->name))) {
330     fputs("\\begin{",of);
331     fputs(env,of);
332     fputs("}\n",of);
333     } else {
334     if (sty->outline_level &&
335     (sty->outline_level > 0) && (sty->outline_level < 6)) {
336     putc('\\', of);
337     fputs(sections[(sty->outline_level - 1)], of);
338     putc('{', of);
339     cur_char = para->base_character; /* ignore initial formatting */
340     flags |= 1;
341     }
342     }
343     }
344    
345     if (psiconv_list_length(para->in_lines) == 0) {
346     diff_char(of,cur_char,para->base_character,&flags);
347     cur_char = para->base_character;
348     }
349     loc = 0;
350    
351     for (i = 0; i < psiconv_list_length(para->in_lines); i++) {
352     inl = psiconv_list_get(para->in_lines,i);
353     diff_char(of,cur_char,inl->layout,&flags);
354     cur_char = inl->layout;
355     for (j = loc; j < inl->length + loc; j ++) {
356 frodo 147 fputs(table[(unsigned char) (para->text[j])],of);
357 frodo 109 }
358     loc = j;
359     }
360    
361     if (loc < strlen(para->text)) {
362     diff_char(of,cur_char,para->base_character,&flags);
363     cur_char = para->base_character;
364     for (j = loc; j < strlen(para->text); j ++) {
365 frodo 147 fputs(table[(unsigned char) (para->text[j])],of);
366 frodo 109 }
367     }
368    
369     flags |= 2;
370     diff_char(of,cur_char,base_char,&flags);
371    
372     if (env) {
373     fputs("\n\\end{",of);
374     fputs(env,of);
375     putc('}',of);
376     }
377    
378     fputs("\n\n", of);
379     }
380    
381     int psiconv_gen_latex(const char * filename,const psiconv_file file,
382 frodo 147 const char *dest, const psiconv_encoding encoding_type)
383 frodo 109 {
384     FILE *of = fopen(filename,"w");
385     if (! of)
386     return -1;
387    
388 frodo 147 encoding = encoding_type;
389    
390 frodo 109 if (file->type == psiconv_word_file) {
391     psiconv_gen_latex_word(of,(psiconv_word_f) file->file);
392     } else if (file->type == psiconv_texted_file) {
393     psiconv_gen_latex_texted(of,(psiconv_texted_f) file->file);
394     } else {
395     fclose(of);
396     return -1;
397     }
398     return fclose(of);
399     }
400    
401     /* This isn't tested !!! */
402     void psiconv_gen_latex_texted(FILE *of,psiconv_texted_f tf)
403     {
404     psiconv_character_layout base_char;
405     psiconv_paragraph para;
406     int i;
407    
408     /* We have nothing better */
409     base_char = psiconv_basic_character_layout();
410    
411 frodo 147 /* Uncomment if you'd like to use jarticle...
412     if (encoding == PSICONV_ENCODING_UTF8){
413     fputs("\\documentclass{jarticle}\n\n\\begin{document}\n\n", of);
414     }else{
415     */
416     fputs("\\documentclass{article}\n\n\\begin{document}\n\n", of);
417     /* Uncomment if you'd like to use jarticle...
418     }
419     */
420 frodo 109 for (i = 0; i < psiconv_list_length(tf->texted_sec->paragraphs); i++) {
421     para = psiconv_list_get(tf->texted_sec->paragraphs,i);
422     gen_para(of,para,base_char, NULL);
423     }
424     fputs("\\end{document}",of);
425     psiconv_free_character_layout(base_char);
426     }
427    
428     void psiconv_gen_latex_word(FILE *of,psiconv_word_f wf)
429     {
430     int i;
431     psiconv_paragraph para;
432     psiconv_color white,black;
433     psiconv_character_layout base_char;
434    
435     white = malloc(sizeof(*white));
436     black = malloc(sizeof(*black));
437     white->red = 0x00;
438     white->green = 0x00;
439     white->blue = 0x00;
440     black->red = 0xff;
441     black->green = 0xff;
442     black->blue = 0xff;
443    
444     /* To keep from generating a font desc for each line */
445     base_char = gen_base_char(wf->styles_sec->normal->character->font,
446     black,white);
447    
448     psiconv_free_color(black);
449     psiconv_free_color(white);
450    
451 frodo 147 /* Uncomment if you'd like to use jarticle
452     if (encoding == PSICONV_ENCODING_UTF8){
453     fputs("\\documentclass{jarticle}\n\n\\begin{document}\n\n", of);
454     }else{
455     */
456     fputs("\\documentclass{article}\n\n\\begin{document}\n\n", of);
457     /* Uncomment if you'd like to use jarticle
458     }
459     */
460 frodo 109
461     for (i = 0; i < psiconv_list_length(wf->paragraphs); i++) {
462     para = psiconv_list_get(wf->paragraphs,i);
463     gen_para(of,para,base_char,wf);
464     }
465     fputs("\\end{document}\n",of);
466     for (i = 0; i <
467     psiconv_list_length(wf->page_sec->header->text->paragraphs); i++) {
468     para = psiconv_list_get(wf->page_sec->header->text->paragraphs,i);
469     gen_para(of,para,base_char,wf);
470     }
471    
472     psiconv_free_character_layout(base_char);
473     }
474    
475     static struct psiconv_fileformat_s ff =
476     {
477     "LaTeX",
478     "LaTeX conversion to article class",
479     &psiconv_gen_latex
480     };
481    
482     void init_latex(void)
483     {
484     psiconv_list_add(fileformat_list,&ff);
485     }
486    

frodo@frodo.looijaard.name
ViewVC Help
Powered by ViewVC 1.1.26