/[public]/psiconv/trunk/program/psiconv/gen_html.c
ViewVC logotype

Contents of /psiconv/trunk/program/psiconv/gen_html.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 147 - (show annotations)
Fri May 10 15:55:55 2002 UTC (21 years, 10 months ago) by frodo
File MIME type: text/plain
File size: 17032 byte(s)
(Frodo) UTF-8 support (Keita Kawabe, keite.kawabe@mpq.mpg.de)

1 /*
2 gen_html.c - Part of psiconv, a PSION 5 file formats converter
3 Copyright (c) 1999 Frodo Looijaard <frodol@dds.nl>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19 /*
20 2002/Apr. Keita KAWABE
21 A: <META HTTP-EQUIV="Content-Type" CONTENT="text/html"> header
22 was added.
23
24 B: Support for narrow build Asian Psions added.
25 If the encoding_type is PSICONV_ENCODING_UTF8,
26 B-1: use utf8_table for character conversion.
27 B-2: add the "Charset=UTF-8" in the Content-Type header.
28 Otherwise proceed as normal.
29 */
30
31 #include "config.h"
32 #include <stdio.h>
33 #include <string.h>
34 #include <stdlib.h>
35 #include "psiconv/data.h"
36 #include "psiconv/list.h"
37 #include "gen.h"
38 #include "psiconv.h"
39
40 #ifdef DMALLOC
41 #include "dmalloc.h"
42 #endif
43
44 /* This determines for each character how it is displayed */
45 static const char *char_table[0x100] =
46 {
47 /* 0x00 */ "" ,"" ,"" ,"" ,"" ,"" ,"<P> ","<BR>" ,
48 /* 0x08 */ "<P>" ," " ,"" ,"" ,"" ,"" ,"" ,"" ,
49 /* 0x10 */ " " ,"" ,"" ,"" ,"" ,"" ,"" ,"" ,
50 /* 0x18 */ "" ,"" ,"" ,"" ,"" ,"" ,"" ,"" ,
51 /* 0x20 */ " " ,"!" ,"&quot;","#" ,"$" ,"%" ,"&amp;","'" ,
52 /* 0x28 */ "(" ,")" ,"*" ,"+" ,"," ,"-" ,"." ,"/" ,
53 /* 0x30 */ "0" ,"1" ,"2" ,"3" ,"4" ,"5" ,"6" ,"7" ,
54 /* 0x38 */ "8" ,"9" ,":" ,";" ,"&lt;" ,"=" ,"&gt;" ,"?" ,
55 /* 0x40 */ "@" ,"A" ,"B" ,"C" ,"D" ,"E" ,"F" ,"G" ,
56 /* 0x48 */ "H" ,"I" ,"J" ,"K" ,"L" ,"M" ,"N" ,"O" ,
57 /* 0x50 */ "P" ,"Q" ,"R" ,"S" ,"T" ,"U" ,"V" ,"W" ,
58 /* 0x58 */ "X" ,"Y" ,"Z" ,"[" ,"\\" ,"]" ,"^" ,"_" ,
59 /* 0x60 */ "`" ,"a" ,"b" ,"c" ,"d" ,"e" ,"f" ,"g" ,
60 /* 0x68 */ "h" ,"i" ,"j" ,"k" ,"l" ,"m" ,"n" ,"o" ,
61 /* 0x70 */ "p" ,"q" ,"r" ,"s" ,"t" ,"u" ,"v" ,"w" ,
62 /* 0x78 */ "x" ,"y" ,"z" ,"{" ,"|" ,"}" ,"~" ,"" ,
63 /* 0x80 */ "" ,"","&sbquot;","&fnof;","&bdquo;","&hellip;",
64 "&dagger;","&Dagger;",
65 /* 0x88 */ "^","&permil;","&Scaron;","&lang;","&OElig;","" ,"" ,"" ,
66 /* 0x90 */ "","&lsquo;","&rsquo;","&ldquo;","&rdquo;",
67 "&middot;","&ndash","&mdash",
68 /* 0x98 */ "&tilde;","&trade;","&scaron;","&rang;","&oelig;","","","&Yuml;",
69 /* 0xa0 */ "","&iexcl;","&cent;","&pound;",
70 "&curren;","&yen;","&brvbar;","&sect",
71 /* 0xa8 */ "&quot;","&copy;","a","&laquo;","&not;","-","&reg;","&macron;",
72 /* 0xb0 */ "&deg;","&plusmn;","&sup2;","&sup3;",
73 "&rsquot;","&micron;","&para;","&middot;",
74 /* 0xb8 */ "&comma;","&sup1","&deg;","&raquo;",
75 "&frac14;","&frac12;","&frac34;","&iquest;",
76 /* 0xc0 */ "&Agrave;","&Aacute;","&Acirc;","&Atilde;",
77 "&Auml;","&Aring;","&AElig;","&Ccedil;",
78 /* 0xc8 */ "&Egrave;","&Eacute;","&Ecirc;","&Euml;",
79 "&Igrave;","&Iacute;","&Icirc;","&Iuml;",
80 /* 0xd0 */ "&ETH;","&Ntilde;","&Ograve;","&Oacute;",
81 "&Ocirc;","&Otilde;","&Ouml;","&times;",
82 /* 0xd8 */ "&Oslash;","&Ugrave;","&Uacute;","&Ucirc;",
83 "&Uuml;","&Yacute;","&THORN;","&szlig;",
84 /* 0xe0 */ "&agrave;","&aacute;","&acirc;","&atilde;",
85 "&auml;","&aring;","&aelig;","&ccedil;",
86 /* 0xe8 */ "&egrave;","&eacute;","&ecirc;","&euml;",
87 "&igrave;","&iacute;","&icirc;","&iuml;",
88 /* 0xf0 */ "&eth;","&ntilde;","&ograve;","&oacute;",
89 "&ocirc;","&otilde;","&ouml;","&divide;",
90 /* 0xf8 */ "&oslash;","&ugrave;","&uacute;","&ucirc;",
91 "&uuml;","&yacute;","&thorn;","&yuml;"
92 };
93
94 static const char *utf_table[0x100] =
95 {
96 /* 0x00 */ "" ,"" ,"" ,"" ,"" ,"" ,"<P> ","<BR>" ,
97 /* 0x08 */ "<P>" ," " ,"" ,"" ,"" ,"" ,"" ,"" ,
98 /* 0x10 */ " " ,"" ,"" ,"" ,"" ,"" ,"" ,"" ,
99 /* 0x18 */ "" ,"" ,"" ,"" ,"" ,"" ,"" ,"" ,
100 /* 0x20 */ " " ,"!" ,"&quot;","#" ,"$" ,"%" ,"&amp;","'" ,
101 /* 0x28 */ "(" ,")" ,"*" ,"+" ,"," ,"-" ,"." ,"/" ,
102 /* 0x30 */ "0" ,"1" ,"2" ,"3" ,"4" ,"5" ,"6" ,"7" ,
103 /* 0x38 */ "8" ,"9" ,":" ,";" ,"&lt;" ,"=" ,"&gt;" ,"?" ,
104 /* 0x40 */ "@" ,"A" ,"B" ,"C" ,"D" ,"E" ,"F" ,"G" ,
105 /* 0x48 */ "H" ,"I" ,"J" ,"K" ,"L" ,"M" ,"N" ,"O" ,
106 /* 0x50 */ "P" ,"Q" ,"R" ,"S" ,"T" ,"U" ,"V" ,"W" ,
107 /* 0x58 */ "X" ,"Y" ,"Z" ,"[" ,"\\" ,"]" ,"^" ,"_" ,
108 /* 0x60 */ "`" ,"a" ,"b" ,"c" ,"d" ,"e" ,"f" ,"g" ,
109 /* 0x68 */ "h" ,"i" ,"j" ,"k" ,"l" ,"m" ,"n" ,"o" ,
110 /* 0x70 */ "p" ,"q" ,"r" ,"s" ,"t" ,"u" ,"v" ,"w" ,
111 /* 0x78 */ "x" ,"y" ,"z" ,"{" ,"|" ,"}" ,"~" ,"" ,
112 /* 0x80 */ "\x80", "\x81", "\x82", "\x83", "\x84", "\x85", "\x86", "\x87",
113 /* 0x88 */ "\x88", "\x89", "\x8a", "\x8b", "\x8c", "\x8d", "\x8e", "\x8f",
114 /* 0x90 */ "\x90", "\x91", "\x92", "\x93", "\x94", "\x95", "\x96", "\x97",
115 /* 0x98 */ "\x98", "\x99", "\x9a", "\x9b", "\x9c", "\x9d", "\x9e", "\x9f",
116 /* 0xa0 */ "\xa0", "\xa1", "\xa2", "\xa3", "\xa4", "\xa5", "\xa6", "\xa7",
117 /* 0xa8 */ "\xa8", "\xa9", "\xaa", "\xab", "\xac", "\xad", "\xae", "\xaf",
118 /* 0xb0 */ "\xb0", "\xb1", "\xb2", "\xb3", "\xb4", "\xb5", "\xb6", "\xb7",
119 /* 0xb8 */ "\xb8", "\xb9", "\xba", "\xbb", "\xbc", "\xbd", "\xbe", "\xbf",
120 /* 0xc0 */ "\xc0", "\xc1", "\xc2", "\xc3", "\xc4", "\xc5", "\xc6", "\xc7",
121 /* 0xc8 */ "\xc8", "\xc9", "\xca", "\xcb", "\xcc", "\xcd", "\xce", "\xcf",
122 /* 0xd0 */ "\xd0", "\xd1", "\xd2", "\xd3", "\xd4", "\xd5", "\xd6", "\xd7",
123 /* 0xd8 */ "\xd8", "\xd9", "\xda", "\xdb", "\xdc", "\xdd", "\xde", "\xdf",
124 /* 0xe0 */ "\xe0", "\xe1", "\xe2", "\xe3", "\xe4", "\xe5", "\xe6", "\xe7",
125 /* 0xe8 */ "\xe8", "\xe9", "\xea", "\xeb", "\xec", "\xed", "\xee", "\xef",
126 /* 0xf0 */ "\xf0", "\xf1", "\xf2", "\xf3", "\xf4", "\xf5", "\xf6", "\xf7",
127 /* 0xf8 */ "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff",
128 };
129
130 /* a flag to indicate the use of UTF8 */
131 static psiconv_encoding encoding= PSICONV_ENCODING_CP1252;
132
133 static psiconv_character_layout gen_base_char(const psiconv_font font,
134 const psiconv_color color,
135 const psiconv_color back_color);
136 static void diff_char(FILE *of, const psiconv_character_layout old,
137 const psiconv_character_layout new, int *flags);
138 static void gen_para(FILE *of, const psiconv_paragraph para,
139 const psiconv_character_layout base_char);
140
141 static void psiconv_gen_html_word(FILE *of,psiconv_word_f wf);
142 static void psiconv_gen_html_texted(FILE *of,psiconv_texted_f tf);
143
144 /* This is not necessarily the same as returned by basic_character_layout_status
145 This one is specific for the base point of HTML */
146 psiconv_character_layout gen_base_char(const psiconv_font font,
147 const psiconv_color color,
148 const psiconv_color back_color)
149 {
150 struct psiconv_character_layout_s base_char_struct =
151 {
152 NULL, /* color */
153 NULL, /* back_color */
154 13.0, /* font_size */
155 psiconv_bool_false, /* italic */
156 psiconv_bool_false, /* bold */
157 psiconv_normalscript, /* super_sub */
158 psiconv_bool_false, /* underline */
159 psiconv_bool_false, /* strikethrough */
160 NULL, /* font */
161 };
162 base_char_struct.color = color;
163 base_char_struct.back_color = back_color;
164 base_char_struct.font = font;
165 return psiconv_clone_character_layout(&base_char_struct);
166 }
167
168 /* flags & 1: 0 if no <FONT> was yet generated.
169 flags & 2: 1 if at end-of-paragraph
170 */
171 void diff_char(FILE *of, const psiconv_character_layout old,
172 const psiconv_character_layout new,
173 int *flags)
174 {
175 int font_set = 0;
176
177 if ((old->font_size != new->font_size) ||
178 (old->color->red != new->color->red) ||
179 (old->color->green != new->color->green) ||
180 (old->color->blue != new->color->blue) ||
181 (strcmp(old->font->name,new->font->name)) ||
182 (old->font->screenfont != new->font->screenfont) ||
183 ((*flags & 0x03) == 3)) {
184 if (old->italic)
185 fputs("</I>",of);
186 if (old->bold)
187 fputs("</B>",of);
188 if (old->underline)
189 fputs("</U>",of);
190 if (old->strikethrough)
191 fputs("</STRIKE>",of);
192 if (old->super_sub == psiconv_superscript)
193 fputs("</SUP>",of);
194 if (old->super_sub == psiconv_subscript)
195 fputs("</SUB>",of);
196 if ((*flags & 1) == 1)
197 fputs("</FONT>",of);
198 if ((*flags & 2) == 0) {
199 *flags |= 1;
200 fputs("<FONT SIZE=",of);
201 if (new->font_size <= 8.0)
202 fputs("1",of);
203 else if (new->font_size <= 10.0)
204 fputs("2",of);
205 else if (new->font_size <= 12.0)
206 fputs("3",of);
207 else if (new->font_size <= 14.0)
208 fputs("4",of);
209 else if (new->font_size <= 18.0)
210 fputs("5",of);
211 else if (new->font_size <= 24.0)
212 fputs("6",of);
213 else
214 fputs("7",of);
215 fprintf(of," COLOR=#%02x%02x%02x",new->color->red,new->color->green,
216 new->color->blue);
217 if (new->font->screenfont == psiconv_font_sansserif)
218 fprintf(of," FACE=\"%s, Sans-Serif\">",new->font->name);
219 else if (new->font->screenfont == psiconv_font_nonprop)
220 fprintf(of," FACE=\"%s, Monospace\">",new->font-> name);
221 else if (new->font->screenfont == psiconv_font_serif)
222 fprintf(of," FACE=\"%s, Serif\">",new->font-> name);
223 else
224 fprintf(of," FACE=\"%s, Serif\">",new->font-> name);
225 }
226 if (new->italic)
227 fputs("<I>",of);
228 if (new->bold)
229 fputs("<B>",of);
230 if (new->underline)
231 fputs("<U>",of);
232 if (new->strikethrough)
233 fputs("<STRIKE>",of);
234 if (new->super_sub == psiconv_superscript)
235 fputs("<SUP>",of);
236 if (new->super_sub == psiconv_subscript)
237 fputs("<SUB>",of);
238 } else {
239 if (font_set || (old->italic != new->italic)) {
240 if (old->italic)
241 fputs("</I>",of);
242 else
243 fputs("<I>",of);
244 }
245 if (old->bold != new->bold) {
246 if (old->bold)
247 fputs("</B>",of);
248 else
249 fputs("<B>",of);
250 }
251 if (old->underline != new->underline) {
252 if (old->underline)
253 fputs("</U>",of);
254 else
255 fputs("<U>",of);
256 }
257 if (old->strikethrough != new->strikethrough) {
258 if (old->strikethrough)
259 fputs("</STRIKE>",of);
260 else
261 fputs("<STRIKE>",of);
262 }
263 if (old->super_sub != new->super_sub) {
264 if (old->super_sub == psiconv_superscript)
265 fputs("</SUP>",of);
266 else if (old->super_sub == psiconv_subscript)
267 fputs("</SUB>",of);
268 if (new->super_sub == psiconv_superscript)
269 fputs("<SUP>",of);
270 else if (new->super_sub == psiconv_subscript)
271 fputs("<SUB>",of);
272 }
273 }
274 }
275
276 void gen_para(FILE *of, const psiconv_paragraph para,
277 const psiconv_character_layout base_char)
278 {
279 int i,j,loc;
280 psiconv_character_layout cur_char;
281 psiconv_in_line_layout inl;
282 int flags = 0;
283
284 char **table;
285 if (encoding == PSICONV_ENCODING_UTF8){
286 table=(char**)utf_table;
287 }else{
288 table=(char**)char_table;
289 }
290
291
292
293 fputs("<P",of);
294 if (para->base_paragraph->justify_hor == psiconv_justify_left)
295 fputs(" ALIGN=left",of);
296 else if (para->base_paragraph->justify_hor == psiconv_justify_right)
297 fputs(" ALIGN=right",of);
298 else if (para->base_paragraph->justify_hor == psiconv_justify_centre)
299 fputs(" ALIGN=center",of);
300 else if (para->base_paragraph->justify_hor == psiconv_justify_full)
301 fputs(" ALIGN=left",of);
302 fputs(">",of);
303 if (para->base_paragraph->bullet->on)
304 fputs("<UL><LI>",of);
305
306 cur_char = base_char;
307
308 if (psiconv_list_length(para->in_lines) == 0) {
309 diff_char(of,cur_char,para->base_character,&flags);
310 cur_char = para->base_character;
311 }
312 loc = 0;
313
314 for (i = 0; i < psiconv_list_length(para->in_lines); i++) {
315 inl = psiconv_list_get(para->in_lines,i);
316 diff_char(of,cur_char,inl->layout,&flags);
317 cur_char = inl->layout;
318 for (j = loc; j < inl->length + loc; j ++) {
319 fputs(table[(unsigned char) (para->text[j])],of);
320 }
321 loc = j;
322 }
323
324 if (loc < strlen(para->text)) {
325 diff_char(of,cur_char,para->base_character,&flags);
326 cur_char = para->base_character;
327 for (j = loc; j < strlen(para->text); j ++) {
328 fputs(table[(unsigned char) (para->text[j])],of);
329 }
330 }
331
332 if (strlen(para->text) == 0)
333 fputs("<BR>",of);
334
335 flags |= 2;
336 diff_char(of,cur_char,base_char,&flags);
337
338 if (para->base_paragraph->bullet->on)
339 fputs("</UL>",of);
340
341 fputs("</P>\n",of);
342 }
343
344 int psiconv_gen_html(const char * filename,const psiconv_file file,
345 const char *dest, const psiconv_encoding encoding_type)
346 {
347 FILE *of = fopen(filename,"w");
348 if (! of)
349 return -1;
350
351 encoding = encoding_type;
352
353 if (file->type == psiconv_word_file) {
354 psiconv_gen_html_word(of,(psiconv_word_f) file->file);
355 } else if (file->type == psiconv_texted_file) {
356 psiconv_gen_html_texted(of,(psiconv_texted_f) file->file);
357 } else {
358 fclose(of);
359 return -1;
360 }
361 return fclose(of);
362 }
363
364 void psiconv_gen_html_texted(FILE *of,psiconv_texted_f tf)
365 {
366 psiconv_character_layout base_char;
367 psiconv_paragraph para;
368 int i;
369
370 /* We have nothing better */
371 base_char = psiconv_basic_character_layout();
372
373 fputs("<!doctype html public \"-//W3C//DTD HTML 3.2 Final//EN\">", of);
374 fputs("\n<HTML>\n<HEAD>\n <META NAME=\"GENERATOR\"", of);
375 fputs(" CONTENT=\"psiconv-" VERSION "\">\n", of);
376
377 fputs(" <META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html", of);
378 if (encoding == PSICONV_ENCODING_UTF8){
379 fputs("; CHARSET=UTF-8", of);
380 }
381 fputs("\">\n", of);
382
383 fputs("</HEAD>\n",of);
384 fputs("<BODY>\n",of);
385 for (i = 0; i < psiconv_list_length(tf->texted_sec->paragraphs); i++) {
386 para = psiconv_list_get(tf->texted_sec->paragraphs,i);
387 gen_para(of,para,base_char);
388 }
389 fputs("</BODY>\n</HTML>\n",of);
390 psiconv_free_character_layout(base_char);
391 }
392
393
394 void psiconv_gen_html_word(FILE *of,psiconv_word_f wf)
395 {
396 int i;
397 psiconv_paragraph para;
398 psiconv_color white,black;
399 psiconv_character_layout base_char;
400
401 white = malloc(sizeof(*white));
402 black = malloc(sizeof(*black));
403 white->red = 0x00;
404 white->green = 0x00;
405 white->blue = 0x00;
406 black->red = 0xff;
407 black->green = 0xff;
408 black->blue = 0xff;
409
410 /* To keep from generating a font desc for each line */
411 base_char = gen_base_char(wf->styles_sec->normal->character->font,
412 black,white);
413
414 psiconv_free_color(black);
415 psiconv_free_color(white);
416
417 fputs("<!doctype html public \"-//W3C//DTD HTML 3.2 Final//EN\">", of);
418 fputs("\n<HTML>\n<HEAD>\n <META NAME=\"GENERATOR\"", of);
419 fputs(" CONTENT=\"psiconv-" VERSION "\">\n", of);
420
421 fputs(" <META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html", of);
422 if (encoding == PSICONV_ENCODING_UTF8)
423 fputs("; CHARSET=UTF-8", of);
424 fputs("\">\n", of);
425
426 fputs("</HEAD>\n",of);
427 fputs("<BODY>\n",of);
428
429 for (i = 0; i < psiconv_list_length(wf->paragraphs); i++) {
430 para = psiconv_list_get(wf->paragraphs,i);
431 gen_para(of,para,base_char);
432 }
433 fputs("</BODY>\n</HTML>\n",of);
434 psiconv_free_character_layout(base_char);
435 }
436
437 static struct psiconv_fileformat_s ff =
438 {
439 "HTML3",
440 "HTML 3.2, not verified so probably not completely compliant",
441 &psiconv_gen_html
442 };
443
444 void init_html(void)
445 {
446 psiconv_list_add(fileformat_list,&ff);
447 }

frodo@frodo.looijaard.name
ViewVC Help
Powered by ViewVC 1.1.26