--- psiconv/trunk/program/psiconv/gen_txt.c 2004/01/06 20:15:01 184 +++ psiconv/trunk/program/psiconv/gen_txt.c 2004/01/09 22:20:03 185 @@ -1,7 +1,7 @@ /* * gen_text.c - Part of psiconv, a PSION 5 file formats converter * Copyright (c) 1999 Andrew Johnson - * Portions Copyright (c) 1999 Frodo Looijaard + * Portions Copyright (c) 1999,2003 Frodo Looijaard * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,19 +18,13 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -/* - 2002/Apr. Keita KAWABE - Support for narrow build Asian Psions added. - - If the encoding_type is PSICONV_ENCODING_UTF8, use utf8_table for - character conversion. Otherwise use char_table. -*/ - #include "config.h" #include #include -#include "psiconv/data.h" -#include "psiconv/list.h" +#include +#include +#include +#include "general.h" #include "gen.h" #include "psiconv.h" @@ -38,201 +32,174 @@ #include "dmalloc.h" #endif +static void output_para(const psiconv_config config,psiconv_list list, + const psiconv_paragraph para,encoding encoding_type); +static void gen_txt_word(const psiconv_config config, psiconv_list list, + psiconv_word_f wf, encoding encoding_type); +static void gen_txt_texted(const psiconv_config config, psiconv_list list, + psiconv_texted_f tf, encoding encoding_type); +static int gen_txt(const psiconv_config config, psiconv_list list, + const psiconv_file file, const char *dest, + const encoding encoding_type); -/* - * Various string tables for HTML4 settings - */ - -/* Character conversion table */ -static const char *char_table[0x100] = { - /* 0x00 */ "", "", "", "", "", "", "\n", "\n", - /* 0x08 */ "\n", "\t", "", "", "", "", "", "", - /* 0x10 */ " ", "", "", "", "", "", "", "", - /* 0x18 */ "", "", "", "", "", "", "", "", - /* 0x20 */ " ", "!", "\"", "#", "$", "%", "&", "'", - /* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/", - /* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7", - /* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?", - /* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G", - /* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O", - /* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W", - /* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_", - /* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g", - /* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o", - /* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w", - /* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "", - /* 0x80 */ "", "", ",", "f", ",,", "...", "+", "#", - /* 0x88 */ "^", "\176/oo","S", "<", "OE", "", "", "", - /* 0x90 */ "", "`", "'", "``", "''", "*", "-", "--", - /* 0x98 */ "~", "(TM)", "s", ">", "oe", "", "", "Y", - /* 0xa0 */ "\xa0", "\xa1", "\xa2", "\xa3", "\xa4", "\xa5", "\xa6", "\xa7", - /* 0xa8 */ "\xa8", "\xa9", "\xaa", "\xab", "\xac", "\xad", "\xae", "\xaf", - /* 0xb0 */ "\xb0", "\xb1", "\xb2", "\xb3", "\xb4", "\xb5", "\xb6", "\xb7", - /* 0xb8 */ "\xb8", "\xb9", "\xba", "\xbb", "\xbc", "\xbd", "\xbe", "\xbf", - /* 0xc0 */ "\xc0", "\xc1", "\xc2", "\xc3", "\xc4", "\xc5", "\xc6", "\xc7", - /* 0xc8 */ "\xc8", "\xc9", "\xca", "\xcb", "\xcc", "\xcd", "\xce", "\xcf", - /* 0xd0 */ "\xd0", "\xd1", "\xd2", "\xd3", "\xd4", "\xd5", "\xd6", "\xd7", - /* 0xd8 */ "\xd8", "\xd9", "\xda", "\xdb", "\xdc", "\xdd", "\xde", "\xdf", - /* 0xe0 */ "\xe0", "\xe1", "\xe2", "\xe3", "\xe4", "\xe5", "\xe6", "\xe7", - /* 0xe8 */ "\xe8", "\xe9", "\xea", "\xeb", "\xec", "\xed", "\xee", "\xef", - /* 0xf0 */ "\xf0", "\xf1", "\xf2", "\xf3", "\xf4", "\xf5", "\xf6", "\xf7", - /* 0xf8 */ "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff", -}; - -static const char *utf_table[0x100] = { - /* 0x00 */ "", "", "", "", "", "", "\n", "\n", - /* 0x08 */ "\n", "\t", "", "", "", "", "", "", - /* 0x10 */ " ", "", "", "", "", "", "", "", - /* 0x18 */ "", "", "", "", "", "", "", "", - /* 0x20 */ " ", "!", "\"", "#", "$", "%", "&", "'", - /* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/", - /* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7", - /* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?", - /* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G", - /* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O", - /* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W", - /* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_", - /* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g", - /* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o", - /* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w", - /* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "\x7f", - /* 0x80 */ "\x80", "\x81", "\x82", "\x83", "\x84", "\x85", "\x86", "\x87", - /* 0x88 */ "\x88", "\x89", "\x8a", "\x8b", "\x8c", "\x8d", "\x8e", "\x8f", - /* 0x90 */ "\x90", "\x91", "\x92", "\x93", "\x94", "\x95", "\x96", "\x97", - /* 0x98 */ "\x98", "\x99", "\x9a", "\x9b", "\x9c", "\x9d", "\x9e", "\x9f", - /* 0xa0 */ "\xa0", "\xa1", "\xa2", "\xa3", "\xa4", "\xa5", "\xa6", "\xa7", - /* 0xa8 */ "\xa8", "\xa9", "\xaa", "\xab", "\xac", "\xad", "\xae", "\xaf", - /* 0xb0 */ "\xb0", "\xb1", "\xb2", "\xb3", "\xb4", "\xb5", "\xb6", "\xb7", - /* 0xb8 */ "\xb8", "\xb9", "\xba", "\xbb", "\xbc", "\xbd", "\xbe", "\xbf", - /* 0xc0 */ "\xc0", "\xc1", "\xc2", "\xc3", "\xc4", "\xc5", "\xc6", "\xc7", - /* 0xc8 */ "\xc8", "\xc9", "\xca", "\xcb", "\xcc", "\xcd", "\xce", "\xcf", - /* 0xd0 */ "\xd0", "\xd1", "\xd2", "\xd3", "\xd4", "\xd5", "\xd6", "\xd7", - /* 0xd8 */ "\xd8", "\xd9", "\xda", "\xdb", "\xdc", "\xdd", "\xde", "\xdf", - /* 0xe0 */ "\xe0", "\xe1", "\xe2", "\xe3", "\xe4", "\xe5", "\xe6", "\xe7", - /* 0xe8 */ "\xe8", "\xe9", "\xea", "\xeb", "\xec", "\xed", "\xee", "\xef", - /* 0xf0 */ "\xf0", "\xf1", "\xf2", "\xf3", "\xf4", "\xf5", "\xf6", "\xf7", - /* 0xf8 */ "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff", -}; - -/* a flag to indicate the use of UTF8 */ -static psiconv_encoding encoding = PSICONV_ENCODING_CP1252; - -/* Output a string, doing character conversions */ -static void fput_text(FILE * of, const char *text, int length) { - int j; - char **table; - - if (encoding == PSICONV_ENCODING_UTF8){ - table=(char**)utf_table; - }else{ - table=(char**)char_table; - } - - for (j = 0; j < length; j++) { - fputs(table[(unsigned char) (text[j])], of); - } -} +static struct fileformat_s ff = + { + "ASCII", + "Plain text without much layout", + gen_txt + }; -/* Output a paragraph */ -static void fput_para(FILE * of, - const psiconv_paragraph para,int extra_nl) +void output_para(const psiconv_config config,psiconv_list list, + const psiconv_paragraph para,encoding encoding_type) { - if (para->base_paragraph->bullet->on) { - fprintf(of, "%s ", char_table[para->base_paragraph->bullet->character]); - } - fput_text(of, para->text, strlen(para->text)); - fputs("\n", of); - if (extra_nl) - fputs("\n", of); + int i; + if (para && para->base_paragraph && para->base_paragraph->bullet && + para->base_paragraph->bullet->on) { + output_char(config,list,para->base_paragraph->bullet->character, + encoding_type); + output_char(config,list,' ', encoding_type); + output_char(config,list,' ', encoding_type); + output_char(config,list,' ', encoding_type); + } + if (para && para->text) { + for (i = 0; i < psiconv_unicode_strlen(para->text); i++) + switch (para->text[i]) { + case 0x06: + case 0x07: + case 0x08: + output_char(config,list,'\n',encoding_type); + break; + case 0x09: + case 0x0a: + output_char(config,list,'\t',encoding_type); + break; + case 0x0b: + case 0x0c: + output_char(config,list,'-',encoding_type); + break; + case 0x0f: + output_char(config,list,' ',encoding_type); + break; + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x0e: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + break; + default: + output_char(config,list,para->text[i],encoding_type); + break; + } + output_char(config,list,'\n',encoding_type); + } } -static void psiconv_gen_txt_texted(FILE * of, psiconv_texted_f tf) +void gen_txt_word(const psiconv_config config, psiconv_list list, + psiconv_word_f wf, encoding encoding_type) { - int i; - psiconv_paragraph para; + int i; + psiconv_paragraph para; - if (tf->page_sec->header->text) { - if (tf->page_sec->header->on_first_page) { - for (i=0; i < psiconv_list_length(tf->page_sec->header->text->paragraphs); i++) { - para = psiconv_list_get(tf->page_sec->header->text->paragraphs, i); - fput_text(of, para->text, strlen(para->text)); - fputs("\n", of); - } - } + if (wf && wf->page_sec && wf->page_sec->header && + wf->page_sec->header->text && wf->page_sec->header->text->paragraphs) { + for (i=0; + i < psiconv_list_length(wf->page_sec->header-> + text->paragraphs); i++) { + para = psiconv_list_get(wf->page_sec->header->text->paragraphs, + i); + output_para(config,list,para,encoding_type); } - fputs("\n",of); + } + output_char(config,list,'\n',encoding_type); - for (i=0; i < psiconv_list_length(tf->texted_sec->paragraphs); i++) { - para = psiconv_list_get(tf->texted_sec->paragraphs, i); - fput_para(of, para,0); - } + if (wf && wf->paragraphs) + for (i=0; i < psiconv_list_length(wf->paragraphs); i++) { + para = psiconv_list_get(wf->paragraphs, i); + output_para(config, list,para,encoding_type); + } - fputs("\n",of); - if (tf->page_sec->header->text) { - for (i=0; i < psiconv_list_length(tf->page_sec->footer->text->paragraphs); i++) { - para = psiconv_list_get(tf->page_sec->footer->text->paragraphs, i); - fput_text(of, para->text, strlen(para->text)); - fputs("\n", of); - } + output_char(config,list,'\n',encoding_type); + + if (wf && wf->page_sec && wf->page_sec->footer && + wf->page_sec->footer->text && wf->page_sec->footer->text->paragraphs) { + for (i=0; + i < psiconv_list_length(wf->page_sec->footer-> + text->paragraphs); i++) { + para = psiconv_list_get(wf->page_sec->footer->text->paragraphs, i); + output_para(config,list,para,encoding_type); } + } } -static void psiconv_gen_txt_word(FILE * of, psiconv_word_f wf) +void gen_txt_texted(const psiconv_config config, psiconv_list list, + psiconv_texted_f tf, encoding encoding_type) { - int i; - psiconv_paragraph para; - - if (wf->page_sec->header->on_first_page) { - for (i=0; i < psiconv_list_length(wf->page_sec->header->text->paragraphs); i++) { - para = psiconv_list_get(wf->page_sec->header->text->paragraphs, i); - fput_text(of, para->text, strlen(para->text)); - fputs("\n\n", of); - } + int i; + psiconv_paragraph para; + + if (tf && tf->page_sec && tf->page_sec->header && + tf->page_sec->header->text && tf->page_sec->header->text->paragraphs) { + for (i=0; + i < psiconv_list_length(tf->page_sec->header-> + text->paragraphs); i++) { + para = psiconv_list_get(tf->page_sec->header->text->paragraphs, + i); + output_para(config,list,para,encoding_type); } - fputs("\n\n", of); + } + output_char(config,list,'\n',encoding_type); - for (i=0; i < psiconv_list_length(wf->paragraphs); i++) { - para = psiconv_list_get(wf->paragraphs, i); - fput_para(of, para,1); - } + if (tf && tf->texted_sec && tf->texted_sec->paragraphs) + for (i=0; i < psiconv_list_length(tf->texted_sec->paragraphs); i++) { + para = psiconv_list_get(tf->texted_sec->paragraphs, i); + output_para(config, list,para,encoding_type); + } - fputs("\n\n", of); - for (i=0; i < psiconv_list_length(wf->page_sec->footer->text->paragraphs); i++) { - para = psiconv_list_get(wf->page_sec->footer->text->paragraphs, i); - fput_text(of, para->text, strlen(para->text)); - fputs("\n\n", of); + output_char(config,list,'\n',encoding_type); + + if (tf && tf->page_sec && tf->page_sec->footer && + tf->page_sec->footer->text && tf->page_sec->footer->text->paragraphs) { + for (i=0; + i < psiconv_list_length(tf->page_sec->footer-> + text->paragraphs); i++) { + para = psiconv_list_get(tf->page_sec->footer->text->paragraphs, i); + output_para(config,list,para,encoding_type); } + } } -static int psiconv_gen_txt(const char *filename, const psiconv_file file, - const char *dest, - const psiconv_encoding encoding_type) +int gen_txt(const psiconv_config config, psiconv_list list, + const psiconv_file file, const char *dest, + const encoding encoding_type) { - FILE *of = fopen(filename,"w"); - if (! of) - return -1; - - encoding=encoding_type; - if (file->type == psiconv_word_file) { - psiconv_gen_txt_word(of,(psiconv_word_f) file->file); + gen_txt_word(config,list,(psiconv_word_f) file->file,encoding_type); + return 0; } else if (file->type == psiconv_texted_file) { - psiconv_gen_txt_texted(of,(psiconv_texted_f) file->file); - } else { - fclose(of); + gen_txt_texted(config,list,(psiconv_texted_f) file->file,encoding_type); + return 0; + } else return -1; - } - return fclose(of); } -static struct psiconv_fileformat_s ff = - { - "ASCII", - "Plain text without much layout", - psiconv_gen_txt - }; - void init_txt(void) { psiconv_list_add(fileformat_list,&ff);