diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-01-01 15:22:25 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-01-01 15:22:25 +0000 |
commit | 7d847f7c377b6dbf85371c3e03599881b76f1954 (patch) | |
tree | 620178b6bee52e11f5ba2790031123443aecbb33 /ext/nkf/nkf-utf8/nkf.c | |
parent | 5f41f87d2ba568cb0ad1fdb523a4c60b15a40613 (diff) | |
download | ruby-7d847f7c377b6dbf85371c3e03599881b76f1954.tar.gz |
* ext/nkf/nkf-utf8/{nkf.c, utf8tbl.c}: Update nkf.
* ext/nkf/nkf.c: fix documents.
* ext/nkf/lib/kconv.rb: fix documents.
(Kconv.is*): use valid_encoding?.
(Kconv.isjis): defined.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14833 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/nkf/nkf-utf8/nkf.c')
-rw-r--r-- | ext/nkf/nkf-utf8/nkf.c | 266 |
1 files changed, 97 insertions, 169 deletions
diff --git a/ext/nkf/nkf-utf8/nkf.c b/ext/nkf/nkf-utf8/nkf.c index 66f01896d6..94deb13ed1 100644 --- a/ext/nkf/nkf-utf8/nkf.c +++ b/ext/nkf/nkf-utf8/nkf.c @@ -32,7 +32,7 @@ ***********************************************************************/ /* $Id$ */ #define NKF_VERSION "2.0.8" -#define NKF_RELEASE_DATE "2007-12-23" +#define NKF_RELEASE_DATE "2007-01-02" #define COPY_RIGHT \ "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \ "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon" @@ -205,11 +205,12 @@ void djgpp_setbinmode(FILE *fp) /* byte order */ - -#define ENDIAN_BIG 1234 -#define ENDIAN_LITTLE 4321 -#define ENDIAN_2143 2143 -#define ENDIAN_3412 3412 +enum byte_order { + ENDIAN_BIG = 1, + ENDIAN_LITTLE = 2, + ENDIAN_2143 = 3, + ENDIAN_3412 = 4 +}; /* ASCII CODE */ @@ -266,10 +267,10 @@ enum nkf_encodings { UTF_32LE, UTF_32LE_BOM, JIS_X_0201=0x1000, - JIS_X_0208, - JIS_X_0212, - JIS_X_0213_1, - JIS_X_0213_2, + JIS_X_0208=0x1001, + JIS_X_0212=0x1002, + JIS_X_0213_1=0x1003, + JIS_X_0213_2=0x1004, BINARY }; @@ -286,9 +287,9 @@ void w_oconv16(nkf_char c2, nkf_char c1); void w_oconv32(nkf_char c2, nkf_char c1); typedef struct { - char *name; - nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0); - void (*oconv_func)(nkf_char c2, nkf_char c1); + const char *name; + nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0); + void (*oconv)(nkf_char c2, nkf_char c1); } nkf_native_encoding; nkf_native_encoding NkfEncodingASCII = { "US_ASCII", e_iconv, e_oconv }; @@ -300,21 +301,21 @@ nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 }; nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 }; typedef struct { - int id; - char *name; - nkf_native_encoding *based_encoding; + const int id; + const char *name; + const nkf_native_encoding *base_encoding; } nkf_encoding; nkf_encoding nkf_encoding_table[] = { {ASCII, "ASCII", &NkfEncodingASCII}, {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII}, - {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingASCII}, + {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP}, {CP50220, "CP50220", &NkfEncodingISO_2022_JP}, {CP50221, "CP50221", &NkfEncodingISO_2022_JP}, {CP50222, "CP50222", &NkfEncodingISO_2022_JP}, {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP}, {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP}, {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS}, - {WINDOWS_31J, "WINDOWS-31J", &NkfEncodingShift_JIS}, + {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS}, {CP10001, "CP10001", &NkfEncodingShift_JIS}, {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP}, {CP51932, "CP51932", &NkfEncodingEUC_JP}, @@ -476,7 +477,8 @@ struct input_code{ }; static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */ -static nkf_encoding *output_encoding; +static nkf_encoding *input_encoding = NULL; +static nkf_encoding *output_encoding = NULL; #if !defined(PERL_XS) && !defined(WIN32DLL) static nkf_char noconvert(FILE *f); @@ -601,7 +603,6 @@ static int nop_f = FALSE; static int binmode_f = TRUE; /* binary mode */ static int rot_f = FALSE; /* rot14/43 mode */ static int hira_f = FALSE; /* hira/kata henkan */ -static int input_f = FALSE; /* non fixed input code */ static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */ static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */ static int mime_decode_f = FALSE; /* mime decode is explicitly on */ @@ -753,11 +754,8 @@ static int fold_margin = FOLD_MARGIN; #endif /* process default */ -static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV; - -static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection; -/* s_iconv or oconv */ static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2; +static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection; static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection; static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection; @@ -948,7 +946,20 @@ static nkf_encoding *nkf_enc_find(const char *name) #define nkf_enc_name(enc) (enc)->name #define nkf_enc_to_index(enc) (enc)->id -#define nkf_enc_to_base_encoding(enc) (enc)->based_encoding +#define nkf_enc_to_base_encoding(enc) (enc)->base_encoding +#define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv +#define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv +#define nkf_enc_asciicompat(enc) (\ + nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\ + nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP) +#define nkf_enc_unicode_p(enc) (\ + nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\ + nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\ + nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32) +#define nkf_enc_cp5022x_p(enc) (\ + nkf_enc_to_index(enc) == CP50220 ||\ + nkf_enc_to_index(enc) == CP50221 ||\ + nkf_enc_to_index(enc) == CP50222) #ifdef WIN32DLL #include "nkf32dll.c" @@ -1294,7 +1305,7 @@ static const struct { {"katakana","h2"}, {"katakana-hiragana","h3"}, {"guess=", ""}, - {"guess", "g1"}, + {"guess", "g2"}, {"cp932", ""}, {"no-cp932", ""}, #ifdef X0212_ENABLE @@ -1358,7 +1369,6 @@ void options(unsigned char *cp) char codeset[32]; nkf_encoding *enc; - if (!output_encoding) output_encoding = nkf_enc_from_index(DEFAULT_ENCODING); if (option_mode==1) return; while(*cp && *cp++!='-'); @@ -1396,14 +1406,12 @@ void options(unsigned char *cp) if (strcmp(long_option[i].name, "ic=") == 0){ nkf_str_upcase((char *)p, codeset, 32); enc = nkf_enc_find(codeset); - switch (nkf_enc_to_index(enc)) { - case ISO_2022_JP: - input_f = JIS_INPUT; - break; + if (!enc) continue; + input_encoding = enc; + switch (nkf_enc_to_index(input_encoding)) { case CP50220: case CP50221: case CP50222: - input_f = JIS_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = TRUE; #endif @@ -1412,23 +1420,17 @@ void options(unsigned char *cp) #endif break; case ISO_2022_JP_1: - input_f = JIS_INPUT; #ifdef X0212_ENABLE x0212_f = TRUE; #endif break; case ISO_2022_JP_3: - input_f = JIS_INPUT; #ifdef X0212_ENABLE x0212_f = TRUE; #endif x0213_f = TRUE; break; - case SHIFT_JIS: - input_f = SJIS_INPUT; - break; case WINDOWS_31J: - input_f = SJIS_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = TRUE; #endif @@ -1437,7 +1439,6 @@ void options(unsigned char *cp) #endif break; case CP10001: - input_f = SJIS_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = TRUE; #endif @@ -1445,11 +1446,7 @@ void options(unsigned char *cp) ms_ucs_map_f = UCS_MAP_CP10001; #endif break; - case EUC_JP: - input_f = EUC_INPUT; - break; case CP51932: - input_f = EUC_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = TRUE; #endif @@ -1458,7 +1455,6 @@ void options(unsigned char *cp) #endif break; case EUCJP_MS: - input_f = EUC_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = FALSE; #endif @@ -1467,7 +1463,6 @@ void options(unsigned char *cp) #endif break; case EUCJP_ASCII: - input_f = EUC_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = FALSE; #endif @@ -1477,7 +1472,6 @@ void options(unsigned char *cp) break; case SHIFT_JISX0213: case SHIFT_JIS_2004: - input_f = SJIS_INPUT; x0213_f = TRUE; #ifdef SHIFTJIS_CP932 cp51932_f = FALSE; @@ -1485,50 +1479,36 @@ void options(unsigned char *cp) break; case EUC_JISX0213: case EUC_JIS_2004: - input_f = EUC_INPUT; x0213_f = TRUE; #ifdef SHIFTJIS_CP932 cp51932_f = FALSE; #endif break; #ifdef UTF8_INPUT_ENABLE - case UTF_8: - case UTF_8N: - case UTF_8_BOM: - input_f = UTF8_INPUT; - break; #ifdef UNICODE_NORMALIZATION case UTF8_MAC: - input_f = UTF8_INPUT; nfc_f = TRUE; break; #endif case UTF_16: case UTF_16BE: case UTF_16BE_BOM: - input_f = UTF16_INPUT; input_endian = ENDIAN_BIG; break; case UTF_16LE: case UTF_16LE_BOM: - input_f = UTF16_INPUT; input_endian = ENDIAN_LITTLE; break; case UTF_32: case UTF_32BE: case UTF_32BE_BOM: - input_f = UTF32_INPUT; input_endian = ENDIAN_BIG; break; case UTF_32LE: case UTF_32LE_BOM: - input_f = UTF32_INPUT; input_endian = ENDIAN_LITTLE; break; #endif - default: - fprintf(stderr, "unknown input encoding: %s\n", codeset); - break; } continue; } @@ -1539,21 +1519,16 @@ void options(unsigned char *cp) if (enc <= 0) continue; output_encoding = enc; switch (nkf_enc_to_index(output_encoding)) { - case ISO_2022_JP: - output_conv = j_oconv; - break; case CP50220: - output_conv = j_oconv; - x0201_f = TRUE; + x0201_f = TRUE; #ifdef SHIFTJIS_CP932 - cp932inv_f = FALSE; + cp932inv_f = FALSE; #endif #ifdef UTF8_OUTPUT_ENABLE - ms_ucs_map_f = UCS_MAP_CP932; + ms_ucs_map_f = UCS_MAP_CP932; #endif break; case CP50221: - output_conv = j_oconv; #ifdef SHIFTJIS_CP932 cp932inv_f = FALSE; #endif @@ -1562,7 +1537,6 @@ void options(unsigned char *cp) #endif break; case ISO_2022_JP_1: - output_conv = j_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; #endif @@ -1571,7 +1545,6 @@ void options(unsigned char *cp) #endif break; case ISO_2022_JP_3: - output_conv = j_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; #endif @@ -1580,26 +1553,17 @@ void options(unsigned char *cp) cp932inv_f = FALSE; #endif break; - case SHIFT_JIS: - output_conv = s_oconv; - break; case WINDOWS_31J: - output_conv = s_oconv; #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif break; case CP10001: - output_conv = s_oconv; #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP10001; #endif break; - case EUC_JP: - output_conv = e_oconv; - break; case CP51932: - output_conv = e_oconv; #ifdef SHIFTJIS_CP932 cp932inv_f = FALSE; #endif @@ -1608,7 +1572,6 @@ void options(unsigned char *cp) #endif break; case EUCJP_MS: - output_conv = e_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; #endif @@ -1617,7 +1580,6 @@ void options(unsigned char *cp) #endif break; case EUCJP_ASCII: - output_conv = e_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; #endif @@ -1627,15 +1589,13 @@ void options(unsigned char *cp) break; case SHIFT_JISX0213: case SHIFT_JIS_2004: - output_conv = s_oconv; - x0213_f = TRUE; + x0213_f = TRUE; #ifdef SHIFTJIS_CP932 - cp932inv_f = FALSE; + cp932inv_f = FALSE; #endif break; case EUC_JISX0213: case EUC_JIS_2004: - output_conv = e_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; #endif @@ -1645,60 +1605,41 @@ void options(unsigned char *cp) #endif break; #ifdef UTF8_OUTPUT_ENABLE - case UTF_8: - case UTF_8N: - output_conv = w_oconv; - break; case UTF_8_BOM: - output_conv = w_oconv; output_bom_f = TRUE; break; - case UTF_16BE: - output_conv = w_oconv16; - break; case UTF_16: case UTF_16BE_BOM: - output_conv = w_oconv16; output_bom_f = TRUE; break; case UTF_16LE: - output_conv = w_oconv16; output_endian = ENDIAN_LITTLE; + output_bom_f = FALSE; break; case UTF_16LE_BOM: - output_conv = w_oconv16; output_endian = ENDIAN_LITTLE; output_bom_f = TRUE; break; - case UTF_32: - case UTF_32BE: - output_conv = w_oconv32; - break; case UTF_32BE_BOM: - output_conv = w_oconv32; output_bom_f = TRUE; break; case UTF_32LE: - output_conv = w_oconv32; output_endian = ENDIAN_LITTLE; + output_bom_f = FALSE; break; case UTF_32LE_BOM: - output_conv = w_oconv32; output_endian = ENDIAN_LITTLE; output_bom_f = TRUE; break; #endif - default: - fprintf(stderr, "unknown output encoding: %s\n", codeset); - break; } continue; } if (strcmp(long_option[i].name, "guess=") == 0){ - if (p[0] == '1') { - guess_f = 2; - } else { + if (p[0] == '0' || p[0] == '1') { guess_f = 1; + } else { + guess_f = 2; } continue; } @@ -1872,7 +1813,6 @@ void options(unsigned char *cp) #endif #ifdef UNICODE_NORMALIZATION if (strcmp(long_option[i].name, "utf8mac-input") == 0){ - input_f = UTF8_INPUT; nfc_f = TRUE; continue; } @@ -1912,21 +1852,18 @@ void options(unsigned char *cp) continue; case 'j': /* JIS output */ case 'n': - output_conv = j_oconv; output_encoding = nkf_enc_from_index(ISO_2022_JP); continue; case 'e': /* AT&T EUC output */ - output_conv = e_oconv; cp932inv_f = FALSE; output_encoding = nkf_enc_from_index(EUC_JP); continue; case 's': /* SJIS output */ - output_conv = s_oconv; - output_encoding = nkf_enc_from_index(SHIFT_JIS); + output_encoding = nkf_enc_from_index(WINDOWS_31J); continue; case 'l': /* ISO8859 Latin-1 support, no conversion */ iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */ - input_f = LATIN1_INPUT; + input_encoding = nkf_enc_from_index(ISO_8859_1); continue; case 'i': /* Kanji IN ESC-$-@/B */ if (*cp=='@'||*cp=='B') @@ -1967,7 +1904,7 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE case 'w': /* UTF-8 output */ if (cp[0] == '8') { - output_conv = w_oconv; cp++; + cp++; if (cp[0] == '0'){ cp++; output_encoding = nkf_enc_from_index(UTF_8N); @@ -1978,13 +1915,12 @@ void options(unsigned char *cp) } else { int enc_idx; if ('1'== cp[0] && '6'==cp[1]) { - output_conv = w_oconv16; cp+=2; + cp += 2; enc_idx = UTF_16; } else if ('3'== cp[0] && '2'==cp[1]) { - output_conv = w_oconv32; cp+=2; + cp += 2; enc_idx = UTF_32; } else { - output_conv = w_oconv; output_encoding = nkf_enc_from_index(UTF_8); continue; } @@ -2016,18 +1952,19 @@ void options(unsigned char *cp) case 'W': /* UTF input */ if (cp[0] == '8') { cp++; - input_f = UTF8_INPUT; + input_encoding = nkf_enc_from_index(UTF_8); }else{ + int enc_idx; if ('1'== cp[0] && '6'==cp[1]) { cp += 2; - input_f = UTF16_INPUT; input_endian = ENDIAN_BIG; + enc_idx = UTF_16; } else if ('3'== cp[0] && '2'==cp[1]) { cp += 2; - input_f = UTF32_INPUT; input_endian = ENDIAN_BIG; + enc_idx = UTF_32; } else { - input_f = UTF8_INPUT; + input_encoding = nkf_enc_from_index(UTF_8); continue; } if (cp[0]=='L') { @@ -2035,20 +1972,25 @@ void options(unsigned char *cp) input_endian = ENDIAN_LITTLE; } else if (cp[0] == 'B') { cp++; + input_endian = ENDIAN_BIG; } + enc_idx = enc_idx == UTF_16 + ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE) + : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE); + input_encoding = nkf_enc_from_index(enc_idx); } continue; #endif /* Input code assumption */ - case 'J': /* JIS input */ - input_f = JIS_INPUT; - continue; - case 'E': /* AT&T EUC input */ - input_f = EUC_INPUT; - continue; - case 'S': /* MS Kanji input */ - input_f = SJIS_INPUT; - continue; + case 'J': /* ISO-2022-JP input */ + input_encoding = nkf_enc_from_index(ISO_2022_JP); + continue; + case 'E': /* EUC-JP input */ + input_encoding = nkf_enc_from_index(EUC_JP); + continue; + case 'S': /* Windows-31J input */ + input_encoding = nkf_enc_from_index(WINDOWS_31J); + continue; case 'Z': /* Convert X0208 alphabet to asii */ /* alpha_f bit:0 Convert JIS X 0208 Alphabet to ASCII @@ -2160,10 +2102,10 @@ void options(unsigned char *cp) continue; #ifndef PERL_XS case 'g': - if (*cp == '1') { + if ('2' <= *cp && *cp <= '9') { guess_f = 2; cp++; - } else if (*cp == '0') { + } else if (*cp == '0' || *cp == '1') { guess_f = 1; cp++; } else { @@ -2200,7 +2142,7 @@ struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0)) { #ifdef INPUT_CODE_FIX - if (f || !input_f) + if (f || !input_encoding) #endif if (estab_f != f){ estab_f = f; @@ -2208,7 +2150,7 @@ void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_ch if (iconv_func #ifdef INPUT_CODE_FIX - && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */ + && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */ #endif ){ iconv = iconv_func; @@ -2588,7 +2530,8 @@ nkf_char noconvert(FILE *f) void module_connection(void) { - oconv = output_conv; + if (!output_encoding) output_encoding = nkf_enc_from_index(DEFAULT_ENCODING); + oconv = nkf_enc_to_oconv(output_encoding); o_putc = std_putc; /* replace continucation module, from output side */ @@ -2648,7 +2591,7 @@ void module_connection(void) } #endif #ifdef UNICODE_NORMALIZATION - if (nfc_f && input_f == UTF8_INPUT){ + if (nfc_f){ i_nfc_getc = i_getc; i_getc = nfc_getc; i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc; } @@ -2661,18 +2604,8 @@ void module_connection(void) i_bgetc = i_getc; i_getc = broken_getc; i_bungetc = i_ungetc; i_ungetc = broken_ungetc; } - if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) { - set_iconv(-TRUE, e_iconv); - } else if (input_f == SJIS_INPUT) { - set_iconv(-TRUE, s_iconv); -#ifdef UTF8_INPUT_ENABLE - } else if (input_f == UTF8_INPUT) { - set_iconv(-TRUE, w_iconv); - } else if (input_f == UTF16_INPUT) { - set_iconv(-TRUE, w_iconv16); - } else if (input_f == UTF32_INPUT) { - set_iconv(-TRUE, w_iconv32); -#endif + if (input_encoding) { + set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding)); } else { set_iconv(FALSE, e_iconv); } @@ -2696,7 +2629,7 @@ void check_bom(FILE *f) if((c2 = (*i_getc)(f)) == 0x00){ if((c2 = (*i_getc)(f)) == 0xFE){ if((c2 = (*i_getc)(f)) == 0xFF){ - if(!input_f){ + if(!input_encoding){ set_iconv(TRUE, w_iconv32); } if (iconv == w_iconv32) { @@ -2708,7 +2641,7 @@ void check_bom(FILE *f) (*i_ungetc)(0xFE,f); }else if(c2 == 0xFF){ if((c2 = (*i_getc)(f)) == 0xFE){ - if(!input_f){ + if(!input_encoding){ set_iconv(TRUE, w_iconv32); } if (iconv == w_iconv32) { @@ -2726,7 +2659,7 @@ void check_bom(FILE *f) case 0xEF: if((c2 = (*i_getc)(f)) == 0xBB){ if((c2 = (*i_getc)(f)) == 0xBF){ - if(!input_f){ + if(!input_encoding){ set_iconv(TRUE, w_iconv); } if (iconv == w_iconv) { @@ -2742,7 +2675,7 @@ void check_bom(FILE *f) if((c2 = (*i_getc)(f)) == 0xFF){ if((c2 = (*i_getc)(f)) == 0x00){ if((c2 = (*i_getc)(f)) == 0x00){ - if(!input_f){ + if(!input_encoding){ set_iconv(TRUE, w_iconv32); } if (iconv == w_iconv32) { @@ -2753,7 +2686,7 @@ void check_bom(FILE *f) }else (*i_ungetc)(c2,f); (*i_ungetc)(0x00,f); }else (*i_ungetc)(c2,f); - if(!input_f){ + if(!input_encoding){ set_iconv(TRUE, w_iconv16); } if (iconv == w_iconv16) { @@ -2768,7 +2701,7 @@ void check_bom(FILE *f) if((c2 = (*i_getc)(f)) == 0xFE){ if((c2 = (*i_getc)(f)) == 0x00){ if((c2 = (*i_getc)(f)) == 0x00){ - if(!input_f){ + if(!input_encoding){ set_iconv(TRUE, w_iconv32); } if (iconv == w_iconv32) { @@ -2779,7 +2712,7 @@ void check_bom(FILE *f) }else (*i_ungetc)(c2,f); (*i_ungetc)(0x00,f); }else (*i_ungetc)(c2,f); - if(!input_f){ + if(!input_encoding){ set_iconv(TRUE, w_iconv16); } if (iconv == w_iconv16) { @@ -2805,11 +2738,7 @@ nkf_char kanji_convert(FILE *f) nkf_char c3, c2=0, c1, c0=0; int is_8bit = FALSE; - if(input_f == SJIS_INPUT || input_f == EUC_INPUT -#ifdef UTF8_INPUT_ENABLE - || input_f == UTF8_INPUT || input_f == UTF16_INPUT -#endif - ){ + if (input_encoding && !nkf_enc_asciicompat(input_encoding)) { is_8bit = TRUE; } @@ -2826,12 +2755,12 @@ nkf_char kanji_convert(FILE *f) while ((c1 = (*i_getc)(f)) != EOF) { #ifdef INPUT_CODE_FIX - if (!input_f) + if (!input_encoding) #endif code_status(c1); if (c2) { /* second byte */ - if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) { + if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) { /* in case of 8th bit is on */ if (!estab_f&&!mime_decode_mode) { /* in case of not established yet */ @@ -2920,7 +2849,7 @@ nkf_char kanji_convert(FILE *f) SEND; } else #endif - if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) { + if (c1 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) { /* 8 bit code */ if (!estab_f && !iso8859_f) { /* not established yet */ @@ -5019,7 +4948,7 @@ void hira_conv(nkf_char c2, nkf_char c1) c2 = 0x24; (*o_hira_conv)(c2,c1); return; - } else if (c1 == 0x74 && (output_conv == w_oconv || output_conv == w_oconv16)) { + } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) { c2 = 0; c1 = CLASS_UNICODE | 0x3094; (*o_hira_conv)(c2,c1); @@ -6252,7 +6181,6 @@ void reinit(void) binmode_f = TRUE; rot_f = FALSE; hira_f = FALSE; - input_f = FALSE; alpha_f = FALSE; mime_f = MIME_DECODE_DEFAULT; mime_decode_f = FALSE; @@ -6318,7 +6246,6 @@ void reinit(void) kanji_intro = DEFAULT_J; ascii_intro = DEFAULT_R; fold_margin = FOLD_MARGIN; - output_conv = DEFAULT_CONV; oconv = DEFAULT_CONV; o_zconv = no_connection; o_fconv = no_connection; @@ -6353,6 +6280,7 @@ void reinit(void) iconv_for_check = 0; #endif input_codename = NULL; + input_encoding = NULL; output_encoding = nkf_enc_from_index(DEFAULT_ENCODING); #ifdef WIN32DLL reinitdll(); @@ -6491,7 +6419,7 @@ void show_configuration(void) "OFF" #endif "\n"); -fprintf(HELP_OUTPUT, " --help, --version output: " + fprintf(HELP_OUTPUT, " --help, --version output: " #if HELP_OUTPUT_HELP_OUTPUT "HELP_OUTPUT" #else |