From 8b60d65bcb516df0dc0a5f0de695fc5de8231f06 Mon Sep 17 00:00:00 2001 From: naruse Date: Fri, 5 Nov 2004 10:07:16 +0000 Subject: follow CVS Head of original nkf. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7213 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ext/nkf/nkf-utf8/nkf.c | 90 ++++++++++++++++++++++++++++++++++++---------- ext/nkf/nkf-utf8/utf8tbl.c | 60 +++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 19 deletions(-) (limited to 'ext/nkf/nkf-utf8') diff --git a/ext/nkf/nkf-utf8/nkf.c b/ext/nkf/nkf-utf8/nkf.c index aa7c459b83..458fbbc0e8 100644 --- a/ext/nkf/nkf-utf8/nkf.c +++ b/ext/nkf/nkf-utf8/nkf.c @@ -46,7 +46,7 @@ static char *CopyRight = static char *Version = "2.0"; static char *Patchlevel = - "4/0401/Shinji Kono"; + "4/0410/Shinji Kono"; /* ** @@ -198,7 +198,7 @@ static char *Patchlevel = #define UTF8 12 #define UTF8_INPUT 13 -#define UTF16_INPUT 14 +#define UTF16LE_INPUT 14 #define UTF16BE_INPUT 15 #define WISH_TRUE 15 @@ -376,8 +376,9 @@ static int x0201_f = NO_X0201; /* Assume NO JISX0201 */ #endif static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */ #ifdef UTF8_OUTPUT_ENABLE -static int w_oconv16_begin_f= 0; /* utf-16 header */ +static int unicode_bom_f= 0; /* Output Unicode BOM */ static int w_oconv16_LE = 0; /* utf-16 little endian */ +static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */ #endif @@ -443,7 +444,7 @@ STATIC void s_status PROTO((struct input_code *, int)); #ifdef UTF8_INPUT_ENABLE STATIC void w_status PROTO((struct input_code *, int)); STATIC void w16_status PROTO((struct input_code *, int)); -static int utf16_mode = UTF16_INPUT; +static int utf16_mode = UTF16LE_INPUT; #endif struct input_code input_code_list[] = { @@ -892,6 +893,7 @@ struct { #ifdef UTF8_OUTPUT_ENABLE {"utf8", "w"}, {"utf16", "w16"}, + {"ms-ucs-map", ""}, #endif #ifdef UTF8_INPUT_ENABLE {"utf8-input", "W"}, @@ -1007,6 +1009,12 @@ options(cp) exec_f = -1; return; } +#endif +#ifdef UTF8_OUTPUT_ENABLE + if (strcmp(long_option[i].name, "ms-ucs-map") == 0){ + ms_ucs_map_f = TRUE; + continue; + } #endif if (strcmp(long_option[i].name, "prefix=") == 0){ if (*p == '=' && ' ' < p[1] && p[1] < 128){ @@ -1082,17 +1090,23 @@ options(cp) if ('1'== cp[0] && '6'==cp[1]) { output_conv = w_oconv16; cp+=2; if (cp[0]=='L') { - w_oconv16_begin_f=2; cp++; + unicode_bom_f=2; cp++; w_oconv16_LE = 1; if (cp[0] == '0'){ - w_oconv16_begin_f=1; cp++; + unicode_bom_f=1; cp++; } } else if (cp[0] == 'B') { - w_oconv16_begin_f=2; cp++; + unicode_bom_f=2; cp++; if (cp[0] == '0'){ - w_oconv16_begin_f=1; cp++; + unicode_bom_f=1; cp++; } - } + } + } else if (cp[0] == '8') { + output_conv = w_oconv; cp++; + unicode_bom_f=2; + if (cp[0] == '0'){ + unicode_bom_f=1; cp++; + } } else output_conv = w_oconv; continue; @@ -1100,7 +1114,16 @@ options(cp) #ifdef UTF8_INPUT_ENABLE case 'W': /* UTF-8 input */ if ('1'== cp[0] && '6'==cp[1]) { - input_f = UTF16_INPUT; + input_f = UTF16LE_INPUT; + if (cp[0]=='L') { + cp++; + } else if (cp[0] == 'B') { + cp++; + input_f = UTF16BE_INPUT; + } + } else if (cp[0] == '8') { + cp++; + input_f = UTF8_INPUT; } else input_f = UTF8_INPUT; continue; @@ -1760,7 +1783,7 @@ module_connection() #ifdef UTF8_INPUT_ENABLE } else if (input_f == UTF8_INPUT) { set_iconv(-TRUE, w_iconv); - } else if (input_f == UTF16_INPUT) { + } else if (input_f == UTF16LE_INPUT) { set_iconv(-TRUE, w_iconv16); #endif } else { @@ -2364,7 +2387,7 @@ w_iconv16(c2, c1, c0) int ret; if (c2==0376 && c1==0377){ - utf16_mode = UTF16_INPUT; + utf16_mode = UTF16LE_INPUT; return 0; } else if (c2==0377 && c1==0376){ utf16_mode = UTF16BE_INPUT; @@ -2424,6 +2447,7 @@ e2w_conv(c2, c1) { extern unsigned short euc_to_utf8_1byte[]; extern unsigned short * euc_to_utf8_2bytes[]; + extern unsigned short * euc_to_utf8_2bytes_ms[]; unsigned short *p; if (c2 == X0201) { @@ -2432,7 +2456,7 @@ e2w_conv(c2, c1) c2 &= 0x7f; c2 = (c2&0x7f) - 0x21; if (0<=c2 && c2