From 8e70a60da0d3a34cf6c748d81a6fab42a5734e26 Mon Sep 17 00:00:00 2001 From: naruse Date: Mon, 27 Jun 2005 16:53:48 +0000 Subject: * ext/nkf/lib/kconv.rb: add Kconv::VERSION * ext/nkf/lib/kconv.rb (conv): can process arrayed options * ext/nkf/nkf-utf8/nkf.c: imported Revision 1.69 * ext/nkf/nkf-utf8/utf8tbl.c: imported Revision 1.9 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8667 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 7 +++++++ ext/nkf/lib/kconv.rb | 50 +++++++++++----------------------------------- ext/nkf/nkf-utf8/nkf.c | 25 ++++++++++------------- ext/nkf/nkf-utf8/utf8tbl.c | 18 ++++++++++++++++- 4 files changed, 47 insertions(+), 53 deletions(-) diff --git a/ChangeLog b/ChangeLog index 060d78d56a..23f1a673a8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Tue Jun 28 01:52:00 2005 NARUSE, Yui + + * ext/nkf/lib/kconv.rb: add Kconv::VERSION + * ext/nkf/lib/kconv.rb (conv): can process arrayed options + * ext/nkf/nkf-utf8/nkf.c: imported Revision 1.69 + * ext/nkf/nkf-utf8/utf8tbl.c: imported Revision 1.9 + Sat Jun 25 17:12:20 2005 GOTOU Yuuzou * lib/webrick/httputils.rb (WEBrick::HTTPUtils.parse_query): should diff --git a/ext/nkf/lib/kconv.rb b/ext/nkf/lib/kconv.rb index fecddee978..8d57c2a5d2 100644 --- a/ext/nkf/lib/kconv.rb +++ b/ext/nkf/lib/kconv.rb @@ -11,6 +11,8 @@ module Kconv # Public Constants # + VERSION = '1.8' + #Constant of Encoding AUTO = ::NKF::AUTO JIS = ::NKF::JIS @@ -28,6 +30,8 @@ module Kconv # Private Constants # + REVISON = %q$Revison$ + #Regexp of Encoding RegexpShiftjis = /\A(?: [\x00-\x7f\xa1-\xdf] | @@ -100,30 +104,6 @@ module Kconv UNKNOWN => :unknown } - SYMBOL_TO_CONSTANT = { - :auto => AUTO, - :unknown => UNKNOWN, - :binary => BINARY, - :ascii => ASCII, - :ascii => ASCII, - :shiftjis => SJIS, - :sjis => SJIS, - :cp932 => SJIS, - :eucjp => EUC, - :euc => EUC, - :eucjpms => EUC, - :iso2022jp => JIS, - :jis => JIS, - :utf8 => UTF8, - :utf8n => UTF8, - :utf16 => UTF16, - :utf16be => UTF16, - :utf16ben => UTF16, - :utf16le => UTF16, - :utf16len => UTF16, - :noconv => NOCONV - } - # # Public Methods # @@ -186,14 +166,9 @@ module Kconv to = symbol_to_option(option[0]) from = symbol_to_option(option[1]).to_s.sub(/(-[jesw])/o){$1.upcase} - opt = Array.new - if option[2].is_a? Array - opt << option[2].map{|x|symbol_to_option(x)}.compact.join('') - elsif option[2].is_a? String - opt << option[2] - end + opt = option[2..-1].to_a.map{|x|symbol_to_option(x)}.compact.join('') - nkf_opt = ('-x -m0 %s %s %s' % [to, from, opt.join(' ')]) + nkf_opt = ('-x -m0 %s %s %s' % [to, from, opt]) result = ::NKF::nkf( nkf_opt, str) end module_function :conv @@ -300,12 +275,12 @@ module Kconv # # Private Methods # - + private def symbol_to_option(symbol) - if symbol.to_s[0] == ?- - return symbol.to_s - elsif symbol.is_a? Integer + if symbol.is_a? Integer symbol = CONSTANT_TO_SYMBOL[symbol] + elsif symbol.to_s[0] == ?- + return symbol.to_s end begin SYMBOL_TO_OPTION[ symbol.to_s.downcase.delete('-_').to_sym ] @@ -313,13 +288,12 @@ module Kconv return nil end end -private :symbol_to_option module_function :symbol_to_option end class String - def kconv(out_code, in_code=Kconv::AUTO) - Kconv::kconv(self, out_code, in_code) + def kconv(*args) + Kconv::kconv(self, *args) end def conv(*args) diff --git a/ext/nkf/nkf-utf8/nkf.c b/ext/nkf/nkf-utf8/nkf.c index 2973799fb2..9fd1436239 100644 --- a/ext/nkf/nkf-utf8/nkf.c +++ b/ext/nkf/nkf-utf8/nkf.c @@ -41,7 +41,7 @@ ***********************************************************************/ /* $Id$ */ #define NKF_VERSION "2.0.5" -#define NKF_RELEASE_DATE "2005-04-10" +#define NKF_RELEASE_DATE "2005-06-28" #include "config.h" static char *CopyRight = @@ -970,13 +970,9 @@ options(cp) if (option_mode==1) return; - if (*cp++ != '-') - return; + while(*cp && *cp++!='-'); while (*cp) { - if (p && !*cp) { - cp = p; - p = 0; - } + p = 0; switch (*cp++) { case '-': /* literal options */ if (!*cp) { /* ignore the rest of arguments */ @@ -986,8 +982,8 @@ options(cp) for (i=0;i kata - bit:2 kata -> hira + bit:1 katakana->hiragana + bit:2 hiragana->katakana */ if ('9'>= *cp && *cp>='0') hira_f |= (*cp++ -'0'); @@ -1320,8 +1317,7 @@ options(cp) continue; case ' ': /* module muliple options in a string are allowed for Perl moudle */ - while(*cp && *cp!='-') cp++; - if(*cp=='-') cp++; + while(*cp && *cp++!='-'); continue; default: /* bogus option but ignored */ @@ -4732,7 +4728,7 @@ usage() fprintf(stderr,"t no conversion\n"); fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n"); fprintf(stderr,"r {de/en}crypt ROT13/47\n"); - fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n"); + fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"); fprintf(stderr,"v Show this usage. V: show version\n"); fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n"); fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n"); @@ -4755,6 +4751,7 @@ usage() fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n"); fprintf(stderr," --x0212 Convert JISX0212\n"); fprintf(stderr," --cp932, --no-cp932 CP932 compatibility\n"); + fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n"); #ifdef INPUT_OPTION fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n"); #endif diff --git a/ext/nkf/nkf-utf8/utf8tbl.c b/ext/nkf/nkf-utf8/utf8tbl.c index 12e2403cf8..5b6c09ad23 100644 --- a/ext/nkf/nkf-utf8/utf8tbl.c +++ b/ext/nkf/nkf-utf8/utf8tbl.c @@ -1275,6 +1275,22 @@ unsigned short euc_to_utf8_FC[] = { 0xFA29, 0x969D, 0x96AF, 0x9733, 0x973B, 0x9743, 0x974D, 0x974F, 0x9751, 0x9755, 0x9857, 0x9865, 0xFA2A, 0xFA2B, 0x9927, 0xFA2C, 0x999E, 0x9A4E, 0x9AD9, 0x9ADC, 0x9B75, 0x9B72, 0x9B8F, 0x9BB1, + 0x9BBB, 0x9C00, 0x9D70, 0x9D6B, 0xFA2D, 0x9E19, 0x9ED1, 0, + 0, 0x2170, 0x2171, 0x2172, 0x2173, 0x2174, 0x2175, 0x2176, + 0x2177, 0x2178, 0x2179, 0xFFE2, 0x00A6, 0xFF07, 0xFF02, +}; + +/* Microsoft UCS Mapping Compatible */ +unsigned short euc_to_utf8_FC_ms[] = { + 0x91D7, 0x91DE, 0x91ED, 0x91EE, 0x91E4, 0x91E5, 0x9206, + 0x9210, 0x920A, 0x923A, 0x9240, 0x923C, 0x924E, 0x9259, 0x9251, + 0x9239, 0x9267, 0x92A7, 0x9277, 0x9278, 0x92E7, 0x92D7, 0x92D9, + 0x92D0, 0xFA27, 0x92D5, 0x92E0, 0x92D3, 0x9325, 0x9321, 0x92FB, + 0xFA28, 0x931E, 0x92FF, 0x931D, 0x9302, 0x9370, 0x9357, 0x93A4, + 0x93C6, 0x93DE, 0x93F8, 0x9431, 0x9445, 0x9448, 0x9592, 0xF9DC, + 0xFA29, 0x969D, 0x96AF, 0x9733, 0x973B, 0x9743, 0x974D, 0x974F, + 0x9751, 0x9755, 0x9857, 0x9865, 0xFA2A, 0xFA2B, 0x9927, 0xFA2C, + 0x999E, 0x9A4E, 0x9AD9, 0x9ADC, 0x9B75, 0x9B72, 0x9B8F, 0x9BB1, 0x9BBB, 0x9C00, 0x9D70, 0x9D6B, 0xFA2D, 0x9E19, 0x9ED1, 0, 0, 0x2170, 0x2171, 0x2172, 0x2173, 0x2174, 0x2175, 0x2176, 0x2177, 0x2178, 0x2179, 0xFFE2, 0xFFE4, 0xFF07, 0xFF02, @@ -2300,7 +2316,7 @@ unsigned short * euc_to_utf8_2bytes_ms[] = { euc_to_utf8_F0, euc_to_utf8_F1, euc_to_utf8_F2, euc_to_utf8_F3, euc_to_utf8_F4, euc_to_utf8_F5, 0, 0, 0, euc_to_utf8_F9, euc_to_utf8_FA, euc_to_utf8_FB, - euc_to_utf8_FC, 0, 0, + euc_to_utf8_FC_ms, 0, 0, }; #ifdef X0212_ENABLE -- cgit v1.2.3