diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2006-09-15 11:26:07 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2006-09-15 11:26:07 +0000 |
commit | 5300eecfb324f3a29d891b3e229baef631dc6aeb (patch) | |
tree | d5082f25c6b239bcd018156e6fa54dc5f2c798ab /ext/nkf/nkf.c | |
parent | 976b4e5f8bedcd8285578c6da5117b6883ef1c02 (diff) | |
download | ruby-5300eecfb324f3a29d891b3e229baef631dc6aeb.tar.gz |
* ext/nkf/nkf-8/nkf.c: imported nkf 2.0.8 rev.110.
* Fix: check_bom cuts \xfe\xff\xXX\xXX of UTF-32.
* Add support --ic=UTF-32.
* Fix: can't guess UTF-16 and UTF-32.
* Fix: can't decode beyond BMP of UTF-16LE.
* ext/nkf/nkf.c (guess): Support UTF-32.
* ext/nkf/lib/kconv.rb (kconv): Support UTF-32.
* ext/nkf/lib/kconv.rb (to_utf32): new method.
* ext/nkf/lib/kconv.rb (to_utf32): new method.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10938 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/nkf/nkf.c')
-rw-r--r-- | ext/nkf/nkf.c | 34 |
1 files changed, 29 insertions, 5 deletions
diff --git a/ext/nkf/nkf.c b/ext/nkf/nkf.c index 8a4bcfce64..e12df16960 100644 --- a/ext/nkf/nkf.c +++ b/ext/nkf/nkf.c @@ -306,6 +306,8 @@ rb_nkf_guess1(VALUE obj, VALUE src) * "UTF-8" * when NKF::UTF16 * "UTF-16" + * when NKF::UTF32 + * "UTF-32" * when NKF::UNKNOWN * "UNKNOWN" * when NKF::BINARY @@ -345,6 +347,8 @@ rb_nkf_guess2(VALUE obj, VALUE src) code = _UTF8; } else if (strcmp(input_codename, "UTF-16") == 0) { code = _UTF16; + } else if (strcmp(input_codename, "UTF-32") == 0) { + code = _UTF32; } else if (strlen(input_codename) > 0) { code = _UNKNOWN; } @@ -382,16 +386,16 @@ rb_nkf_guess2(VALUE obj, VALUE src) * * Output is buffered (DEFAULT), Output is unbuffered. * - * === -j -s -e -w -w16 + * === -j -s -e -w -w16 -w32 * * Output code is ISO-2022-JP (7bit JIS), Shift_JIS, EUC-JP, - * UTF-8N, UTF-16BE. + * UTF-8N, UTF-16BE, UTF-32BE. * Without this option and compile option, ISO-2022-JP is assumed. * - * === -J -S -E -W -W16 + * === -J -S -E -W -W16 -W32 * * Input assumption is JIS 7 bit, Shift_JIS, EUC-JP, - * UTF-8, UTF-16LE. + * UTF-8, UTF-16, UTF-32. * * ==== -J * @@ -574,6 +578,16 @@ rb_nkf_guess2(VALUE obj, VALUE src) * * [UTF-16LE-BOM] UTF-16 Little Endian with BOM * + * [UTF-32] same as UTF-32BE + * + * [UTF-32BE] UTF-32 Big Endian without BOM + * + * [UTF-32BE-BOM] UTF-32 Big Endian with BOM + * + * [UTF-32LE] UTF-32 Little Endian without BOM + * + * [UTF-32LE-BOM] UTF-32 Little Endian with BOM + * * [UTF8-MAC] NKDed UTF-8, a.k.a. UTF8-NFD (input only) * * === --fb-{skip, html, xml, perl, java, subchar} @@ -587,10 +601,20 @@ rb_nkf_guess2(VALUE obj, VALUE src) * nkf adds a specified escape character to specified 2nd byte of Shift_JIS characters. * 1st byte of argument is the escape character and following bytes are target characters. * - * === --disable-cp932ext + * === --no-cp932ext * * Handle the characters extended in CP932 as unassigned characters. * + * == --no-best-fit-chars + * + * When Unicode to Encoded byte conversion, + * don't convert characters which is not round trip safe. + * When Unicode to Unicode conversion, + * with this and -x option, nkf can be used as UTF converter. + * (In other words, without this and -x option, nkf doesn't save some characters) + * + * When nkf convert string which related to path, you should use this opion. + * * === --cap-input * * Decode hex encoded characters. |