aboutsummaryrefslogtreecommitdiffstats
path: root/ext/nkf/nkf.c
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2006-09-15 11:26:07 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2006-09-15 11:26:07 +0000
commit5300eecfb324f3a29d891b3e229baef631dc6aeb (patch)
treed5082f25c6b239bcd018156e6fa54dc5f2c798ab /ext/nkf/nkf.c
parent976b4e5f8bedcd8285578c6da5117b6883ef1c02 (diff)
downloadruby-5300eecfb324f3a29d891b3e229baef631dc6aeb.tar.gz
* ext/nkf/nkf-8/nkf.c: imported nkf 2.0.8 rev.110.
* Fix: check_bom cuts \xfe\xff\xXX\xXX of UTF-32. * Add support --ic=UTF-32. * Fix: can't guess UTF-16 and UTF-32. * Fix: can't decode beyond BMP of UTF-16LE. * ext/nkf/nkf.c (guess): Support UTF-32. * ext/nkf/lib/kconv.rb (kconv): Support UTF-32. * ext/nkf/lib/kconv.rb (to_utf32): new method. * ext/nkf/lib/kconv.rb (to_utf32): new method. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10938 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/nkf/nkf.c')
-rw-r--r--ext/nkf/nkf.c34
1 files changed, 29 insertions, 5 deletions
diff --git a/ext/nkf/nkf.c b/ext/nkf/nkf.c
index 8a4bcfce64..e12df16960 100644
--- a/ext/nkf/nkf.c
+++ b/ext/nkf/nkf.c
@@ -306,6 +306,8 @@ rb_nkf_guess1(VALUE obj, VALUE src)
* "UTF-8"
* when NKF::UTF16
* "UTF-16"
+ * when NKF::UTF32
+ * "UTF-32"
* when NKF::UNKNOWN
* "UNKNOWN"
* when NKF::BINARY
@@ -345,6 +347,8 @@ rb_nkf_guess2(VALUE obj, VALUE src)
code = _UTF8;
} else if (strcmp(input_codename, "UTF-16") == 0) {
code = _UTF16;
+ } else if (strcmp(input_codename, "UTF-32") == 0) {
+ code = _UTF32;
} else if (strlen(input_codename) > 0) {
code = _UNKNOWN;
}
@@ -382,16 +386,16 @@ rb_nkf_guess2(VALUE obj, VALUE src)
*
* Output is buffered (DEFAULT), Output is unbuffered.
*
- * === -j -s -e -w -w16
+ * === -j -s -e -w -w16 -w32
*
* Output code is ISO-2022-JP (7bit JIS), Shift_JIS, EUC-JP,
- * UTF-8N, UTF-16BE.
+ * UTF-8N, UTF-16BE, UTF-32BE.
* Without this option and compile option, ISO-2022-JP is assumed.
*
- * === -J -S -E -W -W16
+ * === -J -S -E -W -W16 -W32
*
* Input assumption is JIS 7 bit, Shift_JIS, EUC-JP,
- * UTF-8, UTF-16LE.
+ * UTF-8, UTF-16, UTF-32.
*
* ==== -J
*
@@ -574,6 +578,16 @@ rb_nkf_guess2(VALUE obj, VALUE src)
*
* [UTF-16LE-BOM] UTF-16 Little Endian with BOM
*
+ * [UTF-32] same as UTF-32BE
+ *
+ * [UTF-32BE] UTF-32 Big Endian without BOM
+ *
+ * [UTF-32BE-BOM] UTF-32 Big Endian with BOM
+ *
+ * [UTF-32LE] UTF-32 Little Endian without BOM
+ *
+ * [UTF-32LE-BOM] UTF-32 Little Endian with BOM
+ *
* [UTF8-MAC] NKDed UTF-8, a.k.a. UTF8-NFD (input only)
*
* === --fb-{skip, html, xml, perl, java, subchar}
@@ -587,10 +601,20 @@ rb_nkf_guess2(VALUE obj, VALUE src)
* nkf adds a specified escape character to specified 2nd byte of Shift_JIS characters.
* 1st byte of argument is the escape character and following bytes are target characters.
*
- * === --disable-cp932ext
+ * === --no-cp932ext
*
* Handle the characters extended in CP932 as unassigned characters.
*
+ * == --no-best-fit-chars
+ *
+ * When Unicode to Encoded byte conversion,
+ * don't convert characters which is not round trip safe.
+ * When Unicode to Unicode conversion,
+ * with this and -x option, nkf can be used as UTF converter.
+ * (In other words, without this and -x option, nkf doesn't save some characters)
+ *
+ * When nkf convert string which related to path, you should use this opion.
+ *
* === --cap-input
*
* Decode hex encoded characters.