aboutsummaryrefslogtreecommitdiffstats
path: root/enc/trans
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2010-11-24 00:08:04 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2010-11-24 00:08:04 +0000
commit38b482be8c3b53809fa44e35cb0eded148b132c8 (patch)
treeb0aab19655364ea093efdceb915cfd61ba245416 /enc/trans
parentbcf80a5140c1b08899b20d9bbcd5fd9e13db4017 (diff)
downloadruby-38b482be8c3b53809fa44e35cb0eded148b132c8.tar.gz
* enc/trans/utf_16_32.trans: add the UTF-32 converter.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29895 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc/trans')
-rw-r--r--enc/trans/utf_16_32.trans87
1 files changed, 87 insertions, 0 deletions
diff --git a/enc/trans/utf_16_32.trans b/enc/trans/utf_16_32.trans
index 01caffe02c..c841df035f 100644
--- a/enc/trans/utf_16_32.trans
+++ b/enc/trans/utf_16_32.trans
@@ -26,6 +26,10 @@
transcode_generate_node(ActionMap.parse(map), "from_UTF_16")
map = {}
+ map["{00-ff}{00-ff}{00-ff}{00-ff}"] = :func_si
+ transcode_generate_node(ActionMap.parse(map), "from_UTF_32")
+
+ map = {}
map["{00-7f}"] = :func_so
map["{c2-df}{80-bf}"] = :func_so
map["e0{a0-bf}{80-bf}"] = :func_so
@@ -321,6 +325,48 @@ fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char
return 0;
}
+static VALUE
+fun_si_from_utf_32(void *statep, const unsigned char *s, size_t l)
+{
+ unsigned char *sp = statep;
+ switch (*sp) {
+ case 0:
+ if (s[0] == 0 && s[1] == 0 && s[2] == 0xFE && s[3] == 0xFF) {
+ *sp = BE;
+ return ZERObt;
+ }
+ else if (s[0] == 0xFF && s[1] == 0xFE && s[2] == 0 && s[3] == 0) {
+ *sp = LE;
+ return ZERObt;
+ }
+ break;
+ case BE:
+ if (s[0] == 0 && ((0 < s[1] && s[1] <= 0x10) ||
+ (s[1] == 0 && (s[2] < 0xD8 || 0xDF < s[2]))))
+ return (VALUE)FUNso;
+ break;
+ case LE:
+ if (s[3] == 0 && ((0 < s[2] && s[2] <= 0x10) ||
+ (s[2] == 0 && (s[1] < 0xD8 || 0xDF < s[1]))))
+ return (VALUE)FUNso;
+ break;
+ }
+ return (VALUE)INVALID;
+}
+
+static ssize_t
+fun_so_from_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
+{
+ unsigned char *sp = statep;
+ switch (*sp) {
+ case BE:
+ return fun_so_from_utf_32be(statep, s, l, o, osize);
+ case LE:
+ return fun_so_from_utf_32le(statep, s, l, o, osize);
+ }
+ return 0;
+}
+
static ssize_t
fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
{
@@ -334,6 +380,21 @@ fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *
return fun_so_to_utf_16be(statep, s, l, o, osize);
}
+static ssize_t
+fun_so_to_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
+{
+ unsigned char *sp = statep;
+ if (*sp == 0) {
+ *o++ = 0x00;
+ *o++ = 0x00;
+ *o++ = 0xFE;
+ *o++ = 0xFF;
+ *sp = 1;
+ return 4 + fun_so_to_utf_32be(statep, s, l, o, osize);
+ }
+ return fun_so_to_utf_32be(statep, s, l, o, osize);
+}
+
static const rb_transcoder
rb_from_UTF_16BE = {
"UTF-16BE", "UTF-8", from_UTF_16BE,
@@ -443,6 +504,18 @@ rb_from_UTF_16 = {
};
static const rb_transcoder
+rb_from_UTF_32 = {
+ "UTF-32", "UTF-8", from_UTF_32,
+ TRANSCODE_TABLE_INFO,
+ 4, /* input_unit_length */
+ 4, /* max_input */
+ 4, /* max_output */
+ asciicompat_decoder, /* asciicompat_type */
+ 1, state_init, NULL, /* state_size, state_init, state_fini */
+ NULL, fun_si_from_utf_32, NULL, fun_so_from_utf_32
+};
+
+static const rb_transcoder
rb_to_UTF_16 = {
"UTF-8", "UTF-16", from_UTF_8,
TRANSCODE_TABLE_INFO,
@@ -454,6 +527,18 @@ rb_to_UTF_16 = {
NULL, NULL, NULL, fun_so_to_utf_16
};
+static const rb_transcoder
+rb_to_UTF_32 = {
+ "UTF-8", "UTF-32", from_UTF_8,
+ TRANSCODE_TABLE_INFO,
+ 1, /* input_unit_length */
+ 4, /* max_input */
+ 4, /* max_output */
+ asciicompat_encoder, /* asciicompat_type */
+ 1, state_init, NULL, /* state_size, state_init, state_fini */
+ NULL, NULL, NULL, fun_so_to_utf_32
+};
+
void
Init_utf_16_32(void)
{
@@ -467,4 +552,6 @@ Init_utf_16_32(void)
rb_register_transcoder(&rb_to_UTF_32LE);
rb_register_transcoder(&rb_from_UTF_16);
rb_register_transcoder(&rb_to_UTF_16);
+ rb_register_transcoder(&rb_from_UTF_32);
+ rb_register_transcoder(&rb_to_UTF_32);
}