diff options
author | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-02-22 06:52:54 +0000 |
---|---|---|
committer | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-02-22 06:52:54 +0000 |
commit | 81fc1cf2017840d0786cf997b4d2a80f80060001 (patch) | |
tree | ed95702464f90d637072a7912e6ad681262b3d2d | |
parent | a532dcafe6f523b79024cb1ee7cff31cc1127ae3 (diff) | |
download | ruby-81fc1cf2017840d0786cf997b4d2a80f80060001.tar.gz |
* encoding.c (rb_enc_mbclen): return minlen instead of 1 when
a character is not found properly.
* string.c (rb_enc_strlen): round up string length with fixed
multibyte encoding such as UTF-32.
(rb_enc_strlen_cr): ditto.
(rb_str_substr): fix substring with fixed multibyte encoding.
(rb_str_justify): check number of characters.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15573 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 11 | ||||
-rw-r--r-- | encoding.c | 6 | ||||
-rw-r--r-- | string.c | 13 | ||||
-rw-r--r-- | test/ruby/test_utf32.rb | 27 |
4 files changed, 48 insertions, 9 deletions
@@ -1,3 +1,14 @@ +Fri Feb 22 15:47:36 2008 Tanaka Akira <akr@fsij.org> + + * encoding.c (rb_enc_mbclen): return minlen instead of 1 when + a character is not found properly. + + * string.c (rb_enc_strlen): round up string length with fixed + multibyte encoding such as UTF-32. + (rb_enc_strlen_cr): ditto. + (rb_str_substr): fix substring with fixed multibyte encoding. + (rb_str_justify): check number of characters. + Fri Feb 22 12:11:12 2008 NARUSE, Yui <naruse@ruby-lang.org> * string.c (rb_str_inspect): string of ascii incompatible encoding diff --git a/encoding.c b/encoding.c index 002c0c04fb..813b4b5062 100644 --- a/encoding.c +++ b/encoding.c @@ -738,8 +738,10 @@ rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc) int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p) return MBCLEN_CHARFOUND_LEN(n); - else - return 1; + else { + int min = rb_enc_mbminlen(enc); + return min <= e-p ? min : e-p; + } } int @@ -618,7 +618,7 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc) const char *q; if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { - return (e - p) / rb_enc_mbminlen(enc); + return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc); } else if (rb_enc_asciicompat(enc)) { c = 0; @@ -651,7 +651,7 @@ rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr) *cr = 0; if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { - return (e - p) / rb_enc_mbminlen(enc); + return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc); } else if (rb_enc_asciicompat(enc)) { c = 0; @@ -1223,10 +1223,9 @@ rb_str_substr(VALUE str, long beg, long len) len = 0; } else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { - long rest = (e - p) / rb_enc_mbmaxlen(enc); - if (len > rest) - len = rest; - else + if (len * rb_enc_mbmaxlen(enc) > e - p) + len = e - p; + else len *= rb_enc_mbmaxlen(enc); } else { @@ -5777,7 +5776,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag) flen = RSTRING_LEN(pad); fclen = str_strlen(pad, enc); singlebyte = single_byte_optimizable(pad); - if (flen == 0) { + if (flen == 0 || fclen == 0) { rb_raise(rb_eArgError, "zero width padding"); } } diff --git a/test/ruby/test_utf32.rb b/test/ruby/test_utf32.rb new file mode 100644 index 0000000000..f81524f29e --- /dev/null +++ b/test/ruby/test_utf32.rb @@ -0,0 +1,27 @@ +require 'test/unit' + +class TestUTF32 < Test::Unit::TestCase + def encdump(str) + d = str.dump + if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d + d + else + "#{d}.force_encoding(#{str.encoding.name.dump})" + end + end + + def assert_str_equal(expected, actual, message=nil) + full_message = build_message(message, <<EOT) +#{encdump expected} expected but not equal to +#{encdump actual}. +EOT + assert_block(full_message) { expected == actual } + end + + def test_substr + assert_str_equal( + "abcdefgh".force_encoding("utf-32be"), + "abcdefgh".force_encoding("utf-32be")[0,3]) + end +end + |