diff options
-rw-r--r-- | ChangeLog | 11 | ||||
-rw-r--r-- | encoding.c | 6 | ||||
-rw-r--r-- | string.c | 13 | ||||
-rw-r--r-- | test/ruby/test_utf32.rb | 27 |
4 files changed, 48 insertions, 9 deletions
@@ -1,3 +1,14 @@ +Fri Feb 22 15:47:36 2008 Tanaka Akira <akr@fsij.org> + + * encoding.c (rb_enc_mbclen): return minlen instead of 1 when + a character is not found properly. + + * string.c (rb_enc_strlen): round up string length with fixed + multibyte encoding such as UTF-32. + (rb_enc_strlen_cr): ditto. + (rb_str_substr): fix substring with fixed multibyte encoding. + (rb_str_justify): check number of characters. + Fri Feb 22 12:11:12 2008 NARUSE, Yui <naruse@ruby-lang.org> * string.c (rb_str_inspect): string of ascii incompatible encoding diff --git a/encoding.c b/encoding.c index 002c0c04fb..813b4b5062 100644 --- a/encoding.c +++ b/encoding.c @@ -738,8 +738,10 @@ rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc) int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p) return MBCLEN_CHARFOUND_LEN(n); - else - return 1; + else { + int min = rb_enc_mbminlen(enc); + return min <= e-p ? min : e-p; + } } int @@ -618,7 +618,7 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc) const char *q; if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { - return (e - p) / rb_enc_mbminlen(enc); + return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc); } else if (rb_enc_asciicompat(enc)) { c = 0; @@ -651,7 +651,7 @@ rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr) *cr = 0; if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { - return (e - p) / rb_enc_mbminlen(enc); + return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc); } else if (rb_enc_asciicompat(enc)) { c = 0; @@ -1223,10 +1223,9 @@ rb_str_substr(VALUE str, long beg, long len) len = 0; } else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { - long rest = (e - p) / rb_enc_mbmaxlen(enc); - if (len > rest) - len = rest; - else + if (len * rb_enc_mbmaxlen(enc) > e - p) + len = e - p; + else len *= rb_enc_mbmaxlen(enc); } else { @@ -5777,7 +5776,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag) flen = RSTRING_LEN(pad); fclen = str_strlen(pad, enc); singlebyte = single_byte_optimizable(pad); - if (flen == 0) { + if (flen == 0 || fclen == 0) { rb_raise(rb_eArgError, "zero width padding"); } } diff --git a/test/ruby/test_utf32.rb b/test/ruby/test_utf32.rb new file mode 100644 index 0000000000..f81524f29e --- /dev/null +++ b/test/ruby/test_utf32.rb @@ -0,0 +1,27 @@ +require 'test/unit' + +class TestUTF32 < Test::Unit::TestCase + def encdump(str) + d = str.dump + if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d + d + else + "#{d}.force_encoding(#{str.encoding.name.dump})" + end + end + + def assert_str_equal(expected, actual, message=nil) + full_message = build_message(message, <<EOT) +#{encdump expected} expected but not equal to +#{encdump actual}. +EOT + assert_block(full_message) { expected == actual } + end + + def test_substr + assert_str_equal( + "abcdefgh".force_encoding("utf-32be"), + "abcdefgh".force_encoding("utf-32be")[0,3]) + end +end + |