aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-02-22 06:52:54 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-02-22 06:52:54 +0000
commit81fc1cf2017840d0786cf997b4d2a80f80060001 (patch)
treeed95702464f90d637072a7912e6ad681262b3d2d
parenta532dcafe6f523b79024cb1ee7cff31cc1127ae3 (diff)
downloadruby-81fc1cf2017840d0786cf997b4d2a80f80060001.tar.gz
* encoding.c (rb_enc_mbclen): return minlen instead of 1 when
a character is not found properly. * string.c (rb_enc_strlen): round up string length with fixed multibyte encoding such as UTF-32. (rb_enc_strlen_cr): ditto. (rb_str_substr): fix substring with fixed multibyte encoding. (rb_str_justify): check number of characters. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15573 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog11
-rw-r--r--encoding.c6
-rw-r--r--string.c13
-rw-r--r--test/ruby/test_utf32.rb27
4 files changed, 48 insertions, 9 deletions
diff --git a/ChangeLog b/ChangeLog
index 2d19f72903..82b6fd960a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+Fri Feb 22 15:47:36 2008 Tanaka Akira <akr@fsij.org>
+
+ * encoding.c (rb_enc_mbclen): return minlen instead of 1 when
+ a character is not found properly.
+
+ * string.c (rb_enc_strlen): round up string length with fixed
+ multibyte encoding such as UTF-32.
+ (rb_enc_strlen_cr): ditto.
+ (rb_str_substr): fix substring with fixed multibyte encoding.
+ (rb_str_justify): check number of characters.
+
Fri Feb 22 12:11:12 2008 NARUSE, Yui <naruse@ruby-lang.org>
* string.c (rb_str_inspect): string of ascii incompatible encoding
diff --git a/encoding.c b/encoding.c
index 002c0c04fb..813b4b5062 100644
--- a/encoding.c
+++ b/encoding.c
@@ -738,8 +738,10 @@ rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
return MBCLEN_CHARFOUND_LEN(n);
- else
- return 1;
+ else {
+ int min = rb_enc_mbminlen(enc);
+ return min <= e-p ? min : e-p;
+ }
}
int
diff --git a/string.c b/string.c
index 19e8072462..90ed51b108 100644
--- a/string.c
+++ b/string.c
@@ -618,7 +618,7 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
const char *q;
if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
- return (e - p) / rb_enc_mbminlen(enc);
+ return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
}
else if (rb_enc_asciicompat(enc)) {
c = 0;
@@ -651,7 +651,7 @@ rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
*cr = 0;
if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
- return (e - p) / rb_enc_mbminlen(enc);
+ return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
}
else if (rb_enc_asciicompat(enc)) {
c = 0;
@@ -1223,10 +1223,9 @@ rb_str_substr(VALUE str, long beg, long len)
len = 0;
}
else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
- long rest = (e - p) / rb_enc_mbmaxlen(enc);
- if (len > rest)
- len = rest;
- else
+ if (len * rb_enc_mbmaxlen(enc) > e - p)
+ len = e - p;
+ else
len *= rb_enc_mbmaxlen(enc);
}
else {
@@ -5777,7 +5776,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
flen = RSTRING_LEN(pad);
fclen = str_strlen(pad, enc);
singlebyte = single_byte_optimizable(pad);
- if (flen == 0) {
+ if (flen == 0 || fclen == 0) {
rb_raise(rb_eArgError, "zero width padding");
}
}
diff --git a/test/ruby/test_utf32.rb b/test/ruby/test_utf32.rb
new file mode 100644
index 0000000000..f81524f29e
--- /dev/null
+++ b/test/ruby/test_utf32.rb
@@ -0,0 +1,27 @@
+require 'test/unit'
+
+class TestUTF32 < Test::Unit::TestCase
+ def encdump(str)
+ d = str.dump
+ if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d
+ d
+ else
+ "#{d}.force_encoding(#{str.encoding.name.dump})"
+ end
+ end
+
+ def assert_str_equal(expected, actual, message=nil)
+ full_message = build_message(message, <<EOT)
+#{encdump expected} expected but not equal to
+#{encdump actual}.
+EOT
+ assert_block(full_message) { expected == actual }
+ end
+
+ def test_substr
+ assert_str_equal(
+ "abcdefgh".force_encoding("utf-32be"),
+ "abcdefgh".force_encoding("utf-32be")[0,3])
+ end
+end
+