aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--io.c17
-rw-r--r--test/ruby/test_io_m17n.rb20
3 files changed, 42 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 1096377d77..cd7a16509f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Sat Aug 15 10:15:20 2015 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * io.c (rb_io_each_codepoint): read more data when read partially.
+ [ruby-core:70379] [Bug #11444]
+
Sat Aug 15 04:33:39 2015 Eric Wong <e@80x24.org>
* hash.c (any_hash): skip rb_objid_hash for static syms
diff --git a/io.c b/io.c
index 104f521378..fc973bc5c8 100644
--- a/io.c
+++ b/io.c
@@ -3763,8 +3763,25 @@ rb_io_each_codepoint(VALUE io)
rb_yield(UINT2NUM(c));
}
else if (MBCLEN_INVALID_P(r)) {
+ invalid:
rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
}
+ else if (MBCLEN_NEEDMORE_P(r)) {
+ char cbuf[8], *p = cbuf;
+ int more = MBCLEN_NEEDMORE_LEN(r);
+ if (more > numberof(cbuf)) goto invalid;
+ more += n = fptr->rbuf.len;
+ if (more > numberof(cbuf)) goto invalid;
+ while ((n = (int)read_buffered_data(p, more, fptr)) > 0 &&
+ (p += n, (more -= n) > 0)) {
+ if (io_fillbuf(fptr) < 0) goto invalid;
+ if ((n = fptr->rbuf.len) > more) n = more;
+ }
+ r = rb_enc_precise_mbclen(cbuf, p, enc);
+ if (!MBCLEN_CHARFOUND_P(r)) goto invalid;
+ c = rb_enc_codepoint(cbuf, p, enc);
+ rb_yield(UINT2NUM(c));
+ }
else {
continue;
}
diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb
index 4382824844..55e23a2768 100644
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@@ -2562,4 +2562,24 @@ EOT
a.close rescue nil
b.close rescue nil
end
+
+ def test_each_codepoint_need_more
+ code = <<-'end;'
+ c = nil
+ begin
+ STDIN.set_encoding(Encoding::UTF_8).each_codepoint{|i| c = i}
+ rescue ArgumentError => e
+ STDERR.puts e.message
+ else
+ printf "%x", c
+ end
+ end;
+ args = ['-e', code]
+ bug11444 = '[ruby-core:70379] [Bug #11444]'
+ assert_in_out_err(args, "\u{1f376}".b[0,3], [],
+ ["invalid byte sequence in UTF-8"],
+ bug11444, timeout: 1)
+ assert_in_out_err(args, "x"*8190+"\u{1f376}", ["1f376"], [],
+ bug11444, timeout: 1)
+ end
end