diff options
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | ext/stringio/stringio.c | 33 | ||||
-rw-r--r-- | io.c | 102 | ||||
-rw-r--r-- | test/stringio/test_stringio.rb | 5 |
4 files changed, 146 insertions, 1 deletions
@@ -1,3 +1,10 @@ +Mon Jun 22 17:15:38 2009 Yukihiro Matsumoto <matz@ruby-lang.org> + + * ext/stringio/stringio.c (strio_each_codepoint): new method. + [ruby-core:23949] + + * ext/stringio/stringio.c (strio_each_codepoint): ditto. + Mon Jun 22 16:26:11 2009 Nobuyoshi Nakada <nobu@ruby-lang.org> * ruby.c (ruby_init_loadpath_safe): removed "." from load_path. diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index 9bb5ed8a2a..7514264f6a 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -824,6 +824,37 @@ strio_each_char(VALUE self) return self; } +/* + * call-seq: + * strio.each_codepoint {|c| block } -> strio + * + * See IO#each_codepoint. + */ +static VALUE +strio_each_codepoint(VALUE self) +{ + struct StringIO *ptr; + rb_encoding *enc; + unsigned int c; + int n; + + RETURN_ENUMERATOR(self, 0, 0); + + ptr = readable(StringIO(self)); + enc = rb_enc_get(ptr->string); + for (;;) { + if (ptr->pos >= RSTRING_LEN(ptr->string)) { + return self; + } + + c = rb_enc_codepoint_len(RSTRING_PTR(ptr->string)+ptr->pos, + RSTRING_END(ptr->string), &n, enc); + rb_yield(UINT2NUM(c)); + ptr->pos += n; + } + return self; +} + /* Boyer-Moore search: copied from regex.c */ static void bm_init_skip(long *skip, const char *pat, long m) @@ -1359,6 +1390,8 @@ Init_stringio() rb_define_method(StringIO, "bytes", strio_each_byte, 0); rb_define_method(StringIO, "each_char", strio_each_char, 0); rb_define_method(StringIO, "chars", strio_each_char, 0); + rb_define_method(StringIO, "each_codepoint", strio_each_codepoint, 0); + rb_define_method(StringIO, "codepoints", strio_each_codepoint, 0); rb_define_method(StringIO, "getc", strio_getc, 0); rb_define_method(StringIO, "ungetc", strio_ungetc, 1); rb_define_method(StringIO, "ungetbyte", strio_ungetbyte, 1); @@ -2641,7 +2641,7 @@ rb_io_each_byte(VALUE io) fptr->rbuf_len--; rb_yield(INT2FIX(*p & 0xff)); p++; - errno = 0; + errno = 0; } rb_io_check_readable(fptr); READ_CHECK(fptr); @@ -2776,6 +2776,89 @@ rb_io_each_char(VALUE io) /* * call-seq: + * ios.each_codepoint {|c| block } => ios + * + * Passes the <code>Integer</code> ordinal of each character in <i>ios</i>, + * passing the codepoint as an argument. The stream must be opened for + * reading or an <code>IOError</code> will be raised. + */ + +static VALUE +rb_io_each_codepoint(VALUE io) +{ + rb_io_t *fptr; + rb_encoding *enc; + unsigned int c; + int r, n; + + RETURN_ENUMERATOR(io, 0, 0); + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + + READ_CHECK(fptr); + if (NEED_READCONV(fptr)) { + for (;;) { + make_readconv(fptr, 0); + for (;;) { + if (fptr->cbuf_len) { + if (fptr->encs.enc) + r = rb_enc_precise_mbclen(fptr->cbuf+fptr->cbuf_off, + fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len, + fptr->encs.enc); + else + r = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1); + if (!MBCLEN_NEEDMORE_P(r)) + break; + if (fptr->cbuf_len == fptr->cbuf_capa) { + rb_raise(rb_eIOError, "too long character"); + } + } + if (more_char(fptr) == -1) { + /* ignore an incomplete character before EOF */ + return io; + } + } + if (MBCLEN_INVALID_P(r)) { + rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc)); + } + n = MBCLEN_CHARFOUND_LEN(r); + c = rb_enc_codepoint(fptr->cbuf+fptr->cbuf_off, + fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len, + fptr->encs.enc); + fptr->rbuf_off += n; + fptr->rbuf_len -= n; + rb_yield(UINT2NUM(c)); + } + } + enc = io_input_encoding(fptr); + for (;;) { + if (io_fillbuf(fptr) < 0) { + return io; + } + r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off, + fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc); + if (MBCLEN_CHARFOUND_P(r) && + (n = MBCLEN_CHARFOUND_LEN(r)) <= fptr->rbuf_len) { + c = rb_enc_codepoint(fptr->rbuf+fptr->rbuf_off, + fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc); + fptr->rbuf_off += n; + fptr->rbuf_len -= n; + rb_yield(UINT2NUM(c)); + } + else if (MBCLEN_INVALID_P(r)) { + rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc)); + } + else { + continue; + } + } + return io; +} + + + +/* + * call-seq: * ios.lines(sep=$/) => anEnumerator * ios.lines(limit) => anEnumerator * ios.lines(sep, limit) => anEnumerator @@ -2838,6 +2921,21 @@ rb_io_chars(VALUE io) /* * call-seq: + * ios.codepoints => anEnumerator + * + * Returns an enumerator that gives each codepoint in <em>ios</em>. + * The stream must be opened for reading or an <code>IOError</code> + * will be raised. + */ + +static VALUE +rb_io_codepoints(VALUE io) +{ + return rb_enumeratorize(io, ID2SYM(rb_intern("each_codepoint")), 0, 0); +} + +/* + * call-seq: * ios.getc => string or nil * * Reads a one-character string from <em>ios</em>. Returns @@ -8797,9 +8895,11 @@ Init_IO(void) rb_define_method(rb_cIO, "each_line", rb_io_each_line, -1); rb_define_method(rb_cIO, "each_byte", rb_io_each_byte, 0); rb_define_method(rb_cIO, "each_char", rb_io_each_char, 0); + rb_define_method(rb_cIO, "each_codepoint", rb_io_each_codepoint, 0); rb_define_method(rb_cIO, "lines", rb_io_lines, -1); rb_define_method(rb_cIO, "bytes", rb_io_bytes, 0); rb_define_method(rb_cIO, "chars", rb_io_chars, 0); + rb_define_method(rb_cIO, "codepoints", rb_io_codepoints, 0); rb_define_method(rb_cIO, "syswrite", rb_io_syswrite, 1); rb_define_method(rb_cIO, "sysread", rb_io_sysread, -1); diff --git a/test/stringio/test_stringio.rb b/test/stringio/test_stringio.rb index 8c72803b45..570f180fc7 100644 --- a/test/stringio/test_stringio.rb +++ b/test/stringio/test_stringio.rb @@ -340,6 +340,11 @@ class TestStringIO < Test::Unit::TestCase assert_equal(%w(1 2 3 4), f.each_char.to_a) end + def test_each_codepoint + f = StringIO.new("1234") + assert_equal([49, 50, 51, 52], f.each_codepoint.to_a) + end + def test_gets2 f = StringIO.new("foo\nbar\nbaz\n") assert_equal("fo", f.gets(2)) |