From 3f9b0936aa846bdf6984019ca40bc629fe05d929 Mon Sep 17 00:00:00 2001 From: knu Date: Sat, 24 Nov 2012 18:46:15 +0000 Subject: String#{lines,chars,codepoints,bytes} now return an array. * string.c (rb_str_each_line, rb_str_lines): String#lines now returns an array instead of an enumerator. Passing a block is deprecated but still supported for backwards compatibility. Based on the patch by yhara. [Feature #6670] * string.c (rb_str_each_char, rb_str_chars): Ditto for String#chars. * string.c (rb_str_each_codepoint, rb_str_codepoints): Ditto for String#codepoints. * string.c (rb_str_each_byte, rb_str_bytes): Ditto for String#bytes. * NEWS: Add notes for the above changes. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37838 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- string.c | 397 +++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 298 insertions(+), 99 deletions(-) (limited to 'string.c') diff --git a/string.c b/string.c index c63f59ad90..68a4e46bb6 100644 --- a/string.c +++ b/string.c @@ -6098,45 +6098,8 @@ rb_str_split(VALUE str, const char *sep0) } -/* - * call-seq: - * str.each_line(separator=$/) {|substr| block } -> str - * str.each_line(separator=$/) -> an_enumerator - * - * str.lines(separator=$/) {|substr| block } -> str - * str.lines(separator=$/) -> an_enumerator - * - * Splits str using the supplied parameter as the record separator - * ($/ by default), passing each substring in turn to the supplied - * block. If a zero-length record separator is supplied, the string is split - * into paragraphs delimited by multiple successive newlines. - * - * If no block is given, an enumerator is returned instead. - * - * print "Example one\n" - * "hello\nworld".each_line {|s| p s} - * print "Example two\n" - * "hello\nworld".each_line('l') {|s| p s} - * print "Example three\n" - * "hello\n\n\nworld".each_line('') {|s| p s} - * - * produces: - * - * Example one - * "hello\n" - * "world" - * Example two - * "hel" - * "l" - * "o\nworl" - * "d" - * Example three - * "hello\n\n\n" - * "world" - */ - static VALUE -rb_str_each_line(int argc, VALUE *argv, VALUE str) +rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray) { rb_encoding *enc; VALUE rs; @@ -6146,6 +6109,7 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) VALUE line; int n; VALUE orig = str; + VALUE ary; if (argc == 0) { rs = rb_rs; @@ -6153,10 +6117,34 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) else { rb_scan_args(argc, argv, "01", &rs); } - RETURN_ENUMERATOR(str, argc, argv); + + if (rb_block_given_p()) { + if (wantarray) { +#if 0 /* next major */ + rb_warn("given block not used"); + ary = rb_ary_new(); +#else + rb_warning("passing a block to String#lines is deprecated"); + wantarray = 0; +#endif + } + } + else { + if (wantarray) + ary = rb_ary_new(); + else + RETURN_ENUMERATOR(str, argc, argv); + } + if (NIL_P(rs)) { - rb_yield(str); - return orig; + if (wantarray) { + rb_ary_push(ary, str); + return ary; + } + else { + rb_yield(str); + return orig; + } } str = rb_str_new4(str); ptr = p = s = RSTRING_PTR(str); @@ -6179,7 +6167,10 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) line = rb_str_new5(str, s, p - s); OBJ_INFECT(line, str); rb_enc_cr_str_copy_for_substr(line, str); - rb_yield(line); + if (wantarray) + rb_ary_push(ary, line); + else + rb_yield(line); str_mod_check(str, ptr, len); s = p; } @@ -6215,7 +6206,10 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) line = rb_str_new5(str, s, p - s + (rslen ? rslen : n)); OBJ_INFECT(line, str); rb_enc_cr_str_copy_for_substr(line, str); - rb_yield(line); + if (wantarray) + rb_ary_push(ary, line); + else + rb_yield(line); str_mod_check(str, ptr, len); s = p + (rslen ? rslen : n); } @@ -6227,11 +6221,76 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) line = rb_str_new5(str, s, pend - s); OBJ_INFECT(line, str); rb_enc_cr_str_copy_for_substr(line, str); - rb_yield(line); + if (wantarray) + rb_ary_push(ary, line); + else + rb_yield(line); RB_GC_GUARD(str); } - return orig; + if (wantarray) + return ary; + else + return orig; +} + +/* + * call-seq: + * str.each_line(separator=$/) {|substr| block } -> str + * str.each_line(separator=$/) -> an_enumerator + * + * Splits str using the supplied parameter as the record + * separator ($/ by default), passing each substring in + * turn to the supplied block. If a zero-length record separator is + * supplied, the string is split into paragraphs delimited by + * multiple successive newlines. + * + * If no block is given, an enumerator is returned instead. + * + * print "Example one\n" + * "hello\nworld".each_line {|s| p s} + * print "Example two\n" + * "hello\nworld".each_line('l') {|s| p s} + * print "Example three\n" + * "hello\n\n\nworld".each_line('') {|s| p s} + * + * produces: + * + * Example one + * "hello\n" + * "world" + * Example two + * "hel" + * "l" + * "o\nworl" + * "d" + * Example three + * "hello\n\n\n" + * "world" + */ + +static VALUE +rb_str_each_line(int argc, VALUE *argv, VALUE str) +{ + return rb_str_enumerate_lines(argc, argv, str, 0); +} + +/* + * call-seq: + * str.lines(separator=$/) -> an_array + * + * Returns an array of lines in str split using the supplied + * record separator ($/ by default). This is a + * shorthand for str.each_line(separator).to_a. + * + * If a block is given, which is a deprecated form, works the same as + * each_line. + */ + +static VALUE +rb_str_lines(int argc, VALUE *argv, VALUE str) +{ + return rb_str_enumerate_lines(argc, argv, str, 1); } static VALUE @@ -6240,16 +6299,49 @@ rb_str_each_byte_size(VALUE str, VALUE args) return LONG2FIX(RSTRING_LEN(str)); } +static VALUE +rb_str_enumerate_bytes(VALUE str, int wantarray) +{ + long i; + VALUE ary; + + if (rb_block_given_p()) { + if (wantarray) { +#if 0 /* next major */ + rb_warn("given block not used"); + ary = rb_ary_new(); +#else + rb_warning("passing a block to String#bytes is deprecated"); + wantarray = 0; +#endif + } + } + else { + if (wantarray) + ary = rb_ary_new2(RSTRING_LEN(str)); + else + RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_byte_size); + } + + for (i=0; i str - * str.bytes -> an_enumerator - * * str.each_byte {|fixnum| block } -> str * str.each_byte -> an_enumerator * - * Passes each byte in str to the given block, or returns - * an enumerator if no block is given. + * Passes each byte in str to the given block, or returns an + * enumerator if no block is given. * * "hello".each_byte {|c| print c, ' ' } * @@ -6261,13 +6353,24 @@ rb_str_each_byte_size(VALUE str, VALUE args) static VALUE rb_str_each_byte(VALUE str) { - long i; + return rb_str_enumerate_bytes(str, 0); +} - RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_byte_size); - for (i=0; i an_array + * + * Returns an array of bytes in str. This is a shorthand for + * str.each_byte.to_a. + * + * If a block is given, which is a deprecated form, works the same as + * each_byte. + */ + +static VALUE +rb_str_bytes(VALUE str) +{ + return rb_str_enumerate_bytes(str, 1); } static VALUE @@ -6285,33 +6388,33 @@ rb_str_each_char_size(VALUE str) return LONG2FIX(len); } -/* - * call-seq: - * str.chars {|cstr| block } -> str - * str.chars -> an_enumerator - * - * str.each_char {|cstr| block } -> str - * str.each_char -> an_enumerator - * - * Passes each character in str to the given block, or returns - * an enumerator if no block is given. - * - * "hello".each_char {|c| print c, ' ' } - * - * produces: - * - * h e l l o - */ - static VALUE -rb_str_each_char(VALUE str) +rb_str_enumerate_chars(VALUE str, int wantarray) { VALUE orig = str; long i, len, n; const char *ptr; rb_encoding *enc; + VALUE ary; + + if (rb_block_given_p()) { + if (wantarray) { +#if 0 /* next major */ + rb_warn("given block not used"); + ary = rb_ary_new(); +#else + rb_warning("passing a block to String#chars is deprecated"); + wantarray = 0; +#endif + } + } + else { + if (wantarray) + ary = rb_ary_new(); + else + RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size); + } - RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size); str = rb_str_new4(str); ptr = RSTRING_PTR(str); len = RSTRING_LEN(str); @@ -6321,63 +6424,159 @@ rb_str_each_char(VALUE str) case ENC_CODERANGE_7BIT: for (i = 0; i < len; i += n) { n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc); - rb_yield(rb_str_subseq(str, i, n)); + if (wantarray) + rb_ary_push(ary, rb_str_subseq(str, i, n)); + else + rb_yield(rb_str_subseq(str, i, n)); } break; default: for (i = 0; i < len; i += n) { n = rb_enc_mbclen(ptr + i, ptr + len, enc); - rb_yield(rb_str_subseq(str, i, n)); + if (wantarray) + rb_ary_push(ary, rb_str_subseq(str, i, n)); + else + rb_yield(rb_str_subseq(str, i, n)); } } - return orig; + if (wantarray) + return ary; + else + return orig; } /* * call-seq: - * str.codepoints {|integer| block } -> str - * str.codepoints -> an_enumerator + * str.each_char {|cstr| block } -> str + * str.each_char -> an_enumerator * - * str.each_codepoint {|integer| block } -> str - * str.each_codepoint -> an_enumerator + * Passes each character in str to the given block, or returns + * an enumerator if no block is given. * - * Passes the Integer ordinal of each character in str, - * also known as a codepoint when applied to Unicode strings to the - * given block. + * "hello".each_char {|c| print c, ' ' } * - * If no block is given, an enumerator is returned instead. + * produces: * - * "hello\u0639".each_codepoint {|c| print c, ' ' } + * h e l l o + */ + +static VALUE +rb_str_each_char(VALUE str) +{ + return rb_str_enumerate_chars(str, 0); +} + +/* + * call-seq: + * str.chars -> an_array * - * produces: + * Returns an array of characters in str. This is a shorthand + * for str.each_char.to_a. * - * 104 101 108 108 111 1593 + * If a block is given, which is a deprecated form, works the same as + * each_char. */ static VALUE -rb_str_each_codepoint(VALUE str) +rb_str_chars(VALUE str) +{ + return rb_str_enumerate_chars(str, 1); +} + + +static VALUE +rb_str_enumerate_codepoints(VALUE str, int wantarray) { VALUE orig = str; int n; unsigned int c; const char *ptr, *end; rb_encoding *enc; + VALUE ary; + + if (single_byte_optimizable(str)) + return rb_str_enumerate_bytes(str, wantarray); + + if (rb_block_given_p()) { + if (wantarray) { +#if 0 /* next major */ + rb_warn("given block not used"); + ary = rb_ary_new(); +#else + rb_warning("passing a block to String#codepoints is deprecated"); + wantarray = 0; +#endif + } + } + else { + if (wantarray) + ary = rb_ary_new(); + else + RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size); + } - if (single_byte_optimizable(str)) return rb_str_each_byte(str); - RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size); str = rb_str_new4(str); ptr = RSTRING_PTR(str); end = RSTRING_END(str); enc = STR_ENC_GET(str); while (ptr < end) { c = rb_enc_codepoint_len(ptr, end, &n, enc); - rb_yield(UINT2NUM(c)); + if (wantarray) + rb_ary_push(ary, UINT2NUM(c)); + else + rb_yield(UINT2NUM(c)); ptr += n; } RB_GC_GUARD(str); - return orig; + if (wantarray) + return ary; + else + return orig; +} + +/* + * call-seq: + * str.each_codepoint {|integer| block } -> str + * str.each_codepoint -> an_enumerator + * + * Passes the Integer ordinal of each character in str, + * also known as a codepoint when applied to Unicode strings to the + * given block. + * + * If no block is given, an enumerator is returned instead. + * + * "hello\u0639".each_codepoint {|c| print c, ' ' } + * + * produces: + * + * 104 101 108 108 111 1593 + */ + +static VALUE +rb_str_each_codepoint(VALUE str) +{ + return rb_str_enumerate_codepoints(str, 0); +} + +/* + * call-seq: + * str.codepoints -> an_array + * + * Returns an array of the Integer ordinals of the + * characters in str. This is a shorthand for + * str.each_codepoint.to_a. + * + * If a block is given, which is a deprecated form, works the same as + * each_codepoint. + */ + +static VALUE +rb_str_codepoints(VALUE str) +{ + return rb_str_enumerate_codepoints(str, 1); } + static long chopped_length(VALUE str) { @@ -7994,10 +8193,10 @@ Init_String(void) rb_define_method(rb_cString, "hex", rb_str_hex, 0); rb_define_method(rb_cString, "oct", rb_str_oct, 0); rb_define_method(rb_cString, "split", rb_str_split_m, -1); - rb_define_method(rb_cString, "lines", rb_str_each_line, -1); - rb_define_method(rb_cString, "bytes", rb_str_each_byte, 0); - rb_define_method(rb_cString, "chars", rb_str_each_char, 0); - rb_define_method(rb_cString, "codepoints", rb_str_each_codepoint, 0); + rb_define_method(rb_cString, "lines", rb_str_lines, -1); + rb_define_method(rb_cString, "bytes", rb_str_bytes, 0); + rb_define_method(rb_cString, "chars", rb_str_chars, 0); + rb_define_method(rb_cString, "codepoints", rb_str_codepoints, 0); rb_define_method(rb_cString, "reverse", rb_str_reverse, 0); rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0); rb_define_method(rb_cString, "concat", rb_str_concat, 1); -- cgit v1.2.3