From 0df867f93355807e1f90c4311c015da88bfeea32 Mon Sep 17 00:00:00 2001 From: matz Date: Tue, 16 Sep 2008 03:14:41 +0000 Subject: * string.c (rb_str_each_codepoint): add new methods, #codepoints and #each_codepoint. a patch from Michael Selig in [ruby-core:18532]. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19379 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 6 ++++++ string.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5d1c929ebc..701ae510b5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Tue Sep 16 11:55:16 2008 Yukihiro Matsumoto + + * string.c (rb_str_each_codepoint): add new methods, #codepoints + and #each_codepoint. a patch from Michael Selig + in [ruby-core:18532]. + Tue Sep 16 11:24:44 2008 Yukihiro Matsumoto * ext/socket/mkconstants.rb: add new constants. a patch from diff --git a/string.c b/string.c index 1d6410641b..6308ff541a 100644 --- a/string.c +++ b/string.c @@ -5506,6 +5506,59 @@ rb_str_each_char(VALUE str) return str; } +/* + * Document-method: codepoints + * call-seq: + * str.codepoints => anEnumerator + * str.codepoints {|fixnum| block } => str + * + * Returns an enumerator that gives the Integer ordinal + * of each character in the string, also known as a codepoint + * when applied to Unicode strings. If a block is given, it iterates + * over each character in the string. + * + * "foo\u0635".chars.to_a #=> [102, 111, 111, 1589] + */ + +/* + * Document-method: each_codepoint + * call-seq: + * str.each_codepoint {|fixnum| block } => str + * + * Passes the Integer ordinal of each character in str, + * also known as a codepoint when applied to Unicode strings to the + * given block. + * + * "hello\u0639".each_codepoint {|c| print c, ' ' } + * + * produces: + * + * 104 101 108 108 111 1593 + */ + +static VALUE +rb_str_each_codepoint(VALUE str) +{ + int i, len, n; + unsigned int c; + const char *ptr, *end; + rb_encoding *enc; + + if (single_byte_optimizable(str)) return rb_str_each_byte(str); + RETURN_ENUMERATOR(str, 0, 0); + ptr = RSTRING_PTR(str); + len = RSTRING_LEN(str); + end = RSTRING_END(str); + enc = STR_ENC_GET(str); + while (ptr < end) { + c = rb_enc_codepoint(ptr, end, enc); + n = rb_enc_codelen(c, enc); + rb_yield(INT2FIX(c)); + ptr += n; + } + return str; +} + static long chopped_length(VALUE str) { @@ -6883,6 +6936,7 @@ Init_String(void) rb_define_method(rb_cString, "lines", rb_str_each_line, -1); rb_define_method(rb_cString, "bytes", rb_str_each_byte, 0); rb_define_method(rb_cString, "chars", rb_str_each_char, 0); + rb_define_method(rb_cString, "codepoints", rb_str_each_codepoint, 0); rb_define_method(rb_cString, "reverse", rb_str_reverse, 0); rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0); rb_define_method(rb_cString, "concat", rb_str_concat, 1); @@ -6932,6 +6986,7 @@ Init_String(void) rb_define_method(rb_cString, "each_line", rb_str_each_line, -1); rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0); rb_define_method(rb_cString, "each_char", rb_str_each_char, 0); + rb_define_method(rb_cString, "each_codepoint", rb_str_each_codepoint, 0); rb_define_method(rb_cString, "sum", rb_str_sum, -1); -- cgit v1.2.3