aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2011-03-01 01:35:17 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2011-03-01 01:35:17 +0000
commitb8b48f9d58619e55c33106f8dfa21fc70c63e7ad (patch)
tree49b2899415ce9c76c3fe118ac527a95e486d6fdf
parentaf4d75f9586b002e63bb36220bf98dabc0456c06 (diff)
downloadruby-b8b48f9d58619e55c33106f8dfa21fc70c63e7ad.tar.gz
* string.c (rb_str_byteslice): Add String#byteslice. [ruby-core:35376]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30991 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog4
-rw-r--r--NEWS1
-rw-r--r--string.c103
-rw-r--r--test/ruby/test_string.rb29
4 files changed, 137 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 12c6962407..aa9f44e3d7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+Tue Mar 1 10:34:39 2011 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * string.c (rb_str_byteslice): Add String#byteslice. [ruby-core:35376]
+
Tue Mar 1 00:12:49 2011 Tajima Akio <artonx@yahoo.co.jp>
* include/ruby/win32.h: define WIN32 if neither _WIN64 nor WIN32
diff --git a/NEWS b/NEWS
index 8839403304..fb0fe1f4dc 100644
--- a/NEWS
+++ b/NEWS
@@ -72,6 +72,7 @@ with all sufficient information, see the ChangeLog file.
* String#unpack supports endian modifiers
* new method:
* String#prepend
+ * String#byteslice
* Time
* extended method:
diff --git a/string.c b/string.c
index db8f761a0b..9cc93722ab 100644
--- a/string.c
+++ b/string.c
@@ -3987,6 +3987,108 @@ rb_str_setbyte(VALUE str, VALUE index, VALUE value)
return value;
}
+static VALUE
+str_byte_substr(VALUE str, long beg, long len)
+{
+ char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
+ VALUE str2;
+ if (beg > RSTRING_LEN(str)) return Qnil;
+ if (beg < 0) {
+ beg += RSTRING_LEN(str);
+ if (beg < 0) return Qnil;
+ }
+ if (beg + len > RSTRING_LEN(str))
+ len = RSTRING_LEN(str) - beg;
+ if (len <= 0) {
+ len = 0;
+ p = 0;
+ }
+ else
+ p = s + beg;
+
+ if (len > RSTRING_EMBED_LEN_MAX && beg + len == RSTRING_LEN(str)) {
+ str2 = rb_str_new4(str);
+ str2 = str_new3(rb_obj_class(str2), str2);
+ RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
+ RSTRING(str2)->as.heap.len = len;
+ }
+ else {
+ str2 = rb_str_new5(str, p, len);
+ OBJ_INFECT(str2, str);
+ }
+
+ return str2;
+}
+
+static VALUE
+str_byte_aref(VALUE str, VALUE indx)
+{
+ long idx;
+ switch (TYPE(indx)) {
+ case T_FIXNUM:
+ idx = FIX2LONG(indx);
+
+ num_index:
+ str = str_byte_substr(str, idx, 1);
+ if (!NIL_P(str) && RSTRING_LEN(str) == 0) return Qnil;
+ return str;
+
+ default:
+ /* check if indx is Range */
+ {
+ long beg, len = RSTRING_LEN(str);
+ VALUE tmp;
+
+ switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
+ case Qfalse:
+ break;
+ case Qnil:
+ return Qnil;
+ default:
+ tmp = str_byte_substr(str, beg, len);
+ return tmp;
+ }
+ }
+ idx = NUM2LONG(indx);
+ goto num_index;
+ }
+ return Qnil; /* not reached */
+}
+
+/*
+ * call-seq:
+ * str.byteslice(fixnum) -> new_str or nil
+ * str.byteslice(fixnum, fixnum) -> new_str or nil
+ * str.byteslice(range) -> new_str or nil
+ *
+ * Byte Reference---If passed a single <code>Fixnum</code>, returns a
+ * substring of one byte at that position. If passed two <code>Fixnum</code>
+ * objects, returns a substring starting at the offset given by the first, and
+ * a length given by the second. If given a range, a substring containing
+ * bytes at offsets given by the range is returned. In all three cases, if
+ * an offset is negative, it is counted from the end of <i>str</i>. Returns
+ * <code>nil</code> if the initial offset falls outside the string, the length
+ * is negative, or the beginning of the range is greater than the end.
+ *
+ * "hello".byteslice(1) #=> "e"
+ * "hello".byteslice(-1) #=> "o"
+ * "hello".byteslice(1, 2) #=> "el"
+ * "\u3042".byteslice(1, 2) #=> "\x81\x82"
+ * "\u3042".byteslice(1..3) #=> "\x81\x82"
+ */
+
+static VALUE
+rb_str_byteslice(int argc, VALUE *argv, VALUE str)
+{
+ if (argc == 2) {
+ return str_byte_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
+ }
+ if (argc != 1) {
+ rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
+ }
+ return str_byte_aref(str, argv[0]);
+}
+
/*
* call-seq:
* str.reverse -> new_str
@@ -7649,6 +7751,7 @@ Init_String(void)
rb_define_method(rb_cString, "chr", rb_str_chr, 0);
rb_define_method(rb_cString, "getbyte", rb_str_getbyte, 1);
rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2);
+ rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1);
rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
rb_define_method(rb_cString, "to_f", rb_str_to_f, 0);
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index c5d3a53850..f18c8148e4 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -1944,4 +1944,33 @@ class TestString < Test::Unit::TestCase
assert_equal(S("hello world"), a)
assert_equal(S("hello "), b)
end
+
+ def b(str)
+ str.force_encoding(Encoding::ASCII_8BIT)
+ end
+
+ def test_byteslice
+ assert_equal(b("h"), "hello".byteslice(0))
+ assert_equal(nil, "hello".byteslice(5))
+ assert_equal(b("o"), "hello".byteslice(-1))
+ assert_equal(nil, "hello".byteslice(-6))
+
+ assert_equal(b(""), "hello".byteslice(0, 0))
+ assert_equal(b("hello"), "hello".byteslice(0, 6))
+ assert_equal(b("hello"), "hello".byteslice(0, 6))
+ assert_equal(b(""), "hello".byteslice(5, 1))
+ assert_equal(b("o"), "hello".byteslice(-1, 6))
+ assert_equal(nil, "hello".byteslice(-6, 1))
+
+ assert_equal(b("h"), "hello".byteslice(0..0))
+ assert_equal(b(""), "hello".byteslice(5..0))
+ assert_equal(b("o"), "hello".byteslice(4..5))
+ assert_equal(nil, "hello".byteslice(6..0))
+ assert_equal(b(""), "hello".byteslice(-1..0))
+ assert_equal(b("llo"), "hello".byteslice(-3..5))
+
+ assert_equal(b("\x81"), "\u3042".byteslice(1))
+ assert_equal(b("\x81\x82"), "\u3042".byteslice(1, 2))
+ assert_equal(b("\x81\x82"), "\u3042".byteslice(1..2))
+ end
end