From f989f7c7ea89bb0b4a120666f5e85c0a81c9b44c Mon Sep 17 00:00:00 2001 From: nobu Date: Sun, 30 Jan 2011 04:01:58 +0000 Subject: * string.c (rb_str_ellipsize): new function to ellipsize a string. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30729 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 4 ++- ext/-test-/string/ellipsize.c | 13 ++++++++++ include/ruby/intern.h | 1 + string.c | 51 +++++++++++++++++++++++++++++++++++++ test/-ext-/string/test_ellipsize.rb | 46 +++++++++++++++++++++++++++++++++ 5 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 ext/-test-/string/ellipsize.c create mode 100644 test/-ext-/string/test_ellipsize.rb diff --git a/ChangeLog b/ChangeLog index b756b13648..0d0d23405e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,6 @@ -Sun Jan 30 12:56:13 2011 Nobuyoshi Nakada +Sun Jan 30 13:01:54 2011 Nobuyoshi Nakada + + * string.c (rb_str_ellipsize): new function to ellipsize a string. * include/ruby/encoding.h (rb_enc_step_back): new function to step back n characters. diff --git a/ext/-test-/string/ellipsize.c b/ext/-test-/string/ellipsize.c new file mode 100644 index 0000000000..0451519492 --- /dev/null +++ b/ext/-test-/string/ellipsize.c @@ -0,0 +1,13 @@ +#include "ruby.h" + +static VALUE +bug_str_ellipsize(VALUE str, VALUE len) +{ + return rb_str_ellipsize(str, NUM2LONG(len)); +} + +void +Init_ellipsize(VALUE klass) +{ + rb_define_method(klass, "ellipsize", bug_str_ellipsize, 1); +} diff --git a/include/ruby/intern.h b/include/ruby/intern.h index 1e3a01b549..9b32298bde 100644 --- a/include/ruby/intern.h +++ b/include/ruby/intern.h @@ -712,6 +712,7 @@ long rb_str_strlen(VALUE); VALUE rb_str_length(VALUE); long rb_str_offset(VALUE, long); size_t rb_str_capacity(VALUE); +VALUE rb_str_ellipsize(VALUE, long); #if defined __GNUC__ #define rb_str_new_cstr(str) __extension__ ( \ { \ diff --git a/string.c b/string.c index d4d1c9f7da..6628eb0aef 100644 --- a/string.c +++ b/string.c @@ -7137,6 +7137,57 @@ rb_str_is_ascii_only_p(VALUE str) return cr == ENC_CODERANGE_7BIT ? Qtrue : Qfalse; } +/** + * Shortens _str_ and adds three dots, an ellipsis, if it is longer + * than _len_ characters. + * + * \param str the string to ellipsize. + * \param len the maximum string length. + * \return the ellipsized string. + * \pre _len_ must not be negative. + * \post the length of the returned string in characters is less than or equal to _len_. + * \post If the length of _str_ is less than or equal _len_, returns _str_ itself. + * \post the encoded of returned string is equal to the encoded of _str_. + * \post the class of returned string is equal to the class of _str_. + * \note the length is counted in characters. + */ +VALUE +rb_str_ellipsize(VALUE str, long len) +{ + static const char ellipsis[] = "..."; + const long ellipsislen = sizeof(ellipsis) - 1; + rb_encoding *const enc = rb_enc_get(str); + const long blen = RSTRING_LEN(str); + const char *const p = RSTRING_PTR(str), *e = p + blen; + VALUE estr, ret = 0; + + if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len); + if (len * rb_enc_mbminlen(enc) >= blen || + (e = rb_enc_nth(p, e, len, enc)) - p == blen) { + ret = str; + } + else if (len <= ellipsislen || + !(e = rb_enc_step_back(p, e, e, len = ellipsislen, enc))) { + if (rb_enc_asciicompat(enc)) { + ret = rb_str_new_with_class(str, ellipsis, len); + rb_enc_associate(ret, enc); + } + else { + estr = rb_usascii_str_new(ellipsis, len); + ret = rb_str_encode(estr, rb_enc_from_encoding(enc), 0, Qnil); + } + } + else if (ret = rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) { + rb_str_cat(ret, ellipsis, ellipsislen); + } + else { + estr = rb_str_encode(rb_usascii_str_new(ellipsis, ellipsislen), + rb_enc_from_encoding(enc), 0, Qnil); + rb_str_append(ret, estr); + } + return ret; +} + /********************************************************************** * Document-class: Symbol * diff --git a/test/-ext-/string/test_ellipsize.rb b/test/-ext-/string/test_ellipsize.rb new file mode 100644 index 0000000000..6ef7d0a1a3 --- /dev/null +++ b/test/-ext-/string/test_ellipsize.rb @@ -0,0 +1,46 @@ +require 'test/unit' +require "-test-/string/string" + +class Test_StringEllipsize < Test::Unit::TestCase + def setup + @foobar = Bug::String.new("foobar") + end + + def assert_equal_with_class(expected, result, *rest) + assert_equal(expected.encoding, result.encoding, *rest) + assert_equal(expected, result, result.encoding.name) + assert_instance_of(Bug::String, result, *rest) + end + + def test_longer + assert_equal_with_class("", @foobar.ellipsize(0)) + assert_equal_with_class(".", @foobar.ellipsize(1)) + assert_equal_with_class("..", @foobar.ellipsize(2)) + assert_equal_with_class("...", @foobar.ellipsize(3)) + assert_equal_with_class("f...", @foobar.ellipsize(4)) + assert_equal_with_class("fo...", @foobar.ellipsize(5)) + end + + def test_shorter + assert_same(@foobar, @foobar.ellipsize(6)) + assert_same(@foobar, @foobar.ellipsize(7)) + end + + def test_negative_length + assert_raise(IndexError) {@foobar.ellipsize(-1)} + end + + def test_nonascii + a = "\u3042" + encs = Encoding.list.each do |enc| + next if enc.dummy? + begin + s = a.encode(enc) + e = "...".encode(enc) + rescue + else + assert_equal_with_class(s*12+e, Bug::String.new(s*20).ellipsize(15)) + end + end + end +end -- cgit v1.2.3