diff options
author | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2009-04-20 15:04:18 +0000 |
---|---|---|
committer | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2009-04-20 15:04:18 +0000 |
commit | 254d12215c3223b5820c1356dc12cd38eaa4cc37 (patch) | |
tree | a4688a020834a1c8a315c90891505c57565dd125 /string.c | |
parent | c7853b4344d821667a158bea0288e1861f70047f (diff) | |
download | ruby-254d12215c3223b5820c1356dc12cd38eaa4cc37.tar.gz |
* string.c (rb_str_split_m): faster processing on 7bit strings.
* string.c (ascii_isspace): faster isspace() for 7bit strings.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@23234 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 62 |
1 files changed, 53 insertions, 9 deletions
@@ -5382,6 +5382,26 @@ rb_str_count(int argc, VALUE *argv, VALUE str) return INT2NUM(i); } +static const char isspacetable[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +#define ascii_isspace(c) isspacetable[(unsigned char)(c)] /* * call-seq: @@ -5495,21 +5515,45 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) unsigned int c; end = beg; - while (ptr < eptr) { - c = rb_enc_codepoint(ptr, eptr, enc); - ptr += rb_enc_mbclen(ptr, eptr, enc); - if (skip) { - if (rb_enc_isspace(c, enc)) { + if (is_ascii_string(str)) { + while (ptr < eptr) { + c = (unsigned char)*ptr++; + if (skip) { + if (ascii_isspace(c)) { + beg = ptr - bptr; + } + else { + end = ptr - bptr; + skip = 0; + if (!NIL_P(limit) && lim <= i) break; + } + } + else if (ascii_isspace(c)) { + rb_ary_push(result, rb_str_subseq(str, beg, end-beg)); + skip = 1; beg = ptr - bptr; + if (!NIL_P(limit)) ++i; } else { end = ptr - bptr; - skip = 0; - if (!NIL_P(limit) && lim <= i) break; } } - else { - if (rb_enc_isspace(c, enc)) { + } + else { + while (ptr < eptr) { + c = rb_enc_codepoint(ptr, eptr, enc); + ptr += rb_enc_mbclen(ptr, eptr, enc); + if (skip) { + if (rb_enc_isspace(c, enc)) { + beg = ptr - bptr; + } + else { + end = ptr - bptr; + skip = 0; + if (!NIL_P(limit) && lim <= i) break; + } + } + else if (rb_enc_isspace(c, enc)) { rb_ary_push(result, rb_str_subseq(str, beg, end-beg)); skip = 1; beg = ptr - bptr; |