diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-05-02 12:04:04 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-05-02 12:04:04 +0000 |
commit | 6853ccd90740de73484052877d396e7e10501f7e (patch) | |
tree | 2917a8626bbff3fc8a3162334b534df625546fc0 | |
parent | 76a28bcd9a381ed9d9866df5ce296755860b69e5 (diff) | |
download | ruby-6853ccd90740de73484052877d396e7e10501f7e.tar.gz |
* re.c (str_coderange): to avoid function call when the string already
has coderange information.
* re.c (rb_reg_prepare_enc): add shortcut path when the regexp has
the same encoding of given string.
* re.c (rb_reg_prepare_re): avoid duplicated allocation of
onig_errmsg_buffer.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@54886 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | re.c | 52 |
2 files changed, 45 insertions, 19 deletions
@@ -1,3 +1,15 @@ +Mon May 2 20:59:21 2016 NARUSE, Yui <naruse@ruby-lang.org> + + * re.c (str_coderange): to avoid function call when the string already + has coderange information. + + * re.c (rb_reg_prepare_enc): add shortcut path when the regexp has + the same encoding of given string. + + * re.c (rb_reg_prepare_re): avoid duplicated allocation of + onig_errmsg_buffer. + + Mon May 2 12:34:52 2016 Tanaka Akira <akr@fsij.org> * test/ruby/test_refinement.rb (test_inspect): Use Integer instead of @@ -1370,11 +1370,21 @@ reg_enc_error(VALUE re, VALUE str) rb_enc_name(rb_enc_get(str))); } +static inline int +str_coderange(VALUE str) +{ + int cr = ENC_CODERANGE(str); + if (cr == ENC_CODERANGE_UNKNOWN) { + cr = rb_enc_str_coderange(str); + } + return cr; +} + static rb_encoding* rb_reg_prepare_enc(VALUE re, VALUE str, int warn) { rb_encoding *enc = 0; - int cr = rb_enc_str_coderange(str); + int cr = str_coderange(str); if (cr == ENC_CODERANGE_BROKEN) { rb_raise(rb_eArgError, @@ -1384,25 +1394,23 @@ rb_reg_prepare_enc(VALUE re, VALUE str, int warn) rb_reg_check(re); enc = rb_enc_get(str); - if (!rb_enc_asciicompat(enc)) { - if (RREGEXP_PTR(re)->enc != enc) { - reg_enc_error(re, str); - } + if (RREGEXP_PTR(re)->enc == enc) { + } + else if (cr == ENC_CODERANGE_7BIT && + RREGEXP_PTR(re)->enc == rb_usascii_encoding()) { + enc = RREGEXP_PTR(re)->enc; + } + else if (!rb_enc_asciicompat(enc)) { + reg_enc_error(re, str); } else if (rb_reg_fixed_encoding_p(re)) { - if (RREGEXP_PTR(re)->enc != enc && - (!rb_enc_asciicompat(RREGEXP_PTR(re)->enc) || + if ((!rb_enc_asciicompat(RREGEXP_PTR(re)->enc) || cr != ENC_CODERANGE_7BIT)) { reg_enc_error(re, str); } enc = RREGEXP_PTR(re)->enc; } - else if (cr == ENC_CODERANGE_7BIT && - (RREGEXP_PTR(re)->enc == rb_usascii_encoding() - )) { - enc = RREGEXP_PTR(re)->enc; - } - if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) && + else if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) && enc != rb_ascii8bit_encoding() && cr != ENC_CODERANGE_7BIT) { rb_warn("regexp match /.../n against to %s string", @@ -1412,10 +1420,9 @@ rb_reg_prepare_enc(VALUE re, VALUE str, int warn) } regex_t * -rb_reg_prepare_re(VALUE re, VALUE str) +rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err) { regex_t *reg = RREGEXP_PTR(re); - onig_errmsg_buffer err = ""; int r; OnigErrorInfo einfo; const char *pattern; @@ -1450,6 +1457,13 @@ rb_reg_prepare_re(VALUE re, VALUE str) return reg; } +regex_t * +rb_reg_prepare_re(VALUE re, VALUE str) +{ + onig_errmsg_buffer err = ""; + return rb_reg_prepare_re0(re, str, err); +} + long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse) { @@ -1491,13 +1505,14 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) char *range = RSTRING_PTR(str); regex_t *reg; int tmpreg; + onig_errmsg_buffer err = ""; if (pos > RSTRING_LEN(str) || pos < 0) { rb_backref_set(Qnil); return -1; } - reg = rb_reg_prepare_re(re, str); + reg = rb_reg_prepare_re0(re, str, err); tmpreg = reg != RREGEXP_PTR(re); if (!tmpreg) RREGEXP(re)->usecnt++; @@ -1540,7 +1555,6 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) return result; } else { - onig_errmsg_buffer err = ""; onig_error_code_to_str((UChar*)err, (int)result); rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re); } @@ -2558,7 +2572,7 @@ rb_reg_preprocess_dregexp(VALUE ary, int options) src_enc = rb_enc_get(str); if (options & ARG_ENCODING_NONE && src_enc != ascii8bit) { - if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) + if (str_coderange(str) != ENC_CODERANGE_7BIT) rb_raise(rb_eRegexpError, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script"); else src_enc = ascii8bit; @@ -2669,7 +2683,7 @@ rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err, if (options & ARG_ENCODING_NONE) { rb_encoding *ascii8bit = rb_ascii8bit_encoding(); if (enc != ascii8bit) { - if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { + if (str_coderange(str) != ENC_CODERANGE_7BIT) { errcpy(err, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script"); return -1; } |