From 7ec0677b8cf7e55603da146360bb74853aabb3ec Mon Sep 17 00:00:00 2001 From: naruse Date: Tue, 2 Feb 2016 17:38:00 +0000 Subject: * re.c (rb_reg_prepare_enc): use already compiled US-ASCII regexp if given string is ASCII only. 121.2s to 113.9s on my x86_64-freebsd10.2 Intel Core i5 661 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53720 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- re.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 're.c') diff --git a/re.c b/re.c index 3f7d227594..4036df2910 100644 --- a/re.c +++ b/re.c @@ -1374,8 +1374,9 @@ static rb_encoding* rb_reg_prepare_enc(VALUE re, VALUE str, int warn) { rb_encoding *enc = 0; + int cr = rb_enc_str_coderange(str); - if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) { + if (cr == ENC_CODERANGE_BROKEN) { rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(rb_enc_get(str))); @@ -1391,14 +1392,19 @@ rb_reg_prepare_enc(VALUE re, VALUE str, int warn) else if (rb_reg_fixed_encoding_p(re)) { if (RREGEXP_PTR(re)->enc != enc && (!rb_enc_asciicompat(RREGEXP_PTR(re)->enc) || - rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT)) { + cr != ENC_CODERANGE_7BIT)) { reg_enc_error(re, str); } enc = RREGEXP_PTR(re)->enc; } + else if (cr == ENC_CODERANGE_7BIT && + (RREGEXP_PTR(re)->enc == rb_usascii_encoding() + )) { + enc = RREGEXP_PTR(re)->enc; + } if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) && enc != rb_ascii8bit_encoding() && - rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { + cr != ENC_CODERANGE_7BIT) { rb_warn("regexp match /.../n against to %s string", rb_enc_name(enc)); } -- cgit v1.2.3