From f1dc132b41fc3f10a8ba8990cfd070ac49155762 Mon Sep 17 00:00:00 2001 From: nobu Date: Sun, 22 Jan 2012 06:23:38 +0000 Subject: * parse.y (rb_intern3, rb_intern_str): check the coderange first. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@34352 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 4 ++++ parse.y | 35 +++++++++++++---------------------- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/ChangeLog b/ChangeLog index e2fc2b045c..e06e92132c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +Sun Jan 22 15:23:35 2012 Nobuyoshi Nakada + + * parse.y (rb_intern3, rb_intern_str): check the coderange first. + Sat Jan 21 22:21:07 2012 Nobuyoshi Nakada * include/ruby/ruby.h (FIXNUM_P): simple flag should be int. diff --git a/parse.y b/parse.y index 8ee7cdb610..0b58166f3c 100644 --- a/parse.y +++ b/parse.y @@ -10397,6 +10397,16 @@ register_symid(ID id, const char *name, long len, rb_encoding *enc) return id; } +static int +sym_check_asciionly(VALUE str) +{ + int cr = rb_enc_str_coderange(str); + if (cr == ENC_CODERANGE_BROKEN) { + rb_raise(rb_eEncodingError, "invalid encoding symbol"); + } + return cr == ENC_CODERANGE_7BIT; +} + ID rb_intern3(const char *name, long len, rb_encoding *enc) { @@ -10418,9 +10428,7 @@ rb_intern3(const char *name, long len, rb_encoding *enc) rb_enc_associate(str, enc); OBJ_FREEZE(str); - if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) { - rb_raise(rb_eEncodingError, "invalid encoding symbol"); - } + if (sym_check_asciionly(str)) enc = rb_usascii_encoding(); if (st_lookup(global_symbols.sym_id, str, &data)) return (ID)data; @@ -10486,32 +10494,17 @@ rb_intern3(const char *name, long len, rb_encoding *enc) } break; } - mb = 0; if (!rb_enc_isdigit(*m, enc)) { while (m <= name + last && is_identchar(m, e, enc)) { if (ISASCII(*m)) { m++; } else { - mb = 1; m += rb_enc_mbclen(m, e, enc); } } } if (m - name < len) id = ID_JUNK; - if (enc != rb_usascii_encoding()) { - /* - * this clause makes sense only when called from other than - * rb_intern_str() taking care of code-range. - */ - if (!mb) { - for (; m <= name + len; ++m) { - if (!ISASCII(*m)) goto mbstr; - } - enc = rb_usascii_encoding(); - } - mbstr:; - } new_id: if (global_symbols.last_id >= ~(ID)0 >> (ID_SCOPE_SHIFT+RUBY_SPECIAL_SHIFT)) { if (len > 20) { @@ -10547,7 +10540,7 @@ rb_intern_str(VALUE str) rb_encoding *enc; ID id; - if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) { + if (sym_check_asciionly(str)) { enc = rb_usascii_encoding(); } else { @@ -10734,9 +10727,7 @@ rb_check_id(volatile VALUE *namep) *namep = name; } - if (rb_enc_str_coderange(name) == ENC_CODERANGE_BROKEN) { - rb_raise(rb_eEncodingError, "invalid encoding symbol"); - } + sym_check_asciionly(name); if (st_lookup(global_symbols.sym_id, (st_data_t)name, &id)) return (ID)id; -- cgit v1.2.3