aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-03-15 06:04:44 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-03-15 06:04:44 +0000
commit375aedbfb4d1a93ab2d9573fcb34cf4d6c474de2 (patch)
treef507d2b5d172f75639d2f6cc71782c8c8d6372e6
parent36f6ce40832a8c19301898ff20c370d5d4638923 (diff)
downloadruby-375aedbfb4d1a93ab2d9573fcb34cf4d6c474de2.tar.gz
* parse.y (sym_check_asciionly): check ascii compatibility before
scanning for code range. * parse.y (intern_str): set to us-ascii if ascii only. [ruby-dev:45363][Bug #6146] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@35034 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog8
-rw-r--r--parse.y35
-rw-r--r--test/ruby/test_symbol.rb7
3 files changed, 31 insertions, 19 deletions
diff --git a/ChangeLog b/ChangeLog
index ad940f3313..3d0f19dc04 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,10 @@
-Thu Mar 15 15:02:58 2012 Nobuyoshi Nakada <nobu@ruby-lang.org>
+Thu Mar 15 15:04:41 2012 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * parse.y (sym_check_asciionly): check ascii compatibility before
+ scanning for code range.
+
+ * parse.y (intern_str): set to us-ascii if ascii only.
+ [ruby-dev:45363][Bug #6146]
* file.c (ruby_enc_find_basename): allow NULL as alllen.
[ruby-dev:45363][Bug #6146]
diff --git a/parse.y b/parse.y
index 3ba0e96de6..08de0046eb 100644
--- a/parse.y
+++ b/parse.y
@@ -9955,13 +9955,21 @@ register_symid_str(ID id, VALUE str)
static int
sym_check_asciionly(VALUE str)
{
- int cr = rb_enc_str_coderange(str);
- if (cr == ENC_CODERANGE_BROKEN) {
+ if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE;
+ switch (rb_enc_str_coderange(str)) {
+ case ENC_CODERANGE_BROKEN:
rb_raise(rb_eEncodingError, "invalid encoding symbol");
+ case ENC_CODERANGE_7BIT:
+ return TRUE;
}
- return cr == ENC_CODERANGE_7BIT;
+ return FALSE;
}
+/*
+ * _str_ itself will be registered at the global symbol table. _str_
+ * can be modified before the registration, since the encoding will be
+ * set to ASCII-8BIT if it is a special global name.
+ */
static ID intern_str(VALUE str);
ID
@@ -9979,8 +9987,6 @@ rb_intern3(const char *name, long len, rb_encoding *enc)
rb_enc_associate(str, enc);
OBJ_FREEZE(str);
- if (sym_check_asciionly(str)) enc = rb_usascii_encoding();
-
if (st_lookup(global_symbols.sym_id, str, &data))
return (ID)data;
@@ -9993,7 +9999,7 @@ intern_str(VALUE str)
{
const char *name, *m, *e;
long len, last;
- rb_encoding *enc;
+ rb_encoding *enc, *symenc;
unsigned char c;
ID id;
int mb;
@@ -10002,6 +10008,7 @@ intern_str(VALUE str)
m = name;
e = m + len;
enc = rb_enc_get(str);
+ symenc = enc;
if (rb_cString && !rb_enc_asciicompat(enc)) {
id = ID_JUNK;
@@ -10013,7 +10020,7 @@ intern_str(VALUE str)
case '$':
id |= ID_GLOBAL;
if ((mb = is_special_global_name(++m, e, enc)) != 0) {
- if (!--mb) enc = rb_ascii8bit_encoding();
+ if (!--mb) symenc = rb_usascii_encoding();
goto new_id;
}
break;
@@ -10075,7 +10082,9 @@ intern_str(VALUE str)
}
}
if (m - name < len) id = ID_JUNK;
+ if (sym_check_asciionly(str)) symenc = rb_usascii_encoding();
new_id:
+ if (symenc != enc) rb_enc_associate(str, symenc);
if (global_symbols.last_id >= ~(ID)0 >> (ID_SCOPE_SHIFT+RUBY_SPECIAL_SHIFT)) {
if (len > 20) {
rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %.20s...)",
@@ -10107,21 +10116,11 @@ rb_intern(const char *name)
ID
rb_intern_str(VALUE str)
{
- rb_encoding *enc;
st_data_t id;
- int ascii = sym_check_asciionly(str);
if (st_lookup(global_symbols.sym_id, str, &id))
return (ID)id;
- if (ascii && (enc = rb_usascii_encoding()) != rb_enc_get(str)) {
- str = rb_str_dup(str);
- rb_enc_associate(str, enc);
- OBJ_FREEZE(str);
- }
- else {
- str = rb_str_dup_frozen(str);
- }
- return intern_str(str);
+ return intern_str(rb_str_dup(str));
}
VALUE
diff --git a/test/ruby/test_symbol.rb b/test/ruby/test_symbol.rb
index 576fa96729..f39883bafa 100644
--- a/test/ruby/test_symbol.rb
+++ b/test/ruby/test_symbol.rb
@@ -161,4 +161,11 @@ class TestSymbol < Test::Unit::TestCase
assert_equal(':"\\u3042\\u3044\\u3046"', "\u3042\u3044\u3046".encode(e).to_sym.inspect)
end
end
+
+ def test_symbol_encoding
+ assert_equal(Encoding::US_ASCII, "$-A".force_encoding("iso-8859-15").intern.encoding)
+ assert_equal(Encoding::US_ASCII, "foobar~!".force_encoding("iso-8859-15").intern.encoding)
+ assert_equal(Encoding::UTF_8, "\u{2192}".intern.encoding)
+ assert_raise(EncodingError) {"\xb0a".force_encoding("utf-8").intern}
+ end
end