aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--intern.h1
-rw-r--r--object.c2
-rw-r--r--parse.y122
-rw-r--r--test_symbol.rb50
5 files changed, 174 insertions, 8 deletions
diff --git a/ChangeLog b/ChangeLog
index 9aac6984e2..3f1a804b24 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+Sat Oct 22 13:08:21 2005 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * object.c (sym_inspect), parse.y (parser_yylex, rb_symname_p): check
+ if valid as a symbol name more strictly. [ruby-dev:27478]
+
+ * test/ruby/test_symbol.rb: tests for [ruby-core:03573].
+
Sat Oct 22 10:08:28 2005 Hirokazu Yamamoto <ocean@m2.ccsnet.ne.jp>
* missing.h, missing/*.c: SUSv3 compatible strcasecmp and strncasecmp,
diff --git a/intern.h b/intern.h
index 4df7106a7a..ec08a3f5fa 100644
--- a/intern.h
+++ b/intern.h
@@ -398,6 +398,7 @@ int rb_is_instance_id(ID);
int rb_is_class_id(ID);
int rb_is_local_id(ID);
int rb_is_junk_id(ID);
+int rb_symname_p(const char*);
VALUE rb_backref_get(void);
void rb_backref_set(VALUE);
VALUE rb_lastline_get(void);
diff --git a/object.c b/object.c
index 9f39ec3117..825c4e627b 100644
--- a/object.c
+++ b/object.c
@@ -1053,7 +1053,7 @@ sym_inspect(VALUE sym)
str = rb_str_new(0, strlen(name)+1);
RSTRING(str)->ptr[0] = ':';
strcpy(RSTRING(str)->ptr+1, name);
- if (rb_is_junk_id(id)) {
+ if (!rb_symname_p(name)) {
str = rb_str_dump(str);
strncpy(RSTRING(str)->ptr, ":\"", 2);
}
diff --git a/parse.y b/parse.y
index 4760335765..55168adfd1 100644
--- a/parse.y
+++ b/parse.y
@@ -5587,6 +5587,7 @@ parser_yylex(struct parser_params *parser)
int space_seen = 0;
int cmd_state;
unsigned char uc;
+ enum lex_state_e last_state;
#ifdef RIPPER
int fallthru = Qfalse;
#endif
@@ -6556,6 +6557,7 @@ parser_yylex(struct parser_params *parser)
return '%';
case '$':
+ last_state = lex_state;
lex_state = EXPR_END;
newtok();
c = nextc();
@@ -6599,7 +6601,14 @@ parser_yylex(struct parser_params *parser)
tokadd('$');
tokadd(c);
c = nextc();
- tokadd(c);
+ uc = (unsigned char)c;
+ if (is_identchar(uc)) {
+ tokadd(c);
+ }
+ else {
+ pushback(c);
+ }
+ gvar:
tokfix();
set_yylval_id(rb_intern(tok()));
if (!is_global_id(yylval_id())) {
@@ -6612,6 +6621,11 @@ parser_yylex(struct parser_params *parser)
case '`': /* $`: string before last match */
case '\'': /* $': string after last match */
case '+': /* $+: string matches last paren. */
+ if (last_state == EXPR_FNAME) {
+ tokadd('$');
+ tokadd(c);
+ goto gvar;
+ }
set_yylval_node(NEW_BACK_REF(c));
return tBACK_REF;
@@ -6624,6 +6638,7 @@ parser_yylex(struct parser_params *parser)
c = nextc();
} while (ISDIGIT(c));
pushback(c);
+ if (last_state == EXPR_FNAME) goto gvar;
tokfix();
set_yylval_node(NEW_NTH_REF(atoi(tok()+1)));
return tNTH_REF;
@@ -6712,8 +6727,8 @@ parser_yylex(struct parser_params *parser)
{
int result = 0;
- enum lex_state_e last_state = lex_state;
+ last_state = lex_state;
switch (tok()[0]) {
case '$':
lex_state = EXPR_END;
@@ -8285,6 +8300,97 @@ internal_id(void)
return ID_INTERNAL | (++global_symbols.last_id << ID_SCOPE_SHIFT);
}
+static int
+is_special_global_name(const char *m)
+{
+ switch (*m) {
+ case '~': case '*': case '$': case '?': case '!': case '@':
+ case '/': case '\\': case ';': case ',': case '.': case '=':
+ case ':': case '<': case '>': case '\"':
+ case '&': case '`': case '\'': case '+':
+ case '0':
+ ++m;
+ break;
+ case '-':
+ ++m;
+ if (is_identchar(*m)) m += mbclen(*m);
+ break;
+ default:
+ if (!ISDIGIT(*m)) return 0;
+ do ++m; while (ISDIGIT(*m));
+ }
+ return !*m;
+}
+
+int
+rb_symname_p(const char *name)
+{
+ const char *m = name;
+ int localid = Qfalse;
+
+ if (!m) return Qfalse;
+ switch (*m) {
+ case '\0':
+ return Qfalse;
+
+ case '$':
+ if (is_special_global_name(++m)) return Qtrue;
+ goto id;
+
+ case '@':
+ if (*++m == '@') ++m;
+ goto id;
+
+ case '<':
+ switch (*++m) {
+ case '<': ++m; break;
+ case '=': if (*++m == '>') ++m; break;
+ default: break;
+ }
+ break;
+
+ case '>':
+ if (*++m == '>') ++m;
+ break;
+
+ case '=':
+ switch (*++m) {
+ case '~': ++m; break;
+ case '=': if (*++m == '=') ++m; break;
+ default: return Qfalse;
+ }
+ break;
+
+ case '*':
+ if (*++m == '*') ++m;
+ break;
+
+ case '+': case '-':
+ if (*++m == '@') ++m;
+ break;
+
+ case '|': case '^': case '&': case '/': case '%': case '~': case '`':
+ break;
+
+ case '[':
+ if (*++m == ']' && *++m == '=') ++m;
+ break;
+
+ default:
+ localid = !ISUPPER(*m);
+ id:
+ if (*m != '_' && !ISALPHA(*m) && !ismbchar(*m)) return Qfalse;
+ while (is_identchar(*m)) m += mbclen(*m);
+ if (localid) {
+ switch (*m) {
+ case '!': case '?': case '=': ++m;
+ }
+ }
+ break;
+ }
+ return *m ? Qfalse : Qtrue;
+}
+
ID
rb_intern(const char *name)
{
@@ -8300,8 +8406,7 @@ rb_intern(const char *name)
switch (*name) {
case '$':
id |= ID_GLOBAL;
- m++;
- if (!is_identchar(*m)) m++;
+ if (is_special_global_name(++m)) goto new_id;
break;
case '@':
if (name[1] == '@') {
@@ -8314,7 +8419,7 @@ rb_intern(const char *name)
m++;
break;
default:
- if (name[0] != '_' && !ISALPHA(name[0]) && !ismbchar(name[0])) {
+ if (name[0] != '_' && ISASCII(name[0]) && !ISALNUM(name[0])) {
/* operators */
int i;
@@ -8348,10 +8453,13 @@ rb_intern(const char *name)
}
break;
}
- while (m <= name + last && is_identchar(*m)) {
- m += mbclen(*m);
+ if (!ISDIGIT(*m)) {
+ while (m <= name + last && is_identchar(*m)) {
+ m += mbclen(*m);
+ }
}
if (*m) id = ID_JUNK;
+ new_id:
id |= ++global_symbols.last_id << ID_SCOPE_SHIFT;
id_regist:
name = strdup(name);
diff --git a/test_symbol.rb b/test_symbol.rb
new file mode 100644
index 0000000000..b6dd203596
--- /dev/null
+++ b/test_symbol.rb
@@ -0,0 +1,50 @@
+require 'test/unit'
+
+class TestSymbol < Test::Unit::TestCase
+ # [ruby-core:3573]
+
+ def assert_eval_inspected(sym)
+ n = sym.inspect
+ assert_nothing_raised(SyntaxError) {assert_equal(sym, eval(n))}
+ end
+
+ def test_inspect_invalid
+ # 2) Symbol#inspect sometimes returns invalid symbol representations:
+ assert_eval_inspected(:"!")
+ assert_eval_inspected(:"=")
+ assert_eval_inspected(:"0")
+ assert_eval_inspected(:"$1")
+ assert_eval_inspected(:"@1")
+ assert_eval_inspected(:"@@1")
+ assert_eval_inspected(:"@")
+ assert_eval_inspected(:"@@")
+ end
+
+ def assert_inspect_evaled(n)
+ assert_nothing_raised(SyntaxError) {assert_equal(n, eval(n).inspect)}
+ end
+
+ def test_inspect_suboptimal
+ # 3) Symbol#inspect sometimes returns suboptimal symbol representations:
+ assert_inspect_evaled(':foo')
+ assert_inspect_evaled(':foo!')
+ assert_inspect_evaled(':bar?')
+ end
+
+ def test_inspect_dollar
+ # 4) :$- always treats next character literally:
+ sym = "$-".intern
+ assert_nothing_raised(SyntaxError) {assert_equal(sym, eval(':$-'))}
+ assert_nothing_raised(SyntaxError) {assert_equal(sym, eval(":$-\n"))}
+ assert_nothing_raised(SyntaxError) {assert_equal(sym, eval(":$- "))}
+ assert_nothing_raised(SyntaxError) {assert_equal(sym, eval(":$-#"))}
+ assert_raise(SyntaxError) {eval ':$-('}
+ end
+
+ def test_inspect_number
+ # 5) Inconsistency between :$0 and :$1? The first one is valid, but the
+ # latter isn't.
+ assert_inspect_evaled(':$0')
+ assert_inspect_evaled(':$1')
+ end
+end