aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-04-10 00:41:47 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-04-10 00:41:47 +0000
commit55e9e2864cfe8d25205b228b7fad7294ddee8237 (patch)
tree64f6b3a62e8525c3d44fc6b69900400fcb7c3fa2
parent1b90e86dda8f67bcea2862ec249cc7600bc4b27b (diff)
downloadruby-55e9e2864cfe8d25205b228b7fad7294ddee8237.tar.gz
symbol.c: non-ASCII constant names
* symbol.c (rb_sym_constant_char_p): support for non-ASCII constant names. [Feature #13770] * object.c (rb_mod_const_get, rb_mod_const_defined): support for non-ASCII constant names. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@63130 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--NEWS2
-rw-r--r--object.c4
-rw-r--r--symbol.c38
-rw-r--r--test/ruby/test_parse.rb39
4 files changed, 80 insertions, 3 deletions
diff --git a/NEWS b/NEWS
index a8e1874939..5be9025862 100644
--- a/NEWS
+++ b/NEWS
@@ -20,6 +20,8 @@ with all sufficient information, see the ChangeLog file or Redmine
* `else` without `rescue` is now causes a syntax error. [EXPERIMENTAL]
+* constant names may start with a non-ASCII capital letter. [Feature #13770]
+
=== Core classes updates (outstanding ones only)
* Array
diff --git a/object.c b/object.c
index 62110f7ccf..4a5b5d359a 100644
--- a/object.c
+++ b/object.c
@@ -2480,7 +2480,7 @@ rb_mod_const_get(int argc, VALUE *argv, VALUE mod)
if (!id) {
part = rb_str_subseq(name, beglen, len);
OBJ_FREEZE(part);
- if (!ISUPPER(*pbeg) || !rb_is_const_name(part)) {
+ if (!rb_is_const_name(part)) {
name = part;
goto wrong_name;
}
@@ -2633,7 +2633,7 @@ rb_mod_const_defined(int argc, VALUE *argv, VALUE mod)
if (!id) {
part = rb_str_subseq(name, beglen, len);
OBJ_FREEZE(part);
- if (!ISUPPER(*pbeg) || !rb_is_const_name(part)) {
+ if (!rb_is_const_name(part)) {
name = part;
goto wrong_name;
}
diff --git a/symbol.c b/symbol.c
index 723764c34e..c54a743cdc 100644
--- a/symbol.c
+++ b/symbol.c
@@ -199,6 +199,42 @@ rb_enc_symname_p(const char *name, rb_encoding *enc)
return rb_enc_symname2_p(name, strlen(name), enc);
}
+static int
+rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc)
+{
+ int c, len;
+ const char *end = name + nlen;
+
+ if (nlen < 1) return FALSE;
+ if (ISASCII(*name)) return ISUPPER(*name);
+ c = rb_enc_precise_mbclen(name, end, enc);
+ if (!MBCLEN_CHARFOUND_P(c)) return FALSE;
+ len = MBCLEN_CHARFOUND_LEN(c);
+ c = rb_enc_mbc_to_codepoint(name, end, enc);
+ if (ONIGENC_IS_UNICODE(enc)) {
+ static int ctype_titlecase = 0;
+ if (rb_enc_isupper(c, enc)) return TRUE;
+ if (rb_enc_islower(c, enc)) return FALSE;
+ if (!ctype_titlecase) {
+ static const UChar cname[] = "titlecaseletter";
+ static const UChar *const end = cname + sizeof(cname) - 1;
+ ctype_titlecase = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, cname, end);
+ }
+ if (rb_enc_isctype(c, ctype_titlecase, enc)) return TRUE;
+ }
+ else {
+ /* fallback to case-folding */
+ OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
+ const OnigUChar *beg = (const OnigUChar *)name;
+ int r = enc->mbc_case_fold(ONIGENC_CASE_FOLD,
+ &beg, (const OnigUChar *)end,
+ fold, enc);
+ if (r > 0 && (r != len || memcmp(fold, name, r)))
+ return TRUE;
+ }
+ return FALSE;
+}
+
#define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST))
#define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET))
@@ -279,7 +315,7 @@ rb_enc_symname_type(const char *name, long len, rb_encoding *enc, unsigned int a
break;
default:
- type = ISUPPER(*m) ? ID_CONST : ID_LOCAL;
+ type = rb_sym_constant_char_p(m, e-m, enc) ? ID_CONST : ID_LOCAL;
id:
if (m >= e || (*m != '_' && !ISALPHA(*m) && ISASCII(*m))) {
if (len > 1 && *(e-1) == '=') {
diff --git a/test/ruby/test_parse.rb b/test/ruby/test_parse.rb
index 7229d0acb5..b05dbe2a0f 100644
--- a/test/ruby/test_parse.rb
+++ b/test/ruby/test_parse.rb
@@ -1157,6 +1157,45 @@ x = __ENCODING__
end;
end
+ NONASCII_CONSTANTS = [
+ *%W"\u{00de} \u{00C0}".flat_map {|c| [c, c.encode("iso-8859-15")]},
+ "\u{1c4}", "\u{1f2}", "\u{1f88}", "\u{370}",
+ *%W"\u{391} \u{ff21}".flat_map {|c| [c, c.encode("cp932"), c.encode("euc-jp")]},
+ ]
+
+ def assert_nonascii_const
+ assert_all_assertions_foreach("NONASCII_CONSTANTS", *NONASCII_CONSTANTS) do |n|
+ m = Module.new
+ assert_not_operator(m, :const_defined?, n)
+ assert_raise_with_message(NameError, /uninitialized/) do
+ m.const_get(n)
+ end
+ assert_nil(eval("defined?(m::#{n})"))
+
+ v = yield m, n
+
+ assert_operator(m, :const_defined?, n)
+ assert_equal("constant", eval("defined?(m::#{n})"))
+ assert_same(v, m.const_get(n))
+
+ m.__send__(:remove_const, n)
+ assert_not_operator(m, :const_defined?, n)
+ assert_nil(eval("defined?(m::#{n})"))
+ end
+ end
+
+ def test_nonascii_const_set
+ assert_nonascii_const do |m, n|
+ m.const_set(n, 42)
+ end
+ end
+
+ def test_nonascii_constant
+ assert_nonascii_const do |m, n|
+ m.module_eval("class #{n}; self; end")
+ end
+ end
+
=begin
def test_past_scope_variable
assert_warning(/past scope/) {catch {|tag| eval("BEGIN{throw tag}; tap {a = 1}; a")}}