diff options
-rw-r--r-- | internal/string.h | 1 | ||||
-rw-r--r-- | parse.y | 51 | ||||
-rw-r--r-- | ruby_parser.c | 17 | ||||
-rw-r--r-- | rubyparser.h | 5 | ||||
-rw-r--r-- | string.c | 2 | ||||
-rw-r--r-- | test/ruby/test_syntax.rb | 6 | ||||
-rw-r--r-- | universal_parser.c | 5 |
7 files changed, 28 insertions, 59 deletions
diff --git a/internal/string.h b/internal/string.h index cde81a1a25..009b6ec3be 100644 --- a/internal/string.h +++ b/internal/string.h @@ -46,6 +46,7 @@ int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc); int rb_ascii8bit_appendable_encoding_index(rb_encoding *enc, unsigned int code); VALUE rb_str_include(VALUE str, VALUE arg); VALUE rb_str_byte_substr(VALUE str, VALUE beg, VALUE len); +VALUE rb_str_valid_encoding_p(VALUE str); static inline bool STR_EMBED_P(VALUE str); static inline bool STR_SHARED_P(VALUE str); @@ -13728,6 +13728,17 @@ new_defined(struct parser_params *p, NODE *expr, const YYLTYPE *loc) } static NODE* +str_to_sym_node(struct parser_params *p, NODE *node, const YYLTYPE *loc) +{ + VALUE lit = rb_node_str_string_val(node); + if (!rb_str_valid_encoding_p(lit)) { + yyerror1(loc, "invalid symbol"); + lit = STR_NEW0(); + } + return NEW_SYM(lit, loc); +} + +static NODE* symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol) { enum node_type type = nd_type(symbol); @@ -13736,7 +13747,7 @@ symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol) nd_set_type(symbol, NODE_DSYM); break; case NODE_STR: - symbol = NEW_SYM(rb_node_str_string_val(symbol), &RNODE(symbol)->nd_loc); + symbol = str_to_sym_node(p, symbol, &RNODE(symbol)->nd_loc); break; default: compile_error(p, "unexpected node as symbol: %s", parser_node_name(type)); @@ -15465,8 +15476,6 @@ new_hash_pattern_tail(struct parser_params *p, NODE *kw_args, ID kw_rest_arg, co static NODE* dsym_node(struct parser_params *p, NODE *node, const YYLTYPE *loc) { - VALUE lit; - if (!node) { return NEW_SYM(STR_NEW0(), loc); } @@ -15477,8 +15486,7 @@ dsym_node(struct parser_params *p, NODE *node, const YYLTYPE *loc) nd_set_loc(node, loc); break; case NODE_STR: - lit = rb_node_str_string_val(node); - node = NEW_SYM(lit, loc); + node = str_to_sym_node(p, node, loc); break; default: node = NEW_DSYM(0, 1, NEW_LIST(node, loc), loc); @@ -15560,23 +15568,6 @@ nd_value(struct parser_params *p, NODE *node) } static void -warn_duplicate_keys_check_key(struct parser_params *p, st_data_t key, st_table *literal_keys) -{ - if (OBJ_BUILTIN_TYPE(key) == T_NODE && nd_type(key) == NODE_SYM) { - rb_parser_string_t *parser_str = RNODE_SYM(key)->string; - struct RString fake_str; - VALUE str = rb_setup_fake_str(&fake_str, parser_str->ptr, parser_str->len, parser_str->enc); - if (rb_enc_asciicompat(parser_str->enc) && rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) { - st_free_table(literal_keys); - /* Since we have a ASCII compatible encoding and the coderange is - * broken, sym_check_asciionly should raise an EncodingError. */ - rb_check_id_cstr(parser_str->ptr, parser_str->len, parser_str->enc); - rb_bug("unreachable"); - } - } -} - -static void warn_duplicate_keys(struct parser_params *p, NODE *hash) { struct st_hash_type literal_type = { @@ -15594,18 +15585,12 @@ warn_duplicate_keys(struct parser_params *p, NODE *hash) if (!head) { key = (st_data_t)value; } - else if (nd_type_st_key_enable_p(head)) { - warn_duplicate_keys_check_key(p, (st_data_t)head, literal_keys); - - key = (st_data_t)nd_st_key(p, head); - if (st_delete(literal_keys, &key, &data)) { - rb_compile_warn(p->ruby_sourcefile, nd_line((NODE *)data), - "key %+"PRIsVALUE" is duplicated and overwritten on line %d", - nd_value(p, head), nd_line(head)); - } + else if (nd_type_st_key_enable_p(head) && + st_delete(literal_keys, (key = (st_data_t)nd_st_key(p, head), &key), &data)) { + rb_compile_warn(p->ruby_sourcefile, nd_line((NODE *)data), + "key %+"PRIsVALUE" is duplicated and overwritten on line %d", + nd_value(p, head), nd_line(head)); } - - warn_duplicate_keys_check_key(p, key, literal_keys); st_insert(literal_keys, (st_data_t)key, (st_data_t)hash); hash = next; } diff --git a/ruby_parser.c b/ruby_parser.c index 9560c66ef1..f74511941b 100644 --- a/ruby_parser.c +++ b/ruby_parser.c @@ -292,18 +292,6 @@ enc_symname_type(const char *name, long len, void *enc, unsigned int allowed_att return rb_enc_symname_type(name, len, (rb_encoding *)enc, allowed_attrset); } -static ID -check_id_cstr(const char *ptr, long len, void *enc) -{ - return rb_check_id_cstr(ptr, len, (rb_encoding *)enc); -} - -static VALUE -setup_fake_str(struct RString *fake_str, const char *name, long len, void *enc) -{ - return rb_setup_fake_str(fake_str, name, len, (rb_encoding *)enc); -} - typedef struct { struct parser_params *parser; rb_encoding *enc; @@ -563,7 +551,7 @@ static const rb_parser_config_t rb_global_parser_config = { .id2str = rb_id2str, .id2sym = rb_id2sym, .sym2id = rb_sym2id, - .check_id_cstr = check_id_cstr, + .str_valid_encoding_p = rb_str_valid_encoding_p, .str_catf = rb_str_catf, .str_cat_cstr = rb_str_cat_cstr, @@ -578,12 +566,10 @@ static const rb_parser_config_t rb_global_parser_config = { .str_resize = rb_str_resize, .str_new = rb_str_new, .str_new_cstr = rb_str_new_cstr, - .setup_fake_str = setup_fake_str, .fstring = rb_fstring, .is_ascii_string = is_ascii_string2, .enc_str_new = enc_str_new, .enc_str_buf_cat = enc_str_buf_cat, - .enc_str_coderange = rb_enc_str_coderange, .str_buf_append = rb_str_buf_append, .str_vcatf = rb_str_vcatf, .string_value_cstr = rb_string_value_cstr, @@ -643,7 +629,6 @@ static const rb_parser_config_t rb_global_parser_config = { .encoding_set = encoding_set, .encoding_is_ascii8bit = encoding_is_ascii8bit, .usascii_encoding = usascii_encoding, - .enc_coderange_broken = ENC_CODERANGE_BROKEN, .ractor_make_shareable = rb_ractor_make_shareable, diff --git a/rubyparser.h b/rubyparser.h index 59f594bad0..4b2e19a2a7 100644 --- a/rubyparser.h +++ b/rubyparser.h @@ -1277,7 +1277,7 @@ typedef struct rb_parser_config_struct { VALUE (*id2str)(ID id); VALUE (*id2sym)(ID x); ID (*sym2id)(VALUE sym); - ID (*check_id_cstr)(const char *ptr, long len, rb_encoding *enc); + VALUE (*str_valid_encoding_p)(VALUE str); /* String */ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) @@ -1294,12 +1294,10 @@ typedef struct rb_parser_config_struct { VALUE (*str_resize)(VALUE str, long len); VALUE (*str_new)(const char *ptr, long len); VALUE (*str_new_cstr)(const char *ptr); - VALUE (*setup_fake_str)(struct RString *fake_str, const char *name, long len, rb_encoding *enc); VALUE (*fstring)(VALUE); int (*is_ascii_string)(VALUE str); VALUE (*enc_str_new)(const char *ptr, long len, rb_encoding *enc); VALUE (*enc_str_buf_cat)(VALUE str, const char *ptr, long len, rb_encoding *enc); - int (*enc_str_coderange)(VALUE str); VALUE (*str_buf_append)(VALUE str, VALUE str2); RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0) VALUE (*str_vcatf)(VALUE str, const char *fmt, va_list ap); @@ -1364,7 +1362,6 @@ typedef struct rb_parser_config_struct { void (*encoding_set)(VALUE obj, int encindex); int (*encoding_is_ascii8bit)(VALUE obj); rb_encoding *(*usascii_encoding)(void); - int enc_coderange_broken; /* Ractor */ VALUE (*ractor_make_shareable)(VALUE obj); @@ -11030,7 +11030,7 @@ rb_str_b(VALUE str) * "\x80".force_encoding("UTF-8").valid_encoding? # => false */ -static VALUE +VALUE rb_str_valid_encoding_p(VALUE str) { int cr = rb_enc_str_coderange(str); diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb index 1ea0d1763f..355f524e1a 100644 --- a/test/ruby/test_syntax.rb +++ b/test/ruby/test_syntax.rb @@ -1365,13 +1365,17 @@ eom assert_valid_syntax 'p :foo, {proc do end => proc do end, b: proc do end}', bug13073 end + def test_invalid_encoding_symbol + assert_syntax_error('{"\xC3": 1}', "invalid symbol") + end + def test_invalid_symbol_in_hash_memory_leak assert_no_memory_leak([], "#{<<-'begin;'}", "#{<<-'end;'}", rss: true) str = '{"\xC3": 1}'.force_encoding("UTF-8") code = proc do eval(str) raise "unreachable" - rescue EncodingError + rescue SyntaxError end 1_000.times(&code) diff --git a/universal_parser.c b/universal_parser.c index 0e7059638b..9bf3c65936 100644 --- a/universal_parser.c +++ b/universal_parser.c @@ -173,7 +173,7 @@ struct rb_imemo_tmpbuf_struct { #define ID2SYM p->config->id2sym #undef SYM2ID #define SYM2ID p->config->sym2id -#define rb_check_id_cstr p->config->check_id_cstr +#define rb_str_valid_encoding_p p->config->str_valid_encoding_p #define rb_str_catf p->config->str_catf #undef rb_str_cat_cstr @@ -192,12 +192,10 @@ struct rb_imemo_tmpbuf_struct { #define rb_str_new p->config->str_new #undef rb_str_new_cstr #define rb_str_new_cstr p->config->str_new_cstr -#define rb_setup_fake_str p->config->setup_fake_str #define rb_fstring p->config->fstring #define is_ascii_string p->config->is_ascii_string #define rb_enc_str_new p->config->enc_str_new #define rb_enc_str_buf_cat p->config->enc_str_buf_cat -#define rb_enc_str_coderange p->config->enc_str_coderange #define rb_str_buf_append p->config->str_buf_append #define rb_str_vcatf p->config->str_vcatf #undef StringValueCStr @@ -262,7 +260,6 @@ struct rb_imemo_tmpbuf_struct { #define ENCODING_SET p->config->encoding_set #define ENCODING_IS_ASCII8BIT p->config->encoding_is_ascii8bit #define rb_usascii_encoding p->config->usascii_encoding -#define ENC_CODERANGE_BROKEN p->config->enc_coderange_broken #define rb_ractor_make_shareable p->config->ractor_make_shareable |