diff options
author | Kevin Newton <kddnewton@gmail.com> | 2023-09-12 12:53:24 -0400 |
---|---|---|
committer | git <svn-admin@ruby-lang.org> | 2023-09-14 18:28:44 +0000 |
commit | 500f38e06f6da1f309c9ac2d7835c07b615e3d48 (patch) | |
tree | 3a45ca3df8d22de37088fc8f93187c1f417c6f89 /yarp | |
parent | 122f3d64bedecf7fd49c984e4dc5fdbe4dae5a4e (diff) | |
download | ruby-500f38e06f6da1f309c9ac2d7835c07b615e3d48.tar.gz |
[ruby/yarp] Parse frozen string literal comment
https://github.com/ruby/yarp/commit/373898e7cb
Diffstat (limited to 'yarp')
-rw-r--r-- | yarp/parser.h | 8 | ||||
-rw-r--r-- | yarp/yarp.c | 60 |
2 files changed, 60 insertions, 8 deletions
diff --git a/yarp/parser.h b/yarp/parser.h index 93cb63573c..024ec5b8fa 100644 --- a/yarp/parser.h +++ b/yarp/parser.h @@ -394,6 +394,14 @@ struct yp_parser { // This flag indicates that we are currently parsing a keyword argument. unsigned int in_keyword_arg : 1; + + // Whether or not the parser has seen a token that has semantic meaning + // (i.e., a token that is not a comment or whitespace). + unsigned int semantic_token_seen : 1; + + // Whether or not we have found a frozen_string_literal magic comment with + // a true value. + unsigned int frozen_string_literal : 1; }; #endif // YARP_PARSER_H diff --git a/yarp/yarp.c b/yarp/yarp.c index 24a5d98752..2ff228c540 100644 --- a/yarp/yarp.c +++ b/yarp/yarp.c @@ -4951,8 +4951,7 @@ parser_lex_encoding_comment_start(yp_parser_t *parser, const uint8_t *cursor, pt static void parser_lex_encoding_comment(yp_parser_t *parser) { const uint8_t *start = parser->current.start + 1; - const uint8_t *end = next_newline(start, parser->end - start); - if (end == NULL) end = parser->end; + const uint8_t *end = parser->current.end; // These are the patterns we're going to match to find the encoding comment. // This is definitely not complete or even really correct. @@ -5052,6 +5051,38 @@ parser_lex_encoding_comment(yp_parser_t *parser) { yp_diagnostic_list_append(&parser->error_list, encoding_start, encoding_end, YP_ERR_INVALID_ENCODING_MAGIC_COMMENT); } +// Check if this is a magic comment that includes the frozen_string_literal +// pragma. If it does, set that field on the parser. +static void +parser_lex_frozen_string_literal_comment(yp_parser_t *parser) { + const uint8_t *cursor = parser->current.start + 1; + const uint8_t *end = parser->current.end; + + size_t key_length = strlen("frozen_string_literal"); + if (key_length > (size_t) (end - cursor)) return; + + const uint8_t *cursor_limit = cursor + (end - cursor) - key_length + 1; + + while ((cursor = yp_memchr(cursor, 'f', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) { + if (memcmp(cursor, "frozen_string_literal", key_length) == 0) { + cursor += yp_strspn_inline_whitespace(cursor + key_length, end - (cursor + key_length)); + + if (*cursor == ':' || *cursor == '=') { + cursor++; + cursor += yp_strspn_inline_whitespace(cursor, end - cursor); + + if (cursor + 4 <= end && yp_strncasecmp(cursor, (const uint8_t *) "true", 4) == 0) { + parser->frozen_string_literal = true; + } + + return; + } + } + + cursor++; + } +} + /******************************************************************************/ /* Context manipulations */ /******************************************************************************/ @@ -5982,6 +6013,11 @@ parser_lex(yp_parser_t *parser) { // already seen a comment. bool lexed_comment = false; + // Here we cache the current value of the semantic token seen flag. This is + // used to reset it in case we find a token that shouldn't flip this flag. + unsigned int semantic_token_seen = parser->semantic_token_seen; + parser->semantic_token_seen = true; + switch (parser->lex_modes.current->mode) { case YP_LEX_DEFAULT: case YP_LEX_EMBEXPR: @@ -6083,12 +6119,18 @@ parser_lex(yp_parser_t *parser) { parser_lex_encoding_comment(parser); } + if (!semantic_token_seen) { + parser_lex_frozen_string_literal_comment(parser); + } + lexed_comment = true; } /* fallthrough */ case '\r': case '\n': { + parser->semantic_token_seen = semantic_token_seen; size_t eol_length = match_eol_at(parser, parser->current.end - 1); + if (eol_length) { // The only way you can have carriage returns in this // particular loop is if you have a carriage return @@ -13950,7 +13992,6 @@ yp_parser_init(yp_parser_t *parser, const uint8_t *source, size_t size, const ch *parser = (yp_parser_t) { .lex_state = YP_LEX_STATE_BEG, - .command_start = true, .enclosure_nesting = 0, .lambda_enclosure_nesting = -1, .brace_nesting = 0, @@ -13972,19 +14013,22 @@ yp_parser_init(yp_parser_t *parser, const uint8_t *source, size_t size, const ch .error_list = YP_LIST_EMPTY, .current_scope = NULL, .current_context = NULL, - .recovering = false, .encoding = yp_encoding_utf_8, - .encoding_changed = false, .encoding_changed_callback = NULL, .encoding_decode_callback = NULL, .encoding_comment_start = source, .lex_callback = NULL, - .pattern_matching_newlines = false, - .in_keyword_arg = false, .filepath_string = filepath_string, .constant_pool = YP_CONSTANT_POOL_EMPTY, .newline_list = YP_NEWLINE_LIST_EMPTY, - .integer_base = 0 + .integer_base = 0, + .command_start = true, + .recovering = false, + .encoding_changed = false, + .pattern_matching_newlines = false, + .in_keyword_arg = false, + .semantic_token_seen = false, + .frozen_string_literal = false }; yp_accepts_block_stack_push(parser, true); |