aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-11-27 14:17:02 -0500
committergit <svn-admin@ruby-lang.org>2023-11-28 13:25:48 +0000
commitc798943a4a272f213d21295a837da06ed5fa9a51 (patch)
tree618a074700e2d501beec4db66320e2e1f5a6f085
parent43dc8e9012dd7c390f1299d1b653656c81ae2aa7 (diff)
downloadruby-c798943a4a272f213d21295a837da06ed5fa9a51.tar.gz
[ruby/prism] Move DATA parsing into its own parse result field
https://github.com/ruby/prism/commit/42b60b6e95
-rw-r--r--lib/prism/ffi.rb4
-rw-r--r--lib/prism/lex_compat.rb2
-rw-r--r--lib/prism/parse_result.rb27
-rw-r--r--prism/extension.c44
-rw-r--r--prism/parser.h6
-rw-r--r--prism/prism.c4
-rw-r--r--prism/templates/lib/prism/serialize.rb.erb15
-rw-r--r--prism/templates/src/serialize.c.erb14
-rw-r--r--test/prism/comments_test.rb34
9 files changed, 76 insertions, 74 deletions
diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb
index 847990ed9a..36f1c398de 100644
--- a/lib/prism/ffi.rb
+++ b/lib/prism/ffi.rb
@@ -254,10 +254,10 @@ module Prism
loader = Serialize::Loader.new(source, buffer.read)
tokens = loader.load_tokens
- node, comments, magic_comments, errors, warnings = loader.load_nodes
+ node, comments, magic_comments, data_loc, errors, warnings = loader.load_nodes
tokens.each { |token,| token.value.force_encoding(loader.encoding) }
- ParseResult.new([node, tokens], comments, magic_comments, errors, warnings, source)
+ ParseResult.new([node, tokens], comments, magic_comments, data_loc, errors, warnings, source)
end
end
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index b6d12053a0..66be275bcd 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -831,7 +831,7 @@ module Prism
# We sort by location to compare against Ripper's output
tokens.sort_by!(&:location)
- ParseResult.new(tokens, result.comments, result.magic_comments, result.errors, result.warnings, [])
+ ParseResult.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, [])
end
end
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb
index 50c23bce65..753d72f10b 100644
--- a/lib/prism/parse_result.rb
+++ b/lib/prism/parse_result.rb
@@ -238,11 +238,6 @@ module Prism
def deconstruct_keys(keys)
{ location: location }
end
-
- # This can only be true for inline comments.
- def trailing?
- false
- end
end
# InlineComment objects are the most common. They correspond to comments in
@@ -263,18 +258,14 @@ module Prism
# EmbDocComment objects correspond to comments that are surrounded by =begin
# and =end.
class EmbDocComment < Comment
- # Returns a string representation of this comment.
- def inspect
- "#<Prism::EmbDocComment @location=#{location.inspect}>"
+ # This can only be true for inline comments.
+ def trailing?
+ false
end
- end
- # DATAComment objects correspond to comments that are after the __END__
- # keyword in a source file.
- class DATAComment < Comment
# Returns a string representation of this comment.
def inspect
- "#<Prism::DATAComment @location=#{location.inspect}>"
+ "#<Prism::EmbDocComment @location=#{location.inspect}>"
end
end
@@ -378,6 +369,11 @@ module Prism
# The list of magic comments that were encountered during parsing.
attr_reader :magic_comments
+ # An optional location that represents the location of the content after the
+ # __END__ marker. This content is loaded into the DATA constant when the
+ # file being parsed is the main file being executed.
+ attr_reader :data_loc
+
# The list of errors that were generated during parsing.
attr_reader :errors
@@ -388,10 +384,11 @@ module Prism
attr_reader :source
# Create a new parse result object with the given values.
- def initialize(value, comments, magic_comments, errors, warnings, source)
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
@value = value
@comments = comments
@magic_comments = magic_comments
+ @data_loc = data_loc
@errors = errors
@warnings = warnings
@source = source
@@ -399,7 +396,7 @@ module Prism
# Implement the hash pattern matching interface for ParseResult.
def deconstruct_keys(keys)
- { value: value, comments: comments, magic_comments: magic_comments, errors: errors, warnings: warnings }
+ { value: value, comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
end
# Returns true if there were no errors during parsing and false if there
diff --git a/prism/extension.c b/prism/extension.c
index 9ecd1e30da..3637cc1617 100644
--- a/prism/extension.c
+++ b/prism/extension.c
@@ -12,7 +12,6 @@ VALUE rb_cPrismLocation;
VALUE rb_cPrismComment;
VALUE rb_cPrismInlineComment;
VALUE rb_cPrismEmbDocComment;
-VALUE rb_cPrismDATAComment;
VALUE rb_cPrismMagicComment;
VALUE rb_cPrismParseError;
VALUE rb_cPrismParseWarning;
@@ -320,22 +319,7 @@ parser_comments(pm_parser_t *parser, VALUE source) {
LONG2FIX(comment->end - comment->start)
};
- VALUE type;
- switch (comment->type) {
- case PM_COMMENT_INLINE:
- type = rb_cPrismInlineComment;
- break;
- case PM_COMMENT_EMBDOC:
- type = rb_cPrismEmbDocComment;
- break;
- case PM_COMMENT___END__:
- type = rb_cPrismDATAComment;
- break;
- default:
- type = rb_cPrismInlineComment;
- break;
- }
-
+ VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) };
rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type));
}
@@ -375,6 +359,25 @@ parser_magic_comments(pm_parser_t *parser, VALUE source) {
}
/**
+ * Extract out the data location from the parser into a Location instance if one
+ * exists.
+ */
+static VALUE
+parser_data_loc(const pm_parser_t *parser, VALUE source) {
+ if (parser->data_loc.end == NULL) {
+ return Qnil;
+ } else {
+ VALUE argv[] = {
+ source,
+ LONG2FIX(parser->data_loc.start - parser->start),
+ LONG2FIX(parser->data_loc.end - parser->data_loc.start)
+ };
+
+ return rb_class_new_instance(3, argv, rb_cPrismLocation);
+ }
+}
+
+/**
* Extract the errors out of the parser into an array.
*/
static VALUE
@@ -531,6 +534,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
value,
parser_comments(&parser, source),
parser_magic_comments(&parser, source),
+ parser_data_loc(&parser, source),
parser_errors(&parser, parse_lex_data.encoding, source),
parser_warnings(&parser, parse_lex_data.encoding, source),
source
@@ -538,7 +542,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
- return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
+ return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
}
/**
@@ -601,12 +605,13 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
pm_ast_new(&parser, node, encoding),
parser_comments(&parser, source),
parser_magic_comments(&parser, source),
+ parser_data_loc(&parser, source),
parser_errors(&parser, encoding, source),
parser_warnings(&parser, encoding, source),
source
};
- VALUE result = rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
+ VALUE result = rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
@@ -938,7 +943,6 @@ Init_prism(void) {
rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment);
rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment);
- rb_cPrismDATAComment = rb_define_class_under(rb_cPrism, "DATAComment", rb_cPrismComment);
rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
diff --git a/prism/parser.h b/prism/parser.h
index e3c93b4246..86442d2a22 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -361,8 +361,7 @@ typedef struct pm_context_node {
/** This is the type of a comment that we've found while parsing. */
typedef enum {
PM_COMMENT_INLINE,
- PM_COMMENT_EMBDOC,
- PM_COMMENT___END__
+ PM_COMMENT_EMBDOC
} pm_comment_type_t;
/**
@@ -571,6 +570,9 @@ struct pm_parser {
/** The list of magic comments that have been found while parsing. */
pm_list_t magic_comment_list;
+ /** The optional location of the __END__ keyword and its contents. */
+ pm_location_t data_loc;
+
/** The list of warnings that have been found while parsing. */
pm_list_t warning_list;
diff --git a/prism/prism.c b/prism/prism.c
index 1751857e1e..f1c0e07760 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -9248,8 +9248,8 @@ parser_lex(pm_parser_t *parser) {
parser->current.type = PM_TOKEN___END__;
parser_lex_callback(parser);
- pm_comment_t *comment = parser_comment(parser, PM_COMMENT___END__);
- pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
+ parser->data_loc.start = parser->current.start;
+ parser->data_loc.end = parser->current.end;
LEX(PM_TOKEN_EOF);
}
diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb
index 058142682e..681b6117b4 100644
--- a/prism/templates/lib/prism/serialize.rb.erb
+++ b/prism/templates/lib/prism/serialize.rb.erb
@@ -95,9 +95,10 @@ module Prism
def load_metadata
comments = load_comments
magic_comments = load_varint.times.map { MagicComment.new(load_location, load_location) }
+ data_loc = load_optional_location
errors = load_varint.times.map { ParseError.new(load_embedded_string, load_location) }
warnings = load_varint.times.map { ParseWarning.new(load_embedded_string, load_location) }
- [comments, magic_comments, errors, warnings]
+ [comments, magic_comments, data_loc, errors, warnings]
end
def load_tokens
@@ -117,11 +118,11 @@ module Prism
tokens = load_tokens
encoding = load_encoding
load_start_line
- comments, magic_comments, errors, warnings = load_metadata
+ comments, magic_comments, data_loc, errors, warnings = load_metadata
tokens.each { |token,| token.value.force_encoding(encoding) }
raise "Expected to consume all bytes while deserializing" unless @io.eof?
- Prism::ParseResult.new(tokens, comments, magic_comments, errors, warnings, @source)
+ Prism::ParseResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, @source)
end
def load_nodes
@@ -129,17 +130,17 @@ module Prism
load_encoding
load_start_line
- comments, magic_comments, errors, warnings = load_metadata
+ comments, magic_comments, data_loc, errors, warnings = load_metadata
@constant_pool_offset = io.read(4).unpack1("L")
@constant_pool = Array.new(load_varint, nil)
- [load_node, comments, magic_comments, errors, warnings]
+ [load_node, comments, magic_comments, data_loc, errors, warnings]
end
def load_result
- node, comments, magic_comments, errors, warnings = load_nodes
- Prism::ParseResult.new(node, comments, magic_comments, errors, warnings, @source)
+ node, comments, magic_comments, data_loc, errors, warnings = load_nodes
+ Prism::ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, @source)
end
private
diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb
index db4c91e0cd..0ea70a3976 100644
--- a/prism/templates/src/serialize.c.erb
+++ b/prism/templates/src/serialize.c.erb
@@ -15,7 +15,7 @@ pm_sizet_to_u32(size_t value) {
}
static void
-pm_serialize_location(pm_parser_t *parser, pm_location_t *location, pm_buffer_t *buffer) {
+pm_serialize_location(const pm_parser_t *parser, const pm_location_t *location, pm_buffer_t *buffer) {
assert(location->start);
assert(location->end);
assert(location->start <= location->end);
@@ -171,6 +171,16 @@ pm_serialize_magic_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_
}
static void
+pm_serialize_data_loc(const pm_parser_t *parser, pm_buffer_t *buffer) {
+ if (parser->data_loc.end == NULL) {
+ pm_buffer_append_byte(buffer, 0);
+ } else {
+ pm_buffer_append_byte(buffer, 1);
+ pm_serialize_location(parser, &parser->data_loc, buffer);
+ }
+}
+
+static void
pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
// serialize message
size_t message_length = strlen(diagnostic->message);
@@ -214,6 +224,7 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
<%- end -%>
pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
+ pm_serialize_data_loc(parser, buffer);
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
@@ -310,6 +321,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const
pm_buffer_append_varint(buffer, parser.start_line);
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
+ pm_serialize_data_loc(&parser, buffer);
pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
diff --git a/test/prism/comments_test.rb b/test/prism/comments_test.rb
index d14409458c..b99c00268c 100644
--- a/test/prism/comments_test.rb
+++ b/test/prism/comments_test.rb
@@ -39,37 +39,23 @@ module Prism
)
end
- def test_comment___END__
- source = <<~RUBY
+ def test___END__
+ result = Prism.parse(<<~RUBY)
__END__
comment
RUBY
- assert_comment(
- source,
- DATAComment,
- start_offset: 0,
- end_offset: 16,
- start_line: 1,
- end_line: 3,
- start_column: 0,
- end_column: 0
- )
+ data_loc = result.data_loc
+ assert_equal 0, data_loc.start_offset
+ assert_equal 16, data_loc.end_offset
end
- def test_comment___END__crlf
- source = "__END__\r\ncomment\r\n"
+ def test___END__crlf
+ result = Prism.parse("__END__\r\ncomment\r\n")
- assert_comment(
- source,
- DATAComment,
- start_offset: 0,
- end_offset: 18,
- start_line: 1,
- end_line: 3,
- start_column: 0,
- end_column: 0
- )
+ data_loc = result.data_loc
+ assert_equal 0, data_loc.start_offset
+ assert_equal 18, data_loc.end_offset
end
def test_comment_embedded_document