test/prism/parse_test.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278

# frozen_string_literal: true

require_relative "test_helper"

module Prism
  class ParseTest < TestCase
    # A subclass of Ripper that extracts out magic comments.
    class MagicCommentRipper < Ripper
      attr_reader :magic_comments

      def initialize(*)
        super
        @magic_comments = []
      end

      def on_magic_comment(key, value)
        @magic_comments << [key, value]
        super
      end
    end

    # When we pretty-print the trees to compare against the snapshots, we want to
    # be certain that we print with the same external encoding. This is because
    # methods like Symbol#inspect take into account external encoding and it could
    # change how the snapshot is generated. On machines with certain settings
    # (like LANG=C or -Eascii-8bit) this could have been changed. So here we're
    # going to force it to be UTF-8 to keep the snapshots consistent.
    def setup
      @previous_default_external = Encoding.default_external
      ignore_warnings { Encoding.default_external = Encoding::UTF_8 }
    end

    def teardown
      ignore_warnings { Encoding.default_external = @previous_default_external }
    end

    def test_empty_string
      result = Prism.parse("")
      assert_equal [], result.value.statements.body
    end

    def test_parse_takes_file_path
      filepath = "filepath.rb"
      result = Prism.parse("def foo; __FILE__; end", filepath: filepath)

      assert_equal filepath, find_source_file_node(result.value).filepath
    end

    def test_parse_takes_line
      line = 4
      result = Prism.parse("def foo\n __FILE__\nend", line: line)

      assert_equal line, result.value.location.start_line
      assert_equal line + 1, find_source_file_node(result.value).location.start_line
    end

    def test_parse_takes_negative_lines
      line = -2
      result = Prism.parse("def foo\n __FILE__\nend", line: line)

      assert_equal line, result.value.location.start_line
      assert_equal line + 1, find_source_file_node(result.value).location.start_line
    end

    def test_parse_lex
      node, tokens = Prism.parse_lex("def foo; end").value

      assert_kind_of ProgramNode, node
      assert_equal 5, tokens.length
    end

    def test_parse_lex_file
      node, tokens = Prism.parse_lex_file(__FILE__).value

      assert_kind_of ProgramNode, node
      refute_empty tokens
    end

    # To accurately compare against Ripper, we need to make sure that we're
    # running on CRuby 3.2+.
    ripper_enabled = RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.2.0"

    # The FOCUS environment variable allows you to specify one particular fixture
    # to test, instead of all of them.
    base = File.join(__dir__, "fixtures")
    relatives = ENV["FOCUS"] ? [ENV["FOCUS"]] : Dir["**/*.txt", base: base]

    relatives.each do |relative|
      # These fail on TruffleRuby due to a difference in Symbol#inspect: :测试 vs :"测试"
      next if RUBY_ENGINE == "truffleruby" and %w[seattlerb/bug202.txt seattlerb/magic_encoding_comment.txt].include?(relative)

      filepath = File.join(base, relative)
      snapshot = File.expand_path(File.join("snapshots", relative), __dir__)

      directory = File.dirname(snapshot)
      FileUtils.mkdir_p(directory) unless File.directory?(directory)

      ripper_should_parse = ripper_should_match = ripper_enabled

      # This file has changed behavior in Ripper in Ruby 3.3, so we skip it if
      # we're on an earlier version.
      ripper_should_match = false if relative == "seattlerb/pct_w_heredoc_interp_nested.txt" && RUBY_VERSION < "3.3.0"

      # It seems like there are some oddities with nested heredocs and ripper.
      # Waiting for feedback on https://bugs.ruby-lang.org/issues/19838.
      ripper_should_match = false if relative == "seattlerb/heredoc_nested.txt"

      # Ripper seems to have a bug that the regex portions before and after the heredoc are combined
      # into a single token. See https://bugs.ruby-lang.org/issues/19838.
      #
      # Additionally, Ripper cannot parse the %w[] fixture in this file, so set ripper_should_parse to false.
      ripper_should_parse = false if relative == "spanning_heredoc.txt"

      define_method "test_filepath_#{relative}" do
        # First, read the source from the filepath. Use binmode to avoid converting CRLF on Windows,
        # and explicitly set the external encoding to UTF-8 to override the binmode default.
        source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)

        if ripper_should_parse
          src = source

          case relative
          when /break|next|redo|if|unless|rescue|control|keywords|retry/
            # Uncaught syntax errors: Invalid break, Invalid next
            src = "->do\nrescue\n#{src}\nend"
            ripper_should_match = false
          end
          case src
          when /^ *yield/
            # Uncaught syntax errors: Invalid yield
            src = "def __invalid_yield__\n#{src}\nend"
            ripper_should_match = false
          end

          # Make sure that it can be correctly parsed by Ripper. If it can't, then we have a fixture
          # that is invalid Ruby.
          refute_nil(Ripper.sexp_raw(src), "Ripper failed to parse")
        end

        # Next, assert that there were no errors during parsing.
        result = Prism.parse(source, filepath: relative)
        assert_empty result.errors

        # Next, pretty print the source.
        printed = PP.pp(result.value, +"", 79)

        if File.exist?(snapshot)
          saved = File.read(snapshot)

          # If the snapshot file exists, but the printed value does not match the
          # snapshot, then update the snapshot file.
          if printed != saved
            File.write(snapshot, printed)
            warn("Updated snapshot at #{snapshot}.")
          end

          # If the snapshot file exists, then assert that the printed value
          # matches the snapshot.
          assert_equal(saved, printed)
        else
          # If the snapshot file does not yet exist, then write it out now.
          File.write(snapshot, printed)
          warn("Created snapshot at #{snapshot}.")
        end

        # Next, assert that the value can be serialized and deserialized without
        # changing the shape of the tree.
        assert_equal_nodes(result.value, Prism.load(source, Prism.dump(source, filepath: relative)).value)

        # Next, check that the location ranges of each node in the tree are a
        # superset of their respective child nodes.
        assert_non_overlapping_locations(result.value)

        # Next, assert that the newlines are in the expected places.
        expected_newlines = [0]
        source.b.scan("\n") { expected_newlines << $~.offset(0)[0] + 1 }
        assert_equal expected_newlines, Debug.newlines(source)

        if ripper_should_parse && ripper_should_match
          # Finally, assert that we can lex the source and get the same tokens as
          # Ripper.
          lex_result = Prism.lex_compat(source)
          assert_equal [], lex_result.errors
          tokens = lex_result.value

          begin
            Prism.lex_ripper(source).zip(tokens).each do |(ripper, prism)|
              assert_equal ripper, prism
            end
          rescue SyntaxError
            raise ArgumentError, "Test file has invalid syntax #{filepath}"
          end

          # Next, check that we get the correct number of magic comments when
          # lexing with ripper.
          expected = MagicCommentRipper.new(source).tap(&:parse).magic_comments
          actual = result.magic_comments

          assert_equal expected.length, actual.length
          expected.zip(actual).each do |(expected_key, expected_value), magic_comment|
            assert_equal expected_key, magic_comment.key
            assert_equal expected_value, magic_comment.value
          end
        end
      end
    end

    Dir["*.txt", base: base].each do |relative|
      next if relative == "newline_terminated.txt"

      # We test every snippet (separated by \n\n) in isolation
      # to ensure the parser does not try to read bytes further than the end of each snippet
      define_method "test_individual_snippets_#{relative}" do
        filepath = File.join(base, relative)

        # First, read the source from the filepath. Use binmode to avoid converting CRLF on Windows,
        # and explicitly set the external encoding to UTF-8 to override the binmode default.
        file_contents = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)

        file_contents.split(/(?<=\S)\n\n(?=\S)/).each do |snippet|
          snippet = snippet.rstrip
          result = Prism.parse(snippet, filepath: relative)
          assert_empty result.errors

          assert_equal_nodes(result.value, Prism.load(snippet, Prism.dump(snippet, filepath: relative)).value)
        end
      end
    end

    private

    # Check that the location ranges of each node in the tree are a superset of
    # their respective child nodes.
    def assert_non_overlapping_locations(node)
      queue = [node]

      while (current = queue.shift)
        # We only want to compare parent/child location overlap in the case that
        # we are not looking at a heredoc. That's because heredoc locations are
        # special in that they only use the declaration of the heredoc.
        compare = !(current.is_a?(StringNode) ||
                    current.is_a?(XStringNode) ||
                    current.is_a?(InterpolatedStringNode) ||
                    current.is_a?(InterpolatedXStringNode)) ||
        !current.opening&.start_with?("<<")

        current.child_nodes.each do |child|
          # child_nodes can return nil values, so we need to skip those.
          next unless child

          # Now that we know we have a child node, add that to the queue.
          queue << child

          if compare
            assert_operator current.location.start_offset, :<=, child.location.start_offset
            assert_operator current.location.end_offset, :>=, child.location.end_offset
          end
        end
      end
    end

    def find_source_file_node(program)
      queue = [program]
      while (node = queue.shift)
        return node if node.is_a?(SourceFileNode)
        queue.concat(node.compact_child_nodes)
      end
    end

    def ignore_warnings
      previous_verbosity = $VERBOSE
      $VERBOSE = nil
      yield
    ensure
      $VERBOSE = previous_verbosity
    end
  end
end