aboutsummaryrefslogtreecommitdiffstats
path: root/test/prism/encoding_test.rb
blob: 8427bddcbefa582c29f987ee7351bf094ebcaff7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# frozen_string_literal: true

require_relative "test_helper"

module YARP
  class EncodingTest < TestCase
    %w[
      ascii
      ascii-8bit
      big5
      binary
      euc-jp
      gbk
      iso-8859-1
      iso-8859-2
      iso-8859-3
      iso-8859-4
      iso-8859-5
      iso-8859-6
      iso-8859-7
      iso-8859-8
      iso-8859-9
      iso-8859-10
      iso-8859-11
      iso-8859-13
      iso-8859-14
      iso-8859-15
      iso-8859-16
      koi8-r
      shift_jis
      sjis
      us-ascii
      utf-8
      utf8-mac
      windows-31j
      windows-1251
      windows-1252
      CP1251
      CP1252
    ].each do |encoding|
      define_method "test_encoding_#{encoding}" do
        result = YARP.parse("# encoding: #{encoding}\nident")
        actual = result.value.statements.body.first.name.encoding
        assert_equal Encoding.find(encoding), actual
      end
    end

    def test_coding
      result = YARP.parse("# coding: utf-8\nident")
      actual = result.value.statements.body.first.name.encoding
      assert_equal Encoding.find("utf-8"), actual
    end

    def test_coding_with_whitespace
      result = YARP.parse("# coding \t \r  \v   :     \t \v    \r   ascii-8bit \nident")
      actual = result.value.statements.body.first.name.encoding
      assert_equal Encoding.find("ascii-8bit"), actual
    end


    def test_emacs_style
      result = YARP.parse("# -*- coding: utf-8 -*-\nident")
      actual = result.value.statements.body.first.name.encoding
      assert_equal Encoding.find("utf-8"), actual
    end

    # This test may be a little confusing. Basically when we use our strpbrk, it
    # takes into account the encoding of the file.
    def test_strpbrk_multibyte
      result = YARP.parse(<<~RUBY)
        # encoding: Shift_JIS
        %w[\x81\x5c]
      RUBY

      assert(result.errors.empty?)
      assert_equal(
        (+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
        result.value.statements.body.first.elements.first.unescaped
      )
    end

    def test_utf_8_variations
      %w[
        utf-8-unix
        utf-8-dos
        utf-8-mac
        utf-8-*
      ].each do |encoding|
        result = YARP.parse("# coding: #{encoding}\nident")
        actual = result.value.statements.body.first.name.encoding
        assert_equal Encoding.find("utf-8"), actual
      end
    end

    def test_first_lexed_token
      encoding = YARP.lex("# encoding: ascii-8bit").value[0][0].value.encoding
      assert_equal Encoding.find("ascii-8bit"), encoding
    end

    def test_slice_encoding
      slice = YARP.parse("# encoding: Shift_JIS\nア").value.slice
      assert_equal (+"ア").force_encoding(Encoding::SHIFT_JIS), slice
      assert_equal Encoding::SHIFT_JIS, slice.encoding
    end
  end
end