diff options
author | Mike Dalton <michaelcdalton@gmail.com> | 2023-11-18 01:07:04 -0600 |
---|---|---|
committer | git <svn-admin@ruby-lang.org> | 2023-11-18 18:47:15 +0000 |
commit | bbf14bbba62621b51fafed26b6fe98fb8ae016a3 (patch) | |
tree | 88ec293bb8e2f6ab20fdbc792fec2560a13f4608 /prism | |
parent | fdcb97833cae7580fc85ea5786c72bb575598043 (diff) | |
download | ruby-bbf14bbba62621b51fafed26b6fe98fb8ae016a3.tar.gz |
[ruby/prism] Add IBM864 encoding
Fixes https://github.com/ruby/prism/pull/1868
Related #1843
https://github.com/ruby/prism/commit/abc136dfc9
Diffstat (limited to 'prism')
-rw-r--r-- | prism/enc/pm_encoding.h | 1 | ||||
-rw-r--r-- | prism/enc/pm_tables.c | 35 | ||||
-rw-r--r-- | prism/prism.c | 2 |
3 files changed, 38 insertions, 0 deletions
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h index 50d4d38dc4..957fa794f6 100644 --- a/prism/enc/pm_encoding.h +++ b/prism/enc/pm_encoding.h @@ -174,6 +174,7 @@ extern pm_encoding_t pm_encoding_ibm857; extern pm_encoding_t pm_encoding_ibm860; extern pm_encoding_t pm_encoding_ibm861; extern pm_encoding_t pm_encoding_ibm862; +extern pm_encoding_t pm_encoding_ibm864; extern pm_encoding_t pm_encoding_iso_8859_1; extern pm_encoding_t pm_encoding_iso_8859_2; extern pm_encoding_t pm_encoding_iso_8859_3; diff --git a/prism/enc/pm_tables.c b/prism/enc/pm_tables.c index b7ce80f7ff..6f9e513664 100644 --- a/prism/enc/pm_tables.c +++ b/prism/enc/pm_tables.c @@ -338,6 +338,30 @@ static uint8_t pm_encoding_ibm862_table[256] = { /** * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM864 character. + */ +static uint8_t pm_encoding_ibm864_table[256] = { +// 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x + 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a * piece of information about the corresponding ISO-8859-1 character. */ static uint8_t pm_encoding_iso_8859_1_table[256] = { @@ -1230,6 +1254,7 @@ PRISM_ENCODING_TABLE(ibm857) PRISM_ENCODING_TABLE(ibm860) PRISM_ENCODING_TABLE(ibm861) PRISM_ENCODING_TABLE(ibm862) +PRISM_ENCODING_TABLE(ibm864) PRISM_ENCODING_TABLE(iso_8859_1) PRISM_ENCODING_TABLE(iso_8859_2) PRISM_ENCODING_TABLE(iso_8859_3) @@ -1417,6 +1442,16 @@ pm_encoding_t pm_encoding_ibm862 = { .multibyte = false }; +/** IBM864 */ +pm_encoding_t pm_encoding_ibm864 = { + .name = "IBM864", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_ibm864_alnum_char, + .alpha_char = pm_encoding_ibm864_alpha_char, + .isupper_char = pm_encoding_ibm864_isupper_char, + .multibyte = false +}; + /** ISO-8859-1 */ pm_encoding_t pm_encoding_iso_8859_1 = { .name = "ISO-8859-1", diff --git a/prism/prism.c b/prism/prism.c index 2cd00b00a2..a219ffa52a 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -6075,6 +6075,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star ENCODING1("CP860", pm_encoding_ibm860); ENCODING1("CP861", pm_encoding_ibm861); ENCODING1("CP862", pm_encoding_ibm862); + ENCODING1("CP864", pm_encoding_ibm864); ENCODING1("CP874", pm_encoding_windows_874); ENCODING1("CP878", pm_encoding_koi8_r); ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j); @@ -6113,6 +6114,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star ENCODING1("IBM860", pm_encoding_ibm860); ENCODING1("IBM861", pm_encoding_ibm861); ENCODING1("IBM862", pm_encoding_ibm862); + ENCODING1("IBM864", pm_encoding_ibm864); ENCODING2("ISO-8859-1", "ISO8859-1", pm_encoding_iso_8859_1); ENCODING2("ISO-8859-2", "ISO8859-2", pm_encoding_iso_8859_2); ENCODING2("ISO-8859-3", "ISO8859-3", pm_encoding_iso_8859_3); |