aboutsummaryrefslogtreecommitdiffstats
path: root/prism
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-11-29 11:42:47 -0500
committerKevin Newton <kddnewton@gmail.com>2023-11-30 21:37:56 -0500
commit9ba92327f2aed5b5d95ad1ce51fe695f14a3428e (patch)
tree4cd320f488c8653b0c45d6491a04f9bda17f58a5 /prism
parent219c3c1c09417d99e9de295c15a9deb8d158ff86 (diff)
downloadruby-9ba92327f2aed5b5d95ad1ce51fe695f14a3428e.tar.gz
[PRISM] Consolidate SJIS encodings
Diffstat (limited to 'prism')
-rw-r--r--prism/enc/pm_encoding.h3
-rw-r--r--prism/enc/pm_mac_japanese.c57
-rw-r--r--prism/enc/pm_shift_jis.c52
-rw-r--r--prism/enc/pm_windows_31j.c57
-rw-r--r--prism/prism.c3
5 files changed, 57 insertions, 115 deletions
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h
index 1985f00f26..7dfc8cd982 100644
--- a/prism/enc/pm_encoding.h
+++ b/prism/enc/pm_encoding.h
@@ -213,6 +213,9 @@ extern pm_encoding_t pm_encoding_mac_thai;
extern pm_encoding_t pm_encoding_mac_turkish;
extern pm_encoding_t pm_encoding_mac_ukraine;
extern pm_encoding_t pm_encoding_shift_jis;
+extern pm_encoding_t pm_encoding_sjis_docomo;
+extern pm_encoding_t pm_encoding_sjis_kddi;
+extern pm_encoding_t pm_encoding_sjis_softbank;
extern pm_encoding_t pm_encoding_tis_620;
extern pm_encoding_t pm_encoding_utf_8;
extern pm_encoding_t pm_encoding_utf8_mac;
diff --git a/prism/enc/pm_mac_japanese.c b/prism/enc/pm_mac_japanese.c
deleted file mode 100644
index a5185f0e55..0000000000
--- a/prism/enc/pm_mac_japanese.c
+++ /dev/null
@@ -1,57 +0,0 @@
-#include "prism/enc/pm_encoding.h"
-
-static size_t
-pm_encoding_mac_japanese_char_width(const uint8_t *b, ptrdiff_t n) {
- // These are the single byte characters.
- if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
- return 1;
- }
-
- // These are the double byte characters.
- if (
- (n > 1) &&
- ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
- (b[1] >= 0x40 && b[1] <= 0xFC)
- ) {
- return 2;
- }
-
- return 0;
-}
-
-static size_t
-pm_encoding_mac_japanese_alpha_char(const uint8_t *b, ptrdiff_t n) {
- if (pm_encoding_mac_japanese_char_width(b, n) == 1) {
- return pm_encoding_ascii_alpha_char(b, n);
- } else {
- return 0;
- }
-}
-
-static size_t
-pm_encoding_mac_japanese_alnum_char(const uint8_t *b, ptrdiff_t n) {
- if (pm_encoding_mac_japanese_char_width(b, n) == 1) {
- return pm_encoding_ascii_alnum_char(b, n);
- } else {
- return 0;
- }
-}
-
-static bool
-pm_encoding_mac_japanese_isupper_char(const uint8_t *b, ptrdiff_t n) {
- if (pm_encoding_mac_japanese_char_width(b, n) == 1) {
- return pm_encoding_ascii_isupper_char(b, n);
- } else {
- return 0;
- }
-}
-
-/** MacJapanese encoding */
-pm_encoding_t pm_encoding_mac_japanese = {
- .name = "MacJapanese",
- .char_width = pm_encoding_mac_japanese_char_width,
- .alnum_char = pm_encoding_mac_japanese_alnum_char,
- .alpha_char = pm_encoding_mac_japanese_alpha_char,
- .isupper_char = pm_encoding_mac_japanese_isupper_char,
- .multibyte = true
-};
diff --git a/prism/enc/pm_shift_jis.c b/prism/enc/pm_shift_jis.c
index f92956e08b..7833c6653b 100644
--- a/prism/enc/pm_shift_jis.c
+++ b/prism/enc/pm_shift_jis.c
@@ -48,7 +48,57 @@ pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
/** Shift_JIS encoding */
pm_encoding_t pm_encoding_shift_jis = {
- .name = "shift_jis",
+ .name = "Shift_JIS",
+ .char_width = pm_encoding_shift_jis_char_width,
+ .alnum_char = pm_encoding_shift_jis_alnum_char,
+ .alpha_char = pm_encoding_shift_jis_alpha_char,
+ .isupper_char = pm_encoding_shift_jis_isupper_char,
+ .multibyte = true
+};
+
+/** SJIS-DoCoMo encoding */
+pm_encoding_t pm_encoding_sjis_docomo = {
+ .name = "SJIS-DoCoMo",
+ .char_width = pm_encoding_shift_jis_char_width,
+ .alnum_char = pm_encoding_shift_jis_alnum_char,
+ .alpha_char = pm_encoding_shift_jis_alpha_char,
+ .isupper_char = pm_encoding_shift_jis_isupper_char,
+ .multibyte = true
+};
+
+/** SJIS-KDDI encoding */
+pm_encoding_t pm_encoding_sjis_kddi = {
+ .name = "SJIS-KDDI",
+ .char_width = pm_encoding_shift_jis_char_width,
+ .alnum_char = pm_encoding_shift_jis_alnum_char,
+ .alpha_char = pm_encoding_shift_jis_alpha_char,
+ .isupper_char = pm_encoding_shift_jis_isupper_char,
+ .multibyte = true
+};
+
+/** SJIS-SoftBank encoding */
+pm_encoding_t pm_encoding_sjis_softbank = {
+ .name = "SJIS-SoftBank",
+ .char_width = pm_encoding_shift_jis_char_width,
+ .alnum_char = pm_encoding_shift_jis_alnum_char,
+ .alpha_char = pm_encoding_shift_jis_alpha_char,
+ .isupper_char = pm_encoding_shift_jis_isupper_char,
+ .multibyte = true
+};
+
+/** MacJapanese encoding */
+pm_encoding_t pm_encoding_mac_japanese = {
+ .name = "MacJapanese",
+ .char_width = pm_encoding_shift_jis_char_width,
+ .alnum_char = pm_encoding_shift_jis_alnum_char,
+ .alpha_char = pm_encoding_shift_jis_alpha_char,
+ .isupper_char = pm_encoding_shift_jis_isupper_char,
+ .multibyte = true
+};
+
+/** Windows-31J */
+pm_encoding_t pm_encoding_windows_31j = {
+ .name = "Windows-31J",
.char_width = pm_encoding_shift_jis_char_width,
.alnum_char = pm_encoding_shift_jis_alnum_char,
.alpha_char = pm_encoding_shift_jis_alpha_char,
diff --git a/prism/enc/pm_windows_31j.c b/prism/enc/pm_windows_31j.c
deleted file mode 100644
index 848a9efd36..0000000000
--- a/prism/enc/pm_windows_31j.c
+++ /dev/null
@@ -1,57 +0,0 @@
-#include "prism/enc/pm_encoding.h"
-
-static size_t
-pm_encoding_windows_31j_char_width(const uint8_t *b, ptrdiff_t n) {
- // These are the single byte characters.
- if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
- return 1;
- }
-
- // These are the double byte characters.
- if (
- (n > 1) &&
- ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
- (b[1] >= 0x40 && b[1] <= 0xFC)
- ) {
- return 2;
- }
-
- return 0;
-}
-
-static size_t
-pm_encoding_windows_31j_alpha_char(const uint8_t *b, ptrdiff_t n) {
- if (pm_encoding_windows_31j_char_width(b, n) == 1) {
- return pm_encoding_ascii_alpha_char(b, n);
- } else {
- return 0;
- }
-}
-
-static size_t
-pm_encoding_windows_31j_alnum_char(const uint8_t *b, ptrdiff_t n) {
- if (pm_encoding_windows_31j_char_width(b, n) == 1) {
- return pm_encoding_ascii_alnum_char(b, n);
- } else {
- return 0;
- }
-}
-
-static bool
-pm_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
- if (pm_encoding_windows_31j_char_width(b, n) == 1) {
- return pm_encoding_ascii_isupper_char(b, n);
- } else {
- return false;
- }
-}
-
-/** Windows-31J */
-pm_encoding_t pm_encoding_windows_31j = {
- .name = "windows-31j",
- .char_width = pm_encoding_windows_31j_char_width,
- .alnum_char = pm_encoding_windows_31j_alnum_char,
- .alpha_char = pm_encoding_windows_31j_alpha_char,
- .isupper_char = pm_encoding_windows_31j_isupper_char,
- .multibyte = true
-};
diff --git a/prism/prism.c b/prism/prism.c
index a0c0e728b6..0cabae6232 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -6317,6 +6317,9 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
case 'S': case 's':
ENCODING1("Shift_JIS", pm_encoding_shift_jis);
ENCODING1("SJIS", pm_encoding_windows_31j);
+ ENCODING1("SJIS-DoCoMo", pm_encoding_sjis_docomo);
+ ENCODING1("SJIS-KDDI", pm_encoding_sjis_kddi);
+ ENCODING1("SJIS-SoftBank", pm_encoding_sjis_softbank);
break;
case 'T': case 't':
ENCODING1("TIS-620", pm_encoding_tis_620);