aboutsummaryrefslogtreecommitdiffstats
path: root/enc/trans/utf8_mac.trans
diff options
context:
space:
mode:
Diffstat (limited to 'enc/trans/utf8_mac.trans')
-rw-r--r--enc/trans/utf8_mac.trans257
1 files changed, 257 insertions, 0 deletions
diff --git a/enc/trans/utf8_mac.trans b/enc/trans/utf8_mac.trans
new file mode 100644
index 0000000000..76c0a1595f
--- /dev/null
+++ b/enc/trans/utf8_mac.trans
@@ -0,0 +1,257 @@
+#include "transcode_data.h"
+
+<%
+ require 'utf8_mac-tbl'
+
+ def hexstr(str)
+ str.unpack("H*")[0]
+ end
+
+ transcode_tblgen("UTF-8", "UTF8-MAC", [
+ *MAC_DECOMPOSE_TBL.map{|k,v|[hexstr(k), hexstr(v)]},
+ ["{00-7F}", :nomap],
+ ["{c2-df}{80-bf}", :asis],
+ ["e0{a0-bf}{80-bf}", :asis],
+ ["{e1-ec}{80-bf}{80-bf}", :asis],
+ ["ed{80-9f}{80-bf}", :asis],
+ ["{ee-ef}{80-bf}{80-bf}", :asis],
+ ["f0{90-bf}{80-bf}{80-bf}", :asis],
+ ["{f1-f3}{80-bf}{80-bf}{80-bf}", :asis],
+ ])
+
+ map = {}
+ map["{00-7f}"] = :func_so
+ map["{c2-df}{80-bf}"] = :func_so
+ map["e0{a0-bf}{80-bf}"] = :func_so
+ map["{e1-ec}{80-bf}{80-bf}"] = :func_so
+ map["ed{80-9f}{80-bf}"] = :func_so
+ map["{ee-ef}{80-bf}{80-bf}"] = :func_so
+ map["f0{90-bf}{80-bf}{80-bf}"] = :func_so
+ map["{f1-f3}{80-bf}{80-bf}{80-bf}"] = :func_so
+ map["f4{80-8f}{80-bf}{80-bf}"] = :func_so
+ transcode_generate_node(ActionMap.parse(map), "from_UTF8_MAC")
+
+ map = Hash[*MAC_DECOMPOSE_TBL.select{|k,v|v.length == 3}.
+ map{|k,v|[hexstr(v), hexstr(k)]}.flatten]
+
+ transcode_generate_node(ActionMap.parse(map), "from_utf8_mac_nfc3")
+ map = Hash[*MAC_DECOMPOSE_TBL.select{|k,v|v.length == 2}.
+ map{|k,v|[hexstr(v), hexstr(k)]}.flatten]
+ transcode_generate_node(ActionMap.parse(map), "from_utf8_mac_nfc2")
+%>
+
+<%= transcode_generated_code %>
+
+#define BYTE_ADDR(index) (<%= OUTPUT_PREFIX %>byte_array + (index))
+#define WORD_ADDR(index) (<%= OUTPUT_PREFIX %>word_array + INFO2WORDINDEX(index))
+#define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_info)))
+#define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_info)))
+#define BL_MIN_BYTE (BL_BASE[0])
+#define BL_MAX_BYTE (BL_BASE[1])
+#define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE])
+#define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))])
+
+#define STATUS_BUF_SIZE 16
+struct from_utf8_mac_status {
+ unsigned char buf[STATUS_BUF_SIZE];
+ int beg;
+ int end;
+ int len;
+};
+#define buf_length(sp) (sp->len)
+
+int
+buf_bytesize(struct from_utf8_mac_status *sp)
+{
+ int size = sp->end - sp->beg + STATUS_BUF_SIZE;
+ size %= STATUS_BUF_SIZE;
+ return size;
+}
+
+void
+buf_push(struct from_utf8_mac_status *sp, const unsigned char *p, ssize_t l)
+{
+ const unsigned char *pend = p + l;
+ while (p < pend) {
+ sp->buf[sp->end++] = *p++;
+ sp->end %= STATUS_BUF_SIZE;
+ }
+ sp->len++;
+}
+
+unsigned char
+buf_shift(struct from_utf8_mac_status *sp)
+{
+ unsigned char c = sp->buf[sp->beg++];
+ sp->beg %= STATUS_BUF_SIZE;
+ if ((c & 0xC0) != 0x80) sp->len--;
+ return c;
+}
+
+void
+buf_shift_char(struct from_utf8_mac_status *sp)
+{
+ while (sp->beg != sp->end) {
+ buf_shift(sp);
+ if ((sp->buf[sp->beg] & 0xC0) != 0x80) break;
+ }
+}
+
+void
+buf_clear(struct from_utf8_mac_status *sp)
+{
+ sp->beg = sp->end = sp->len = 0;
+}
+
+unsigned char
+buf_at(struct from_utf8_mac_status *sp, int pos)
+{
+ pos += sp->beg;
+ pos %= STATUS_BUF_SIZE;
+ return sp->buf[pos];
+}
+
+int
+buf_output_char(struct from_utf8_mac_status *sp, unsigned char *o)
+{
+ int n = 0;
+ while (sp->beg != sp->end) {
+ o[n++] = buf_shift(sp);
+ if ((sp->buf[sp->beg] & 0xC0) != 0x80) break;
+ }
+ return n;
+}
+
+int
+buf_output_all(struct from_utf8_mac_status *sp, unsigned char *o)
+{
+ int n = 0;
+ while (sp->beg != sp->end) {
+ o[n++] = buf_shift(sp);
+ }
+ return n;
+}
+
+VALUE
+get_info(VALUE next_info, struct from_utf8_mac_status *sp) {
+ int pos = 0;
+ while (pos < buf_bytesize(sp)) {
+ unsigned char next_byte = buf_at(sp, pos++);
+ if (next_byte < BL_MIN_BYTE || BL_MAX_BYTE < next_byte)
+ next_info = INVALID;
+ else {
+ next_info = (VALUE)BL_ACTION(next_byte);
+ }
+ if ((next_info & 3) == 0) continue;
+ break;
+ }
+ return next_info;
+}
+
+int
+buf_apply(int mode, struct from_utf8_mac_status *sp, unsigned char *o)
+{
+ int n = 0;
+ VALUE next_info = mode == 3 ? from_utf8_mac_nfc3 : from_utf8_mac_nfc2;
+ next_info = get_info(next_info, sp);
+ switch (next_info & 0x1F) {
+ case THREEbt:
+ o[n++] = getBT1(next_info);
+ case TWObt:
+ o[n++] = getBT2(next_info);
+ o[n++] = getBT3(next_info);
+ if (mode == 3) {
+ buf_clear(sp);
+ }
+ else {
+ buf_shift_char(sp);
+ buf_shift_char(sp);
+ }
+ break;
+ default:
+ return 0;
+ }
+ return n;
+}
+
+static int
+from_utf8_mac_init(void *statep)
+{
+ struct from_utf8_mac_status *sp = statep;
+ buf_clear(sp);
+ return 0;
+}
+
+static ssize_t
+from_utf8_mac_finish(void *statep,
+ unsigned char *o, size_t osize)
+{
+ struct from_utf8_mac_status *sp = statep;
+ int n;
+ if (buf_length(sp) == 0) return 0;
+ n = buf_apply(2, sp, o) + buf_output_all(sp, o);
+ return n;
+}
+
+static ssize_t
+fun_so_from_utf8_mac(void *statep,
+ const unsigned char *s, size_t l,
+ unsigned char *o, size_t osize)
+{
+ struct from_utf8_mac_status *sp = statep;
+ int n = 0;
+
+ switch (l) {
+ case 1:
+ n = from_utf8_mac_finish(sp, o, osize);
+ break;
+ case 3:
+ if (s[0] == 0xE3 && s[1] == 0x82 && (s[2] == 0x99 || s[2] == 0x9A)) {
+ n = from_utf8_mac_finish(sp, o, osize);
+ o[n++] = *s++;
+ o[n++] = *s++;
+ o[n++] = *s++;
+ return n;
+ }
+ break;
+ case 4:
+ n = from_utf8_mac_finish(sp, o, osize);
+ o[n++] = *s++;
+ o[n++] = *s++;
+ o[n++] = *s++;
+ o[n++] = *s++;
+ return n;
+ }
+
+ buf_push(sp, s, l);
+ if (buf_length(sp) < 3) return n;
+
+ n = buf_apply(3, sp, o);
+ if (n > 0) return n;
+
+ n = buf_apply(2, sp, o);
+ if (n > 0) return n;
+
+ return buf_output_char(sp, o);
+}
+
+static const rb_transcoder
+rb_from_utf8_mac = {
+ "UTF8-MAC", "UTF-8", from_UTF8_MAC,
+ TRANSCODE_TABLE_INFO,
+ 1, /* input_unit_length */
+ 4, /* max_input */
+ 10, /* max_output */
+ asciicompat_encoder, /* asciicompat_type */
+ sizeof(struct from_utf8_mac_status), from_utf8_mac_init, from_utf8_mac_init,
+ NULL, NULL, NULL, fun_so_from_utf8_mac,
+ from_utf8_mac_finish
+};
+
+void
+Init_utf8_mac(void)
+{
+<%= transcode_register_code %>
+ rb_register_transcoder(&rb_from_utf8_mac);
+}
+