diff options
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | include/ruby/encoding.h | 2 | ||||
-rw-r--r-- | test/ruby/test_econv.rb | 14 | ||||
-rw-r--r-- | transcode.c | 12 |
4 files changed, 38 insertions, 0 deletions
@@ -1,3 +1,13 @@ +Sat Sep 6 14:46:12 2008 Tanaka Akira <akr@fsij.org> + + * include/ruby/encoding.h (ECONV_HTML_TEXT_ENCODER): new constant. + (ECONV_HTML_ATTR_ENCODER): ditto. + + * transcode.c (rb_econv_open): check ECONV_HTML_TEXT_ENCODER and + ECONV_HTML_ATTR_ENCODER. + (Init_transcode): Encoding::Converter::HTML_TEXT_ENCODER and + Encoding::Converter::HTML_ATTR_ENCODER defined. + Sat Sep 6 14:15:25 2008 Tanaka Akira <akr@fsij.org> * transcode.c (struct trans_open_t): defined to pass num_additional. diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 1994bcdc5f..3959ae478e 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -263,6 +263,8 @@ void rb_econv_binmode(rb_econv_t *ec); /* usable only if source encoding is ascii compatible */ #define ECONV_CRLF_NEWLINE_ENCODER 0x0200 #define ECONV_CR_NEWLINE_ENCODER 0x0400 +#define ECONV_HTML_TEXT_ENCODER 0x0800 +#define ECONV_HTML_ATTR_ENCODER 0x1000 /* end of flags for rb_econv_open */ diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb index 4e4608c1af..6966ff4e82 100644 --- a/test/ruby/test_econv.rb +++ b/test/ruby/test_econv.rb @@ -754,4 +754,18 @@ class TestEncodingConverter < Test::Unit::TestCase assert_equal('"&<>"', ec.convert("&<>\"")) assert_equal('"', ec.finish) end + + def test_html_escape_with_charref + ec = Encoding::Converter.new("utf-8", "euc-jp", Encoding::Converter::HTML_TEXT_ENCODER|Encoding::Converter::UNDEF_HEX_CHARREF) + assert_equal('<♥>&"♡"', ec.convert("<\u2665>&\"\u2661\"")) + assert_equal('', ec.finish) + + ec = Encoding::Converter.new("utf-8", "euc-jp", Encoding::Converter::HTML_ATTR_ENCODER|Encoding::Converter::UNDEF_HEX_CHARREF) + assert_equal('"<♥>&"♡"', ec.convert("<\u2665>&\"\u2661\"")) + assert_equal('"', ec.finish) + + ec = Encoding::Converter.new("utf-8", "iso-2022-jp", Encoding::Converter::HTML_TEXT_ENCODER) + assert_equal("&\e$B$&\e(B&".force_encoding("iso-2022-jp"), ec.convert("&\u3046&")) + assert_equal('', ec.finish) + end end diff --git a/transcode.c b/transcode.c index eea2d5e6e9..20303286cd 100644 --- a/transcode.c +++ b/transcode.c @@ -895,6 +895,10 @@ rb_econv_open(const char *sname, const char *dname, int ecflags) (ecflags & ECONV_UNIVERSAL_NEWLINE_DECODER)) return NULL; + if ((ecflags & ECONV_HTML_TEXT_ENCODER) && + (ecflags & ECONV_HTML_ATTR_ENCODER)) + return NULL; + num_encoders = 0; if (ecflags & ECONV_CRLF_NEWLINE_ENCODER) if (!(encoders[num_encoders++] = get_transcoder_entry("", "crlf_newline"))) @@ -902,6 +906,12 @@ rb_econv_open(const char *sname, const char *dname, int ecflags) if (ecflags & ECONV_CR_NEWLINE_ENCODER) if (!(encoders[num_encoders++] = get_transcoder_entry("", "cr_newline"))) return NULL; + if (ecflags & ECONV_HTML_TEXT_ENCODER) + if (!(encoders[num_encoders++] = get_transcoder_entry("", "html-text-escaped"))) + return NULL; + if (ecflags & ECONV_HTML_ATTR_ENCODER) + if (!(encoders[num_encoders++] = get_transcoder_entry("", "html-attr-escaped"))) + return NULL; num_decoders = 0; if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECODER) @@ -3510,6 +3520,8 @@ Init_transcode(void) rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECODER", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECODER)); rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE_ENCODER", INT2FIX(ECONV_CRLF_NEWLINE_ENCODER)); rb_define_const(rb_cEncodingConverter, "CR_NEWLINE_ENCODER", INT2FIX(ECONV_CR_NEWLINE_ENCODER)); + rb_define_const(rb_cEncodingConverter, "HTML_TEXT_ENCODER", INT2FIX(ECONV_HTML_TEXT_ENCODER)); + rb_define_const(rb_cEncodingConverter, "HTML_ATTR_ENCODER", INT2FIX(ECONV_HTML_ATTR_ENCODER)); rb_define_method(rb_eConversionUndefined, "source_encoding_name", ecerr_source_encoding_name, 0); rb_define_method(rb_eConversionUndefined, "destination_encoding_name", ecerr_destination_encoding_name, 0); |