aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--include/ruby/encoding.h2
-rw-r--r--test/ruby/test_econv.rb14
-rw-r--r--transcode.c12
4 files changed, 38 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index ee39dacf06..545ee32d77 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Sat Sep 6 14:46:12 2008 Tanaka Akira <akr@fsij.org>
+
+ * include/ruby/encoding.h (ECONV_HTML_TEXT_ENCODER): new constant.
+ (ECONV_HTML_ATTR_ENCODER): ditto.
+
+ * transcode.c (rb_econv_open): check ECONV_HTML_TEXT_ENCODER and
+ ECONV_HTML_ATTR_ENCODER.
+ (Init_transcode): Encoding::Converter::HTML_TEXT_ENCODER and
+ Encoding::Converter::HTML_ATTR_ENCODER defined.
+
Sat Sep 6 14:15:25 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (struct trans_open_t): defined to pass num_additional.
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h
index 1994bcdc5f..3959ae478e 100644
--- a/include/ruby/encoding.h
+++ b/include/ruby/encoding.h
@@ -263,6 +263,8 @@ void rb_econv_binmode(rb_econv_t *ec);
/* usable only if source encoding is ascii compatible */
#define ECONV_CRLF_NEWLINE_ENCODER 0x0200
#define ECONV_CR_NEWLINE_ENCODER 0x0400
+#define ECONV_HTML_TEXT_ENCODER 0x0800
+#define ECONV_HTML_ATTR_ENCODER 0x1000
/* end of flags for rb_econv_open */
diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb
index 4e4608c1af..6966ff4e82 100644
--- a/test/ruby/test_econv.rb
+++ b/test/ruby/test_econv.rb
@@ -754,4 +754,18 @@ class TestEncodingConverter < Test::Unit::TestCase
assert_equal('"&amp;&lt;&gt;&quot;', ec.convert("&<>\""))
assert_equal('"', ec.finish)
end
+
+ def test_html_escape_with_charref
+ ec = Encoding::Converter.new("utf-8", "euc-jp", Encoding::Converter::HTML_TEXT_ENCODER|Encoding::Converter::UNDEF_HEX_CHARREF)
+ assert_equal('&lt;&#x2665;&gt;&amp;"&#x2661;"', ec.convert("<\u2665>&\"\u2661\""))
+ assert_equal('', ec.finish)
+
+ ec = Encoding::Converter.new("utf-8", "euc-jp", Encoding::Converter::HTML_ATTR_ENCODER|Encoding::Converter::UNDEF_HEX_CHARREF)
+ assert_equal('"&lt;&#x2665;&gt;&amp;&quot;&#x2661;&quot;', ec.convert("<\u2665>&\"\u2661\""))
+ assert_equal('"', ec.finish)
+
+ ec = Encoding::Converter.new("utf-8", "iso-2022-jp", Encoding::Converter::HTML_TEXT_ENCODER)
+ assert_equal("&amp;\e$B$&\e(B&amp;".force_encoding("iso-2022-jp"), ec.convert("&\u3046&"))
+ assert_equal('', ec.finish)
+ end
end
diff --git a/transcode.c b/transcode.c
index eea2d5e6e9..20303286cd 100644
--- a/transcode.c
+++ b/transcode.c
@@ -895,6 +895,10 @@ rb_econv_open(const char *sname, const char *dname, int ecflags)
(ecflags & ECONV_UNIVERSAL_NEWLINE_DECODER))
return NULL;
+ if ((ecflags & ECONV_HTML_TEXT_ENCODER) &&
+ (ecflags & ECONV_HTML_ATTR_ENCODER))
+ return NULL;
+
num_encoders = 0;
if (ecflags & ECONV_CRLF_NEWLINE_ENCODER)
if (!(encoders[num_encoders++] = get_transcoder_entry("", "crlf_newline")))
@@ -902,6 +906,12 @@ rb_econv_open(const char *sname, const char *dname, int ecflags)
if (ecflags & ECONV_CR_NEWLINE_ENCODER)
if (!(encoders[num_encoders++] = get_transcoder_entry("", "cr_newline")))
return NULL;
+ if (ecflags & ECONV_HTML_TEXT_ENCODER)
+ if (!(encoders[num_encoders++] = get_transcoder_entry("", "html-text-escaped")))
+ return NULL;
+ if (ecflags & ECONV_HTML_ATTR_ENCODER)
+ if (!(encoders[num_encoders++] = get_transcoder_entry("", "html-attr-escaped")))
+ return NULL;
num_decoders = 0;
if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECODER)
@@ -3510,6 +3520,8 @@ Init_transcode(void)
rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECODER", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECODER));
rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE_ENCODER", INT2FIX(ECONV_CRLF_NEWLINE_ENCODER));
rb_define_const(rb_cEncodingConverter, "CR_NEWLINE_ENCODER", INT2FIX(ECONV_CR_NEWLINE_ENCODER));
+ rb_define_const(rb_cEncodingConverter, "HTML_TEXT_ENCODER", INT2FIX(ECONV_HTML_TEXT_ENCODER));
+ rb_define_const(rb_cEncodingConverter, "HTML_ATTR_ENCODER", INT2FIX(ECONV_HTML_ATTR_ENCODER));
rb_define_method(rb_eConversionUndefined, "source_encoding_name", ecerr_source_encoding_name, 0);
rb_define_method(rb_eConversionUndefined, "destination_encoding_name", ecerr_destination_encoding_name, 0);