aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2015-12-20 11:54:54 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2015-12-20 11:54:54 +0000
commitce7f7f5e3d6c288b140870fa87c4b11e9a744f62 (patch)
tree54ca2134480577b0aa5c5001b1657d5fc4adfaf4
parent28dc41090aaaf6f43a29c1b6fd4f49192540c256 (diff)
downloadruby-ce7f7f5e3d6c288b140870fa87c4b11e9a744f62.tar.gz
cgi/escape: Optimize CGI.escapeHTML
* cgi/escape/escape.c: Optimize CGI.escapeHTML for ASCII-compatible encodings. [Fix GH-1164] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53220 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog5
-rw-r--r--ext/Setup1
-rw-r--r--ext/Setup.atheos1
-rw-r--r--ext/Setup.nacl1
-rw-r--r--ext/Setup.nt1
-rw-r--r--ext/cgi/escape/escape.c97
-rw-r--r--ext/cgi/escape/extconf.rb3
-rw-r--r--lib/cgi/util.rb5
-rw-r--r--test/cgi/test_cgi_util.rb6
9 files changed, 120 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index c363a407b5..52d28d33d8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Sun Dec 20 20:54:51 2015 Takashi Kokubun <takashikkbn@gmail.com>
+
+ * cgi/escape/escape.c: Optimize CGI.escapeHTML for
+ ASCII-compatible encodings. [Fix GH-1164]
+
Sun Dec 20 15:36:46 2015 SHIBATA Hiroshi <hsbt@ruby-lang.org>
* lib/erb.rb: revert r53123. It breaks compatibility like thor and
diff --git a/ext/Setup b/ext/Setup
index f4da5215c9..05998e3363 100644
--- a/ext/Setup
+++ b/ext/Setup
@@ -1,6 +1,7 @@
#option nodynamic
#bigdecimal
+#cgi/escape
#continuation
#coverage
#date
diff --git a/ext/Setup.atheos b/ext/Setup.atheos
index 41eecd161f..5e39de8e15 100644
--- a/ext/Setup.atheos
+++ b/ext/Setup.atheos
@@ -2,6 +2,7 @@ option nodynamic
#Win32API
bigdecimal
+cgi/escape
dbm
digest
digest/md5
diff --git a/ext/Setup.nacl b/ext/Setup.nacl
index ce65ebcf0a..f205e367c6 100644
--- a/ext/Setup.nacl
+++ b/ext/Setup.nacl
@@ -2,6 +2,7 @@
#
# #Win32API
# bigdecimal
+# cgi/escape
# continuation
# coverage
# date
diff --git a/ext/Setup.nt b/ext/Setup.nt
index 285b1edcb6..4812893eef 100644
--- a/ext/Setup.nt
+++ b/ext/Setup.nt
@@ -3,6 +3,7 @@
Win32API
bigdecimal
+cgi/escape
#dbm
digest
digest/md5
diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c
new file mode 100644
index 0000000000..6fec95af04
--- /dev/null
+++ b/ext/cgi/escape/escape.c
@@ -0,0 +1,97 @@
+#include "ruby.h"
+#include "ruby/encoding.h"
+
+static VALUE rb_cCGI, rb_mUtil, rb_mEscape;
+
+static void
+html_escaped_cat(VALUE str, char c)
+{
+ switch (c) {
+ case '\'':
+ rb_str_cat_cstr(str, "&#39;");
+ break;
+ case '&':
+ rb_str_cat_cstr(str, "&amp;");
+ break;
+ case '"':
+ rb_str_cat_cstr(str, "&quot;");
+ break;
+ case '<':
+ rb_str_cat_cstr(str, "&lt;");
+ break;
+ case '>':
+ rb_str_cat_cstr(str, "&gt;");
+ break;
+ }
+}
+
+static VALUE
+optimized_escape_html(VALUE str)
+{
+ long i, len, modified = 0, beg = 0;
+ VALUE dest;
+ const char *cstr;
+
+ len = RSTRING_LEN(str);
+ cstr = RSTRING_PTR(str);
+
+ for (i = 0; i < len; i++) {
+ switch (cstr[i]) {
+ case '\'':
+ case '&':
+ case '"':
+ case '<':
+ case '>':
+ if (!modified) {
+ modified = 1;
+ dest = rb_str_buf_new(len);
+ }
+
+ rb_str_cat(dest, cstr + beg, i - beg);
+ beg = i + 1;
+
+ html_escaped_cat(dest, cstr[i]);
+ break;
+ }
+ }
+
+ if (modified) {
+ rb_str_cat(dest, cstr + beg, len - beg);
+ rb_enc_associate(dest, rb_enc_get(str));
+ return dest;
+ }
+ else {
+ return str;
+ }
+}
+
+/*
+ * call-seq:
+ * CGI.escapeHTML(string) -> string
+ *
+ * Returns HTML-escaped string.
+ *
+ */
+static VALUE
+cgiesc_escape_html(VALUE self, VALUE str)
+{
+ StringValue(str);
+
+ if (rb_enc_str_asciicompat_p(str)) {
+ return optimized_escape_html(str);
+ }
+ else {
+ return rb_call_super(1, &str);
+ }
+}
+
+void
+Init_escape(void)
+{
+ rb_cCGI = rb_define_class("CGI", rb_cObject);
+ rb_mEscape = rb_define_module_under(rb_cCGI, "Escape");
+ rb_mUtil = rb_define_module_under(rb_cCGI, "Util");
+ rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1);
+ rb_prepend_module(rb_mUtil, rb_mEscape);
+ rb_extend_object(rb_cCGI, rb_mEscape);
+}
diff --git a/ext/cgi/escape/extconf.rb b/ext/cgi/escape/extconf.rb
new file mode 100644
index 0000000000..16e8ff224d
--- /dev/null
+++ b/ext/cgi/escape/extconf.rb
@@ -0,0 +1,3 @@
+require 'mkmf'
+
+create_makefile 'cgi/escape'
diff --git a/lib/cgi/util.rb b/lib/cgi/util.rb
index c232707239..83c310b3cb 100644
--- a/lib/cgi/util.rb
+++ b/lib/cgi/util.rb
@@ -38,6 +38,11 @@ module CGI::Util
string.gsub(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
end
+ begin
+ require 'cgi/escape'
+ rescue LoadError
+ end
+
# Unescape a string that has been HTML-escaped
# CGI::unescapeHTML("Usage: foo &quot;bar&quot; &lt;baz&gt;")
# # => "Usage: foo \"bar\" <baz>"
diff --git a/test/cgi/test_cgi_util.rb b/test/cgi/test_cgi_util.rb
index d9a9f554a2..d30c9bd79c 100644
--- a/test/cgi/test_cgi_util.rb
+++ b/test/cgi/test_cgi_util.rb
@@ -62,6 +62,12 @@ class CGIUtilTest < Test::Unit::TestCase
assert_equal("&#39;&amp;&quot;&gt;&lt;", CGI::escapeHTML("'&\"><"))
end
+ def test_cgi_escape_html_preserve_encoding
+ assert_equal(Encoding::US_ASCII, CGI::escapeHTML("'&\"><".force_encoding("US-ASCII")).encoding)
+ assert_equal(Encoding::ASCII_8BIT, CGI::escapeHTML("'&\"><".force_encoding("ASCII-8BIT")).encoding)
+ assert_equal(Encoding::UTF_8, CGI::escapeHTML("'&\"><".force_encoding("UTF-8")).encoding)
+ end
+
def test_cgi_unescapeHTML
assert_equal("'&\"><", CGI::unescapeHTML("&#39;&amp;&quot;&gt;&lt;"))
end