aboutsummaryrefslogtreecommitdiffstats
path: root/ext
diff options
context:
space:
mode:
authorJean Boussier <jean.boussier@gmail.com>2023-04-13 19:22:29 +0200
committerHiroshi SHIBATA <hsbt@ruby-lang.org>2023-12-01 16:47:06 +0900
commit0dfeb172968cdaefca2ab828c94d3e5f44d91f8f (patch)
tree2850662a485dd3f42068bf14ce4618d77b4b13d6 /ext
parent698cb8406298ff289ba7abadf34abc3d09a07ef0 (diff)
downloadruby-0dfeb172968cdaefca2ab828c94d3e5f44d91f8f.tar.gz
Rename escape_slash in script_safe and also escape E+2028 and E+2029
It is rather common to directly interpolate JSON string inside <script> tags in HTML as to provide configuration or parameters to a script. However this may lead to XSS vulnerabilities, to prevent that 3 characters need to be escaped: - `/` (forward slash) - `U+2028` (LINE SEPARATOR) - `U+2029` (PARAGRAPH SEPARATOR) The forward slash need to be escaped to prevent closing the script tag early, and the other two are valid JSON but invalid Javascript and can be used to break JS parsing. Given that the intent of escaping forward slash is the same than escaping U+2028 and U+2029, I chos to rename and repurpose the existing `escape_slash` option.
Diffstat (limited to 'ext')
-rw-r--r--ext/json/generator/generator.c62
-rw-r--r--ext/json/generator/generator.h10
-rw-r--r--ext/json/lib/json.rb9
-rw-r--r--ext/json/lib/json/common.rb4
4 files changed, 59 insertions, 26 deletions
diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c
index 8f7c57e042..efb6b4a0a2 100644
--- a/ext/json/generator/generator.c
+++ b/ext/json/generator/generator.c
@@ -16,7 +16,7 @@ static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only,
i_pack, i_unpack, i_create_id, i_extend, i_key_p,
i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth,
- i_buffer_initial_length, i_dup, i_escape_slash;
+ i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash;
/*
* Copyright 2001-2004 Unicode, Inc.
@@ -124,7 +124,7 @@ static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16
/* Converts string to a JSON string in FBuffer buffer, where all but the ASCII
* and control characters are JSON escaped. */
-static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escape_slash)
+static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe)
{
const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
const UTF8 *sourceEnd = source + RSTRING_LEN(string);
@@ -175,7 +175,7 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escap
fbuffer_append(buffer, "\\\"", 2);
break;
case '/':
- if(escape_slash) {
+ if(script_safe) {
fbuffer_append(buffer, "\\/", 2);
break;
}
@@ -228,7 +228,7 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escap
* characters required by the JSON standard are JSON escaped. The remaining
* characters (should be UTF8) are just passed through and appended to the
* result. */
-static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slash)
+static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe)
{
const char *ptr = RSTRING_PTR(string), *p;
unsigned long len = RSTRING_LEN(string), start = 0, end = 0;
@@ -280,7 +280,7 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slas
escape_len = 2;
break;
case '/':
- if(escape_slash) {
+ if(script_safe) {
escape = "\\/";
escape_len = 2;
break;
@@ -294,6 +294,22 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slas
rb_raise(rb_path2class("JSON::GeneratorError"),
"partial character in source, but hit end");
}
+
+ if (script_safe && c == 0xE2) {
+ unsigned char c2 = (unsigned char) *(p+1);
+ unsigned char c3 = (unsigned char) *(p+2);
+ if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) {
+ fbuffer_append(buffer, ptr + start, end - start);
+ start = end = (end + clen);
+ if (c3 == 0xA8) {
+ fbuffer_append(buffer, "\\u2028", 6);
+ } else {
+ fbuffer_append(buffer, "\\u2029", 6);
+ }
+ continue;
+ }
+ }
+
if (!isLegalUTF8((UTF8 *) p, clen)) {
rb_raise(rb_path2class("JSON::GeneratorError"),
"source sequence is illegal/malformed utf-8");
@@ -727,8 +743,12 @@ static VALUE cState_configure(VALUE self, VALUE opts)
state->allow_nan = RTEST(tmp);
tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only));
state->ascii_only = RTEST(tmp);
- tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
- state->escape_slash = RTEST(tmp);
+ tmp = rb_hash_aref(opts, ID2SYM(i_script_safe));
+ state->script_safe = RTEST(tmp);
+ if (!state->script_safe) {
+ tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
+ state->script_safe = RTEST(tmp);
+ }
return self;
}
@@ -763,7 +783,7 @@ static VALUE cState_to_h(VALUE self)
rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse);
rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse);
rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting));
- rb_hash_aset(result, ID2SYM(i_escape_slash), state->escape_slash ? Qtrue : Qfalse);
+ rb_hash_aset(result, ID2SYM(i_script_safe), state->script_safe ? Qtrue : Qfalse);
rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth));
rb_hash_aset(result, ID2SYM(i_buffer_initial_length), LONG2FIX(state->buffer_initial_length));
return result;
@@ -948,9 +968,9 @@ static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
}
#endif
if (state->ascii_only) {
- convert_UTF8_to_JSON_ASCII(buffer, obj, state->escape_slash);
+ convert_UTF8_to_JSON_ASCII(buffer, obj, state->script_safe);
} else {
- convert_UTF8_to_JSON(buffer, obj, state->escape_slash);
+ convert_UTF8_to_JSON(buffer, obj, state->script_safe);
}
fbuffer_append_char(buffer, '"');
}
@@ -1391,27 +1411,27 @@ static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
}
/*
- * call-seq: escape_slash
+ * call-seq: script_safe
*
* If this boolean is true, the forward slashes will be escaped in
* the json output.
*/
-static VALUE cState_escape_slash(VALUE self)
+static VALUE cState_script_safe(VALUE self)
{
GET_STATE(self);
- return state->escape_slash ? Qtrue : Qfalse;
+ return state->script_safe ? Qtrue : Qfalse;
}
/*
- * call-seq: escape_slash=(depth)
+ * call-seq: script_safe=(depth)
*
* This sets whether or not the forward slashes will be escaped in
* the json output.
*/
-static VALUE cState_escape_slash_set(VALUE self, VALUE enable)
+static VALUE cState_script_safe_set(VALUE self, VALUE enable)
{
GET_STATE(self);
- state->escape_slash = RTEST(enable);
+ state->script_safe = RTEST(enable);
return Qnil;
}
@@ -1531,9 +1551,12 @@ void Init_generator(void)
rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
- rb_define_method(cState, "escape_slash", cState_escape_slash, 0);
- rb_define_method(cState, "escape_slash?", cState_escape_slash, 0);
- rb_define_method(cState, "escape_slash=", cState_escape_slash_set, 1);
+ rb_define_method(cState, "script_safe", cState_script_safe, 0);
+ rb_define_method(cState, "script_safe?", cState_script_safe, 0);
+ rb_define_method(cState, "script_safe=", cState_script_safe_set, 1);
+ rb_define_alias(cState, "escape_slash", "script_safe");
+ rb_define_alias(cState, "escape_slash?", "script_safe?");
+ rb_define_alias(cState, "escape_slash=", "script_safe=");
rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0);
rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0);
@@ -1590,6 +1613,7 @@ void Init_generator(void)
i_object_nl = rb_intern("object_nl");
i_array_nl = rb_intern("array_nl");
i_max_nesting = rb_intern("max_nesting");
+ i_script_safe = rb_intern("script_safe");
i_escape_slash = rb_intern("escape_slash");
i_allow_nan = rb_intern("allow_nan");
i_ascii_only = rb_intern("ascii_only");
diff --git a/ext/json/generator/generator.h b/ext/json/generator/generator.h
index 3ebd622554..5e6a228040 100644
--- a/ext/json/generator/generator.h
+++ b/ext/json/generator/generator.h
@@ -49,8 +49,8 @@ static const UTF32 halfMask = 0x3FFUL;
static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length);
static void unicode_escape(char *buf, UTF16 character);
static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character);
-static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escape_slash);
-static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slash);
+static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe);
+static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe);
static char *fstrndup(const char *ptr, unsigned long len);
/* ruby api and some helpers */
@@ -72,7 +72,7 @@ typedef struct JSON_Generator_StateStruct {
long max_nesting;
char allow_nan;
char ascii_only;
- char escape_slash;
+ char script_safe;
long depth;
long buffer_initial_length;
} JSON_Generator_State;
@@ -151,8 +151,8 @@ static VALUE cState_allow_nan_p(VALUE self);
static VALUE cState_ascii_only_p(VALUE self);
static VALUE cState_depth(VALUE self);
static VALUE cState_depth_set(VALUE self, VALUE depth);
-static VALUE cState_escape_slash(VALUE self);
-static VALUE cState_escape_slash_set(VALUE self, VALUE depth);
+static VALUE cState_script_safe(VALUE self);
+static VALUE cState_script_safe_set(VALUE self, VALUE depth);
static FBuffer *cState_prepare_buffer(VALUE self);
#ifndef ZALLOC
#define ZALLOC(type) ((type *)ruby_zalloc(sizeof(type)))
diff --git a/ext/json/lib/json.rb b/ext/json/lib/json.rb
index 1e64bfcb1a..807488ffef 100644
--- a/ext/json/lib/json.rb
+++ b/ext/json/lib/json.rb
@@ -285,6 +285,15 @@ require 'json/common'
# # Raises JSON::NestingError (nesting of 2 is too deep):
# JSON.generate(obj, max_nesting: 2)
#
+# ====== Escaping Options
+#
+# Options +script_safe+ (boolean) specifies wether <tt>'\u2028'</tt>, <tt>'\u2029'</tt>
+# and <tt>'/'</tt> should be escaped as to make the JSON object safe to interpolate in script
+# tags.
+#
+# Options +ascii_only+ (boolean) specifies wether all characters outside the ASCII range
+# should be escaped.
+#
# ====== Output Options
#
# The default formatting options generate the most compact
diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb
index 173af04be0..68ff0efcd0 100644
--- a/ext/json/lib/json/common.rb
+++ b/ext/json/lib/json/common.rb
@@ -572,13 +572,13 @@ module JSON
# Sets or returns the default options for the JSON.dump method.
# Initially:
# opts = JSON.dump_default_options
- # opts # => {:max_nesting=>false, :allow_nan=>true, :escape_slash=>false}
+ # opts # => {:max_nesting=>false, :allow_nan=>true, :script_safe=>false}
attr_accessor :dump_default_options
end
self.dump_default_options = {
:max_nesting => false,
:allow_nan => true,
- :escape_slash => false,
+ :script_safe => false,
}
# :call-seq: