aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2013-11-22 08:50:14 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2013-11-22 08:50:14 +0000
commit95a6be1a177976e75e06c1a8f55b438898bee3a2 (patch)
tree1cfdb935b9717ef55fc34e91e36f0074f0208993
parent9cda3a35b2cbe7d52798f5c23e0d1da17945acb2 (diff)
downloadruby-95a6be1a177976e75e06c1a8f55b438898bee3a2.tar.gz
* transcode.c (str_transcode0): don't scrub invalid chars if
str.encode doesn't have explicit invalid: :replace. workaround fix for see #8995 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@43802 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog6
-rw-r--r--test/ruby/test_transcode.rb9
-rw-r--r--transcode.c6
3 files changed, 20 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 1a34971092..b32b1a98f5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Fri Nov 22 17:20:50 2013 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * transcode.c (str_transcode0): don't scrub invalid chars if
+ str.encode doesn't have explicit invalid: :replace.
+ workaround fix for see #8995
+
Fri Nov 22 17:11:26 2013 Narihiro Nakamura <authornari@gmail.com>
* include/ruby/intern.h, internal.h: Expose rb_gc_count().
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index db91010c94..25c9d24663 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -2071,4 +2071,13 @@ class TestTranscode < Test::Unit::TestCase
assert_equal(4, 'aaa'.encode(enc).length, "should count in #{enc} with BOM")
end
end
+
+ def test_encode_with_invalid_chars
+ bug8995 = '[ruby-dev:47747]'
+ EnvUtil.with_default_internal(Encoding::UTF_8) do
+ str = "\xff".force_encoding('utf-8')
+ assert_equal str, str.encode, bug8995
+ assert_equal "\ufffd", str.encode(invalid: :replace), bug8995
+ end
+ end
end
diff --git a/transcode.c b/transcode.c
index 386942689a..0182a205c2 100644
--- a/transcode.c
+++ b/transcode.c
@@ -2672,6 +2672,7 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
rb_encoding *senc, *denc;
const char *sname, *dname;
int dencidx;
+ int explicitly_invalid_replace = TRUE;
rb_check_arity(argc, 0, 2);
@@ -2681,6 +2682,9 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
if (!ecflags) return -1;
arg1 = rb_obj_encoding(str);
}
+ if (!(ecflags & ECONV_INVALID_MASK)) {
+ explicitly_invalid_replace = FALSE;
+ }
ecflags |= ECONV_INVALID_REPLACE | ECONV_UNDEF_REPLACE;
}
else {
@@ -2694,7 +2698,7 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
ECONV_XML_ATTR_CONTENT_DECORATOR|
ECONV_XML_ATTR_QUOTE_DECORATOR)) == 0) {
if (senc && senc == denc) {
- if (ecflags & ECONV_INVALID_MASK) {
+ if ((ecflags & ECONV_INVALID_MASK) && explicitly_invalid_replace) {
VALUE rep = Qnil;
if (!NIL_P(ecopts)) {
rep = rb_hash_aref(ecopts, sym_replace);