aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJean Boussier <byroot@ruby-lang.org>2024-02-19 13:35:48 +0100
committerJean Boussier <jean.boussier@gmail.com>2024-04-18 10:17:26 +0200
commit3a7846b1aa4c10d86dc5a91c6df94f89d60bb0c3 (patch)
treefe189c035b6b7ede818b3d644c897ce0001d4916
parentb3c59370ca20fcde2fde5a63361550c04ca44be3 (diff)
downloadruby-3a7846b1aa4c10d86dc5a91c6df94f89d60bb0c3.tar.gz
Add a hint of `ASCII-8BIT` being `BINARY`
[Feature #18576] Since outright renaming `ASCII-8BIT` is deemed to backward incompatible, the next best thing would be to only change its `#inspect`, particularly in exception messages.
-rw-r--r--encoding.c16
-rw-r--r--internal/encoding.h1
-rw-r--r--re.c4
-rw-r--r--spec/ruby/core/encoding/inspect_spec.rb20
-rw-r--r--string.c8
-rw-r--r--test/ruby/test_m17n.rb4
-rw-r--r--test/ruby/test_yjit.rb2
7 files changed, 40 insertions, 15 deletions
diff --git a/encoding.c b/encoding.c
index df17f63bb1..480cc8bdc6 100644
--- a/encoding.c
+++ b/encoding.c
@@ -1015,13 +1015,22 @@ rb_enc_get(VALUE obj)
return rb_enc_from_index(rb_enc_get_index(obj));
}
+const char *
+rb_enc_inspect_name(rb_encoding *enc)
+{
+ if (enc == global_enc_ascii) {
+ return "BINARY (ASCII-8BIT)";
+ }
+ return enc->name;
+}
+
static rb_encoding*
rb_encoding_check(rb_encoding* enc, VALUE str1, VALUE str2)
{
if (!enc)
rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
- rb_enc_name(rb_enc_get(str1)),
- rb_enc_name(rb_enc_get(str2)));
+ rb_enc_inspect_name(rb_enc_get(str1)),
+ rb_enc_inspect_name(rb_enc_get(str2)));
return enc;
}
@@ -1263,9 +1272,10 @@ enc_inspect(VALUE self)
if (!(enc = DATA_PTR(self)) || rb_enc_from_index(rb_enc_to_index(enc)) != enc) {
rb_raise(rb_eTypeError, "broken Encoding");
}
+
return rb_enc_sprintf(rb_usascii_encoding(),
"#<%"PRIsVALUE":%s%s%s>", rb_obj_class(self),
- rb_enc_name(enc),
+ rb_enc_inspect_name(enc),
(ENC_DUMMY_P(enc) ? " (dummy)" : ""),
rb_enc_autoload_p(enc) ? " (autoload)" : "");
}
diff --git a/internal/encoding.h b/internal/encoding.h
index 11ffa6d83d..fe9ea10ec4 100644
--- a/internal/encoding.h
+++ b/internal/encoding.h
@@ -18,6 +18,7 @@
/* encoding.c */
ID rb_id_encoding(void);
+const char * rb_enc_inspect_name(rb_encoding *enc);
rb_encoding *rb_enc_get_from_index(int index);
rb_encoding *rb_enc_check_str(VALUE str1, VALUE str2);
int rb_encdb_replicate(const char *alias, const char *orig);
diff --git a/re.c b/re.c
index 9c47449271..c8940ff887 100644
--- a/re.c
+++ b/re.c
@@ -1530,8 +1530,8 @@ reg_enc_error(VALUE re, VALUE str)
{
rb_raise(rb_eEncCompatError,
"incompatible encoding regexp match (%s regexp with %s string)",
- rb_enc_name(rb_enc_get(re)),
- rb_enc_name(rb_enc_get(str)));
+ rb_enc_inspect_name(rb_enc_get(re)),
+ rb_enc_inspect_name(rb_enc_get(str)));
}
static inline int
diff --git a/spec/ruby/core/encoding/inspect_spec.rb b/spec/ruby/core/encoding/inspect_spec.rb
index 9a930b2a77..df96141db9 100644
--- a/spec/ruby/core/encoding/inspect_spec.rb
+++ b/spec/ruby/core/encoding/inspect_spec.rb
@@ -5,9 +5,23 @@ describe "Encoding#inspect" do
Encoding::UTF_8.inspect.should be_an_instance_of(String)
end
- it "returns #<Encoding:name> for a non-dummy encoding named 'name'" do
- Encoding.list.to_a.reject {|e| e.dummy? }.each do |enc|
- enc.inspect.should =~ /#<Encoding:#{enc.name}>/
+ ruby_version_is ""..."3.4" do
+ it "returns #<Encoding:name> for a non-dummy encoding named 'name'" do
+ Encoding.list.to_a.reject {|e| e.dummy? }.each do |enc|
+ enc.inspect.should =~ /#<Encoding:#{enc.name}>/
+ end
+ end
+ end
+
+ ruby_version_is "3.4" do
+ it "returns #<Encoding:name> for a non-dummy encoding named 'name'" do
+ Encoding.list.to_a.reject {|e| e.dummy? }.each do |enc|
+ if enc.name == "ASCII-8BIT"
+ enc.inspect.should == "#<Encoding:BINARY (ASCII-8BIT)>"
+ else
+ enc.inspect.should =~ /#<Encoding:#{enc.name}>/
+ end
+ end
end
end
diff --git a/string.c b/string.c
index 83bb91f17b..9f7c163a81 100644
--- a/string.c
+++ b/string.c
@@ -3374,7 +3374,7 @@ rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len,
incompatible:
rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
- rb_enc_name(str_enc), rb_enc_name(ptr_enc));
+ rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
UNREACHABLE_RETURN(Qundef);
}
@@ -5854,8 +5854,8 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
- rb_enc_name(str_enc),
- rb_enc_name(STR_ENC_GET(repl)));
+ rb_enc_inspect_name(str_enc),
+ rb_enc_inspect_name(STR_ENC_GET(repl)));
}
enc = STR_ENC_GET(repl);
}
@@ -11120,7 +11120,7 @@ str_compat_and_valid(VALUE str, rb_encoding *enc)
rb_encoding *e = STR_ENC_GET(str);
if (cr == ENC_CODERANGE_7BIT ? rb_enc_mbminlen(enc) != 1 : enc != e) {
rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
- rb_enc_name(enc), rb_enc_name(e));
+ rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
}
}
return str;
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb
index 907360b3a6..a6493374b5 100644
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@@ -1091,7 +1091,7 @@ class TestM17N < Test::Unit::TestCase
assert_nil(e("\xa1\xa2\xa3\xa4").rindex(e("\xa3")))
s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
- a_with_e = /EUC-JP and ASCII-8BIT/
+ a_with_e = /EUC-JP and BINARY \(ASCII-8BIT\)/
assert_raise_with_message(Encoding::CompatibilityError, a_with_e) do
s.index(a("\xb1\xa3"))
end
@@ -1099,7 +1099,7 @@ class TestM17N < Test::Unit::TestCase
s.rindex(a("\xb1\xa3"))
end
- a_with_e = /ASCII-8BIT regexp with EUC-JP string/
+ a_with_e = /BINARY \(ASCII-8BIT\) regexp with EUC-JP string/
assert_raise_with_message(Encoding::CompatibilityError, a_with_e) do
s.index(Regexp.new(a("\xb1\xa3")))
end
diff --git a/test/ruby/test_yjit.rb b/test/ruby/test_yjit.rb
index df71dbffc0..796787e355 100644
--- a/test/ruby/test_yjit.rb
+++ b/test/ruby/test_yjit.rb
@@ -1458,7 +1458,7 @@ class TestYJIT < Test::Unit::TestCase
end
def test_str_concat_encoding_mismatch
- assert_compiles(<<~'RUBY', result: "incompatible character encodings: ASCII-8BIT and EUC-JP")
+ assert_compiles(<<~'RUBY', result: "incompatible character encodings: BINARY (ASCII-8BIT) and EUC-JP")
def bar(a, b)
a << b
rescue => e