From 335fe1ee7bf51534c5f2abd0f359e86721307335 Mon Sep 17 00:00:00 2001 From: matz Date: Fri, 28 Sep 2007 09:07:02 +0000 Subject: * string.c (rb_str_comparable): need not to check asciicompat here. * encoding.c (rb_enc_check): ditto. * string.c (rb_enc_str_coderange): tuned a bit; no broken check. * encoding.c (rb_enc_check): new encoding comparison criteria. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13547 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 10 ++++++++++ encoding.c | 36 +++++++++++++++++++----------------- string.c | 38 ++++++++++++++++++++------------------ 3 files changed, 49 insertions(+), 35 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2bb7667358..07c76e8abb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -65,6 +65,16 @@ Fri Sep 28 12:49:05 2007 Koichi Sasada * benchmark/driver.rb: fix file selection algorithm. +Fri Sep 28 02:05:42 2007 Yukihiro Matsumoto + + * string.c (rb_str_comparable): need not to check asciicompat here. + + * encoding.c (rb_enc_check): ditto. + + * string.c (rb_enc_str_coderange): tuned a bit; no broken check. + + * encoding.c (rb_enc_check): new encoding comparison criteria. + Thu Sep 27 17:36:28 2007 NAKAMURA Usaku * win32/REAMDE.win32: follow recent changes. diff --git a/encoding.c b/encoding.c index bb9e77b5bd..4df874292f 100644 --- a/encoding.c +++ b/encoding.c @@ -196,25 +196,27 @@ rb_enc_check(VALUE str1, VALUE str2) return rb_enc_from_index(idx1); } - if (idx1 == 0) { - enc = rb_enc_from_index(idx2); - if (rb_enc_asciicompat(enc)) { - return enc; - } + if (BUILTIN_TYPE(str1) != T_STRING) { + VALUE tmp = str1; + str1 = str2; + str2 = tmp; } - else if (idx2 == 0) { - enc = rb_enc_from_index(idx1); - if (rb_enc_asciicompat(enc)) { - return enc; + if (BUILTIN_TYPE(str1) == T_STRING) { + int cr1, cr2; + + cr1 = rb_enc_str_coderange(str1); + if (BUILTIN_TYPE(str2) == T_STRING) { + cr2 = rb_enc_str_coderange(str2); + if (cr1 != cr2) { + /* may need to handle ENC_CODERANGE_BROKEN */ + if (cr1 == ENC_CODERANGE_SINGLE) return rb_enc_from_index(idx2); + if (cr2 == ENC_CODERANGE_SINGLE) return rb_enc_from_index(idx1); + } + if (cr1 == ENC_CODERANGE_SINGLE) return ONIG_ENCODING_ASCII; } - } - if (BUILTIN_TYPE(str1) == T_STRING && - BUILTIN_TYPE(str2) == T_STRING && - rb_enc_asciicompat(rb_enc_from_index(idx1)) && - rb_enc_asciicompat(rb_enc_from_index(idx2)) && - rb_enc_str_coderange(str1) == ENC_CODERANGE_SINGLE && - rb_enc_str_coderange(str2) == ENC_CODERANGE_SINGLE) { - return ONIG_ENCODING_ASCII; + if (cr1 == ENC_CODERANGE_SINGLE && + rb_enc_asciicompat(enc = rb_enc_from_index(idx2))) + return enc; } rb_raise(rb_eArgError, "character encodings differ"); } diff --git a/string.c b/string.c index 3f09196093..ab48a66bb6 100644 --- a/string.c +++ b/string.c @@ -99,26 +99,32 @@ VALUE rb_fs; int rb_enc_str_coderange(VALUE str) { - long i; int cr = ENC_CODERANGE(str); if (cr == ENC_CODERANGE_UNKNOWN) { - cr = ENC_CODERANGE_SINGLE; - for (i = 0; i < RSTRING_LEN(str); ++i) { - const char *p = &RSTRING_PTR(str)[i]; - int c = (unsigned char)*p; - - if (!ISASCII(c)) { - c = rb_enc_codepoint(p, RSTRING_END(str), rb_enc_get(str)); - if (c == -1) { - cr = ENC_CODERANGE_BROKEN; - } - else { + rb_encoding *enc = rb_enc_get(str); + + if (!rb_enc_asciicompat(enc)) { + cr = ENC_CODERANGE_MULTI; + ENC_CODERANGE_SET(str, cr); + return cr; + } + else { + const char *p = RSTRING_PTR(str); + const char *e = p + RSTRING_LEN(str); + + cr = ENC_CODERANGE_SINGLE; + while (p < e) { + int c = (unsigned char)*p; + + if (c > 0x80) { cr = ENC_CODERANGE_MULTI; + break; } + p++; } + ENC_CODERANGE_SET(str, cr); } - ENC_CODERANGE_SET(str, cr); } return cr; } @@ -1169,8 +1175,7 @@ rb_str_hash(VALUE str) if (e && is_ascii_string(str)) { e = 0; } - return hash((const void *)RSTRING_PTR(str), RSTRING_LEN(str), - e); + return hash((const void *)RSTRING_PTR(str), RSTRING_LEN(str), e); } /* @@ -1196,8 +1201,6 @@ rb_str_comparable(VALUE str1, VALUE str2) int idx2 = rb_enc_get_index(str2); if (idx1 == idx2) return Qtrue; - if (!rb_enc_asciicompat(rb_enc_from_index(idx1))) return Qfalse; - if (!rb_enc_asciicompat(rb_enc_from_index(idx2))) return Qfalse; if (!is_ascii_string(str1)) return Qfalse; if (!is_ascii_string(str2)) return Qfalse; return Qtrue; @@ -1263,7 +1266,6 @@ rb_str_eql(VALUE str1, VALUE str2) return Qfalse; if (!rb_str_comparable(str1, str2)) return Qfalse; - if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), lesser(RSTRING_LEN(str1), RSTRING_LEN(str2))) == 0) return Qtrue; -- cgit v1.2.3