From da6997ef13787ca3677e2413fd3bbf0553c057f6 Mon Sep 17 00:00:00 2001 From: nobu Date: Fri, 17 Jul 2015 06:39:29 +0000 Subject: encoding.h: ENC_CODERANGE_CLEAN_P * include/ruby/encoding.h (ENC_CODERANGE_CLEAN_P): predicate that tells if the coderange is clean, that is 7bit or valid, and no needs to scrub. * re.c (rb_reg_expr_str): use ENC_CODERANGE_CLEAN_P. * string.c (enc_strlen, rb_enc_cr_str_buf_cat, rb_str_scrub): ditto. * string.c (rb_str_enumerate_chars): ditto, and suppress a warning by gcc6. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51278 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 14 ++++++++++++++ include/ruby/encoding.h | 6 ++++++ re.c | 3 +-- string.c | 14 ++++++-------- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/ChangeLog b/ChangeLog index 99ea4a96ed..2ebcccaadc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +Fri Jul 17 15:39:19 2015 Nobuyoshi Nakada + + * include/ruby/encoding.h (ENC_CODERANGE_CLEAN_P): predicate that + tells if the coderange is clean, that is 7bit or valid, and no + needs to scrub. + + * re.c (rb_reg_expr_str): use ENC_CODERANGE_CLEAN_P. + + * string.c (enc_strlen, rb_enc_cr_str_buf_cat, rb_str_scrub): + ditto. + + * string.c (rb_str_enumerate_chars): ditto, and suppress a warning + by gcc6. + Fri Jul 17 15:36:52 2015 yui-knk * test/ruby/test_range.rb (test_first_last): Add test for diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 7d33e538de..5eaf468059 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -49,6 +49,12 @@ RUBY_SYMBOL_EXPORT_BEGIN #define ENC_CODERANGE_7BIT ((int)FL_USER8) #define ENC_CODERANGE_VALID ((int)FL_USER9) #define ENC_CODERANGE_BROKEN ((int)(FL_USER8|FL_USER9)) +static inline int +rb_enc_coderange_clean_p(int cr) +{ + return (cr ^ (cr >> 1)) & ENC_CODERANGE_7BIT; +} +#define ENC_CODERANGE_CLEAN_P(cr) rb_enc_coderange_clean_p(cr) #define ENC_CODERANGE(obj) ((int)RBASIC(obj)->flags & ENC_CODERANGE_MASK) #define ENC_CODERANGE_ASCIIONLY(obj) (ENC_CODERANGE(obj) == ENC_CODERANGE_7BIT) #define ENC_CODERANGE_SET(obj,cr) (RBASIC(obj)->flags = \ diff --git a/re.c b/re.c index 7c925c6ba7..bceec15f0a 100644 --- a/re.c +++ b/re.c @@ -340,8 +340,7 @@ rb_reg_expr_str(VALUE str, const char *s, long len, p = s; pend = p + len; rb_str_coderange_scan_restartable(p, pend, enc, &cr); - if (rb_enc_asciicompat(enc) && - (cr == ENC_CODERANGE_VALID || cr == ENC_CODERANGE_7BIT)) { + if (rb_enc_asciicompat(enc) && ENC_CODERANGE_CLEAN_P(cr)) { while (p < pend) { c = rb_enc_ascget(p, pend, &clen, enc); if (c == -1) { diff --git a/string.c b/string.c index e33ef1775e..56960574ec 100644 --- a/string.c +++ b/string.c @@ -1300,7 +1300,7 @@ enc_strlen(const char *p, const char *e, rb_encoding *enc, int cr) #endif else if (rb_enc_asciicompat(enc)) { c = 0; - if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) { + if (ENC_CODERANGE_CLEAN_P(cr)) { while (p < e) { if (ISASCII(*p)) { q = search_nonascii(p, e); @@ -2335,7 +2335,7 @@ rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len, } else if (str_cr == ENC_CODERANGE_VALID) { res_encindex = str_encindex; - if (ptr_cr == ENC_CODERANGE_7BIT || ptr_cr == ENC_CODERANGE_VALID) + if (ENC_CODERANGE_CLEAN_P(ptr_cr)) res_cr = str_cr; else res_cr = ptr_cr; @@ -6970,9 +6970,7 @@ rb_str_enumerate_chars(VALUE str, int wantarray) return SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size); } - switch (ENC_CODERANGE(str)) { - case ENC_CODERANGE_VALID: - case ENC_CODERANGE_7BIT: + if (ENC_CODERANGE_CLEAN_P(ENC_CODERANGE(str))) { for (i = 0; i < len; i += n) { n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc); substr = rb_str_subseq(str, i, n); @@ -6981,8 +6979,8 @@ rb_str_enumerate_chars(VALUE str, int wantarray) else rb_yield(substr); } - break; - default: + } + else { for (i = 0; i < len; i += n) { n = rb_enc_mbclen(ptr + i, ptr + len, enc); substr = rb_str_subseq(str, i, n); @@ -8377,7 +8375,7 @@ rb_str_scrub(VALUE str, VALUE repl) rb_encoding *enc; int encidx; - if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) + if (ENC_CODERANGE_CLEAN_P(cr)) return Qnil; enc = STR_ENC_GET(str); -- cgit v1.2.3