From a47e8e776c8e91cdf17f4cb79569c8b20c88b845 Mon Sep 17 00:00:00 2001 From: akr Date: Sat, 16 Feb 2008 07:16:36 +0000 Subject: * string.c (rb_enc_strlen): UTF-8 character count moved to str_strlen. (str_strlen): UTF-8 character count is only applicable for valid UTF-8 string. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15504 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 6 ++++++ string.c | 61 ++++++++++++++++++++++++++++++++----------------------------- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4c3c349728..36a2afac1c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Sat Feb 16 16:14:35 2008 Tanaka Akira + + * string.c (rb_enc_strlen): UTF-8 character count moved to str_strlen. + (str_strlen): UTF-8 character count is only applicable for valid + UTF-8 string. + Sat Feb 16 13:16:49 2008 Tanaka Akira * string.c (rb_str_sub_bang): stringize replacing hash values. diff --git a/string.c b/string.c index 4fcdd52cbf..5d2d8bdfd7 100644 --- a/string.c +++ b/string.c @@ -597,35 +597,7 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc) if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { return (e - p) / rb_enc_mbminlen(enc); } -#ifdef NONASCII_MASK - else if (enc == rb_utf8_encoding()) { - if (sizeof(long) * 2 < e - p) { - const unsigned long *s, *t; - const VALUE lowbits = sizeof(unsigned long) - 1; - s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); - t = (const unsigned long*)(~lowbits & (VALUE)e); - for (c=0; p<(const char *)s; p++) { - if (((*p)&0xC0) != 0x80) c++; - } - while (s < t) { - unsigned long d = *s; - d = (~d ^ (d&(d<<1)))&NONASCII_MASK; - d = (d>>7) + (d>>15); - d = d + (d>>16); -#if NONASCII_MASK == 0x8080808080808080UL - d = d + (d>>32); -#endif - c += (long)(d&0xF); - s++; - } - p = (const char *)t; - } - for (; p>7) + (d>>15); + d = d + (d>>16); +#if NONASCII_MASK == 0x8080808080808080UL + d = d + (d>>32); +#endif + len += (long)(d&0xF); + s++; + } + p = (const char *)t; + } + for (; p