diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-02-16 04:05:58 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-02-16 04:05:58 +0000 |
commit | 327673a43b8aaf0df6ca2a494ab4aeadb49802fb (patch) | |
tree | 55047b32c93a7326a1e1ba3b516a2943b5e9ed44 /string.c | |
parent | af75cc01bc7ed39eafbc958936c51e0221313051 (diff) | |
download | ruby-327673a43b8aaf0df6ca2a494ab4aeadb49802fb.tar.gz |
* string.c (rb_enc_strlen): add search_nonascii like character
counter for UTF-8.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15499 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 29 |
1 files changed, 29 insertions, 0 deletions
@@ -597,6 +597,35 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc) if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { return (e - p) / rb_enc_mbminlen(enc); } +#ifdef NONASCII_MASK + else if (enc == rb_utf8_encoding()) { + if (sizeof(long) * 2 < e - p) { + const unsigned long *s, *t; + const VALUE lowbits = sizeof(unsigned long) - 1; + s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); + t = (const unsigned long*)(~lowbits & (VALUE)e); + for (c=0; p<(const char *)s; p++) { + if (((*p)&0xC0) != 0x80) c++; + } + while (s < t) { + unsigned long d = *s; + d = (~d ^ (d&(d<<1)))&NONASCII_MASK; + d = (d>>7) + (d>>15); + d = d + (d>>16); +#if NONASCII_MASK == 0x8080808080808080UL + d = d + (d>>32); +#endif + c += (long)(d&0xF); + s++; + } + p = (const char *)t; + } + for (; p<e; p++) { + if (((*p)&0xC0) != 0x80) c++; + } + return c; + } +#endif else if (rb_enc_asciicompat(enc)) { c = 0; while (p < e) { |