From a39feece851a048320ade466dec0a4abb0342a18 Mon Sep 17 00:00:00 2001 From: naruse Date: Sat, 8 Mar 2008 09:05:34 +0000 Subject: * string.c (search_nonascii): Use VALUE instead of unsigned long because VALUE can be the fastest unsigned integer type. On LLP64 unsigned long isn't the fastest. * string.c (str_strlen): ditto. * string.c (str_utf8_nth): ditto. * string.c (count_utf8_lead_bytes_with_ulong): ditto. * string.c (count_utf8_lead_bytes_with_word): renamed. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15731 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 11 ++++++++ string.c | 68 ++++++++++++++++++++++++-------------------------- test/ruby/test_m17n.rb | 2 ++ version.h | 6 ++--- 4 files changed, 49 insertions(+), 38 deletions(-) diff --git a/ChangeLog b/ChangeLog index a58e90d9d0..5e2ff66ee8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +Sat Mar 8 06:53:48 2008 NARUSE, Yui + + * string.c (search_nonascii): Use VALUE instead of unsigned long + because VALUE can be the fastest unsigned integer type. + On LLP64 unsigned long isn't the fastest. + * string.c (str_strlen): ditto. + * string.c (str_utf8_nth): ditto. + * string.c (count_utf8_lead_bytes_with_ulong): ditto. + + * string.c (count_utf8_lead_bytes_with_word): renamed. + Fri Mar 7 21:27:43 2008 Yusuke Endoh * bignum.c: fix indent. diff --git a/string.c b/string.c index 89a8b465f2..7ae755b45c 100644 --- a/string.c +++ b/string.c @@ -118,22 +118,22 @@ VALUE rb_fs; static inline const char * search_nonascii(const char *p, const char *e) { -#if ULONG_MAX == 18446744073709551615UL -# define NONASCII_MASK 0x8080808080808080UL -#elif ULONG_MAX == 4294967295UL +#if SIZEOF_VALUE == 8 +# define NONASCII_MASK 0x8080808080808080LL +#elif SIZEOF_VALUE == 4 # define NONASCII_MASK 0x80808080UL #endif #ifdef NONASCII_MASK - if (sizeof(long) * 2 < e - p) { - const unsigned long *s, *t; - const VALUE lowbits = sizeof(unsigned long) - 1; - s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); + if (sizeof(VALUE) * 2 < e - p) { + const VALUE *s, *t; + const VALUE lowbits = sizeof(VALUE) - 1; + s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits)); while (p < (const char *)s) { if (!ISASCII(*p)) return p; p++; } - t = (const unsigned long*)(~lowbits & (VALUE)e); + t = (const VALUE*)(~lowbits & (VALUE)e); while (s < t) { if (*s & NONASCII_MASK) { t = s; @@ -757,19 +757,19 @@ rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr) #ifdef NONASCII_MASK #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80) -static inline const long -count_utf8_lead_bytes_with_ulong(const unsigned long *s) +static inline const VALUE +count_utf8_lead_bytes_with_word(const VALUE *s) { - unsigned long d = *s; + VALUE d = *s; d |= ~(d>>1); d >>= 6; d &= NONASCII_MASK >> 7; d += (d>>8); d += (d>>16); -#if NONASCII_MASK == 0x8080808080808080UL +#if SIZEOF_VALUE == 8 d += (d>>32); #endif - return (long)(d&0xF); + return (d&0xF); } #endif @@ -786,18 +786,18 @@ str_strlen(VALUE str, rb_encoding *enc) #ifdef NONASCII_MASK if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID && enc == rb_utf8_encoding()) { - long len = 0; - if (sizeof(long) * 2 < e - p) { - const unsigned long *s, *t; - const VALUE lowbits = sizeof(unsigned long) - 1; - s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); - t = (const unsigned long*)(~lowbits & (VALUE)e); + VALUE len = 0; + if (sizeof(VALUE) * 2 < e - p) { + const VALUE *s, *t; + const VALUE lowbits = sizeof(VALUE) - 1; + s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits)); + t = (const VALUE*)(~lowbits & (VALUE)e); while (p < (const char *)s) { if (is_utf8_lead_byte(*p)) len++; p++; } while (s < t) { - len += count_utf8_lead_bytes_with_ulong(s); + len += count_utf8_lead_bytes_with_word(s); s++; } p = (const char *)s; @@ -806,7 +806,7 @@ str_strlen(VALUE str, rb_encoding *enc) if (is_utf8_lead_byte(*p)) len++; p++; } - return len; + return (long)len; } #endif n = rb_enc_strlen_cr(p, e, enc, &cr); @@ -1168,29 +1168,27 @@ str_offset(const char *p, const char *e, int nth, rb_encoding *enc, int singleby static char * str_utf8_nth(const char *p, const char *e, int nth) { - if (sizeof(long) * 2 < nth) { - const unsigned long *s, *t; - const VALUE lowbits = sizeof(unsigned long) - 1; - s = (const unsigned long*)(~lowbits & ((VALUE)p + lowbits)); - t = (const unsigned long*)(~lowbits & (VALUE)e); + if (sizeof(VALUE) * 2 < nth) { + const VALUE *s, *t; + const VALUE lowbits = sizeof(VALUE) - 1; + s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits)); + t = (const VALUE*)(~lowbits & (VALUE)e); while (p < (const char *)s) { if (is_utf8_lead_byte(*p)) nth--; p++; } do { - nth -= count_utf8_lead_bytes_with_ulong(s); + nth -= count_utf8_lead_bytes_with_word(s); s++; - } while (s < t && sizeof(long) <= nth); + } while (s < t && sizeof(VALUE) <= nth); p = (char *)s; } - if (0 < nth) { - while (p < e) { - if (is_utf8_lead_byte(*p)) { - nth--; - if (nth < 0) break; - } - p++; + while (p < e) { + if (is_utf8_lead_byte(*p)) { + if (nth == 0) break; + nth--; } + p++; } return (char *)p; } diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 7a9e24f4a1..dfb00ba657 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -819,6 +819,8 @@ class TestM17N < Test::Unit::TestCase assert_equal("\u{3044}", s[27]) assert_equal("\u{3046}", s[28]) assert_equal("\u{3048}", s[29]) + s = "abcdefghijklmnopqrstuvw\u{3042 3044 3046 3048 304A}" + assert_equal("\u{3044}", s[24]) end def test_str_aref_len diff --git a/version.h b/version.h index 4134af5c00..17220fd63f 100644 --- a/version.h +++ b/version.h @@ -1,7 +1,7 @@ #define RUBY_VERSION "1.9.0" -#define RUBY_RELEASE_DATE "2008-03-07" +#define RUBY_RELEASE_DATE "2008-03-08" #define RUBY_VERSION_CODE 190 -#define RUBY_RELEASE_CODE 20080307 +#define RUBY_RELEASE_CODE 20080308 #define RUBY_PATCHLEVEL 0 #define RUBY_VERSION_MAJOR 1 @@ -9,7 +9,7 @@ #define RUBY_VERSION_TEENY 0 #define RUBY_RELEASE_YEAR 2008 #define RUBY_RELEASE_MONTH 3 -#define RUBY_RELEASE_DAY 7 +#define RUBY_RELEASE_DAY 8 #ifdef RUBY_EXTERN RUBY_EXTERN const char ruby_version[]; -- cgit v1.2.3