From 4cf460a7bb258d3d61414d2f74df4c0f83c6a3af Mon Sep 17 00:00:00 2001 From: naruse Date: Sat, 30 Apr 2016 15:39:02 +0000 Subject: * string.c (search_nonascii): unroll and use ntz * configure.in (__builtin_ctz): check. * configure.in (__builtin_ctzll): check. * internal.h (rb_popcount32): defined for ntz_int32. it can use __builtin_popcount but this function is not used on GCC environment because it uses __builtin_ctz. When another function uses this, using __builtin_popcount should be re-considered. * internal.h (rb_popcount64): ditto. * internal.h (ntz_int32): defined for ntz_intptr. * internal.h (ntz_int64): defined for ntz_intptr. * internal.h (ntz_intptr): defined as ntz for uintptr_t. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@54854 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 22 ++++++++++++++++++++++ configure.in | 2 ++ internal.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ string.c | 60 +++++++++++++++++++++++++++++++++++++----------------------- 4 files changed, 107 insertions(+), 23 deletions(-) diff --git a/ChangeLog b/ChangeLog index ea56000f14..40be5eea9f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +Sun May 1 00:03:30 2016 NARUSE, Yui + + * configure.in (__builtin_ctz): check. + + * configure.in (__builtin_ctzll): check. + + * internal.h (rb_popcount32): defined for ntz_int32. + it can use __builtin_popcount but this function is not used on + GCC environment because it uses __builtin_ctz. + When another function uses this, using __builtin_popcount + should be re-considered. + + * internal.h (rb_popcount64): ditto. + + * internal.h (ntz_int32): defined for ntz_intptr. + + * internal.h (ntz_int64): defined for ntz_intptr. + + * internal.h (ntz_intptr): defined as ntz for uintptr_t. + + * string.c (search_nonascii): unroll and use ntz. + Sat Apr 30 21:54:13 2016 Tanaka Akira * numeric.c (Init_Numeric): Gather Fixnum method definitions. diff --git a/configure.in b/configure.in index 1042b0329c..14711a6837 100644 --- a/configure.in +++ b/configure.in @@ -2432,6 +2432,8 @@ RUBY_CHECK_BUILTIN_FUNC(__builtin_bswap64, [__builtin_bswap64(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_clz, [__builtin_clz(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_clzl, [__builtin_clzl(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_clzll, [__builtin_clzll(0)]) +RUBY_CHECK_BUILTIN_FUNC(__builtin_ctz, [__builtin_ctz(0)]) +RUBY_CHECK_BUILTIN_FUNC(__builtin_ctzll, [__builtin_ctzll(0)]) RUBY_CHECK_BUILTIN_FUNC(__builtin_choose_expr, [ [int x[__extension__(__builtin_choose_expr(1, 1, -1))]]; [int y[__extension__(__builtin_choose_expr(0, -1, 1))]]; diff --git a/internal.h b/internal.h index 7793c1ab32..a8f4240059 100644 --- a/internal.h +++ b/internal.h @@ -260,6 +260,52 @@ nlz_int128(uint128_t x) } #endif +static inline int +rb_popcount32(uint32_t x) { + x = (x & 0x55555555) + (x >> 1 & 0x55555555); + x = (x & 0x33333333) + (x >> 2 & 0x33333333); + x = (x & 0x0f0f0f0f) + (x >> 4 & 0x0f0f0f0f); + x = (x & 0x001f001f) + (x >> 8 & 0x001f001f); + return (x & 0x0000003f) + (x >>16 & 0x0000003f); +} + +static inline int +rb_popcount64(uint64_t x) { + x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555); + x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333); + x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707); + x = (x & 0x001f001f001f001f) + (x >> 8 & 0x001f001f001f001f); + x = (x & 0x0000003f0000003f) + (x >>16 & 0x0000003f0000003f); + return (x & 0x7f) + (x >>32 & 0x7f); +} + +static inline int +ntz_int32(uint32_t x) { +#ifdef HAVE_BUILTIN___BUILTIN_CTZ + return __builtin_ctz(x); +#else + return rb_popcount32((~x) & (x-1)); +#endif +} + +static inline int +ntz_int64(uint64_t x) { +#ifdef HAVE_BUILTIN___BUILTIN_CTZLL + return __builtin_ctzll(x); +#else + return rb_popcount64((~x) & (x-1)); +#endif +} + +static inline int +ntz_intptr(uintptr_t x) { +#if SIZEOF_VOIDP == 8 + return ntz_int64(x); +#elif SIZEOF_VOIDP == 4 + return ntz_int32(x); +#endif +} + #if HAVE_LONG_LONG && SIZEOF_LONG * 2 <= SIZEOF_LONG_LONG # define DLONG LONG_LONG # define DL2NUM(x) LL2NUM(x) diff --git a/string.c b/string.c index 4ee59b2bd9..e2cb68aac2 100644 --- a/string.c +++ b/string.c @@ -427,32 +427,46 @@ search_nonascii(const char *p, const char *e) #elif SIZEOF_VOIDP == 4 # define NONASCII_MASK 0x80808080UL #endif -#ifdef NONASCII_MASK - if ((int)SIZEOF_VOIDP * 2 < e - p) { - const uintptr_t *s, *t; - const uintptr_t lowbits = SIZEOF_VOIDP - 1; - s = (const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits)); - while (p < (const char *)s) { - if (!ISASCII(*p)) - return p; - p++; - } - t = (const uintptr_t*)(~lowbits & (uintptr_t)e); - while (s < t) { - if (*s & NONASCII_MASK) { - t = s; - break; - } - s++; - } - p = (const char *)t; + +#if !UNALIGNED_WORD_ACCESS + if (e - p > SIZEOF_VOIDP) { + switch (8 - (uintptr_t)p % 8) { +#if SIZEOF_VOIDP > 4 + case 7: if (*p&0x80) return p; p++; + case 6: if (*p&0x80) return p; p++; + case 5: if (*p&0x80) return p; p++; + case 4: if (*p&0x80) return p; p++; +#endif + case 3: if (*p&0x80) return p; p++; + case 2: if (*p&0x80) return p; p++; + case 1: if (*p&0x80) return p; p++; + } } #endif - while (p < e) { - if (!ISASCII(*p)) - return p; - p++; + + { + const uintptr_t *s = (const uintptr_t *)p; + const uintptr_t *t = (const uintptr_t *)(e - (SIZEOF_VOIDP-1)); + for (;s < t; s++) { + if (*s & NONASCII_MASK) { + return (const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3); + } + } + p = (const char *)s; } + + switch ((e - p) % SIZEOF_VOIDP) { +#if SIZEOF_VOIDP > 4 + case 7: if (*p&0x80) return p; p++; + case 6: if (*p&0x80) return p; p++; + case 5: if (*p&0x80) return p; p++; + case 4: if (*p&0x80) return p; p++; +#endif + case 3: if (*p&0x80) return p; p++; + case 2: if (*p&0x80) return p; p++; + case 1: if (*p&0x80) return p; + } + return NULL; } -- cgit v1.2.3