From cdef0bc8337377bd11f7987ed82c5780dc6e869f Mon Sep 17 00:00:00 2001 From: naruse Date: Tue, 3 May 2016 13:14:30 +0000 Subject: * string.c (count_utf8_lead_bytes_with_word): Use __builtin_popcount only if it can use SSE 4.2 POPCNT whose latency is 3 cycle. * internal.h (rb_popcount64): use __builtin_popcountll because now it is in fast path. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@54894 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- internal.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'internal.h') diff --git a/internal.h b/internal.h index 0bac79e012..765758a962 100644 --- a/internal.h +++ b/internal.h @@ -271,21 +271,38 @@ nlz_intptr(uintptr_t x) { static inline int rb_popcount32(uint32_t x) { +#ifdef HAVE_BUILTIN___BUILTIN_POPCOUNT + return __builtin_popcount(x); +#else x = (x & 0x55555555) + (x >> 1 & 0x55555555); x = (x & 0x33333333) + (x >> 2 & 0x33333333); x = (x & 0x0f0f0f0f) + (x >> 4 & 0x0f0f0f0f); x = (x & 0x001f001f) + (x >> 8 & 0x001f001f); return (x & 0x0000003f) + (x >>16 & 0x0000003f); +#endif } static inline int rb_popcount64(uint64_t x) { +#ifdef HAVE_BUILTIN___BUILTIN_POPCOUNT + return __builtin_popcountll(x); +#else x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555); x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333); x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707); x = (x & 0x001f001f001f001f) + (x >> 8 & 0x001f001f001f001f); x = (x & 0x0000003f0000003f) + (x >>16 & 0x0000003f0000003f); return (x & 0x7f) + (x >>32 & 0x7f); +#endif +} + +static inline int +rb_popcount_intptr(uintptr_t x) { +#if SIZEOF_VOIDP == 8 + return rb_popcount64(x); +#elif SIZEOF_VOIDP == 4 + return rb_popcount32(x); +#endif } static inline int -- cgit v1.2.3