diff --git a/cnf/config.hin b/cnf/config.hin index a27a32c473..dade4ee603 100644 --- a/cnf/config.hin +++ b/cnf/config.hin @@ -376,6 +376,15 @@ /* Define to 1 if you have the `_setjmp' function. */ #undef HAVE__SETJMP +/* Define to 1 if the system has the `__builtin_clz' built-in function */ +#undef HAVE___BUILTIN_CLZ + +/* Define to 1 if the system has the `__builtin_clzl' built-in function */ +#undef HAVE___BUILTIN_CLZL + +/* Define to 1 if the system has the `__builtin_clzll' built-in function */ +#undef HAVE___BUILTIN_CLZLL + /* Define to 1 if the system has the `__builtin_smulll_overflow' built-in function */ #undef HAVE___BUILTIN_SMULLL_OVERFLOW diff --git a/cnf/configure.in b/cnf/configure.in index 259ec108d3..d99be9c429 100644 --- a/cnf/configure.in +++ b/cnf/configure.in @@ -68,6 +68,9 @@ AC_DEFUN([CHECK_COMPILER_BUILTIN], CHECK_COMPILER_BUILTIN([__builtin_smul_overflow],[0,0,0]); CHECK_COMPILER_BUILTIN([__builtin_smull_overflow],[0,0,0]); CHECK_COMPILER_BUILTIN([__builtin_smulll_overflow],[0,0,0]); +CHECK_COMPILER_BUILTIN([__builtin_clz],[0]); +CHECK_COMPILER_BUILTIN([__builtin_clzl],[0]); +CHECK_COMPILER_BUILTIN([__builtin_clzll],[0]); # diff --git a/cnf/configure.out b/cnf/configure.out index 02d029b608..474e140778 100755 --- a/cnf/configure.out +++ b/cnf/configure.out @@ -4521,6 +4521,102 @@ cat >>confdefs.h <<_ACEOF #define HAVE___BUILTIN_SMULLL_OVERFLOW 1 _ACEOF +fi; +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5 +$as_echo_n "checking for __builtin_clz... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +__builtin_clz(0); + + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + have___builtin_clz=yes +else + have___builtin_clz=no + +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $have___builtin_clz" >&5 +$as_echo "$have___builtin_clz" >&6; } + if test yes = $have___builtin_clz; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE___BUILTIN_CLZ 1 +_ACEOF + +fi; +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clzl" >&5 +$as_echo_n "checking for __builtin_clzl... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +__builtin_clzl(0); + + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + have___builtin_clzl=yes +else + have___builtin_clzl=no + +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $have___builtin_clzl" >&5 +$as_echo "$have___builtin_clzl" >&6; } + if test yes = $have___builtin_clzl; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE___BUILTIN_CLZL 1 +_ACEOF + +fi; +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clzll" >&5 +$as_echo_n "checking for __builtin_clzll... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +__builtin_clzll(0); + + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + have___builtin_clzll=yes +else + have___builtin_clzll=no + +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $have___builtin_clzll" >&5 +$as_echo "$have___builtin_clzll" >&6; } + if test yes = $have___builtin_clzll; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE___BUILTIN_CLZLL 1 +_ACEOF + fi; diff --git a/src/gmpints.c b/src/gmpints.c index aa7cee95c6..90d03812ba 100644 --- a/src/gmpints.c +++ b/src/gmpints.c @@ -638,21 +638,55 @@ Obj FuncIntHexString( Obj self, Obj str ) /**************************************************************************** ** ** Implementation of Log2Int for C integers. +** +** When available, we try to use GCC builtins. Otherwise, fall back to code +** based on https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogLookup. +** On a test machine with x86 64bit, the builtins are about 4 times faster +** than the generic code. +** */ +static Int CLog2UInt(UInt a) +{ +#if SIZEOF_VOID_P == SIZEOF_INT && HAVE___BUILTIN_CLZ + return GMP_LIMB_BITS - 1 - __builtin_clz(a); +#elif SIZEOF_VOID_P == SIZEOF_LONG && HAVE___BUILTIN_CLZL + return GMP_LIMB_BITS - 1 - __builtin_clzl(a); +#elif SIZEOF_VOID_P == SIZEOF_LONG_LONG && HAVE___BUILTIN_CLZLL + return GMP_LIMB_BITS - 1 - __builtin_clzll(a); +#else + static const char LogTable256[256] = { + -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + }; + + Int res = 0; + UInt b; + b = a >> 32; if (b) { res+=32; a=b; } + b = a >> 16; if (b) { res+=16; a=b; } + b = a >> 8; if (b) { res+= 8; a=b; } + return res + LogTable256[a]; +#endif +} + Int CLog2Int(Int a) { - Int res, mask; if (a < 0) a = -a; - if (a < 1) return -1; - if (a < 65536) { - for(mask = 2, res = 0; ;mask *= 2, res += 1) { - if(a < mask) return res; - } - } - for(mask = 65536, res = 15; ;mask *= 2, res += 1) { - if(a < mask) return res; - } + return CLog2UInt(a); } /**************************************************************************** @@ -663,29 +697,22 @@ Int CLog2Int(Int a) */ Obj FuncLog2Int( Obj self, Obj integer) { - Int d; - Int a, len; - TypLimb dmask; - - /* case of small ints */ - if (IS_INTOBJ(integer)) { + if ( IS_INTOBJ(integer) ) { return INTOBJ_INT(CLog2Int(INT_INTOBJ(integer))); } - /* case of long ints */ if ( IS_LARGEINT(integer) ) { - for (len = SIZE_INT(integer); ADDR_INT(integer)[len-1] == 0; len--); - /* Instead of computing - res = len * GMP_LIMB_BITS - d; - we keep len and d separate, because on 32 bit systems res may - not fit into an Int (and not into an immediate integer). */ - d = 1; - a = (TypLimb)(ADDR_INT(integer)[len-1]); - for(dmask = (TypLimb)1 << (GMP_LIMB_BITS - 1); - (dmask & a) == 0 && dmask != (TypLimb)0; - dmask = dmask >> 1, d++); - return DiffInt(ProdInt(INTOBJ_INT(len), INTOBJ_INT(GMP_LIMB_BITS)), - INTOBJ_INT(d)); + UInt len = SIZE_INT(integer) - 1; + UInt a = CLog2UInt( ADDR_INT(integer)[len] ); + +#ifdef SYS_IS_64_BIT + return INTOBJ_INT(len * GMP_LIMB_BITS + a); +#else + /* The final result is len * GMP_LIMB_BITS - d, which may not + fit into an immediate integer (at least on a 32bit system) */ + return SumInt(ProdInt(INTOBJ_INT(len), INTOBJ_INT(GMP_LIMB_BITS)), + INTOBJ_INT(a)); +#endif } else { ErrorReturnObj("Log2Int: argument must be a int, (not a %s)",