Skip to content

Commit

Permalink
kernel: rewrite (C)Log2Int using compiler builtins
Browse files Browse the repository at this point in the history
Compared to the old CLog2Int code, on my machine the new code using
compiler builtins is between 5 and 10 times faster, and the new generic
code is still 1.2 to 4 times faster.

The new FuncLog2Int calls CLog2Int (or rather, the new static inline
function CLog2UInt which expects an unsigned argument), and thus
benefits from the optimizations as well (though not by as much).

Some micro benchmarks, using the old code:

gap> x:=2^0;; for a in [0..2^25] do Log2Int(x); od; time;
998
gap> x:=2^50;; for a in [0..2^25] do Log2Int(x); od; time;
1916
gap> x:=2^60;; for a in [0..2^25] do Log2Int(x); od; time;
1312
gap> x:=2^80;; for a in [0..2^25] do Log2Int(x); od; time;
2773
gap> x:=2^180;; for a in [0..2^25] do Log2Int(x); od; time;
1504

New code:

gap> x:=2^0;; for a in [0..2^25] do Log2Int(x); od; time;
955
gap> x:=2^50;; for a in [0..2^25] do Log2Int(x); od; time;
970
gap> x:=2^60;; for a in [0..2^25] do Log2Int(x); od; time;
1045
gap> x:=2^80;; for a in [0..2^25] do Log2Int(x); od; time;
1020
gap> x:=2^180;; for a in [0..2^25] do Log2Int(x); od; time;
1007
  • Loading branch information
fingolfin authored and ChrisJefferson committed Jan 11, 2017
1 parent eda43da commit 814ec4d
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 29 deletions.
9 changes: 9 additions & 0 deletions cnf/config.hin
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,15 @@
/* Define to 1 if you have the `_setjmp' function. */
#undef HAVE__SETJMP

/* Define to 1 if the system has the `__builtin_clz' built-in function */
#undef HAVE___BUILTIN_CLZ

/* Define to 1 if the system has the `__builtin_clzl' built-in function */
#undef HAVE___BUILTIN_CLZL

/* Define to 1 if the system has the `__builtin_clzll' built-in function */
#undef HAVE___BUILTIN_CLZLL

/* Define to 1 if the system has the `__builtin_smulll_overflow' built-in
function */
#undef HAVE___BUILTIN_SMULLL_OVERFLOW
Expand Down
3 changes: 3 additions & 0 deletions cnf/configure.in
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ AC_DEFUN([CHECK_COMPILER_BUILTIN],
CHECK_COMPILER_BUILTIN([__builtin_smul_overflow],[0,0,0]);
CHECK_COMPILER_BUILTIN([__builtin_smull_overflow],[0,0,0]);
CHECK_COMPILER_BUILTIN([__builtin_smulll_overflow],[0,0,0]);
CHECK_COMPILER_BUILTIN([__builtin_clz],[0]);
CHECK_COMPILER_BUILTIN([__builtin_clzl],[0]);
CHECK_COMPILER_BUILTIN([__builtin_clzll],[0]);


#
Expand Down
96 changes: 96 additions & 0 deletions cnf/configure.out
Original file line number Diff line number Diff line change
Expand Up @@ -4521,6 +4521,102 @@ cat >>confdefs.h <<_ACEOF
#define HAVE___BUILTIN_SMULLL_OVERFLOW 1
_ACEOF

fi;
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5
$as_echo_n "checking for __builtin_clz... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
main ()
{
__builtin_clz(0);
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
have___builtin_clz=yes
else
have___builtin_clz=no

fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $have___builtin_clz" >&5
$as_echo "$have___builtin_clz" >&6; }
if test yes = $have___builtin_clz; then :

cat >>confdefs.h <<_ACEOF
#define HAVE___BUILTIN_CLZ 1
_ACEOF

fi;
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clzl" >&5
$as_echo_n "checking for __builtin_clzl... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
main ()
{
__builtin_clzl(0);
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
have___builtin_clzl=yes
else
have___builtin_clzl=no

fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $have___builtin_clzl" >&5
$as_echo "$have___builtin_clzl" >&6; }
if test yes = $have___builtin_clzl; then :

cat >>confdefs.h <<_ACEOF
#define HAVE___BUILTIN_CLZL 1
_ACEOF

fi;
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clzll" >&5
$as_echo_n "checking for __builtin_clzll... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
main ()
{
__builtin_clzll(0);
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
have___builtin_clzll=yes
else
have___builtin_clzll=no

fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $have___builtin_clzll" >&5
$as_echo "$have___builtin_clzll" >&6; }
if test yes = $have___builtin_clzll; then :

cat >>confdefs.h <<_ACEOF
#define HAVE___BUILTIN_CLZLL 1
_ACEOF

fi;


Expand Down
85 changes: 56 additions & 29 deletions src/gmpints.c
Original file line number Diff line number Diff line change
Expand Up @@ -638,21 +638,55 @@ Obj FuncIntHexString( Obj self, Obj str )
/****************************************************************************
**
** Implementation of Log2Int for C integers.
**
** When available, we try to use GCC builtins. Otherwise, fall back to code
** based on https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogLookup.
** On a test machine with x86 64bit, the builtins are about 4 times faster
** than the generic code.
**
*/

static Int CLog2UInt(UInt a)
{
#if SIZEOF_VOID_P == SIZEOF_INT && HAVE___BUILTIN_CLZ
return GMP_LIMB_BITS - 1 - __builtin_clz(a);
#elif SIZEOF_VOID_P == SIZEOF_LONG && HAVE___BUILTIN_CLZL
return GMP_LIMB_BITS - 1 - __builtin_clzl(a);
#elif SIZEOF_VOID_P == SIZEOF_LONG_LONG && HAVE___BUILTIN_CLZLL
return GMP_LIMB_BITS - 1 - __builtin_clzll(a);
#else
static const char LogTable256[256] = {
-1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
};

Int res = 0;
UInt b;
b = a >> 32; if (b) { res+=32; a=b; }
b = a >> 16; if (b) { res+=16; a=b; }
b = a >> 8; if (b) { res+= 8; a=b; }
return res + LogTable256[a];
#endif
}

Int CLog2Int(Int a)
{
Int res, mask;
if (a < 0) a = -a;
if (a < 1) return -1;
if (a < 65536) {
for(mask = 2, res = 0; ;mask *= 2, res += 1) {
if(a < mask) return res;
}
}
for(mask = 65536, res = 15; ;mask *= 2, res += 1) {
if(a < mask) return res;
}
return CLog2UInt(a);
}

/****************************************************************************
Expand All @@ -663,29 +697,22 @@ Int CLog2Int(Int a)
*/
Obj FuncLog2Int( Obj self, Obj integer)
{
Int d;
Int a, len;
TypLimb dmask;

/* case of small ints */
if (IS_INTOBJ(integer)) {
if ( IS_INTOBJ(integer) ) {
return INTOBJ_INT(CLog2Int(INT_INTOBJ(integer)));
}

/* case of long ints */
if ( IS_LARGEINT(integer) ) {
for (len = SIZE_INT(integer); ADDR_INT(integer)[len-1] == 0; len--);
/* Instead of computing
res = len * GMP_LIMB_BITS - d;
we keep len and d separate, because on 32 bit systems res may
not fit into an Int (and not into an immediate integer). */
d = 1;
a = (TypLimb)(ADDR_INT(integer)[len-1]);
for(dmask = (TypLimb)1 << (GMP_LIMB_BITS - 1);
(dmask & a) == 0 && dmask != (TypLimb)0;
dmask = dmask >> 1, d++);
return DiffInt(ProdInt(INTOBJ_INT(len), INTOBJ_INT(GMP_LIMB_BITS)),
INTOBJ_INT(d));
UInt len = SIZE_INT(integer) - 1;
UInt a = CLog2UInt( ADDR_INT(integer)[len] );

#ifdef SYS_IS_64_BIT
return INTOBJ_INT(len * GMP_LIMB_BITS + a);
#else
/* The final result is len * GMP_LIMB_BITS - d, which may not
fit into an immediate integer (at least on a 32bit system) */
return SumInt(ProdInt(INTOBJ_INT(len), INTOBJ_INT(GMP_LIMB_BITS)),
INTOBJ_INT(a));
#endif
}
else {
ErrorReturnObj("Log2Int: argument must be a int, (not a %s)",
Expand Down

0 comments on commit 814ec4d

Please sign in to comment.