From 281009cad94119a78b73302f2b0c3be216f6b26c Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 19:03:39 +0200 Subject: [PATCH 01/21] bpo-29782: Consolidate _Py_Bit_Length() In GH-2866, _Py_Bit_Length() was added to pymath.h for lack of a better location. GH-20518 added a more appropriate header file for bit utilities. It also shows how to properly use intrinsics. This allows reconsidering bpo-29782. * Move the function to the new header. * Changed return type to match __builtin_clzl and reviewed usage. * Use intrinsics where available. * Pick a (mostly theoretical) fallback implementation suitable for inlining. --- Include/internal/pycore_bitutils.h | 32 ++++++++++++++++++++++++++---- Include/pymath.h | 8 -------- Modules/mathmodule.c | 1 + Python/pymath.c | 15 -------------- 4 files changed, 29 insertions(+), 27 deletions(-) diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h index 36ffe23b9ff264..814876747b9b97 100644 --- a/Include/internal/pycore_bitutils.h +++ b/Include/internal/pycore_bitutils.h @@ -7,8 +7,8 @@ - _Py_bswap64(uint64_t) */ -#ifndef Py_INTERNAL_BSWAP_H -#define Py_INTERNAL_BSWAP_H +#ifndef Py_INTERNAL_BITUTILS_H +#define Py_INTERNAL_BITUTILS_H #ifdef __cplusplus extern "C" { #endif @@ -131,8 +131,32 @@ _Py_popcount32(uint32_t x) } +// Return the index of the most significant 1 bit in 'x'. This is the smallest +// integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0. +static inline int +_Py_bit_length(unsigned long x) { + if (!x) { + return 0; + } +#if (defined(__clang__) || defined(__GNUC__)) + return sizeof(unsigned long) * 8 - __builtin_clzl(x); +#elif defined(_MSC_VER) + Py_BUILD_ASSERT(4 == sizeof(unsigned long)); + unsigned long msb; + _BitScanReverse(&msb, x); + return 32 - msb; +#else + int msb = 0; + while (x != 0) { + msb += 1; + x >>= 1; + } + return msb; +#endif +} + + #ifdef __cplusplus } #endif -#endif /* !Py_INTERNAL_BSWAP_H */ - +#endif /* !Py_INTERNAL_BITUTILS_H */ diff --git a/Include/pymath.h b/Include/pymath.h index 63ca972784e311..f869724334a4c4 100644 --- a/Include/pymath.h +++ b/Include/pymath.h @@ -227,12 +227,4 @@ PyAPI_FUNC(void) _Py_set_387controlword(unsigned short); * behavior. */ #define _Py_InIntegralTypeRange(type, v) (_Py_IntegralTypeMin(type) <= v && v <= _Py_IntegralTypeMax(type)) -/* Return the smallest integer k such that n < 2**k, or 0 if n == 0. - * Equivalent to floor(log2(x))+1. Also equivalent to: bitwidth_of_type - - * count_leading_zero_bits(x) - */ -#ifndef Py_LIMITED_API -PyAPI_FUNC(unsigned int) _Py_bit_length(unsigned long d); -#endif - #endif /* Py_PYMATH_H */ diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index cb05ce7c509625..89b395a7d38852 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -53,6 +53,7 @@ raised for division by zero and mod by zero. */ #include "Python.h" +#include "pycore_bitutils.h" #include "pycore_dtoa.h" #include "_math.h" diff --git a/Python/pymath.c b/Python/pymath.c index a08a0e796156f7..24b804223eef19 100644 --- a/Python/pymath.c +++ b/Python/pymath.c @@ -79,18 +79,3 @@ round(double x) return copysign(y, x); } #endif /* HAVE_ROUND */ - -static const unsigned int BitLengthTable[32] = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 -}; - -unsigned int _Py_bit_length(unsigned long d) { - unsigned int d_bits = 0; - while (d >= 32) { - d_bits += 6; - d >>= 6; - } - d_bits += BitLengthTable[d]; - return d_bits; -} From 65abb8304d41c82efee2f64f2a0d126ed7e3013b Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 21:03:05 +0200 Subject: [PATCH 02/21] Specify required gcc version for __builtin_clzl --- Include/internal/pycore_bitutils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h index 814876747b9b97..a7e157eff41c6f 100644 --- a/Include/internal/pycore_bitutils.h +++ b/Include/internal/pycore_bitutils.h @@ -138,7 +138,7 @@ _Py_bit_length(unsigned long x) { if (!x) { return 0; } -#if (defined(__clang__) || defined(__GNUC__)) +#if (defined(__clang__) || (defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ >= 4)))) return sizeof(unsigned long) * 8 - __builtin_clzl(x); #elif defined(_MSC_VER) Py_BUILD_ASSERT(4 == sizeof(unsigned long)); From ce45ff2a8d0a131839942dd78fa0952ccff42bae Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 21:04:49 +0200 Subject: [PATCH 03/21] Fix _BitScanReverse usage --- Include/internal/pycore_bitutils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h index a7e157eff41c6f..6d1de7223935e8 100644 --- a/Include/internal/pycore_bitutils.h +++ b/Include/internal/pycore_bitutils.h @@ -144,7 +144,7 @@ _Py_bit_length(unsigned long x) { Py_BUILD_ASSERT(4 == sizeof(unsigned long)); unsigned long msb; _BitScanReverse(&msb, x); - return 32 - msb; + return msb + 1; #else int msb = 0; while (x != 0) { From 6464ca6001409512ea15c7d4ea732b4b84a96dfa Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 21:14:35 +0200 Subject: [PATCH 04/21] Document reason for include --- Modules/mathmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 89b395a7d38852..b7dcb66723f10e 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -53,7 +53,7 @@ raised for division by zero and mod by zero. */ #include "Python.h" -#include "pycore_bitutils.h" +#include "pycore_bitutils.h" // _Py_bit_length() #include "pycore_dtoa.h" #include "_math.h" From 0bd42652dfed7b06dc0fe9e6732e327b717f1618 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 21:41:03 +0200 Subject: [PATCH 05/21] Ensure future compatibility --- Include/internal/pycore_bitutils.h | 18 +++++++++++++----- Objects/longobject.c | 18 +++++++++--------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h index 6d1de7223935e8..c6d03b96cf83e3 100644 --- a/Include/internal/pycore_bitutils.h +++ b/Include/internal/pycore_bitutils.h @@ -135,26 +135,34 @@ _Py_popcount32(uint32_t x) // integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0. static inline int _Py_bit_length(unsigned long x) { - if (!x) { + if (x == 0) { return 0; } #if (defined(__clang__) || (defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ >= 4)))) + // Undefined behavior for x == 0. return sizeof(unsigned long) * 8 - __builtin_clzl(x); #elif defined(_MSC_VER) - Py_BUILD_ASSERT(4 == sizeof(unsigned long)); + Py_BUILD_ASSERT(sizeof(unsigned long) <= 4); + // Does not write to msb if x == 0. unsigned long msb; _BitScanReverse(&msb, x); - return msb + 1; + return (int)msb + 1; #else int msb = 0; - while (x != 0) { + do { msb += 1; x >>= 1; - } + } while (x != 0); return msb; #endif } +static inline int +_Py_bit_length_digit(digit x) { + Py_BUILD_ASSERT(PyLong_SHIFT <= sizeof(unsigned long) * 8); + return _Py_popcount32((unsigned long)x); +} + #ifdef __cplusplus } diff --git a/Objects/longobject.c b/Objects/longobject.c index ce10c4f66586a1..a55171b543d126 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -712,7 +712,7 @@ _PyLong_NumBits(PyObject *vv) if ((size_t)(ndigits - 1) > SIZE_MAX / (size_t)PyLong_SHIFT) goto Overflow; result = (size_t)(ndigits - 1) * (size_t)PyLong_SHIFT; - msd_bits = _Py_bit_length(msd); + msd_bits = _Py_bit_length_digit(msd); if (SIZE_MAX - msd_bits < result) goto Overflow; result += msd_bits; @@ -1822,7 +1822,7 @@ long_format_binary(PyObject *aa, int base, int alternate, return -1; } size_a_in_bits = (size_a - 1) * PyLong_SHIFT + - _Py_bit_length(a->ob_digit[size_a - 1]); + _Py_bit_length_digit(a->ob_digit[size_a - 1]); /* Allow 1 character for a '-' sign. */ sz = negative + (size_a_in_bits + (bits - 1)) / bits; } @@ -2642,7 +2642,7 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem) /* normalize: shift w1 left so that its top digit is >= PyLong_BASE/2. shift v1 left by the same amount. Results go into w and v. */ - d = PyLong_SHIFT - _Py_bit_length(w1->ob_digit[size_w-1]); + d = PyLong_SHIFT - _Py_bit_length_digit(w1->ob_digit[size_w-1]); carry = v_lshift(w->ob_digit, w1->ob_digit, size_w, d); assert(carry == 0); carry = v_lshift(v->ob_digit, v1->ob_digit, size_v, d); @@ -2764,7 +2764,7 @@ _PyLong_Frexp(PyLongObject *a, Py_ssize_t *e) *e = 0; return 0.0; } - a_bits = _Py_bit_length(a->ob_digit[a_size-1]); + a_bits = _Py_bit_length_digit(a->ob_digit[a_size-1]); /* The following is an overflow-free version of the check "if ((a_size - 1) * PyLong_SHIFT + a_bits > PY_SSIZE_T_MAX) ..." */ if (a_size >= (PY_SSIZE_T_MAX - 1) / PyLong_SHIFT + 1 && @@ -3857,8 +3857,8 @@ long_true_divide(PyObject *v, PyObject *w) /* Extreme underflow */ goto underflow_or_zero; /* Next line is now safe from overflowing a Py_ssize_t */ - diff = diff * PyLong_SHIFT + _Py_bit_length(a->ob_digit[a_size - 1]) - - _Py_bit_length(b->ob_digit[b_size - 1]); + diff = diff * PyLong_SHIFT + _Py_bit_length_digit(a->ob_digit[a_size - 1]) - + _Py_bit_length_digit(b->ob_digit[b_size - 1]); /* Now diff = a_bits - b_bits. */ if (diff > DBL_MAX_EXP) goto overflow; @@ -3934,7 +3934,7 @@ long_true_divide(PyObject *v, PyObject *w) } x_size = Py_ABS(Py_SIZE(x)); assert(x_size > 0); /* result of division is never zero */ - x_bits = (x_size-1)*PyLong_SHIFT+_Py_bit_length(x->ob_digit[x_size-1]); + x_bits = (x_size-1)*PyLong_SHIFT+_Py_bit_length_digit(x->ob_digit[x_size-1]); /* The number of extra bits that have to be rounded away. */ extra_bits = Py_MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG; @@ -4748,7 +4748,7 @@ _PyLong_GCD(PyObject *aarg, PyObject *barg) alloc_b = Py_SIZE(b); /* reduce until a fits into 2 digits */ while ((size_a = Py_SIZE(a)) > 2) { - nbits = _Py_bit_length(a->ob_digit[size_a-1]); + nbits = _Py_bit_length_digit(a->ob_digit[size_a-1]); /* extract top 2*PyLong_SHIFT bits of a into x, along with corresponding bits of b into y */ size_b = Py_SIZE(b); @@ -5269,7 +5269,7 @@ int_bit_length_impl(PyObject *self) return PyLong_FromLong(0); msd = ((PyLongObject *)self)->ob_digit[ndigits-1]; - msd_bits = _Py_bit_length(msd); + msd_bits = _Py_bit_length_digit(msd); if (ndigits <= PY_SSIZE_T_MAX/PyLong_SHIFT) return PyLong_FromSsize_t((ndigits-1)*PyLong_SHIFT + msd_bits); From fbaca66885fdfb64edcbe5ec45ec590405e98e79 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 22:01:18 +0200 Subject: [PATCH 06/21] Do not try to support GCC < 3.4 --- Include/internal/pycore_bitutils.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h index c6d03b96cf83e3..f24bec3b09c9cb 100644 --- a/Include/internal/pycore_bitutils.h +++ b/Include/internal/pycore_bitutils.h @@ -138,7 +138,8 @@ _Py_bit_length(unsigned long x) { if (x == 0) { return 0; } -#if (defined(__clang__) || (defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ >= 4)))) +#if (defined(__clang__) || defined(__GNUC__)) + // __builtin_clzl() is available since GCC 3.4. // Undefined behavior for x == 0. return sizeof(unsigned long) * 8 - __builtin_clzl(x); #elif defined(_MSC_VER) From 436f68b26f776a5fd80375122c44c00c9710dd70 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 22:03:40 +0200 Subject: [PATCH 07/21] Coding style --- Include/internal/pycore_bitutils.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h index f24bec3b09c9cb..8196c167ce0396 100644 --- a/Include/internal/pycore_bitutils.h +++ b/Include/internal/pycore_bitutils.h @@ -134,7 +134,8 @@ _Py_popcount32(uint32_t x) // Return the index of the most significant 1 bit in 'x'. This is the smallest // integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0. static inline int -_Py_bit_length(unsigned long x) { +_Py_bit_length(unsigned long x) +{ if (x == 0) { return 0; } @@ -159,7 +160,8 @@ _Py_bit_length(unsigned long x) { } static inline int -_Py_bit_length_digit(digit x) { +_Py_bit_length_digit(digit x) +{ Py_BUILD_ASSERT(PyLong_SHIFT <= sizeof(unsigned long) * 8); return _Py_popcount32((unsigned long)x); } From ac664bc555efa89e309e1bfcfd51243ccb055205 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 22:04:51 +0200 Subject: [PATCH 08/21] Fix comment indent Co-authored-by: Victor Stinner --- Modules/mathmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index b7dcb66723f10e..4450ce18941020 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -53,7 +53,7 @@ raised for division by zero and mod by zero. */ #include "Python.h" -#include "pycore_bitutils.h" // _Py_bit_length() +#include "pycore_bitutils.h" // _Py_bit_length() #include "pycore_dtoa.h" #include "_math.h" From 3279d1750e25d7589e6f2d92391578ab41d613cb Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 22:08:28 +0200 Subject: [PATCH 09/21] Do not factor out x != 0 --- Include/internal/pycore_bitutils.h | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h index 8196c167ce0396..0c1b306a7280f0 100644 --- a/Include/internal/pycore_bitutils.h +++ b/Include/internal/pycore_bitutils.h @@ -136,25 +136,30 @@ _Py_popcount32(uint32_t x) static inline int _Py_bit_length(unsigned long x) { - if (x == 0) { +#if (defined(__clang__) || defined(__GNUC__)) + if (x != 0) { + // __builtin_clzl() is available since GCC 3.4. + // Undefined behavior for x == 0. + return sizeof(unsigned long) * 8 - __builtin_clzl(x); + } + else { return 0; } -#if (defined(__clang__) || defined(__GNUC__)) - // __builtin_clzl() is available since GCC 3.4. - // Undefined behavior for x == 0. - return sizeof(unsigned long) * 8 - __builtin_clzl(x); #elif defined(_MSC_VER) Py_BUILD_ASSERT(sizeof(unsigned long) <= 4); - // Does not write to msb if x == 0. unsigned long msb; - _BitScanReverse(&msb, x); - return (int)msb + 1; + if (_BitScanReverse(&msb, x)) { + return (int)msb + 1; + } + else { + return 0; + } #else int msb = 0; - do { + while (x != 0) { msb += 1; x >>= 1; - } while (x != 0); + } return msb; #endif } From 525da3d1f77d0b34a81ad18a32cbf65f3b677eb8 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 22:13:30 +0200 Subject: [PATCH 10/21] Fix and move _Py_bit_length_digit() --- Include/internal/pycore_bitutils.h | 7 ------- Objects/longobject.c | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h index 0c1b306a7280f0..1a17d8aa400840 100644 --- a/Include/internal/pycore_bitutils.h +++ b/Include/internal/pycore_bitutils.h @@ -164,13 +164,6 @@ _Py_bit_length(unsigned long x) #endif } -static inline int -_Py_bit_length_digit(digit x) -{ - Py_BUILD_ASSERT(PyLong_SHIFT <= sizeof(unsigned long) * 8); - return _Py_popcount32((unsigned long)x); -} - #ifdef __cplusplus } diff --git a/Objects/longobject.c b/Objects/longobject.c index a55171b543d126..b5112a62c1a70d 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -695,6 +695,13 @@ _PyLong_Sign(PyObject *vv) return Py_SIZE(v) == 0 ? 0 : (Py_SIZE(v) < 0 ? -1 : 1); } +static int +_Py_bit_length_digit(digit x) +{ + Py_BUILD_ASSERT(PyLong_SHIFT <= sizeof(unsigned long) * 8); + return _Py_bit_length((unsigned long)x); +} + size_t _PyLong_NumBits(PyObject *vv) { From e4876228c203758ee3e512c0dbfe04019901dd9a Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 22:33:22 +0200 Subject: [PATCH 11/21] Add unit test for _Py_bit_length() --- Modules/_testinternalcapi.c | 38 +++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 6d5af5917f1f07..65f235f326e40e 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -102,6 +102,43 @@ test_popcount(PyObject *self, PyObject *Py_UNUSED(args)) } +static int +check_bit_length(unsigned long x, int expected) +{ + int len = _Py_bit_length(x); + if (len != expected) { + PyErr_Format(PyExc_AssertionError, + "_Py_bit_length(%lu) returns %i, expected %i", + (unsigned long)x, len, expected); + return -1; + } + return 0; +} + + +static PyObject* +test_bit_length(PyObject *self, PyObject *Py_UNUSED(args)) +{ +#define CHECK(X, RESULT) \ + do { \ + if (check_bit_length(X, RESULT) < 0) { \ + return NULL; \ + } \ + } while (0) + + CHECK(0, 0); + CHECK(1, 1); + CHECK(0x08080808, 28); + CHECK(0x10101010, 29); + CHECK(0x10204080, 29); + CHECK(0xDEADCAFE, 32); + CHECK(0xFFFFFFFF, 32); + Py_RETURN_NONE; + +#undef CHECK +} + + #define TO_PTR(ch) ((void*)(uintptr_t)ch) #define FROM_PTR(ptr) ((uintptr_t)ptr) #define VALUE(key) (1 + ((int)(key) - 'a')) @@ -197,6 +234,7 @@ static PyMethodDef TestMethods[] = { {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, {"test_bswap", test_bswap, METH_NOARGS}, {"test_popcount", test_popcount, METH_NOARGS}, + {"test_bit_length", test_bit_length, METH_NOARGS}, {"test_hashtable", test_hashtable, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; From a338761dd9d47dec671b1711c526cca551fb2ad5 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 22:39:49 +0200 Subject: [PATCH 12/21] Remove useless cast in test --- Modules/_testinternalcapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 65f235f326e40e..4f6ff9dd2fcfd5 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -109,7 +109,7 @@ check_bit_length(unsigned long x, int expected) if (len != expected) { PyErr_Format(PyExc_AssertionError, "_Py_bit_length(%lu) returns %i, expected %i", - (unsigned long)x, len, expected); + x, len, expected); return -1; } return 0; From 5a146f7cc3b4377d188463f4134b77524d7c9d39 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 22:53:46 +0200 Subject: [PATCH 13/21] Use original implementation as fallback --- Include/internal/pycore_bitutils.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h index 1a17d8aa400840..fb33b042f688bd 100644 --- a/Include/internal/pycore_bitutils.h +++ b/Include/internal/pycore_bitutils.h @@ -136,7 +136,7 @@ _Py_popcount32(uint32_t x) static inline int _Py_bit_length(unsigned long x) { -#if (defined(__clang__) || defined(__GNUC__)) +#if 0 && (defined(__clang__) || defined(__GNUC__)) if (x != 0) { // __builtin_clzl() is available since GCC 3.4. // Undefined behavior for x == 0. @@ -155,11 +155,16 @@ _Py_bit_length(unsigned long x) return 0; } #else + const int BIT_LENGTH_TABLE[32] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 + }; int msb = 0; - while (x != 0) { - msb += 1; - x >>= 1; + while (x >= 32) { + msb += 6; + x >>= 6; } + msb += BIT_LENGTH_TABLE[x]; return msb; #endif } From 8e3ce6615c0f053c5e0186423e59ecfc041095e2 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 22:54:53 +0200 Subject: [PATCH 14/21] Re-enable intrinsics --- Include/internal/pycore_bitutils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h index fb33b042f688bd..55e3dbd6e80e37 100644 --- a/Include/internal/pycore_bitutils.h +++ b/Include/internal/pycore_bitutils.h @@ -136,7 +136,7 @@ _Py_popcount32(uint32_t x) static inline int _Py_bit_length(unsigned long x) { -#if 0 && (defined(__clang__) || defined(__GNUC__)) +#if (defined(__clang__) || defined(__GNUC__)) if (x != 0) { // __builtin_clzl() is available since GCC 3.4. // Undefined behavior for x == 0. From 86241f6ecb36e07ba76fc8c1162d76ab3c0e6ac4 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 23:12:29 +0200 Subject: [PATCH 15/21] Explicitly cast sizeof Co-authored-by: Victor Stinner --- Include/internal/pycore_bitutils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h index 55e3dbd6e80e37..1297f75245e48d 100644 --- a/Include/internal/pycore_bitutils.h +++ b/Include/internal/pycore_bitutils.h @@ -140,7 +140,7 @@ _Py_bit_length(unsigned long x) if (x != 0) { // __builtin_clzl() is available since GCC 3.4. // Undefined behavior for x == 0. - return sizeof(unsigned long) * 8 - __builtin_clzl(x); + return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x); } else { return 0; From c7ccbe206269a79fc73c7ab1bfe3183e2a21fc62 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 23:16:26 +0200 Subject: [PATCH 16/21] Better tests Co-authored-by: Victor Stinner --- Modules/_testinternalcapi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 4f6ff9dd2fcfd5..f13c20271c7aa1 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -128,10 +128,10 @@ test_bit_length(PyObject *self, PyObject *Py_UNUSED(args)) CHECK(0, 0); CHECK(1, 1); - CHECK(0x08080808, 28); - CHECK(0x10101010, 29); - CHECK(0x10204080, 29); - CHECK(0xDEADCAFE, 32); + CHECK(0x1000, 12); + CHECK(0x1234, 12); + CHECK(0x54321, 19); + CHECK(0x7FFFFFFF, 31); CHECK(0xFFFFFFFF, 32); Py_RETURN_NONE; From dfceaf205ed74b9a229cc92c53d7247ad7c5b9eb Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 23:13:02 +0200 Subject: [PATCH 17/21] Explain Py_BUILD_ASSERT(sizeof(unsigned long) <= 4) --- Include/internal/pycore_bitutils.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h index 1297f75245e48d..0bd3270fe82e5c 100644 --- a/Include/internal/pycore_bitutils.h +++ b/Include/internal/pycore_bitutils.h @@ -146,6 +146,7 @@ _Py_bit_length(unsigned long x) return 0; } #elif defined(_MSC_VER) + // _BitScanReverse() is documented to search 32 bits. Py_BUILD_ASSERT(sizeof(unsigned long) <= 4); unsigned long msb; if (_BitScanReverse(&msb, x)) { From fbf01ae0c98156b3f9692453af6c739dedaf4f14 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 23:15:19 +0200 Subject: [PATCH 18/21] Use volatile so test actually runs intrinsic --- Modules/_testinternalcapi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index f13c20271c7aa1..42e0c570073c6e 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -105,7 +105,9 @@ test_popcount(PyObject *self, PyObject *Py_UNUSED(args)) static int check_bit_length(unsigned long x, int expected) { - int len = _Py_bit_length(x); + // Use volatile to prevent the compiler to optimize out the whole test + volatile unsigned long u = x; + int len = _Py_bit_length(u); if (len != expected) { PyErr_Format(PyExc_AssertionError, "_Py_bit_length(%lu) returns %i, expected %i", From 991622b6d5542b9d446aee609a4a70504b5c58a8 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 23:17:04 +0200 Subject: [PATCH 19/21] Remove trailing spaces --- Modules/_testinternalcapi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 42e0c570073c6e..ff4c5ecdd14e32 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -132,8 +132,8 @@ test_bit_length(PyObject *self, PyObject *Py_UNUSED(args)) CHECK(1, 1); CHECK(0x1000, 12); CHECK(0x1234, 12); - CHECK(0x54321, 19); - CHECK(0x7FFFFFFF, 31); + CHECK(0x54321, 19); + CHECK(0x7FFFFFFF, 31); CHECK(0xFFFFFFFF, 32); Py_RETURN_NONE; From 462d995f073215309cbd9afc1aa93658cf683777 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 23:18:22 +0200 Subject: [PATCH 20/21] _Py_bit_length_digit -> bit_length_digit --- Objects/longobject.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index b5112a62c1a70d..81baada7a0a769 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -696,7 +696,7 @@ _PyLong_Sign(PyObject *vv) } static int -_Py_bit_length_digit(digit x) +bit_length_digit(digit x) { Py_BUILD_ASSERT(PyLong_SHIFT <= sizeof(unsigned long) * 8); return _Py_bit_length((unsigned long)x); @@ -719,7 +719,7 @@ _PyLong_NumBits(PyObject *vv) if ((size_t)(ndigits - 1) > SIZE_MAX / (size_t)PyLong_SHIFT) goto Overflow; result = (size_t)(ndigits - 1) * (size_t)PyLong_SHIFT; - msd_bits = _Py_bit_length_digit(msd); + msd_bits = bit_length_digit(msd); if (SIZE_MAX - msd_bits < result) goto Overflow; result += msd_bits; @@ -1829,7 +1829,7 @@ long_format_binary(PyObject *aa, int base, int alternate, return -1; } size_a_in_bits = (size_a - 1) * PyLong_SHIFT + - _Py_bit_length_digit(a->ob_digit[size_a - 1]); + bit_length_digit(a->ob_digit[size_a - 1]); /* Allow 1 character for a '-' sign. */ sz = negative + (size_a_in_bits + (bits - 1)) / bits; } @@ -2649,7 +2649,7 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem) /* normalize: shift w1 left so that its top digit is >= PyLong_BASE/2. shift v1 left by the same amount. Results go into w and v. */ - d = PyLong_SHIFT - _Py_bit_length_digit(w1->ob_digit[size_w-1]); + d = PyLong_SHIFT - bit_length_digit(w1->ob_digit[size_w-1]); carry = v_lshift(w->ob_digit, w1->ob_digit, size_w, d); assert(carry == 0); carry = v_lshift(v->ob_digit, v1->ob_digit, size_v, d); @@ -2771,7 +2771,7 @@ _PyLong_Frexp(PyLongObject *a, Py_ssize_t *e) *e = 0; return 0.0; } - a_bits = _Py_bit_length_digit(a->ob_digit[a_size-1]); + a_bits = bit_length_digit(a->ob_digit[a_size-1]); /* The following is an overflow-free version of the check "if ((a_size - 1) * PyLong_SHIFT + a_bits > PY_SSIZE_T_MAX) ..." */ if (a_size >= (PY_SSIZE_T_MAX - 1) / PyLong_SHIFT + 1 && @@ -3864,8 +3864,8 @@ long_true_divide(PyObject *v, PyObject *w) /* Extreme underflow */ goto underflow_or_zero; /* Next line is now safe from overflowing a Py_ssize_t */ - diff = diff * PyLong_SHIFT + _Py_bit_length_digit(a->ob_digit[a_size - 1]) - - _Py_bit_length_digit(b->ob_digit[b_size - 1]); + diff = diff * PyLong_SHIFT + bit_length_digit(a->ob_digit[a_size - 1]) - + bit_length_digit(b->ob_digit[b_size - 1]); /* Now diff = a_bits - b_bits. */ if (diff > DBL_MAX_EXP) goto overflow; @@ -3941,7 +3941,7 @@ long_true_divide(PyObject *v, PyObject *w) } x_size = Py_ABS(Py_SIZE(x)); assert(x_size > 0); /* result of division is never zero */ - x_bits = (x_size-1)*PyLong_SHIFT+_Py_bit_length_digit(x->ob_digit[x_size-1]); + x_bits = (x_size-1)*PyLong_SHIFT+bit_length_digit(x->ob_digit[x_size-1]); /* The number of extra bits that have to be rounded away. */ extra_bits = Py_MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG; @@ -4755,7 +4755,7 @@ _PyLong_GCD(PyObject *aarg, PyObject *barg) alloc_b = Py_SIZE(b); /* reduce until a fits into 2 digits */ while ((size_a = Py_SIZE(a)) > 2) { - nbits = _Py_bit_length_digit(a->ob_digit[size_a-1]); + nbits = bit_length_digit(a->ob_digit[size_a-1]); /* extract top 2*PyLong_SHIFT bits of a into x, along with corresponding bits of b into y */ size_b = Py_SIZE(b); @@ -5276,7 +5276,7 @@ int_bit_length_impl(PyObject *self) return PyLong_FromLong(0); msd = ((PyLongObject *)self)->ob_digit[ndigits-1]; - msd_bits = _Py_bit_length_digit(msd); + msd_bits = bit_length_digit(msd); if (ndigits <= PY_SSIZE_T_MAX/PyLong_SHIFT) return PyLong_FromSsize_t((ndigits-1)*PyLong_SHIFT + msd_bits); From 98fc7a867ede94c2ba2b2077c4deb0227dc8ce7f Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 8 Jun 2020 23:56:02 +0200 Subject: [PATCH 21/21] Fixup test expectations --- Modules/_testinternalcapi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index ff4c5ecdd14e32..7970e2f4f443fd 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -130,8 +130,8 @@ test_bit_length(PyObject *self, PyObject *Py_UNUSED(args)) CHECK(0, 0); CHECK(1, 1); - CHECK(0x1000, 12); - CHECK(0x1234, 12); + CHECK(0x1000, 13); + CHECK(0x1234, 13); CHECK(0x54321, 19); CHECK(0x7FFFFFFF, 31); CHECK(0xFFFFFFFF, 32);