Skip to content

Commit f06f612

Browse files
committed
bpo-29782: Use __builtin_clzl for bits_in_digit if available
1 parent c8fa45b commit f06f612

File tree

6 files changed

+67
-59
lines changed

6 files changed

+67
-59
lines changed

Include/Python.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
#include "pymath.h"
6565
#include "pytime.h"
6666
#include "pymem.h"
67+
#include "pyintrinsics.h"
6768

6869
#include "object.h"
6970
#include "objimpl.h"

Include/pyintrinsics.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#ifndef Py_PYINTRINSICS_H
2+
#define Py_PYINTRINSICS_H
3+
4+
/* Return the smallest integer k such that n < 2**k, or 0 if n == 0.
5+
* Equivalent to floor(lg(x))+1. Also equivalent to: bitwidth_of_type -
6+
* count_leading_zero_bits(x)
7+
*/
8+
9+
#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ >= 4))
10+
#define HAVE_BIT_LENGTH
11+
static inline unsigned int _Py_bit_length(unsigned long d) {
12+
return d ? (8 * sizeof(unsigned long) - __builtin_clzl(d)) : 0;
13+
}
14+
#elif defined(_MSC_VER)
15+
#define HAVE_BIT_LENGTH
16+
#pragma intrinsic(_BitScanReverse)
17+
#include <intrin.h>
18+
static inline unsigned int _Py_bit_length(unsigned long d) {
19+
unsigned long idx;
20+
if (_BitScanReverse(&idx, d))
21+
return idx + 1;
22+
else
23+
return 0;
24+
}
25+
#else
26+
extern unsigned int _Py_bit_length(unsigned long);
27+
#endif
28+
29+
#endif /* Py_PYINTRINSICS_H */

Makefile.pre.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ PYTHON_OBJS= \
377377
Python/pyfpe.o \
378378
Python/pyhash.o \
379379
Python/pylifecycle.o \
380+
Python/pyintrinsics.o \
380381
Python/pymath.o \
381382
Python/pystate.o \
382383
Python/pythonrun.o \
@@ -959,6 +960,7 @@ PYTHON_HEADERS= \
959960
$(srcdir)/Include/pyfpe.h \
960961
$(srcdir)/Include/pyhash.h \
961962
$(srcdir)/Include/pylifecycle.h \
963+
$(srcdir)/Include/pyintrinsics.h \
962964
$(srcdir)/Include/pymath.h \
963965
$(srcdir)/Include/pygetopt.h \
964966
$(srcdir)/Include/pymacro.h \

Modules/mathmodule.c

Lines changed: 8 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1315,28 +1315,6 @@ math_fsum(PyObject *module, PyObject *seq)
13151315
#undef NUM_PARTIALS
13161316

13171317

1318-
/* Return the smallest integer k such that n < 2**k, or 0 if n == 0.
1319-
* Equivalent to floor(lg(x))+1. Also equivalent to: bitwidth_of_type -
1320-
* count_leading_zero_bits(x)
1321-
*/
1322-
1323-
/* XXX: This routine does more or less the same thing as
1324-
* bits_in_digit() in Objects/longobject.c. Someday it would be nice to
1325-
* consolidate them. On BSD, there's a library function called fls()
1326-
* that we could use, and GCC provides __builtin_clz().
1327-
*/
1328-
1329-
static unsigned long
1330-
bit_length(unsigned long n)
1331-
{
1332-
unsigned long len = 0;
1333-
while (n != 0) {
1334-
++len;
1335-
n >>= 1;
1336-
}
1337-
return len;
1338-
}
1339-
13401318
static unsigned long
13411319
count_set_bits(unsigned long n)
13421320
{
@@ -1415,7 +1393,7 @@ count_set_bits(unsigned long n)
14151393

14161394
/* factorial_partial_product: Compute product(range(start, stop, 2)) using
14171395
* divide and conquer. Assumes start and stop are odd and stop > start.
1418-
* max_bits must be >= bit_length(stop - 2). */
1396+
* max_bits must be >= _Py_bit_length(stop - 2). */
14191397

14201398
static PyObject *
14211399
factorial_partial_product(unsigned long start, unsigned long stop,
@@ -1430,14 +1408,14 @@ factorial_partial_product(unsigned long start, unsigned long stop,
14301408
* the answer.
14311409
*
14321410
* Storing some integer z requires floor(lg(z))+1 bits, which is
1433-
* conveniently the value returned by bit_length(z). The
1411+
* conveniently the value returned by _Py_bit_length(z). The
14341412
* product x*y will require at most
1435-
* bit_length(x) + bit_length(y) bits to store, based
1413+
* _Py_bit_length(x) + _Py_bit_length(y) bits to store, based
14361414
* on the idea that lg product = lg x + lg y.
14371415
*
14381416
* We know that stop - 2 is the largest number to be multiplied. From
1439-
* there, we have: bit_length(answer) <= num_operands *
1440-
* bit_length(stop - 2)
1417+
* there, we have: _Py_bit_length(answer) <= num_operands *
1418+
* _Py_bit_length(stop - 2)
14411419
*/
14421420

14431421
num_operands = (stop - start) / 2;
@@ -1454,7 +1432,7 @@ factorial_partial_product(unsigned long start, unsigned long stop,
14541432
/* find midpoint of range(start, stop), rounded up to next odd number. */
14551433
midpoint = (start + num_operands) | 1;
14561434
left = factorial_partial_product(start, midpoint,
1457-
bit_length(midpoint - 2));
1435+
_Py_bit_length(midpoint - 2));
14581436
if (left == NULL)
14591437
goto error;
14601438
right = factorial_partial_product(midpoint, stop, max_bits);
@@ -1484,7 +1462,7 @@ factorial_odd_part(unsigned long n)
14841462
Py_INCREF(outer);
14851463

14861464
upper = 3;
1487-
for (i = bit_length(n) - 2; i >= 0; i--) {
1465+
for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
14881466
v = n >> i;
14891467
if (v <= 2)
14901468
continue;
@@ -1494,7 +1472,7 @@ factorial_odd_part(unsigned long n)
14941472
/* Here inner is the product of all odd integers j in the range (0,
14951473
n/2**(i+1)]. The factorial_partial_product call below gives the
14961474
product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
1497-
partial = factorial_partial_product(lower, upper, bit_length(upper-2));
1475+
partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
14981476
/* inner *= partial */
14991477
if (partial == NULL)
15001478
goto error;

Objects/longobject.c

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -717,26 +717,6 @@ _PyLong_Sign(PyObject *vv)
717717
return Py_SIZE(v) == 0 ? 0 : (Py_SIZE(v) < 0 ? -1 : 1);
718718
}
719719

720-
/* bits_in_digit(d) returns the unique integer k such that 2**(k-1) <= d <
721-
2**k if d is nonzero, else 0. */
722-
723-
static const unsigned char BitLengthTable[32] = {
724-
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
725-
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
726-
};
727-
728-
static int
729-
bits_in_digit(digit d)
730-
{
731-
int d_bits = 0;
732-
while (d >= 32) {
733-
d_bits += 6;
734-
d >>= 6;
735-
}
736-
d_bits += (int)BitLengthTable[d];
737-
return d_bits;
738-
}
739-
740720
size_t
741721
_PyLong_NumBits(PyObject *vv)
742722
{
@@ -754,7 +734,7 @@ _PyLong_NumBits(PyObject *vv)
754734
if ((size_t)(ndigits - 1) > SIZE_MAX / (size_t)PyLong_SHIFT)
755735
goto Overflow;
756736
result = (size_t)(ndigits - 1) * (size_t)PyLong_SHIFT;
757-
msd_bits = bits_in_digit(msd);
737+
msd_bits = _Py_bit_length(msd);
758738
if (SIZE_MAX - msd_bits < result)
759739
goto Overflow;
760740
result += msd_bits;
@@ -1820,7 +1800,7 @@ long_format_binary(PyObject *aa, int base, int alternate,
18201800
return -1;
18211801
}
18221802
size_a_in_bits = (size_a - 1) * PyLong_SHIFT +
1823-
bits_in_digit(a->ob_digit[size_a - 1]);
1803+
_Py_bit_length(a->ob_digit[size_a - 1]);
18241804
/* Allow 1 character for a '-' sign. */
18251805
sz = negative + (size_a_in_bits + (bits - 1)) / bits;
18261806
}
@@ -2638,7 +2618,7 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem)
26382618

26392619
/* normalize: shift w1 left so that its top digit is >= PyLong_BASE/2.
26402620
shift v1 left by the same amount. Results go into w and v. */
2641-
d = PyLong_SHIFT - bits_in_digit(w1->ob_digit[size_w-1]);
2621+
d = PyLong_SHIFT - _Py_bit_length(w1->ob_digit[size_w-1]);
26422622
carry = v_lshift(w->ob_digit, w1->ob_digit, size_w, d);
26432623
assert(carry == 0);
26442624
carry = v_lshift(v->ob_digit, v1->ob_digit, size_v, d);
@@ -2759,7 +2739,7 @@ _PyLong_Frexp(PyLongObject *a, Py_ssize_t *e)
27592739
*e = 0;
27602740
return 0.0;
27612741
}
2762-
a_bits = bits_in_digit(a->ob_digit[a_size-1]);
2742+
a_bits = _Py_bit_length(a->ob_digit[a_size-1]);
27632743
/* The following is an overflow-free version of the check
27642744
"if ((a_size - 1) * PyLong_SHIFT + a_bits > PY_SSIZE_T_MAX) ..." */
27652745
if (a_size >= (PY_SSIZE_T_MAX - 1) / PyLong_SHIFT + 1 &&
@@ -3892,8 +3872,8 @@ long_true_divide(PyObject *v, PyObject *w)
38923872
/* Extreme underflow */
38933873
goto underflow_or_zero;
38943874
/* Next line is now safe from overflowing a Py_ssize_t */
3895-
diff = diff * PyLong_SHIFT + bits_in_digit(a->ob_digit[a_size - 1]) -
3896-
bits_in_digit(b->ob_digit[b_size - 1]);
3875+
diff = diff * PyLong_SHIFT + _Py_bit_length(a->ob_digit[a_size - 1]) -
3876+
_Py_bit_length(b->ob_digit[b_size - 1]);
38973877
/* Now diff = a_bits - b_bits. */
38983878
if (diff > DBL_MAX_EXP)
38993879
goto overflow;
@@ -3969,7 +3949,7 @@ long_true_divide(PyObject *v, PyObject *w)
39693949
}
39703950
x_size = Py_ABS(Py_SIZE(x));
39713951
assert(x_size > 0); /* result of division is never zero */
3972-
x_bits = (x_size-1)*PyLong_SHIFT+bits_in_digit(x->ob_digit[x_size-1]);
3952+
x_bits = (x_size-1)*PyLong_SHIFT+_Py_bit_length(x->ob_digit[x_size-1]);
39733953

39743954
/* The number of extra bits that have to be rounded away. */
39753955
extra_bits = Py_MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG;
@@ -4611,7 +4591,7 @@ _PyLong_GCD(PyObject *aarg, PyObject *barg)
46114591
alloc_b = Py_SIZE(b);
46124592
/* reduce until a fits into 2 digits */
46134593
while ((size_a = Py_SIZE(a)) > 2) {
4614-
nbits = bits_in_digit(a->ob_digit[size_a-1]);
4594+
nbits = _Py_bit_length(a->ob_digit[size_a-1]);
46154595
/* extract top 2*PyLong_SHIFT bits of a into x, along with
46164596
corresponding bits of b into y */
46174597
size_b = Py_SIZE(b);
@@ -5132,7 +5112,7 @@ int_bit_length_impl(PyObject *self)
51325112
return PyLong_FromLong(0);
51335113

51345114
msd = ((PyLongObject *)self)->ob_digit[ndigits-1];
5135-
msd_bits = bits_in_digit(msd);
5115+
msd_bits = _Py_bit_length(msd);
51365116

51375117
if (ndigits <= PY_SSIZE_T_MAX/PyLong_SHIFT)
51385118
return PyLong_FromSsize_t((ndigits-1)*PyLong_SHIFT + msd_bits);

Python/pyintrinsics.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#include "Python.h"
2+
3+
#ifndef HAVE_BIT_LENGTH
4+
static const unsigned char BitLengthTable[32] = {
5+
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
6+
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
7+
};
8+
9+
unsigned int _Py_bit_length(unsigned long d) {
10+
unsigned int d_bits = 0;
11+
while (d >= 32) {
12+
d_bits += 6;
13+
d >>= 6;
14+
}
15+
d_bits += (unsigned int)BitLengthTable[d];
16+
return d_bits;
17+
}
18+
#endif /* HAVE_BIT_LENGTH */

0 commit comments

Comments
 (0)