diff --git a/CHANGELOG.md b/CHANGELOG.md index ccc0958d..55ae70b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning]. - The endian-specific API for converting intx types to/from bytes has been reworked. [[#107](https://github.com/chfast/intx/pull/107)] +- The `clz()` is now `constexpr` and produces correct answer for zero inputs. + [[#108](https://github.com/chfast/intx/pull/108)] ## [0.3.0] - 2019-06-20 diff --git a/circle.yml b/circle.yml index ed6487b0..98479574 100644 --- a/circle.yml +++ b/circle.yml @@ -148,7 +148,7 @@ jobs: working_directory: ~/build command: | mkdir -p ~/corpus - test/intx-fuzzer ~/corpus -use_value_profile=1 -max_len=129 -runs=200000 + test/intx-fuzzer ~/corpus -use_value_profile=1 -max_len=129 -runs=500000 - save_cache: key: corpus-{{ epoch }} paths: diff --git a/include/intx/int128.hpp b/include/intx/int128.hpp index 74d339e0..873d79a4 100644 --- a/include/intx/int128.hpp +++ b/include/intx/int128.hpp @@ -365,32 +365,60 @@ constexpr uint128& operator>>=(uint128& x, unsigned shift) noexcept /// @} -inline unsigned clz(uint32_t x) noexcept +constexpr unsigned clz_generic(uint32_t x) noexcept +{ + unsigned n = 32; + for (int i = 4; i >= 0; --i) + { + const auto s = 1 << i; + const auto hi = x >> s; + if (hi != 0) + { + n -= s; + x = hi; + } + } + return n - x; +} + +constexpr unsigned clz_generic(uint64_t x) noexcept +{ + unsigned n = 64; + for (int i = 5; i >= 0; --i) + { + const auto s = 1 << i; + const auto hi = x >> s; + if (hi != 0) + { + n -= s; + x = hi; + } + } + return n - static_cast(x); +} + +constexpr inline unsigned clz(uint32_t x) noexcept { #ifdef _MSC_VER - unsigned long most_significant_bit; - _BitScanReverse(&most_significant_bit, x); - return 31 ^ (unsigned)most_significant_bit; + return clz_generic(x); #else - return unsigned(__builtin_clz(x)); + return x != 0 ? unsigned(__builtin_clz(x)) : 32; #endif } -inline unsigned clz(uint64_t x) noexcept +constexpr inline unsigned clz(uint64_t x) noexcept { #ifdef _MSC_VER - unsigned long most_significant_bit; - _BitScanReverse64(&most_significant_bit, x); - return 63 ^ (unsigned)most_significant_bit; + return clz_generic(x); #else - return unsigned(__builtin_clzll(x)); + return x != 0 ? unsigned(__builtin_clzll(x)) : 64; #endif } -inline unsigned clz(uint128 x) noexcept +constexpr inline unsigned clz(uint128 x) noexcept { // In this order `h == 0` we get less instructions than in case of `h != 0`. - return x.hi == 0 ? clz(x.lo) | 64 : clz(x.hi); + return x.hi == 0 ? clz(x.lo) + 64 : clz(x.hi); } diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index c78a84bb..7d8f6bc0 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -691,7 +691,7 @@ constexpr uint exp(uint base, uint exponent) noexcept template constexpr unsigned clz(const uint& x) noexcept { - unsigned half_bits = num_bits(x) / 2; + const auto half_bits = num_bits(x) / 2; // TODO: Try: // bool take_hi = h != 0; @@ -701,8 +701,7 @@ constexpr unsigned clz(const uint& x) noexcept // return clz_hi | clz_lo; // In this order `h == 0` we get less instructions than in case of `h != 0`. - // FIXME: For `x == 0` this is UB. - return x.hi == 0 ? clz(x.lo) | half_bits : clz(x.hi); + return x.hi == 0 ? clz(x.lo) + half_bits : clz(x.hi); } template diff --git a/test/benchmarks/CMakeLists.txt b/test/benchmarks/CMakeLists.txt index 13639d14..9d1519e4 100644 --- a/test/benchmarks/CMakeLists.txt +++ b/test/benchmarks/CMakeLists.txt @@ -8,6 +8,7 @@ find_package(benchmark CONFIG REQUIRED) find_package(GMP REQUIRED) add_executable(intx-bench + bench_builtins.cpp bench_div.cpp bench_int128.cpp benchmarks.cpp diff --git a/test/benchmarks/bench_builtins.cpp b/test/benchmarks/bench_builtins.cpp new file mode 100644 index 00000000..d5b1b5c0 --- /dev/null +++ b/test/benchmarks/bench_builtins.cpp @@ -0,0 +1,31 @@ +// intx: extended precision integer library. +// Copyright 2019 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. + +#include +#include +#include + + +template +static void clz(benchmark::State& state) +{ + constexpr int input_size = 1000; + std::array inputs{}; + for (size_t i = 0; i < inputs.size(); ++i) + { + const auto s = i % 65; + inputs[i] = s == 64 ? 0 : (uint64_t{1} << 63) >> s; + } + + for (auto _ : state) + { + for (auto& in : inputs) + in = ClzFn(static_cast(in)); + } + benchmark::DoNotOptimize(inputs.data()); +} +BENCHMARK_TEMPLATE(clz, uint32_t, intx::clz); +BENCHMARK_TEMPLATE(clz, uint32_t, intx::clz_generic); +BENCHMARK_TEMPLATE(clz, uint64_t, intx::clz); +BENCHMARK_TEMPLATE(clz, uint64_t, intx::clz_generic); diff --git a/test/unittests/CMakeLists.txt b/test/unittests/CMakeLists.txt index c5d05b17..a709eda0 100644 --- a/test/unittests/CMakeLists.txt +++ b/test/unittests/CMakeLists.txt @@ -8,6 +8,7 @@ hunter_add_package(GTest) find_package(GTest CONFIG REQUIRED) add_executable(intx-unittests + test_builtins.cpp test_cases.hpp test_div.cpp test_int128.cpp diff --git a/test/unittests/test_builtins.cpp b/test/unittests/test_builtins.cpp new file mode 100644 index 00000000..eb64618f --- /dev/null +++ b/test/unittests/test_builtins.cpp @@ -0,0 +1,68 @@ +// intx: extended precision integer library. +// Copyright 2019 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. + +#include + +#include + +using namespace intx; + +static_assert(clz_generic(uint32_t{0}) == 32, ""); +static_assert(clz_generic(uint32_t{1}) == 31, ""); +static_assert(clz_generic(uint32_t{3}) == 30, ""); +static_assert(clz_generic(uint32_t{9}) == 28, ""); + +static_assert(clz_generic(uint64_t{0}) == 64, ""); +static_assert(clz_generic(uint64_t{1}) == 63, ""); +static_assert(clz_generic(uint64_t{3}) == 62, ""); +static_assert(clz_generic(uint64_t{9}) == 60, ""); + + +TEST(builtins, clz64_single_one) +{ + for (unsigned i = 0; i <= 63; ++i) + { + const auto input = (uint64_t{1} << 63) >> i; + EXPECT_EQ(clz(input), i); + EXPECT_EQ(clz_generic(input), i); + } +} + +TEST(builtins, clz64_two_ones) +{ + for (unsigned i = 0; i <= 63; ++i) + { + const auto input = ((uint64_t{1} << 63) >> i) | 1; + EXPECT_EQ(clz(input), i); + EXPECT_EQ(clz_generic(input), i); + } +} + +TEST(builtins, clz32_single_one) +{ + for (unsigned i = 0; i <= 31; ++i) + { + const auto input = (uint32_t{1} << 31) >> i; + EXPECT_EQ(clz(input), i); + EXPECT_EQ(clz_generic(input), i); + } +} + +TEST(builtins, clz32_two_ones) +{ + for (unsigned i = 0; i <= 31; ++i) + { + const auto input = ((uint32_t{1} << 31) >> i) | 1; + EXPECT_EQ(clz(input), i); + EXPECT_EQ(clz_generic(input), i); + } +} + +TEST(builtins, clz_zero) +{ + EXPECT_EQ(clz(uint32_t{0}), 32); + EXPECT_EQ(clz_generic(uint32_t{0}), 32); + EXPECT_EQ(clz(uint64_t{0}), 64); + EXPECT_EQ(clz_generic(uint64_t{0}), 64); +} diff --git a/test/unittests/test_int128.cpp b/test/unittests/test_int128.cpp index e846f9c3..04cd9fae 100644 --- a/test/unittests/test_int128.cpp +++ b/test/unittests/test_int128.cpp @@ -373,3 +373,13 @@ TEST(int128, umul_random) EXPECT_EQ(generic.lo, best.lo) << x << " x " << y; } } + +TEST(int128, clz) +{ + EXPECT_EQ(clz(intx::uint128{0}), 128); + for (unsigned i = 0; i < intx::uint128::num_bits; ++i) + { + const auto input = (intx::uint128{1} << (intx::uint128::num_bits - 1)) >> i; + EXPECT_EQ(clz(input), i); + } +} diff --git a/test/unittests/test_intx_api.cpp b/test/unittests/test_intx_api.cpp index ddbee069..fd386606 100644 --- a/test/unittests/test_intx_api.cpp +++ b/test/unittests/test_intx_api.cpp @@ -31,6 +31,13 @@ static_assert( static_assert( 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff_u256 == ~0_u256, ""); +static_assert(clz(uint128{0}) == 128, ""); +static_assert(clz(uint128{1}) == 127, ""); +static_assert(clz(uint256{0}) == 256, ""); +static_assert(clz(uint256{1}) == 255, ""); +static_assert(clz(uint512{0}) == 512, ""); +static_assert(clz(uint512{1}) == 511, ""); + TEST(uint256, div) { uint256 a = 10001;