Merge pull request #108 from chfast/clz

Constexpr clz()
chfast · Aug 20, 2019 · de3f9bb · de3f9bb
2 parents 96afab1 + fb52ae6
commit de3f9bb
Show file tree

Hide file tree

Showing 10 changed files with 163 additions and 16 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning].
 
 - The endian-specific API for converting intx types to/from bytes has been reworked.
   [[#107](https://github.com/chfast/intx/pull/107)]
+- The `clz()` is now `constexpr` and produces correct answer for zero inputs.
+  [[#108](https://github.com/chfast/intx/pull/108)]
 
 
 ## [0.3.0] - 2019-06-20

diff --git a/circle.yml b/circle.yml
@@ -148,7 +148,7 @@ jobs:
           working_directory: ~/build
           command: |
             mkdir -p ~/corpus
-            test/intx-fuzzer ~/corpus -use_value_profile=1 -max_len=129 -runs=200000
+            test/intx-fuzzer ~/corpus -use_value_profile=1 -max_len=129 -runs=500000
       - save_cache:
           key: corpus-{{ epoch }}
           paths:

diff --git a/include/intx/int128.hpp b/include/intx/int128.hpp
@@ -365,32 +365,60 @@ constexpr uint128& operator>>=(uint128& x, unsigned shift) noexcept
 /// @}
 
 
-inline unsigned clz(uint32_t x) noexcept
+constexpr unsigned clz_generic(uint32_t x) noexcept
+{
+    unsigned n = 32;
+    for (int i = 4; i >= 0; --i)
+    {
+        const auto s = 1 << i;
+        const auto hi = x >> s;
+        if (hi != 0)
+        {
+            n -= s;
+            x = hi;
+        }
+    }
+    return n - x;
+}
+
+constexpr unsigned clz_generic(uint64_t x) noexcept
+{
+    unsigned n = 64;
+    for (int i = 5; i >= 0; --i)
+    {
+        const auto s = 1 << i;
+        const auto hi = x >> s;
+        if (hi != 0)
+        {
+            n -= s;
+            x = hi;
+        }
+    }
+    return n - static_cast<unsigned>(x);
+}
+
+constexpr inline unsigned clz(uint32_t x) noexcept
 {
 #ifdef _MSC_VER
-    unsigned long most_significant_bit;
-    _BitScanReverse(&most_significant_bit, x);
-    return 31 ^ (unsigned)most_significant_bit;
+    return clz_generic(x);
 #else
-    return unsigned(__builtin_clz(x));
+    return x != 0 ? unsigned(__builtin_clz(x)) : 32;
 #endif
 }
 
-inline unsigned clz(uint64_t x) noexcept
+constexpr inline unsigned clz(uint64_t x) noexcept
 {
 #ifdef _MSC_VER
-    unsigned long most_significant_bit;
-    _BitScanReverse64(&most_significant_bit, x);
-    return 63 ^ (unsigned)most_significant_bit;
+    return clz_generic(x);
 #else
-    return unsigned(__builtin_clzll(x));
+    return x != 0 ? unsigned(__builtin_clzll(x)) : 64;
 #endif
 }
 
-inline unsigned clz(uint128 x) noexcept
+constexpr inline unsigned clz(uint128 x) noexcept
 {
     // In this order `h == 0` we get less instructions than in case of `h != 0`.
-    return x.hi == 0 ? clz(x.lo) | 64 : clz(x.hi);
+    return x.hi == 0 ? clz(x.lo) + 64 : clz(x.hi);
 }
 
 

diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
@@ -691,7 +691,7 @@ constexpr uint<N> exp(uint<N> base, uint<N> exponent) noexcept
 template <unsigned N>
 constexpr unsigned clz(const uint<N>& x) noexcept
 {
-    unsigned half_bits = num_bits(x) / 2;
+    const auto half_bits = num_bits(x) / 2;
 
     // TODO: Try:
     // bool take_hi = h != 0;
@@ -701,8 +701,7 @@ constexpr unsigned clz(const uint<N>& x) noexcept
     // return clz_hi | clz_lo;
 
     // In this order `h == 0` we get less instructions than in case of `h != 0`.
-    // FIXME: For `x == 0` this is UB.
-    return x.hi == 0 ? clz(x.lo) | half_bits : clz(x.hi);
+    return x.hi == 0 ? clz(x.lo) + half_bits : clz(x.hi);
 }
 
 template <typename Word, typename Int>

diff --git a/test/benchmarks/CMakeLists.txt b/test/benchmarks/CMakeLists.txt
@@ -8,6 +8,7 @@ find_package(benchmark CONFIG REQUIRED)
 find_package(GMP REQUIRED)
 
 add_executable(intx-bench
+    bench_builtins.cpp
     bench_div.cpp
     bench_int128.cpp
     benchmarks.cpp

diff --git a/test/benchmarks/bench_builtins.cpp b/test/benchmarks/bench_builtins.cpp
@@ -0,0 +1,31 @@
+// intx: extended precision integer library.
+// Copyright 2019 Pawel Bylica.
+// Licensed under the Apache License, Version 2.0.
+
+#include <benchmark/benchmark.h>
+#include <array>
+#include <intx/int128.hpp>
+
+
+template <typename T, unsigned ClzFn(T)>
+static void clz(benchmark::State& state)
+{
+    constexpr int input_size = 1000;
+    std::array<uint64_t, input_size> inputs{};
+    for (size_t i = 0; i < inputs.size(); ++i)
+    {
+        const auto s = i % 65;
+        inputs[i] = s == 64 ? 0 : (uint64_t{1} << 63) >> s;
+    }
+
+    for (auto _ : state)
+    {
+        for (auto& in : inputs)
+            in = ClzFn(static_cast<T>(in));
+    }
+    benchmark::DoNotOptimize(inputs.data());
+}
+BENCHMARK_TEMPLATE(clz, uint32_t, intx::clz);
+BENCHMARK_TEMPLATE(clz, uint32_t, intx::clz_generic);
+BENCHMARK_TEMPLATE(clz, uint64_t, intx::clz);
+BENCHMARK_TEMPLATE(clz, uint64_t, intx::clz_generic);
diff --git a/test/unittests/CMakeLists.txt b/test/unittests/CMakeLists.txt
@@ -8,6 +8,7 @@ hunter_add_package(GTest)
 find_package(GTest CONFIG REQUIRED)
 
 add_executable(intx-unittests
+    test_builtins.cpp
     test_cases.hpp
     test_div.cpp
     test_int128.cpp

diff --git a/test/unittests/test_builtins.cpp b/test/unittests/test_builtins.cpp
@@ -0,0 +1,68 @@
+// intx: extended precision integer library.
+// Copyright 2019 Pawel Bylica.
+// Licensed under the Apache License, Version 2.0.
+
+#include <intx/int128.hpp>
+
+#include <gtest/gtest.h>
+
+using namespace intx;
+
+static_assert(clz_generic(uint32_t{0}) == 32, "");
+static_assert(clz_generic(uint32_t{1}) == 31, "");
+static_assert(clz_generic(uint32_t{3}) == 30, "");
+static_assert(clz_generic(uint32_t{9}) == 28, "");
+
+static_assert(clz_generic(uint64_t{0}) == 64, "");
+static_assert(clz_generic(uint64_t{1}) == 63, "");
+static_assert(clz_generic(uint64_t{3}) == 62, "");
+static_assert(clz_generic(uint64_t{9}) == 60, "");
+
+
+TEST(builtins, clz64_single_one)
+{
+    for (unsigned i = 0; i <= 63; ++i)
+    {
+        const auto input = (uint64_t{1} << 63) >> i;
+        EXPECT_EQ(clz(input), i);
+        EXPECT_EQ(clz_generic(input), i);
+    }
+}
+
+TEST(builtins, clz64_two_ones)
+{
+    for (unsigned i = 0; i <= 63; ++i)
+    {
+        const auto input = ((uint64_t{1} << 63) >> i) | 1;
+        EXPECT_EQ(clz(input), i);
+        EXPECT_EQ(clz_generic(input), i);
+    }
+}
+
+TEST(builtins, clz32_single_one)
+{
+    for (unsigned i = 0; i <= 31; ++i)
+    {
+        const auto input = (uint32_t{1} << 31) >> i;
+        EXPECT_EQ(clz(input), i);
+        EXPECT_EQ(clz_generic(input), i);
+    }
+}
+
+TEST(builtins, clz32_two_ones)
+{
+    for (unsigned i = 0; i <= 31; ++i)
+    {
+        const auto input = ((uint32_t{1} << 31) >> i) | 1;
+        EXPECT_EQ(clz(input), i);
+        EXPECT_EQ(clz_generic(input), i);
+    }
+}
+
+TEST(builtins, clz_zero)
+{
+    EXPECT_EQ(clz(uint32_t{0}), 32);
+    EXPECT_EQ(clz_generic(uint32_t{0}), 32);
+    EXPECT_EQ(clz(uint64_t{0}), 64);
+    EXPECT_EQ(clz_generic(uint64_t{0}), 64);
+}
diff --git a/test/unittests/test_int128.cpp b/test/unittests/test_int128.cpp
@@ -373,3 +373,13 @@ TEST(int128, umul_random)
         EXPECT_EQ(generic.lo, best.lo) << x << " x " << y;
     }
 }
+
+TEST(int128, clz)
+{
+    EXPECT_EQ(clz(intx::uint128{0}), 128);
+    for (unsigned i = 0; i < intx::uint128::num_bits; ++i)
+    {
+        const auto input = (intx::uint128{1} << (intx::uint128::num_bits - 1)) >> i;
+        EXPECT_EQ(clz(input), i);
+    }
+}
diff --git a/test/unittests/test_intx_api.cpp b/test/unittests/test_intx_api.cpp
@@ -31,6 +31,13 @@ static_assert(
 static_assert(
     0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff_u256 == ~0_u256, "");
 
+static_assert(clz(uint128{0}) == 128, "");
+static_assert(clz(uint128{1}) == 127, "");
+static_assert(clz(uint256{0}) == 256, "");
+static_assert(clz(uint256{1}) == 255, "");
+static_assert(clz(uint512{0}) == 512, "");
+static_assert(clz(uint512{1}) == 511, "");
+
 TEST(uint256, div)
 {
     uint256 a = 10001;