Skip to content

Commit

Permalink
Merge pull request #171 from chfast/int128_div_optimization
Browse files Browse the repository at this point in the history
Optimize int128 division normalization
  • Loading branch information
chfast authored Jun 23, 2020
2 parents e648bff + 4d0d041 commit 94bd151
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 36 deletions.
58 changes: 22 additions & 36 deletions include/intx/int128.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -659,55 +659,41 @@ inline div_result<uint128> udivrem(uint128 x, uint128 y) noexcept
{
INTX_REQUIRE(y.lo != 0); // Division by 0.

uint64_t xn_ex, xn_hi, xn_lo, yn;

auto lsh = clz(y.lo);
if (lsh != 0)
{
auto rsh = 64 - lsh;
xn_ex = x.hi >> rsh;
xn_hi = (x.lo >> rsh) | (x.hi << lsh);
xn_lo = x.lo << lsh;
yn = y.lo << lsh;
}
else
{
xn_ex = 0;
xn_hi = x.hi;
xn_lo = x.lo;
yn = y.lo;
}

auto v = reciprocal_2by1(yn);

auto res = udivrem_2by1({xn_ex, xn_hi}, yn, v);
auto q1 = res.quot;

res = udivrem_2by1({res.rem, xn_lo}, yn, v);

return {{q1, res.quot}, res.rem >> lsh};
const auto lsh = clz(y.lo);
const auto rsh = (64 - lsh) % 64;
const auto rsh_mask = uint64_t{lsh == 0} - 1;

const auto yn = y.lo << lsh;
const auto xn_lo = x.lo << lsh;
const auto xn_hi = (x.hi << lsh) | ((x.lo >> rsh) & rsh_mask);
const auto xn_ex = (x.hi >> rsh) & rsh_mask;

const auto v = reciprocal_2by1(yn);
const auto res1 = udivrem_2by1({xn_ex, xn_hi}, yn, v);
const auto res2 = udivrem_2by1({res1.rem, xn_lo}, yn, v);
return {{res1.quot, res2.quot}, res2.rem >> lsh};
}

if (y.hi > x.hi)
return {0, x};

auto lsh = clz(y.hi);
const auto lsh = clz(y.hi);
if (lsh == 0)
{
const auto q = unsigned{y.hi < x.hi} | unsigned{y.lo <= x.lo};
return {q, x - (q ? y : 0)};
}

auto rsh = 64 - lsh;
const auto rsh = 64 - lsh;

auto yn_lo = y.lo << lsh;
auto yn_hi = (y.lo >> rsh) | (y.hi << lsh);
auto xn_ex = x.hi >> rsh;
auto xn_hi = (x.lo >> rsh) | (x.hi << lsh);
auto xn_lo = x.lo << lsh;
const auto yn_lo = y.lo << lsh;
const auto yn_hi = (y.hi << lsh) | (y.lo >> rsh);
const auto xn_lo = x.lo << lsh;
const auto xn_hi = (x.hi << lsh) | (x.lo >> rsh);
const auto xn_ex = x.hi >> rsh;

auto v = reciprocal_3by2({yn_hi, yn_lo});
auto res = udivrem_3by2(xn_ex, xn_hi, xn_lo, {yn_hi, yn_lo}, v);
const auto v = reciprocal_3by2({yn_hi, yn_lo});
const auto res = udivrem_3by2(xn_ex, xn_hi, xn_lo, {yn_hi, yn_lo}, v);

return {res.quot, res.rem >> lsh};
}
Expand Down
1 change: 1 addition & 0 deletions test/benchmarks/bench_int128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ static void udiv128(benchmark::State& state)
{
const uint128 inputs[][2] = {
{0x537e3fbc5318dbc0e7e47d96b32ef2d5_u128, 0x395df916dfd1b5e38ae7c47ce8a620f_u128},
{0x837e3fbc5318dbc0e7e47d96b32ef2d5_u128, 0x895df916dfd1b5e38ae7c47ce8a620f_u128},
{0xee657725ff64cd48b8fe188a09dc4f78_u128, 3}, // worst shift
{0x0e657725ff64cd48b8fe188a09dc4f78_u128, 0xe7e47d96b32ef2d5}, // single long normalized
{0x0e657725ff64cd48b8fe188a09dc4f78_u128, 0x77e47d96b32ef2d5}, // single long
Expand Down

0 comments on commit 94bd151

Please sign in to comment.