Skip to content

Commit

Permalink
Add BasicDecimal256 Multiplication Support (PR for decimal256 branch,…
Browse files Browse the repository at this point in the history
… not master) (apache#8344)
  • Loading branch information
Luminarys authored and emkornfield committed Oct 19, 2020
1 parent d664c88 commit 9c4ef3d
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 45 deletions.
167 changes: 128 additions & 39 deletions cpp/src/arrow/util/basic_decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ static const BasicDecimal128 ScaleMultipliersHalf[] = {
#ifdef ARROW_USE_NATIVE_INT128
static constexpr uint64_t kInt64Mask = 0xFFFFFFFFFFFFFFFF;
#else
static constexpr uint64_t kIntMask = 0xFFFFFFFF;
static constexpr uint64_t kInt32Mask = 0xFFFFFFFF;
#endif

// same as ScaleMultipliers[38] - 1
Expand Down Expand Up @@ -254,67 +254,125 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t bits) {

namespace {

// TODO: Remove this guard once it's used by BasicDecimal256
#ifndef ARROW_USE_NATIVE_INT128
// This method losslessly multiplies x and y into a 128 bit unsigned integer
// whose high bits will be stored in hi and low bits in lo.
void ExtendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* lo) {
// Convenience wrapper type over 128 bit unsigned integers. We opt not to
// replace the uint128_t type in int128_internal.h because it would require
// significantly more implementation work to be done. This class merely
// provides the minimum necessary set of functions to perform 128+ bit
// multiplication operations when there may or may not be native support.
#ifdef ARROW_USE_NATIVE_INT128
const __uint128_t r = static_cast<__uint128_t>(x) * y;
*lo = r & kInt64Mask;
*hi = r >> 64;
struct uint128_t {
uint128_t() {}
uint128_t(uint64_t hi, uint64_t lo) : val_((static_cast<__uint128_t>(hi) << 64) | lo) {}
uint128_t(const BasicDecimal128& decimal) {
val_ = (static_cast<__uint128_t>(decimal.high_bits()) << 64) | decimal.low_bits();
}

uint64_t hi() { return val_ >> 64; }
uint64_t lo() { return val_ & kInt64Mask; }

uint128_t& operator+=(const uint128_t& other) {
val_ += other.val_;
return *this;
}

uint128_t& operator*=(const uint128_t& other) {
val_ *= other.val_;
return *this;
}

__uint128_t val_;
};

#else
// If we can't use a native fallback, perform multiplication
// Multiply two 64 bit word components into a 128 bit result, with high bits
// stored in hi and low bits in lo.
inline void ExtendAndMultiply(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* lo) {
// Perform multiplication on two 64 bit words x and y into a 128 bit result
// by splitting up x and y into 32 bit high/low bit components,
// allowing us to represent the multiplication as
// x * y = x_lo * y_lo + x_hi * y_lo * 2^32 + y_hi * x_lo * 2^32
// + x_hi * y_hi * 2^64.
// + x_hi * y_hi * 2^64
//
// Now, consider the final output as lo_lo || lo_hi || hi_lo || hi_hi.
// Now, consider the final output as lo_lo || lo_hi || hi_lo || hi_hi
// Therefore,
// lo_lo is (x_lo * y_lo)_lo,
// lo_hi is ((x_lo * y_lo)_hi + (x_hi * y_lo)_lo + (x_lo * y_hi)_lo)_lo,
// hi_lo is ((x_hi * y_hi)_lo + (x_hi * y_lo)_hi + (x_lo * y_hi)_hi)_hi,
// hi_hi is (x_hi * y_hi)_hi
const uint64_t x_lo = x & kIntMask;
const uint64_t y_lo = y & kIntMask;
const uint64_t x_lo = x & kInt32Mask;
const uint64_t y_lo = y & kInt32Mask;
const uint64_t x_hi = x >> 32;
const uint64_t y_hi = y >> 32;

const uint64_t t = x_lo * y_lo;
const uint64_t t_lo = t & kIntMask;
const uint64_t t_lo = t & kInt32Mask;
const uint64_t t_hi = t >> 32;

const uint64_t u = x_hi * y_lo + t_hi;
const uint64_t u_lo = u & kIntMask;
const uint64_t u_lo = u & kInt32Mask;
const uint64_t u_hi = u >> 32;

const uint64_t v = x_lo * y_hi + u_lo;
const uint64_t v_hi = v >> 32;

*hi = x_hi * y_hi + u_hi + v_hi;
*lo = (v << 32) | t_lo;
#endif
*lo = (v << 32) + t_lo;
}
#endif

void MultiplyUint128(uint64_t x_hi, uint64_t x_lo, uint64_t y_hi, uint64_t y_lo,
uint64_t* hi, uint64_t* lo) {
#ifdef ARROW_USE_NATIVE_INT128
const __uint128_t x = (static_cast<__uint128_t>(x_hi) << 64) | x_lo;
const __uint128_t y = (static_cast<__uint128_t>(y_hi) << 64) | y_lo;
const __uint128_t r = x * y;
*lo = r & kInt64Mask;
*hi = r >> 64;
#else
// To perform 128 bit multiplication without a native fallback
// we first perform lossless 64 bit multiplication of the low
// bits, and then add x_hi * y_lo and x_lo * y_hi to the high
// bits. Note that we can skip adding x_hi * y_hi because it
// always will be over 128 bits.
ExtendAndMultiplyUint64(x_lo, y_lo, hi, lo);
*hi += (x_hi * y_lo) + (x_lo * y_hi);
struct uint128_t {
uint128_t() {}
uint128_t(uint64_t hi, uint64_t lo) : hi_(hi), lo_(lo) {}
uint128_t(const BasicDecimal128& decimal) {
hi_ = decimal.high_bits();
lo_ = decimal.low_bits();
}

uint64_t hi() const { return hi_; }
uint64_t lo() const { return lo_; }

uint128_t& operator+=(const uint128_t& other) {
// To deduce the carry bit, we perform "65 bit" addition on the low bits and
// seeing if the resulting high bit is 1. This is accomplished by shifting the
// low bits to the right by 1 (chopping off the lowest bit), then adding 1 if the
// result of adding the two chopped bits would have produced a carry.
uint64_t carry = (((lo_ & other.lo_) & 1) + (lo_ >> 1) + (other.lo_ >> 1)) >> 63;
hi_ += other.hi_ + carry;
lo_ += other.lo_;
return *this;
}

uint128_t& operator*=(const uint128_t& other) {
uint128_t r;
ExtendAndMultiply(lo_, other.lo_, &r.hi_, &r.lo_);
r.hi_ += (hi_ * other.lo_) + (lo_ * other.hi_);
*this = r;
return *this;
}

uint64_t hi_;
uint64_t lo_;
};
#endif

// Multiplies two N * 64 bit unsigned integer types, represented by a uint64_t
// array into a same sized output. Elements in the array should be in
// little endian order, and output will be the same. Overflow in multiplication
// will result in the lower N * 64 bits of the result being set.
template <int N>
inline void MultiplyUnsignedArray(const std::array<uint64_t, N>& lh,
const std::array<uint64_t, N>& rh,
std::array<uint64_t, N>* result) {
for (int j = 0; j < N; ++j) {
uint64_t carry = 0;
for (int i = 0; i < N - j; ++i) {
uint128_t tmp(lh[i]);
tmp *= uint128_t(rh[j]);
tmp += uint128_t((*result)[i + j]);
tmp += uint128_t(carry);
(*result)[i + j] = tmp.lo();
carry = tmp.hi();
}
}
}

} // namespace
Expand All @@ -325,10 +383,10 @@ BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
const bool negate = Sign() != right.Sign();
BasicDecimal128 x = BasicDecimal128::Abs(*this);
BasicDecimal128 y = BasicDecimal128::Abs(right);
uint64_t hi;
MultiplyUint128(x.high_bits(), x.low_bits(), y.high_bits(), y.low_bits(), &hi,
&low_bits_);
high_bits_ = hi;
uint128_t r(x);
r *= y;
high_bits_ = r.hi();
low_bits_ = r.lo();
if (negate) {
Negate();
}
Expand Down Expand Up @@ -800,6 +858,13 @@ BasicDecimal256& BasicDecimal256::Negate() {
return *this;
}

BasicDecimal256& BasicDecimal256::Abs() { return *this < 0 ? Negate() : *this; }

BasicDecimal256 BasicDecimal256::Abs(const BasicDecimal256& in) {
BasicDecimal256 result(in);
return result.Abs();
}

std::array<uint8_t, 32> BasicDecimal256::ToBytes() const {
std::array<uint8_t, 32> out{{0}};
ToBytes(out.data());
Expand All @@ -821,12 +886,36 @@ void BasicDecimal256::ToBytes(uint8_t* out) const {
#endif
}

BasicDecimal256& BasicDecimal256::operator*=(const BasicDecimal256& right) {
// Since the max value of BasicDecimal256 is supposed to be 1e76 - 1 and the
// min the negation taking the absolute values here should always be safe.
const bool negate = Sign() != right.Sign();
BasicDecimal256 x = BasicDecimal256::Abs(*this);
BasicDecimal256 y = BasicDecimal256::Abs(right);

uint128_t r_hi;
uint128_t r_lo;
std::array<uint64_t, 4> res({0, 0, 0, 0});
MultiplyUnsignedArray<4>(x.little_endian_array_, y.little_endian_array_, &res);
little_endian_array_ = res;
if (negate) {
Negate();
}
return *this;
}

DecimalStatus BasicDecimal256::Rescale(int32_t original_scale, int32_t new_scale,
BasicDecimal256* out) const {
// TODO: implement.
return DecimalStatus::kSuccess;
}

BasicDecimal256 operator*(const BasicDecimal256& left, const BasicDecimal256& right) {
BasicDecimal256 result = left;
result *= right;
return result;
}

bool operator==(const BasicDecimal256& left, const BasicDecimal256& right) {
return left.little_endian_array() == right.little_endian_array();
}
Expand Down
36 changes: 30 additions & 6 deletions cpp/src/arrow/util/basic_decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,10 @@ class ARROW_EXPORT BasicDecimal128 {
BasicDecimal128& operator>>=(uint32_t bits);

/// \brief Get the high bits of the two's complement representation of the number.
inline int64_t high_bits() const { return high_bits_; }
inline constexpr int64_t high_bits() const { return high_bits_; }

/// \brief Get the low bits of the two's complement representation of the number.
inline uint64_t low_bits() const { return low_bits_; }
inline constexpr uint64_t low_bits() const { return low_bits_; }

/// \brief Return the raw bytes of the value in native-endian byte order.
std::array<uint8_t, 16> ToBytes() const;
Expand Down Expand Up @@ -179,6 +179,14 @@ ARROW_EXPORT BasicDecimal128 operator%(const BasicDecimal128& left,
const BasicDecimal128& right);

class ARROW_EXPORT BasicDecimal256 {
private:
// Due to a bug in clang, we have to declare the extend method prior to its
// usage.
template <typename T>
inline static constexpr uint64_t extend(T low_bits) noexcept {
return low_bits >= T() ? uint64_t{0} : ~uint64_t{0};
}

public:
/// \brief Create a BasicDecimal256 from the two's complement representation.
constexpr BasicDecimal256(const std::array<uint64_t, 4>& little_endian_array) noexcept
Expand All @@ -195,13 +203,23 @@ class ARROW_EXPORT BasicDecimal256 {
: little_endian_array_({static_cast<uint64_t>(value), extend(value), extend(value),
extend(value)}) {}

constexpr BasicDecimal256(const BasicDecimal128& value) noexcept
: little_endian_array_({value.low_bits(), static_cast<uint64_t>(value.high_bits()),
extend(value.high_bits()), extend(value.high_bits())}) {}

/// \brief Create a BasicDecimal256 from an array of bytes. Bytes are assumed to be in
/// native-endian byte order.
explicit BasicDecimal256(const uint8_t* bytes);

/// \brief Negate the current value (in-place)
BasicDecimal256& Negate();

/// \brief Absolute value (in-place)
BasicDecimal256& Abs();

/// \brief Absolute value
static BasicDecimal256 Abs(const BasicDecimal256& left);

/// \brief Get the bits of the two's complement representation of the number. The 4
/// elements are in little endian order. The bits within each uint64_t element are in
/// native endian order. For example,
Expand All @@ -220,11 +238,14 @@ class ARROW_EXPORT BasicDecimal256 {
DecimalStatus Rescale(int32_t original_scale, int32_t new_scale,
BasicDecimal256* out) const;

private:
template <typename T>
inline static constexpr uint64_t extend(T low_bits) noexcept {
return low_bits >= T() ? uint64_t{0} : ~uint64_t{0};
inline int64_t Sign() const {
return 1 | (static_cast<int64_t>(little_endian_array_[3]) >> 63);
}

/// \brief Multiply this number by another number. The result is truncated to 256 bits.
BasicDecimal256& operator*=(const BasicDecimal256& right);

private:
std::array<uint64_t, 4> little_endian_array_;
};

Expand All @@ -234,4 +255,7 @@ ARROW_EXPORT bool operator<(const BasicDecimal256& left, const BasicDecimal256&
ARROW_EXPORT bool operator<=(const BasicDecimal256& left, const BasicDecimal256& right);
ARROW_EXPORT bool operator>(const BasicDecimal256& left, const BasicDecimal256& right);
ARROW_EXPORT bool operator>=(const BasicDecimal256& left, const BasicDecimal256& right);

ARROW_EXPORT BasicDecimal256 operator*(const BasicDecimal256& left,
const BasicDecimal256& right);
} // namespace arrow
5 changes: 5 additions & 0 deletions cpp/src/arrow/util/decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -721,4 +721,9 @@ Result<Decimal256> Decimal256::FromString(const char* s) {
Status Decimal256::ToArrowStatus(DecimalStatus dstatus) const {
return arrow::ToArrowStatus(dstatus, 256);
}

std::ostream& operator<<(std::ostream& os, const Decimal256& decimal) {
os << decimal.ToIntegerString();
return os;
}
} // namespace arrow
3 changes: 3 additions & 0 deletions cpp/src/arrow/util/decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ class ARROW_EXPORT Decimal256 : public BasicDecimal256 {
return std::move(out);
}

friend ARROW_EXPORT std::ostream& operator<<(std::ostream& os,
const Decimal256& decimal);

private:
/// Converts internal error code to Status
Status ToArrowStatus(DecimalStatus dstatus) const;
Expand Down
16 changes: 16 additions & 0 deletions cpp/src/arrow/util/decimal_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,21 @@ static void BinaryMathOp(benchmark::State& state) { // NOLINT non-const referen
state.SetItemsProcessed(state.iterations() * kValueSize);
}

static void BinaryMathOp256(benchmark::State& state) { // NOLINT non-const reference
std::vector<BasicDecimal256> v1, v2;
for (uint64_t x = 0; x < kValueSize; x++) {
v1.push_back(BasicDecimal256({100 + x, 100 + x, 100 + x, 100 + x}));
v2.push_back(BasicDecimal256({200 + x, 200 + x, 200 + x, 200 + x}));
}

for (auto _ : state) {
for (int x = 0; x < kValueSize; x += 5) {
benchmark::DoNotOptimize(v1[x + 2] * v2[x + 2]);
}
}
state.SetItemsProcessed(state.iterations() * kValueSize);
}

static void UnaryOp(benchmark::State& state) { // NOLINT non-const reference
std::vector<BasicDecimal128> v;
for (int x = 0; x < kValueSize; x++) {
Expand Down Expand Up @@ -191,6 +206,7 @@ static void BinaryBitOp(benchmark::State& state) { // NOLINT non-const referenc
BENCHMARK(FromString);
BENCHMARK(ToString);
BENCHMARK(BinaryMathOp);
BENCHMARK(BinaryMathOp256);
BENCHMARK(BinaryMathOpAggregate);
BENCHMARK(BinaryCompareOp);
BENCHMARK(BinaryCompareOpConstant);
Expand Down
Loading

0 comments on commit 9c4ef3d

Please sign in to comment.