Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-9747: [Java][C++] Initial Support for 256-bit Decimals #8475

Closed
wants to merge 31 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
8a3c7bb
Basic support of Decimal256 (PR for merging into decimal256 branch NO…
MingyuZhong Sep 16, 2020
61f71d5
Archery C++ round trip working. Java disabled. Fix c-bridge (#8268)
emkornfield Sep 25, 2020
7b90947
Decimal256 java implementation with working integration tests. (#8281)
emkornfield Sep 26, 2020
4cfba5d
Add BasicDecimal256 Multiplication Support (PR for decimal256 branch,…
Luminarys Oct 12, 2020
84de9e8
cpp working
emkornfield Oct 15, 2020
3401bfe
format
emkornfield Oct 17, 2020
ecdfc45
fix some style issues
emkornfield Oct 17, 2020
603dabb
address java comments
emkornfield Oct 17, 2020
6b532e0
remove cout and add visitor
emkornfield Oct 17, 2020
4622758
fail if rescale requires op
emkornfield Oct 17, 2020
0b85b7f
fix format
emkornfield Oct 17, 2020
c17bb61
fix a couple of more errors
emkornfield Oct 17, 2020
5fa6ad6
fix expression registry in java
emkornfield Oct 17, 2020
694b693
Add bit-width to gandiva tests
emkornfield Oct 19, 2020
8fe5d72
properly skip rust/go gold files
emkornfield Oct 19, 2020
2328fca
try to fix java on gold
emkornfield Oct 19, 2020
623e166
add missing decimal types
emkornfield Oct 19, 2020
61ae650
use not implemented
emkornfield Oct 19, 2020
59bbee3
Fix kernel test
emkornfield Oct 19, 2020
82d191c
rename BigDecimal to Decimal256
emkornfield Oct 19, 2020
af59a20
fix benchmark
emkornfield Oct 20, 2020
77492db
convert to assertThrows
emkornfield Oct 20, 2020
3fbb705
fix format
emkornfield Oct 20, 2020
3a9ba27
address more C++ comments
emkornfield Oct 20, 2020
41b6768
more pr comments
emkornfield Oct 20, 2020
49cec43
use const ref
emkornfield Oct 20, 2020
cc55379
inline decimal256 operators
emkornfield Oct 20, 2020
577af6f
add additional c-bridge tests
emkornfield Oct 20, 2020
c55a106
change constructor
emkornfield Oct 21, 2020
11cb9dc
inline writeLong method in decimal256
emkornfield Oct 21, 2020
71735a7
Adress java comments
emkornfield Oct 22, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion c_glib/test/test-decimal128.rb
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def test_rescale_fail
decimal = Arrow::Decimal128.new(10)
message =
"[decimal128][rescale]: Invalid: " +
"Rescaling decimal value would cause data loss"
"Rescaling Decimal128 value would cause data loss"
assert_raise(Arrow::Error::Invalid.new(message)) do
decimal.rescale(1, -1)
end
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/array/array_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ struct ScalarFromArraySlotImpl {
return Finish(Decimal128(a.GetValue(index_)));
}

Status Visit(const Decimal256Array& a) {
return Finish(Decimal256(a.GetValue(index_)));
}

template <typename T>
Status Visit(const BaseBinaryArray<T>& a) {
return Finish(a.GetString(index_));
Expand Down
18 changes: 16 additions & 2 deletions cpp/src/arrow/array/array_decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ namespace arrow {
using internal::checked_cast;

// ----------------------------------------------------------------------
// Decimal
// Decimal128

Decimal128Array::Decimal128Array(const std::shared_ptr<ArrayData>& data)
: FixedSizeBinaryArray(data) {
ARROW_CHECK_EQ(data->type->id(), Type::DECIMAL);
ARROW_CHECK_EQ(data->type->id(), Type::DECIMAL128);
}

std::string Decimal128Array::FormatValue(int64_t i) const {
Expand All @@ -46,4 +46,18 @@ std::string Decimal128Array::FormatValue(int64_t i) const {
return value.ToString(type_.scale());
}

// ----------------------------------------------------------------------
// Decimal256

Decimal256Array::Decimal256Array(const std::shared_ptr<ArrayData>& data)
: FixedSizeBinaryArray(data) {
ARROW_CHECK_EQ(data->type->id(), Type::DECIMAL256);
}

std::string Decimal256Array::FormatValue(int64_t i) const {
const auto& type_ = checked_cast<const Decimal256Type&>(*type());
const Decimal256 value(GetValue(i));
return value.ToString(type_.scale());
}

} // namespace arrow
16 changes: 16 additions & 0 deletions cpp/src/arrow/array/array_decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,20 @@ class ARROW_EXPORT Decimal128Array : public FixedSizeBinaryArray {
// Backward compatibility
using DecimalArray = Decimal128Array;

// ----------------------------------------------------------------------
// Decimal256Array

/// Concrete Array class for 256-bit decimal data
class ARROW_EXPORT Decimal256Array : public FixedSizeBinaryArray {
public:
using TypeClass = Decimal256Type;

using FixedSizeBinaryArray::FixedSizeBinaryArray;

/// \brief Construct Decimal256Array from ArrayData instance
explicit Decimal256Array(const std::shared_ptr<ArrayData>& data);

std::string FormatValue(int64_t i) const;
};

} // namespace arrow
66 changes: 43 additions & 23 deletions cpp/src/arrow/array/array_dict_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -835,13 +835,13 @@ TEST(TestFixedSizeBinaryDictionaryBuilder, AppendArrayInvalidType) {
}
#endif

TEST(TestDecimalDictionaryBuilder, Basic) {
template <typename DecimalValue>
void TestDecimalDictionaryBuilderBasic(std::shared_ptr<DataType> decimal_type) {
// Build the dictionary Array
auto decimal_type = arrow::decimal(2, 0);
DictionaryBuilder<FixedSizeBinaryType> builder(decimal_type);

// Test data
std::vector<Decimal128> test{12, 12, 11, 12};
std::vector<DecimalValue> test{12, 12, 11, 12};
for (const auto& value : test) {
ASSERT_OK(builder.Append(value.ToBytes().data()));
}
Expand All @@ -857,40 +857,48 @@ TEST(TestDecimalDictionaryBuilder, Basic) {
ASSERT_TRUE(expected.Equals(result));
}

TEST(TestDecimalDictionaryBuilder, DoubleTableSize) {
const auto& decimal_type = arrow::decimal(21, 0);
TEST(TestDecimal128DictionaryBuilder, Basic) {
TestDecimalDictionaryBuilderBasic<Decimal128>(arrow::decimal128(2, 0));
}

TEST(TestDecimal256DictionaryBuilder, Basic) {
TestDecimalDictionaryBuilderBasic<Decimal256>(arrow::decimal256(76, 0));
}

void TestDecimalDictionaryBuilderDoubleTableSize(
std::shared_ptr<DataType> decimal_type, FixedSizeBinaryBuilder& decimal_builder) {
// Build the dictionary Array
DictionaryBuilder<FixedSizeBinaryType> dict_builder(decimal_type);

// Build expected data
Decimal128Builder decimal_builder(decimal_type);
Int16Builder int_builder;

// Fill with 1024 different values
for (int64_t i = 0; i < 1024; i++) {
const uint8_t bytes[] = {0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
12,
12,
static_cast<uint8_t>(i / 128),
static_cast<uint8_t>(i % 128)};
// Decimal256Builder takes 32 bytes, while Decimal128Builder takes only the first 16
// bytes.
const uint8_t bytes[32] = {0,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure the remaining bytes will be zeroed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
12,
12,
static_cast<uint8_t>(i / 128),
static_cast<uint8_t>(i % 128)};
ASSERT_OK(dict_builder.Append(bytes));
ASSERT_OK(decimal_builder.Append(bytes));
ASSERT_OK(int_builder.Append(static_cast<uint16_t>(i)));
}
// Fill with an already existing value
const uint8_t known_value[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 1};
const uint8_t known_value[32] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 1};
for (int64_t i = 0; i < 1024; i++) {
ASSERT_OK(dict_builder.Append(known_value));
ASSERT_OK(int_builder.Append(1));
Expand All @@ -911,6 +919,18 @@ TEST(TestDecimalDictionaryBuilder, DoubleTableSize) {
ASSERT_TRUE(expected.Equals(result));
}

TEST(TestDecimal128DictionaryBuilder, DoubleTableSize) {
const auto& decimal_type = arrow::decimal128(21, 0);
Decimal128Builder decimal_builder(decimal_type);
TestDecimalDictionaryBuilderDoubleTableSize(decimal_type, decimal_builder);
}

TEST(TestDecimal256DictionaryBuilder, DoubleTableSize) {
const auto& decimal_type = arrow::decimal256(21, 0);
Decimal256Builder decimal_builder(decimal_type);
TestDecimalDictionaryBuilderDoubleTableSize(decimal_type, decimal_builder);
}

TEST(TestNullDictionaryBuilder, Basic) {
// MakeBuilder
auto dict_type = dictionary(int8(), null());
Expand Down
60 changes: 52 additions & 8 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ TEST_F(TestArray, TestMakeArrayFromScalar) {
std::make_shared<FixedSizeBinaryScalar>(
hello, fixed_size_binary(static_cast<int32_t>(hello->size()))),
std::make_shared<Decimal128Scalar>(Decimal128(10), decimal(16, 4)),
std::make_shared<Decimal256Scalar>(Decimal256(10), decimal(76, 38)),
std::make_shared<StringScalar>(hello),
std::make_shared<LargeStringScalar>(hello),
std::make_shared<ListScalar>(ArrayFromJSON(int8(), "[1, 2, 3]")),
Expand Down Expand Up @@ -2390,10 +2391,14 @@ TEST(TestAdaptiveUIntBuilderWithStartIntSize, TestReset) {
// ----------------------------------------------------------------------
// Test Decimal arrays

using DecimalVector = std::vector<Decimal128>;

template <typename TYPE>
class DecimalTest : public ::testing::TestWithParam<int> {
public:
using DecimalBuilder = typename TypeTraits<TYPE>::BuilderType;
using DecimalValue = typename TypeTraits<TYPE>::ScalarType::ValueType;
using DecimalArray = typename TypeTraits<TYPE>::ArrayType;
using DecimalVector = std::vector<DecimalValue>;

DecimalTest() {}

template <size_t BYTE_WIDTH = 16>
Expand All @@ -2409,8 +2414,8 @@ class DecimalTest : public ::testing::TestWithParam<int> {
template <size_t BYTE_WIDTH = 16>
void TestCreate(int32_t precision, const DecimalVector& draw,
const std::vector<uint8_t>& valid_bytes, int64_t offset) const {
auto type = std::make_shared<Decimal128Type>(precision, 4);
auto builder = std::make_shared<Decimal128Builder>(type);
auto type = std::make_shared<TYPE>(precision, 4);
auto builder = std::make_shared<DecimalBuilder>(type);

size_t null_count = 0;

Expand Down Expand Up @@ -2441,7 +2446,7 @@ class DecimalTest : public ::testing::TestWithParam<int> {
ASSERT_OK_AND_ASSIGN(expected_null_bitmap, internal::BytesToBits(valid_bytes));

int64_t expected_null_count = CountNulls(valid_bytes);
auto expected = std::make_shared<Decimal128Array>(
auto expected = std::make_shared<DecimalArray>(
type, size, expected_data, expected_null_bitmap, expected_null_count);

std::shared_ptr<Array> lhs = out->Slice(offset);
Expand All @@ -2450,7 +2455,9 @@ class DecimalTest : public ::testing::TestWithParam<int> {
}
};

TEST_P(DecimalTest, NoNulls) {
using Decimal128Test = DecimalTest<Decimal128Type>;

TEST_P(Decimal128Test, NoNulls) {
int32_t precision = GetParam();
std::vector<Decimal128> draw = {Decimal128(1), Decimal128(-2), Decimal128(2389),
Decimal128(4), Decimal128(-12348)};
Expand All @@ -2459,7 +2466,7 @@ TEST_P(DecimalTest, NoNulls) {
this->TestCreate(precision, draw, valid_bytes, 2);
}

TEST_P(DecimalTest, WithNulls) {
TEST_P(Decimal128Test, WithNulls) {
int32_t precision = GetParam();
std::vector<Decimal128> draw = {Decimal128(1), Decimal128(2), Decimal128(-1),
Decimal128(4), Decimal128(-1), Decimal128(1),
Expand All @@ -2478,7 +2485,44 @@ TEST_P(DecimalTest, WithNulls) {
this->TestCreate(precision, draw, valid_bytes, 2);
}

INSTANTIATE_TEST_SUITE_P(DecimalTest, DecimalTest, ::testing::Range(1, 38));
INSTANTIATE_TEST_SUITE_P(Decimal128Test, Decimal128Test, ::testing::Range(1, 38));

using Decimal256Test = DecimalTest<Decimal256Type>;

TEST_P(Decimal256Test, NoNulls) {
int32_t precision = GetParam();
std::vector<Decimal256> draw = {Decimal256(1), Decimal256(-2), Decimal256(2389),
Decimal256(4), Decimal256(-12348)};
std::vector<uint8_t> valid_bytes = {true, true, true, true, true};
this->TestCreate(precision, draw, valid_bytes, 0);
this->TestCreate(precision, draw, valid_bytes, 2);
}

TEST_P(Decimal256Test, WithNulls) {
int32_t precision = GetParam();
std::vector<Decimal256> draw = {Decimal256(1), Decimal256(2), Decimal256(-1),
Decimal256(4), Decimal256(-1), Decimal256(1),
Decimal256(2)};
Decimal256 big; // (pow(2, 255) - 1) / pow(10, 38)
ASSERT_OK_AND_ASSIGN(big,
Decimal256::FromString("578960446186580977117854925043439539266."
"34992332820282019728792003956564819967"));
draw.push_back(big);

Decimal256 big_negative; // -pow(2, 255) / pow(10, 38)
ASSERT_OK_AND_ASSIGN(big_negative,
Decimal256::FromString("-578960446186580977117854925043439539266."
"34992332820282019728792003956564819968"));
draw.push_back(big_negative);

std::vector<uint8_t> valid_bytes = {true, true, false, true, false,
true, true, true, true};
this->TestCreate(precision, draw, valid_bytes, 0);
this->TestCreate(precision, draw, valid_bytes, 2);
}

INSTANTIATE_TEST_SUITE_P(Decimal256Test, Decimal256Test,
::testing::Values(1, 2, 5, 10, 38, 39, 40, 75, 76));

// ----------------------------------------------------------------------
// Test rechunking
Expand Down
35 changes: 35 additions & 0 deletions cpp/src/arrow/array/builder_decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,39 @@ Status Decimal128Builder::FinishInternal(std::shared_ptr<ArrayData>* out) {
return Status::OK();
}

// ----------------------------------------------------------------------
// Decimal256Builder

Decimal256Builder::Decimal256Builder(const std::shared_ptr<DataType>& type,
MemoryPool* pool)
: FixedSizeBinaryBuilder(type, pool),
decimal_type_(internal::checked_pointer_cast<Decimal256Type>(type)) {}

Status Decimal256Builder::Append(const Decimal256& value) {
RETURN_NOT_OK(FixedSizeBinaryBuilder::Reserve(1));
UnsafeAppend(value);
return Status::OK();
}

void Decimal256Builder::UnsafeAppend(const Decimal256& value) {
value.ToBytes(GetMutableValue(length()));
byte_builder_.UnsafeAdvance(32);
UnsafeAppendToBitmap(true);
}

void Decimal256Builder::UnsafeAppend(util::string_view value) {
FixedSizeBinaryBuilder::UnsafeAppend(value);
}

Status Decimal256Builder::FinishInternal(std::shared_ptr<ArrayData>* out) {
std::shared_ptr<Buffer> data;
RETURN_NOT_OK(byte_builder_.Finish(&data));
std::shared_ptr<Buffer> null_bitmap;
RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));

*out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
capacity_ = length_ = null_count_ = 0;
return Status::OK();
}

} // namespace arrow
29 changes: 29 additions & 0 deletions cpp/src/arrow/array/builder_decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,35 @@ class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
std::shared_ptr<Decimal128Type> decimal_type_;
};

class ARROW_EXPORT Decimal256Builder : public FixedSizeBinaryBuilder {
public:
using TypeClass = Decimal256Type;

explicit Decimal256Builder(const std::shared_ptr<DataType>& type,
MemoryPool* pool = default_memory_pool());

using FixedSizeBinaryBuilder::Append;
using FixedSizeBinaryBuilder::AppendValues;
using FixedSizeBinaryBuilder::Reset;

Status Append(const Decimal256& val);
void UnsafeAppend(const Decimal256& val);
void UnsafeAppend(util::string_view val);

Status FinishInternal(std::shared_ptr<ArrayData>* out) override;

/// \cond FALSE
using ArrayBuilder::Finish;
/// \endcond

Status Finish(std::shared_ptr<Decimal256Array>* out) { return FinishTyped(out); }

std::shared_ptr<DataType> type() const override { return decimal_type_; }

protected:
std::shared_ptr<Decimal256Type> decimal_type_;
};

using DecimalBuilder = Decimal128Builder;

} // namespace arrow
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/builder_dict.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class DictionaryMemoTable::DictionaryMemoTableImpl {

template <typename T>
enable_if_no_memoize<T, Status> Visit(const T&) {
return Status::NotImplemented("Initialization of ", value_type_,
return Status::NotImplemented("Initialization of ", value_type_->ToString(),
" memo table is not implemented");
}

Expand Down
10 changes: 9 additions & 1 deletion cpp/src/arrow/array/builder_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,12 +240,20 @@ class DictionaryBuilderBase : public ArrayBuilder {

/// \brief Append a decimal (only for Decimal128Type)
template <typename T1 = T>
enable_if_decimal<T1, Status> Append(const Decimal128& value) {
enable_if_decimal128<T1, Status> Append(const Decimal128& value) {
uint8_t data[16];
value.ToBytes(data);
return Append(data, 16);
}

/// \brief Append a decimal (only for Decimal128Type)
template <typename T1 = T>
enable_if_decimal256<T1, Status> Append(const Decimal256& value) {
uint8_t data[32];
value.ToBytes(data);
return Append(data, 32);
}

/// \brief Append a scalar null value
Status AppendNull() final {
length_ += 1;
Expand Down
Loading