Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
50b57b6
GH-43956: [C++][Format] Add initial Decimal32/Decimal64 implementations
zeroshade Sep 4, 2024
12896a9
fix linting
zeroshade Sep 4, 2024
5e125e0
fixing a build issue
zeroshade Sep 4, 2024
64aad89
add not implemented for new decimal types with pyarrow
zeroshade Sep 5, 2024
f2a46dd
make conversion explicit instaed of implicit
zeroshade Sep 5, 2024
ed3ff00
deprecate old decimal, add smallest_decimal, fix tests
zeroshade Sep 5, 2024
00caa8a
fix linting
zeroshade Sep 5, 2024
1684403
fix implicit cast
zeroshade Sep 5, 2024
9b28a0e
update from feedback
zeroshade Sep 5, 2024
6ba84d5
fix build issue
zeroshade Sep 5, 2024
a510534
enable decimal32/64 integration testing
zeroshade Sep 9, 2024
67512e5
python lint
zeroshade Sep 9, 2024
02aea22
Update cpp/src/arrow/integration/json_internal.cc
zeroshade Sep 9, 2024
db59d6d
Update cpp/src/arrow/ipc/metadata_internal.cc
zeroshade Sep 9, 2024
ebed805
updates from feedback
zeroshade Sep 10, 2024
b95d9ba
pre-commit and fix
zeroshade Sep 10, 2024
f4b3a81
rebase and fix
zeroshade Sep 10, 2024
5d82818
ran pre-commit
zeroshade Sep 10, 2024
315579d
Update cpp/src/arrow/type_fwd.h
zeroshade Sep 10, 2024
5bef850
updates from feedback and propagation
zeroshade Sep 10, 2024
53cf289
more updates from feedback
zeroshade Sep 10, 2024
87a7445
pre-commit linting
zeroshade Sep 10, 2024
7ad7dbc
convert away from deprecated function
zeroshade Sep 10, 2024
202df3b
pre-commit
zeroshade Sep 10, 2024
683a789
fix test
zeroshade Sep 10, 2024
ccbe6ca
changes from feedback for swapendian
zeroshade Sep 10, 2024
91e3c13
fix unit tests
zeroshade Sep 10, 2024
f47a1f4
Update cpp/src/arrow/array/builder_dict.h
zeroshade Sep 11, 2024
86aa26c
fixed test issue in parquet
zeroshade Sep 11, 2024
34493f7
Update cpp/src/arrow/engine/substrait/expression_internal.cc
zeroshade Sep 11, 2024
f71303c
Update cpp/src/arrow/compute/kernels/hash_aggregate.cc
zeroshade Sep 11, 2024
f276a9e
updates from feedback
zeroshade Sep 11, 2024
3b6d74a
lint and windows build fix
zeroshade Sep 11, 2024
7ea13c4
Update cpp/src/arrow/integration/json_internal.cc
zeroshade Sep 16, 2024
295b4c8
Update cpp/src/arrow/util/basic_decimal.h
zeroshade Sep 16, 2024
a8c0c75
updates from feedback and comments
zeroshade Sep 16, 2024
46c84f1
remove commented out code
zeroshade Sep 16, 2024
4973864
ran pre-commit for linting
zeroshade Sep 16, 2024
b7d2bf3
sumtype and accumulator type for decimals should be consistent
zeroshade Sep 16, 2024
512d464
simplify check
zeroshade Sep 16, 2024
8f46e50
linting
zeroshade Sep 16, 2024
ae3d8a2
Add tests for Decimal32 and Decimal64
zeroshade Sep 18, 2024
bf9eb74
linting
zeroshade Sep 18, 2024
2fb271a
remove abs from FromReal, only constexpr in C++23 and newer
zeroshade Sep 18, 2024
b44888d
simplify a bunch of tests with a generic typed_test
zeroshade Sep 19, 2024
e2957a9
use FromRealApprox
zeroshade Sep 19, 2024
980d6fb
static_cast instead of implicit cast
zeroshade Sep 19, 2024
2a3e5c4
remove special cases, adjust tests
zeroshade Sep 19, 2024
c723754
Update cpp/src/arrow/util/decimal.cc
zeroshade Sep 24, 2024
5382eb4
more updates from comments
zeroshade Sep 24, 2024
9fda783
add reference to issue for decimal32 approx
zeroshade Sep 24, 2024
af8c722
make RoundedRightShift a no-op
zeroshade Sep 30, 2024
48639e3
fix tests
zeroshade Sep 30, 2024
b110605
avoid ASAN issue
zeroshade Sep 30, 2024
1d97e27
fix ubsan test
zeroshade Sep 30, 2024
39032f2
fix ubsan
zeroshade Sep 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cpp/src/arrow/acero/tpch_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ std::shared_ptr<ExecPlan> Plan_Q1(AsyncGenerator<std::optional<ExecBatch>>* sink
Expression base_price = field_ref("L_EXTENDEDPRICE");

std::shared_ptr<Decimal128Scalar> decimal_1 =
std::make_shared<Decimal128Scalar>(Decimal128{0, 100}, decimal(12, 2));
std::make_shared<Decimal128Scalar>(Decimal128{0, 100}, decimal128(12, 2));
Expression discount_multiplier =
call("subtract", {literal(decimal_1), field_ref("L_DISCOUNT")});
Expression tax_multiplier = call("add", {literal(decimal_1), field_ref("L_TAX")});
Expand All @@ -68,7 +68,7 @@ std::shared_ptr<ExecPlan> Plan_Q1(AsyncGenerator<std::optional<ExecBatch>>* sink
call("multiply",
{call("cast",
{call("multiply", {field_ref("L_EXTENDEDPRICE"), discount_multiplier})},
compute::CastOptions::Unsafe(decimal(12, 2))),
compute::CastOptions::Unsafe(decimal128(12, 2))),
tax_multiplier});
Expression discount = field_ref("L_DISCOUNT");

Expand Down
18 changes: 9 additions & 9 deletions cpp/src/arrow/acero/tpch_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -838,12 +838,12 @@ class PartAndPartSupplierGenerator {

const std::vector<std::shared_ptr<DataType>> kPartTypes = {
int32(), utf8(), fixed_size_binary(25), fixed_size_binary(10),
utf8(), int32(), fixed_size_binary(10), decimal(12, 2),
utf8(), int32(), fixed_size_binary(10), decimal128(12, 2),
utf8(),
};

const std::vector<std::shared_ptr<DataType>> kPartsuppTypes = {
int32(), int32(), int32(), decimal(12, 2), utf8(),
int32(), int32(), int32(), decimal128(12, 2), utf8(),
};

Status AllocatePartBatch(size_t thread_index, int column) {
Expand Down Expand Up @@ -1527,7 +1527,7 @@ class OrdersAndLineItemGenerator {
const std::vector<std::shared_ptr<DataType>> kOrdersTypes = {int32(),
int32(),
fixed_size_binary(1),
decimal(12, 2),
decimal128(12, 2),
date32(),
fixed_size_binary(15),
fixed_size_binary(15),
Expand All @@ -1539,10 +1539,10 @@ class OrdersAndLineItemGenerator {
int32(),
int32(),
int32(),
decimal(12, 2),
decimal(12, 2),
decimal(12, 2),
decimal(12, 2),
decimal128(12, 2),
decimal128(12, 2),
decimal128(12, 2),
decimal128(12, 2),
fixed_size_binary(1),
fixed_size_binary(1),
date32(),
Expand Down Expand Up @@ -2489,7 +2489,7 @@ class SupplierGenerator : public TpchTableGenerator {

std::vector<std::shared_ptr<DataType>> kTypes = {
int32(), fixed_size_binary(25), utf8(),
int32(), fixed_size_binary(15), decimal(12, 2),
int32(), fixed_size_binary(15), decimal128(12, 2),
utf8(),
};

Expand Down Expand Up @@ -2872,7 +2872,7 @@ class CustomerGenerator : public TpchTableGenerator {
utf8(),
int32(),
fixed_size_binary(15),
decimal(12, 2),
decimal128(12, 2),
fixed_size_binary(10),
utf8(),
};
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/array/array_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ struct ScalarFromArraySlotImpl {
return Finish(a.Value(index_));
}

Status Visit(const Decimal32Array& a) { return Finish(Decimal32(a.GetValue(index_))); }

Status Visit(const Decimal64Array& a) { return Finish(Decimal64(a.GetValue(index_))); }

Status Visit(const Decimal128Array& a) {
return Finish(Decimal128(a.GetValue(index_)));
}
Expand Down
28 changes: 28 additions & 0 deletions cpp/src/arrow/array/array_decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,34 @@ namespace arrow {

using internal::checked_cast;

// ----------------------------------------------------------------------
// Decimal32

Decimal32Array::Decimal32Array(const std::shared_ptr<ArrayData>& data)
: FixedSizeBinaryArray(data) {
ARROW_CHECK_EQ(data->type->id(), Type::DECIMAL32);
}

std::string Decimal32Array::FormatValue(int64_t i) const {
const auto& type_ = checked_cast<const Decimal32Type&>(*type());
const Decimal32 value(GetValue(i));
return value.ToString(type_.scale());
}

// ----------------------------------------------------------------------
// Decimal64

Decimal64Array::Decimal64Array(const std::shared_ptr<ArrayData>& data)
: FixedSizeBinaryArray(data) {
ARROW_CHECK_EQ(data->type->id(), Type::DECIMAL64);
}

std::string Decimal64Array::FormatValue(int64_t i) const {
const auto& type_ = checked_cast<const Decimal64Type&>(*type());
const Decimal64 value(GetValue(i));
return value.ToString(type_.scale());
}

// ----------------------------------------------------------------------
// Decimal128

Expand Down
32 changes: 32 additions & 0 deletions cpp/src/arrow/array/array_decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,38 @@ namespace arrow {
///
/// @{

// ----------------------------------------------------------------------
// Decimal32Array

/// Concrete Array class for 32-bit decimal data
class ARROW_EXPORT Decimal32Array : public FixedSizeBinaryArray {
public:
using TypeClass = Decimal32Type;

using FixedSizeBinaryArray::FixedSizeBinaryArray;

/// \brief Construct Decimal32Array from ArrayData instance
explicit Decimal32Array(const std::shared_ptr<ArrayData>& data);

std::string FormatValue(int64_t i) const;
};

// ----------------------------------------------------------------------
// Decimal64Array

/// Concrete Array class for 64-bit decimal data
class ARROW_EXPORT Decimal64Array : public FixedSizeBinaryArray {
public:
using TypeClass = Decimal64Type;

using FixedSizeBinaryArray::FixedSizeBinaryArray;

/// \brief Construct Decimal64Array from ArrayData instance
explicit Decimal64Array(const std::shared_ptr<ArrayData>& data);

std::string FormatValue(int64_t i) const;
};

// ----------------------------------------------------------------------
// Decimal128Array

Expand Down
124 changes: 121 additions & 3 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ static std::vector<std::shared_ptr<DataType>> TestArrayUtilitiesAgainstTheseType
large_binary(),
binary_view(),
fixed_size_binary(3),
decimal(16, 4),
decimal128(16, 4),
utf8(),
large_utf8(),
utf8_view(),
Expand Down Expand Up @@ -667,8 +667,10 @@ static ScalarVector GetScalars() {
std::make_shared<BinaryViewScalar>(hello),
std::make_shared<FixedSizeBinaryScalar>(
hello, fixed_size_binary(static_cast<int32_t>(hello->size()))),
std::make_shared<Decimal128Scalar>(Decimal128(10), decimal(16, 4)),
std::make_shared<Decimal256Scalar>(Decimal256(10), decimal(76, 38)),
std::make_shared<Decimal32Scalar>(Decimal32(10), smallest_decimal(7, 4)),
std::make_shared<Decimal64Scalar>(Decimal64(10), smallest_decimal(12, 4)),
std::make_shared<Decimal128Scalar>(Decimal128(10), smallest_decimal(20, 4)),
std::make_shared<Decimal256Scalar>(Decimal256(10), smallest_decimal(76, 38)),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Has there been discussion about this namin already?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

smallest_decimal was the suggestion by @pitrou, no one made any objections and I doubt he's particularly tied to the name if you have a suggestion for a better one. What are your thoughts?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"narrowest_decimal"?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added suggestions here #43957 (comment)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright. Can we name it something shorter? I have at least 3 suggestions:

  • best_decimal
  • fast_decimal
  • least_decimal

Some of these naming conventions exist in the C++ standard library https://en.cppreference.com/w/cpp/types/integer

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"least_decimal" is the best of these 3 suggestions ("best" and "fast" do not convey the right meaning), but "smallest" is easier to remember than "least" IMHO.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with @pitrou that "smallest" is easier to remember than "least" but I'm fine with renaming it to "least" if that is preferred.

@felipecrv, @bkietz, @lidavidm thoughts?

std::make_shared<StringScalar>(hello),
std::make_shared<LargeStringScalar>(hello),
std::make_shared<StringViewScalar>(hello),
Expand Down Expand Up @@ -3092,6 +3094,98 @@ class DecimalTest : public ::testing::TestWithParam<int> {
}
};

using Decimal32Test = DecimalTest<Decimal32Type>;

TEST_P(Decimal32Test, NoNulls) {
int32_t precision = GetParam();
std::vector<Decimal32> draw = {Decimal32(1), Decimal32(-2), Decimal32(2389),
Decimal32(4), Decimal32(-12348)};
std::vector<uint8_t> valid_bytes = {true, true, true, true, true};
this->TestCreate(precision, draw, valid_bytes, 0);
this->TestCreate(precision, draw, valid_bytes, 2);
}

TEST_P(Decimal32Test, WithNulls) {
int32_t precision = GetParam();
std::vector<Decimal32> draw = {Decimal32(1), Decimal32(2), Decimal32(-1), Decimal32(4),
Decimal32(-1), Decimal32(1), Decimal32(2)};
Decimal32 big;
ASSERT_OK_AND_ASSIGN(big, Decimal32::FromString("23034.234"));
draw.push_back(big);

Decimal32 big_negative;
ASSERT_OK_AND_ASSIGN(big_negative, Decimal32::FromString("-23049.235"));
draw.push_back(big_negative);

std::vector<uint8_t> valid_bytes = {true, true, false, true, false,
true, true, true, true};
this->TestCreate(precision, draw, valid_bytes, 0);
this->TestCreate(precision, draw, valid_bytes, 2);
}

TEST_P(Decimal32Test, ValidateFull) {
int32_t precision = GetParam();
std::vector<Decimal32> draw;
Decimal32 val = Decimal32::GetMaxValue(precision) + 1;

draw = {Decimal32(), val};
auto arr = this->TestCreate(precision, draw, {true, false}, 0);
ASSERT_OK(arr->ValidateFull());

draw = {val, Decimal32()};
arr = this->TestCreate(precision, draw, {true, false}, 0);
EXPECT_RAISES_WITH_MESSAGE_THAT(
Invalid, ::testing::HasSubstr("does not fit in precision of"), arr->ValidateFull());
}

INSTANTIATE_TEST_SUITE_P(Decimal32Test, Decimal32Test, ::testing::Range(1, 9));

using Decimal64Test = DecimalTest<Decimal64Type>;

TEST_P(Decimal64Test, NoNulls) {
int32_t precision = GetParam();
std::vector<Decimal64> draw = {Decimal64(1), Decimal64(-2), Decimal64(2389),
Decimal64(4), Decimal64(-12348)};
std::vector<uint8_t> valid_bytes = {true, true, true, true, true};
this->TestCreate(precision, draw, valid_bytes, 0);
this->TestCreate(precision, draw, valid_bytes, 2);
}

TEST_P(Decimal64Test, WithNulls) {
int32_t precision = GetParam();
std::vector<Decimal64> draw = {Decimal64(1), Decimal64(2), Decimal64(-1), Decimal64(4),
Decimal64(-1), Decimal64(1), Decimal64(2)};
Decimal64 big;
ASSERT_OK_AND_ASSIGN(big, Decimal64::FromString("23034.234234"));
draw.push_back(big);

Decimal64 big_negative;
ASSERT_OK_AND_ASSIGN(big_negative, Decimal64::FromString("-23049.235234"));
draw.push_back(big_negative);

std::vector<uint8_t> valid_bytes = {true, true, false, true, false,
true, true, true, true};
this->TestCreate(precision, draw, valid_bytes, 0);
this->TestCreate(precision, draw, valid_bytes, 2);
}

TEST_P(Decimal64Test, ValidateFull) {
int32_t precision = GetParam();
std::vector<Decimal64> draw;
Decimal64 val = Decimal64::GetMaxValue(precision) + 1;

draw = {Decimal64(), val};
auto arr = this->TestCreate(precision, draw, {true, false}, 0);
ASSERT_OK(arr->ValidateFull());

draw = {val, Decimal64()};
arr = this->TestCreate(precision, draw, {true, false}, 0);
EXPECT_RAISES_WITH_MESSAGE_THAT(
Invalid, ::testing::HasSubstr("does not fit in precision of"), arr->ValidateFull());
}

INSTANTIATE_TEST_SUITE_P(Decimal64Test, Decimal64Test, ::testing::Range(1, 9));

using Decimal128Test = DecimalTest<Decimal128Type>;

TEST_P(Decimal128Test, NoNulls) {
Expand Down Expand Up @@ -3315,6 +3409,28 @@ TEST(TestSwapEndianArrayData, PrimitiveType) {
expected_data = ArrayData::Make(uint64(), 1, {null_buffer, data_int64_buffer}, 0);
AssertArrayDataEqualsWithSwapEndian(data, expected_data);

auto data_4byte_buffer = Buffer::FromString(
"\x01"
"12\x01");
data = ArrayData::Make(decimal32(9, 8), 1, {null_buffer, data_4byte_buffer});
auto data_decimal32_buffer = Buffer::FromString(
"\x01"
"21\x01");
expected_data =
ArrayData::Make(decimal32(9, 8), 1, {null_buffer, data_decimal32_buffer}, 0);
AssertArrayDataEqualsWithSwapEndian(data, expected_data);

auto data_8byte_buffer = Buffer::FromString(
"\x01"
"123456\x01");
data = ArrayData::Make(decimal64(18, 8), 1, {null_buffer, data_8byte_buffer});
auto data_decimal64_buffer = Buffer::FromString(
"\x01"
"654321\x01");
expected_data =
ArrayData::Make(decimal64(18, 8), 1, {null_buffer, data_decimal64_buffer}, 0);
AssertArrayDataEqualsWithSwapEndian(data, expected_data);

auto data_16byte_buffer = Buffer::FromString(
"\x01"
"123456789abcde\x01");
Expand Down Expand Up @@ -3647,6 +3763,8 @@ DataTypeVector SwappableTypes() {
uint16(),
uint32(),
uint64(),
decimal32(8, 1),
decimal64(16, 2),
decimal128(19, 4),
decimal256(37, 8),
timestamp(TimeUnit::MICRO, ""),
Expand Down
40 changes: 38 additions & 2 deletions cpp/src/arrow/array/array_view_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -385,8 +385,32 @@ TEST(TestArrayView, SparseUnionAsStruct) {
CheckView(expected, arr);
}

TEST(TestArrayView, DecimalRoundTrip) {
auto ty1 = decimal(10, 4);
TEST(TestArrayView, Decimal32RoundTrip) {
auto ty1 = decimal32(9, 4);
auto arr = ArrayFromJSON(ty1, R"(["123.4567", "-78.9000", null])");

auto ty2 = fixed_size_binary(4);
ASSERT_OK_AND_ASSIGN(auto v, arr->View(ty2));
ASSERT_OK(v->ValidateFull());
ASSERT_OK_AND_ASSIGN(auto w, v->View(ty1));
ASSERT_OK(w->ValidateFull());
AssertArraysEqual(*arr, *w);
}

TEST(TestArrayView, Decimal64RoundTrip) {
auto ty1 = decimal64(10, 4);
auto arr = ArrayFromJSON(ty1, R"(["123.4567", "-78.9000", null])");

auto ty2 = fixed_size_binary(8);
ASSERT_OK_AND_ASSIGN(auto v, arr->View(ty2));
ASSERT_OK(v->ValidateFull());
ASSERT_OK_AND_ASSIGN(auto w, v->View(ty1));
ASSERT_OK(w->ValidateFull());
AssertArraysEqual(*arr, *w);
}

TEST(TestArrayView, Decimal128RoundTrip) {
auto ty1 = decimal128(20, 4);
auto arr = ArrayFromJSON(ty1, R"(["123.4567", "-78.9000", null])");

auto ty2 = fixed_size_binary(16);
Expand All @@ -397,6 +421,18 @@ TEST(TestArrayView, DecimalRoundTrip) {
AssertArraysEqual(*arr, *w);
}

TEST(TestArrayView, Decimal256RoundTrip) {
auto ty1 = decimal256(10, 4);
auto arr = ArrayFromJSON(ty1, R"(["123.4567", "-78.9000", null])");

auto ty2 = fixed_size_binary(32);
ASSERT_OK_AND_ASSIGN(auto v, arr->View(ty2));
ASSERT_OK(v->ValidateFull());
ASSERT_OK_AND_ASSIGN(auto w, v->View(ty1));
ASSERT_OK(w->ValidateFull());
AssertArraysEqual(*arr, *w);
}

TEST(TestArrayView, Dictionaries) {
// ARROW-6049
auto ty1 = dictionary(int8(), float32());
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/array/builder_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ struct AppendScalarImpl {
}

Status Visit(const FixedSizeBinaryType& t) { return HandleFixedWidth(t); }
Status Visit(const Decimal32Type& t) { return HandleFixedWidth(t); }
Status Visit(const Decimal64Type& t) { return HandleFixedWidth(t); }
Status Visit(const Decimal128Type& t) { return HandleFixedWidth(t); }
Status Visit(const Decimal256Type& t) { return HandleFixedWidth(t); }

Expand Down
Loading