From 21ca3aa6455e23148c70462ac870b0cf7f97a30c Mon Sep 17 00:00:00 2001 From: Arash Andishgar Date: Wed, 9 Jul 2025 15:11:52 +0330 Subject: [PATCH] Enhance arrow::ChunkedArray::Equals for same memory address test case --- cpp/src/arrow/chunked_array.cc | 19 ++++++--- cpp/src/arrow/chunked_array_test.cc | 62 +++++++++++++++++------------ 2 files changed, 50 insertions(+), 31 deletions(-) diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc index 32578ffd93f..0fa174c1759 100644 --- a/cpp/src/arrow/chunked_array.cc +++ b/cpp/src/arrow/chunked_array.cc @@ -100,24 +100,31 @@ DeviceAllocationTypeSet ChunkedArray::device_types() const { } namespace { -bool mayHaveNaN(const arrow::DataType& type) { - if (type.num_fields() == 0) { - return is_floating(type.id()); +// Check whether the type or any of its children is a float type. +bool ContainsFloatType(const DataType& type) { + if (is_floating(type.id())) { + return true; } else { + // Check if any nested field contains a float type. for (const auto& field : type.fields()) { - if (mayHaveNaN(*field->type())) { + if (ContainsFloatType(*field->type())) { return true; } } } + // No float types are observed return false; } } // namespace bool ChunkedArray::Equals(const ChunkedArray& other, const EqualOptions& opts) const { - if (this == &other && !mayHaveNaN(*type_)) { - return true; + if (this == &other) { + if (opts.nans_equal()) { + return true; + } else if (!ContainsFloatType(*type_)) { + return true; + } } if (length_ != other.length()) { return false; diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc index 689ef57c59a..326eb24d083 100644 --- a/cpp/src/arrow/chunked_array_test.cc +++ b/cpp/src/arrow/chunked_array_test.cc @@ -153,33 +153,45 @@ TEST_F(TestChunkedArray, EqualsDifferingMetadata) { ASSERT_TRUE(left.Equals(right)); } -TEST_F(TestChunkedArray, EqualsSameAddressWithNaNs) { - auto chunk_with_nan1 = ArrayFromJSON(float64(), "[0, 1, 2, NaN]"); - auto chunk_without_nan1 = ArrayFromJSON(float64(), "[3, 4, 5]"); - ArrayVector chunks1 = {chunk_with_nan1, chunk_without_nan1}; - ASSERT_OK_AND_ASSIGN(auto chunked_array_with_nan1, ChunkedArray::Make(chunks1)); - ASSERT_FALSE(chunked_array_with_nan1->Equals(chunked_array_with_nan1)); - - auto chunk_without_nan2 = ArrayFromJSON(float64(), "[6, 7, 8, 9]"); - ArrayVector chunks2 = {chunk_without_nan1, chunk_without_nan2}; - ASSERT_OK_AND_ASSIGN(auto chunked_array_without_nan1, ChunkedArray::Make(chunks2)); - ASSERT_TRUE(chunked_array_without_nan1->Equals(chunked_array_without_nan1)); +class TestChunkedArrayEqualsSameAddress : public TestChunkedArray {}; +TEST_F(TestChunkedArrayEqualsSameAddress, NonFloatType) { auto int32_array = ArrayFromJSON(int32(), "[0, 1, 2]"); - auto float64_array_with_nan = ArrayFromJSON(float64(), "[0, 1, NaN]"); - ArrayVector arrays1 = {int32_array, float64_array_with_nan}; - std::vector fieldnames = {"Int32Type", "Float64Type"}; - ASSERT_OK_AND_ASSIGN(auto struct_with_nan, StructArray::Make(arrays1, fieldnames)); - ArrayVector chunks3 = {struct_with_nan}; - ASSERT_OK_AND_ASSIGN(auto chunked_array_with_nan2, ChunkedArray::Make(chunks3)); - ASSERT_FALSE(chunked_array_with_nan2->Equals(chunked_array_with_nan2)); - - auto float64_array_without_nan = ArrayFromJSON(float64(), "[0, 1, 2]"); - ArrayVector arrays2 = {int32_array, float64_array_without_nan}; - ASSERT_OK_AND_ASSIGN(auto struct_without_nan, StructArray::Make(arrays2, fieldnames)); - ArrayVector chunks4 = {struct_without_nan}; - ASSERT_OK_AND_ASSIGN(auto chunked_array_without_nan2, ChunkedArray::Make(chunks4)); - ASSERT_TRUE(chunked_array_without_nan2->Equals(chunked_array_without_nan2)); + ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({int32_array})); + ASSERT_TRUE(chunked_array->Equals(chunked_array)); +} + +TEST_F(TestChunkedArrayEqualsSameAddress, NestedTypeWithoutFloat) { + auto int32_array = ArrayFromJSON(int32(), "[0, 1]"); + ASSERT_OK_AND_ASSIGN(auto struct_array, + StructArray::Make({int32_array}, {"Int32Type"})); + ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({struct_array})); + + ASSERT_TRUE(chunked_array->Equals(chunked_array)); +} + +TEST_F(TestChunkedArrayEqualsSameAddress, FloatType) { + auto float64_array = ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, NaN]"); + ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({float64_array})); + + ASSERT_FALSE(chunked_array->Equals(chunked_array)); + + // Assert when EqualOptions::nans_equal_ is set + ASSERT_TRUE( + chunked_array->Equals(chunked_array, EqualOptions::Defaults().nans_equal(true))); +} + +TEST_F(TestChunkedArrayEqualsSameAddress, NestedTypeWithFloat) { + auto float64_array = ArrayFromJSON(float64(), "[0.0, 1.0, NaN]"); + ASSERT_OK_AND_ASSIGN(auto struct_array, + StructArray::Make({float64_array}, {"Float64Type"})); + ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({struct_array})); + + ASSERT_FALSE(chunked_array->Equals(chunked_array)); + + // Assert when EqualOptions::nans_equal_ is set + ASSERT_TRUE( + chunked_array->Equals(chunked_array, EqualOptions::Defaults().nans_equal(true))); } TEST_F(TestChunkedArray, ApproxEquals) {