From 4fdc794af023dc489ff6457a672bda2816ceb585 Mon Sep 17 00:00:00 2001 From: David Li Date: Tue, 25 May 2021 09:55:21 -0400 Subject: [PATCH 1/2] ARROW-12859: [C++] Add ScalarFromJSON and AssertDatumsApproxEqual for testing --- cpp/src/arrow/chunked_array.cc | 27 ++++++++++++++++ cpp/src/arrow/chunked_array.h | 4 +++ cpp/src/arrow/ipc/json_simple.cc | 21 +++++++++++++ cpp/src/arrow/ipc/json_simple.h | 4 +++ cpp/src/arrow/testing/gtest_util.cc | 49 +++++++++++++++++++++++++++++ cpp/src/arrow/testing/gtest_util.h | 10 ++++++ 6 files changed, 115 insertions(+) diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc index b259b05d7cf..142bd0d8c89 100644 --- a/cpp/src/arrow/chunked_array.cc +++ b/cpp/src/arrow/chunked_array.cc @@ -118,6 +118,33 @@ bool ChunkedArray::Equals(const std::shared_ptr& other) const { return Equals(*other.get()); } +bool ChunkedArray::ApproxEquals(const ChunkedArray& other, + const EqualOptions& equal_options) const { + if (length_ != other.length()) { + return false; + } + if (null_count_ != other.null_count()) { + return false; + } + // We cannot toggle check_metadata here yet, so we don't check it + if (!type_->Equals(*other.type_, /*check_metadata=*/false)) { + return false; + } + + // Check contents of the underlying arrays. This checks for equality of + // the underlying data independently of the chunk size. + return internal::ApplyBinaryChunked( + *this, other, + [&](const Array& left_piece, const Array& right_piece, + int64_t ARROW_ARG_UNUSED(position)) { + if (!left_piece.ApproxEquals(right_piece, equal_options)) { + return Status::Invalid("Unequal piece"); + } + return Status::OK(); + }) + .ok(); +} + std::shared_ptr ChunkedArray::Slice(int64_t offset, int64_t length) const { ARROW_CHECK_LE(offset, length_) << "Slice offset greater than array length"; bool offset_equals_length = offset == length_; diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h index 5c0dda91850..2ace045c2bf 100644 --- a/cpp/src/arrow/chunked_array.h +++ b/cpp/src/arrow/chunked_array.h @@ -23,6 +23,7 @@ #include #include +#include "arrow/compare.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/type_fwd.h" @@ -136,6 +137,9 @@ class ARROW_EXPORT ChunkedArray { bool Equals(const ChunkedArray& other) const; /// \brief Determine if two chunked arrays are equal. bool Equals(const std::shared_ptr& other) const; + /// \brief Determine if two chunked arrays approximately equal + bool ApproxEquals(const ChunkedArray& other, + const EqualOptions& = EqualOptions::Defaults()) const; /// \return PrettyPrint representation suitable for debugging std::string ToString() const; diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc index caf6fd06b9c..eb7a4f3a790 100644 --- a/cpp/src/arrow/ipc/json_simple.cc +++ b/cpp/src/arrow/ipc/json_simple.cc @@ -30,6 +30,7 @@ #include "arrow/array/builder_time.h" #include "arrow/array/builder_union.h" #include "arrow/ipc/json_simple.h" +#include "arrow/scalar.h" #include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" @@ -911,6 +912,26 @@ Status DictArrayFromJSON(const std::shared_ptr& type, .Value(out); } +Status ScalarFromJSON(const std::shared_ptr& type, + util::string_view json_string, std::shared_ptr* out) { + std::shared_ptr converter; + RETURN_NOT_OK(GetConverter(type, &converter)); + + rj::Document json_doc; + json_doc.Parse(json_string.data(), json_string.length()); + if (json_doc.HasParseError()) { + return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ", + GetParseError_En(json_doc.GetParseError())); + } + + std::shared_ptr array; + RETURN_NOT_OK(converter->AppendValue(json_doc)); + RETURN_NOT_OK(converter->Finish(&array)); + DCHECK_EQ(array->length(), 1); + ARROW_ASSIGN_OR_RAISE(*out, array->GetScalar(0)); + return Status::OK(); +} + } // namespace json } // namespace internal } // namespace ipc diff --git a/cpp/src/arrow/ipc/json_simple.h b/cpp/src/arrow/ipc/json_simple.h index 8f6b57a4608..4dd3a664aa6 100644 --- a/cpp/src/arrow/ipc/json_simple.h +++ b/cpp/src/arrow/ipc/json_simple.h @@ -51,6 +51,10 @@ ARROW_EXPORT Status DictArrayFromJSON(const std::shared_ptr&, util::string_view indices_json, util::string_view dictionary_json, std::shared_ptr* out); +ARROW_EXPORT +Status ScalarFromJSON(const std::shared_ptr&, util::string_view json, + std::shared_ptr* out); + } // namespace json } // namespace internal } // namespace ipc diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc index ba4fe1e1fe7..39bd665d5b6 100644 --- a/cpp/src/arrow/testing/gtest_util.cc +++ b/cpp/src/arrow/testing/gtest_util.cc @@ -222,6 +222,20 @@ void AssertChunkedEquivalent(const ChunkedArray& expected, const ChunkedArray& a } } +void AssertChunkedApproxEquivalent(const ChunkedArray& expected, + const ChunkedArray& actual, + const EqualOptions& equal_options) { + if (!actual.ApproxEquals(expected, equal_options)) { + std::stringstream pp_expected; + std::stringstream pp_actual; + ::arrow::PrettyPrintOptions options(/*indent=*/2); + options.window = 50; + ARROW_EXPECT_OK(PrettyPrint(expected, options, &pp_expected)); + ARROW_EXPECT_OK(PrettyPrint(actual, options, &pp_actual)); + FAIL() << "Got: \n" << pp_actual.str() << "\nExpected: \n" << pp_expected.str(); + } +} + void AssertBufferEqual(const Buffer& buffer, const std::vector& expected) { ASSERT_EQ(static_cast(buffer.size()), expected.size()) << "Mismatching buffer size"; @@ -361,6 +375,34 @@ void AssertDatumsEqual(const Datum& expected, const Datum& actual, bool verbose) } } +void AssertDatumsApproxEqual(const Datum& expected, const Datum& actual, bool verbose, + const EqualOptions& options) { + ASSERT_EQ(expected.kind(), actual.kind()) + << "expected:" << expected.ToString() << " got:" << actual.ToString(); + + switch (expected.kind()) { + case Datum::SCALAR: + AssertScalarsApproxEqual(*expected.scalar(), *actual.scalar(), verbose, options); + break; + case Datum::ARRAY: { + auto expected_array = expected.make_array(); + auto actual_array = actual.make_array(); + AssertArraysApproxEqual(*expected_array, *actual_array, verbose, options); + break; + } + case Datum::CHUNKED_ARRAY: { + auto expected_array = expected.chunked_array(); + auto actual_array = actual.chunked_array(); + AssertChunkedApproxEquivalent(*expected_array, *actual_array, options); + break; + } + default: + // TODO: Implement better print + ASSERT_TRUE(actual.Equals(expected)); + break; + } +} + std::shared_ptr ArrayFromJSON(const std::shared_ptr& type, util::string_view json) { std::shared_ptr out; @@ -396,6 +438,13 @@ std::shared_ptr RecordBatchFromJSON(const std::shared_ptr& return *RecordBatch::FromStructArray(struct_array); } +std::shared_ptr ScalarFromJSON(const std::shared_ptr& type, + util::string_view json) { + std::shared_ptr out; + ABORT_NOT_OK(ipc::internal::json::ScalarFromJSON(type, json, &out)); + return out; +} + std::shared_ptr TableFromJSON(const std::shared_ptr& schema, const std::vector& json) { std::vector> batches; diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h index 757986e13ca..b8ea8e76298 100644 --- a/cpp/src/arrow/testing/gtest_util.h +++ b/cpp/src/arrow/testing/gtest_util.h @@ -199,6 +199,9 @@ ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& actual, // Like ChunkedEqual, but permits different chunk layout ARROW_TESTING_EXPORT void AssertChunkedEquivalent(const ChunkedArray& expected, const ChunkedArray& actual); +ARROW_TESTING_EXPORT void AssertChunkedApproxEquivalent( + const ChunkedArray& expected, const ChunkedArray& actual, + const EqualOptions& equal_options = EqualOptions::Defaults()); ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer, const std::vector& expected); ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer, @@ -246,6 +249,9 @@ ARROW_TESTING_EXPORT void AssertTablesEqual(const Table& expected, const Table& ARROW_TESTING_EXPORT void AssertDatumsEqual(const Datum& expected, const Datum& actual, bool verbose = false); +ARROW_TESTING_EXPORT void AssertDatumsApproxEqual( + const Datum& expected, const Datum& actual, bool verbose = false, + const EqualOptions& options = EqualOptions::Defaults()); template void AssertNumericDataEqual(const C_TYPE* raw_data, @@ -301,6 +307,10 @@ ARROW_TESTING_EXPORT std::shared_ptr ChunkedArrayFromJSON(const std::shared_ptr&, const std::vector& json); +ARROW_TESTING_EXPORT +std::shared_ptr ScalarFromJSON(const std::shared_ptr&, + util::string_view json); + ARROW_TESTING_EXPORT std::shared_ptr
TableFromJSON(const std::shared_ptr&, const std::vector& json); From cf124aa97a7fc420404a2413a5ee77986d170899 Mon Sep 17 00:00:00 2001 From: David Li Date: Tue, 1 Jun 2021 16:37:24 -0500 Subject: [PATCH 2/2] ARROW-12859: [C++] Add simple unit test for ScalarFromJSON --- cpp/src/arrow/ipc/json_simple_test.cc | 54 ++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc index c5358ac89f1..481f38aab21 100644 --- a/cpp/src/arrow/ipc/json_simple_test.cc +++ b/cpp/src/arrow/ipc/json_simple_test.cc @@ -136,6 +136,21 @@ void AssertJSONDictArray(const std::shared_ptr& index_type, AssertArraysEqual(*expected_values, *dict_array.dictionary()); } +template +void AssertJSONScalar(const std::shared_ptr& type, const std::string& json, + const bool is_valid, const C_TYPE value) { + SCOPED_TRACE(json); + std::shared_ptr actual, expected; + + ASSERT_OK(ScalarFromJSON(type, json, &actual)); + if (is_valid) { + ASSERT_OK_AND_ASSIGN(expected, MakeScalar(type, value)); + } else { + expected = MakeNullScalar(type); + } + AssertScalarsEqual(*expected, *actual, /*verbose=*/true); +} + TEST(TestHelper, JSONArray) { // Test the JSONArray helper func std::string s = @@ -329,7 +344,6 @@ TEST(TestNull, Errors) { TEST(TestBoolean, Basics) { std::shared_ptr type = boolean(); - std::shared_ptr expected, actual; AssertJSONArray(type, "[]", {}); AssertJSONArray(type, "[false, true, false]", {false, true, false}); @@ -1327,6 +1341,44 @@ TEST(TestDictArrayFromJSON, Errors) { &array)); // dict value isn't string } +TEST(TestScalarFromJSON, Basics) { + // Sanity check for common types (not exhaustive) + std::shared_ptr scalar; + AssertJSONScalar(int64(), "4", true, 4); + AssertJSONScalar(int64(), "null", false, 0); + AssertJSONScalar>(utf8(), R"("")", true, + Buffer::FromString("")); + AssertJSONScalar>(utf8(), R"("foo")", true, + Buffer::FromString("foo")); + AssertJSONScalar>(utf8(), R"(null)", false, + Buffer::FromString("")); + AssertJSONScalar(null(), "null", false, nullptr); + AssertJSONScalar(boolean(), "true", true, true); + AssertJSONScalar(boolean(), "false", true, false); + AssertJSONScalar(boolean(), "null", false, false); + AssertJSONScalar(boolean(), "0", true, false); + AssertJSONScalar(boolean(), "1", true, true); + AssertJSONScalar(float64(), "1.0", true, 1.0); + AssertJSONScalar(float64(), "-0.0", true, -0.0); + ASSERT_OK(ScalarFromJSON(float64(), "NaN", &scalar)); + ASSERT_TRUE(std::isnan(checked_cast(*scalar).value)); + ASSERT_OK(ScalarFromJSON(float64(), "Inf", &scalar)); + ASSERT_TRUE(std::isinf(checked_cast(*scalar).value)); +} + +TEST(TestScalarFromJSON, Errors) { + std::shared_ptr scalar; + ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[0]", &scalar)); + ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[9223372036854775808]", &scalar)); + ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[-9223372036854775809]", &scalar)); + ASSERT_RAISES(Invalid, ScalarFromJSON(uint64(), "[18446744073709551616]", &scalar)); + ASSERT_RAISES(Invalid, ScalarFromJSON(uint64(), "[-1]", &scalar)); + ASSERT_RAISES(Invalid, ScalarFromJSON(binary(), "0", &scalar)); + ASSERT_RAISES(Invalid, ScalarFromJSON(binary(), "[]", &scalar)); + ASSERT_RAISES(Invalid, ScalarFromJSON(boolean(), "0.0", &scalar)); + ASSERT_RAISES(Invalid, ScalarFromJSON(boolean(), "\"true\"", &scalar)); +} + } // namespace json } // namespace internal } // namespace ipc