From 4fdc794af023dc489ff6457a672bda2816ceb585 Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 25 May 2021 09:55:21 -0400
Subject: [PATCH 1/2] ARROW-12859: [C++] Add ScalarFromJSON and
AssertDatumsApproxEqual for testing
---
cpp/src/arrow/chunked_array.cc | 27 ++++++++++++++++
cpp/src/arrow/chunked_array.h | 4 +++
cpp/src/arrow/ipc/json_simple.cc | 21 +++++++++++++
cpp/src/arrow/ipc/json_simple.h | 4 +++
cpp/src/arrow/testing/gtest_util.cc | 49 +++++++++++++++++++++++++++++
cpp/src/arrow/testing/gtest_util.h | 10 ++++++
6 files changed, 115 insertions(+)
diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc
index b259b05d7cf..142bd0d8c89 100644
--- a/cpp/src/arrow/chunked_array.cc
+++ b/cpp/src/arrow/chunked_array.cc
@@ -118,6 +118,33 @@ bool ChunkedArray::Equals(const std::shared_ptr& other) const {
return Equals(*other.get());
}
+bool ChunkedArray::ApproxEquals(const ChunkedArray& other,
+ const EqualOptions& equal_options) const {
+ if (length_ != other.length()) {
+ return false;
+ }
+ if (null_count_ != other.null_count()) {
+ return false;
+ }
+ // We cannot toggle check_metadata here yet, so we don't check it
+ if (!type_->Equals(*other.type_, /*check_metadata=*/false)) {
+ return false;
+ }
+
+ // Check contents of the underlying arrays. This checks for equality of
+ // the underlying data independently of the chunk size.
+ return internal::ApplyBinaryChunked(
+ *this, other,
+ [&](const Array& left_piece, const Array& right_piece,
+ int64_t ARROW_ARG_UNUSED(position)) {
+ if (!left_piece.ApproxEquals(right_piece, equal_options)) {
+ return Status::Invalid("Unequal piece");
+ }
+ return Status::OK();
+ })
+ .ok();
+}
+
std::shared_ptr ChunkedArray::Slice(int64_t offset, int64_t length) const {
ARROW_CHECK_LE(offset, length_) << "Slice offset greater than array length";
bool offset_equals_length = offset == length_;
diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h
index 5c0dda91850..2ace045c2bf 100644
--- a/cpp/src/arrow/chunked_array.h
+++ b/cpp/src/arrow/chunked_array.h
@@ -23,6 +23,7 @@
#include
#include
+#include "arrow/compare.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type_fwd.h"
@@ -136,6 +137,9 @@ class ARROW_EXPORT ChunkedArray {
bool Equals(const ChunkedArray& other) const;
/// \brief Determine if two chunked arrays are equal.
bool Equals(const std::shared_ptr& other) const;
+ /// \brief Determine if two chunked arrays approximately equal
+ bool ApproxEquals(const ChunkedArray& other,
+ const EqualOptions& = EqualOptions::Defaults()) const;
/// \return PrettyPrint representation suitable for debugging
std::string ToString() const;
diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc
index caf6fd06b9c..eb7a4f3a790 100644
--- a/cpp/src/arrow/ipc/json_simple.cc
+++ b/cpp/src/arrow/ipc/json_simple.cc
@@ -30,6 +30,7 @@
#include "arrow/array/builder_time.h"
#include "arrow/array/builder_union.h"
#include "arrow/ipc/json_simple.h"
+#include "arrow/scalar.h"
#include "arrow/type_traits.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/decimal.h"
@@ -911,6 +912,26 @@ Status DictArrayFromJSON(const std::shared_ptr& type,
.Value(out);
}
+Status ScalarFromJSON(const std::shared_ptr& type,
+ util::string_view json_string, std::shared_ptr* out) {
+ std::shared_ptr converter;
+ RETURN_NOT_OK(GetConverter(type, &converter));
+
+ rj::Document json_doc;
+ json_doc.Parse(json_string.data(), json_string.length());
+ if (json_doc.HasParseError()) {
+ return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ",
+ GetParseError_En(json_doc.GetParseError()));
+ }
+
+ std::shared_ptr array;
+ RETURN_NOT_OK(converter->AppendValue(json_doc));
+ RETURN_NOT_OK(converter->Finish(&array));
+ DCHECK_EQ(array->length(), 1);
+ ARROW_ASSIGN_OR_RAISE(*out, array->GetScalar(0));
+ return Status::OK();
+}
+
} // namespace json
} // namespace internal
} // namespace ipc
diff --git a/cpp/src/arrow/ipc/json_simple.h b/cpp/src/arrow/ipc/json_simple.h
index 8f6b57a4608..4dd3a664aa6 100644
--- a/cpp/src/arrow/ipc/json_simple.h
+++ b/cpp/src/arrow/ipc/json_simple.h
@@ -51,6 +51,10 @@ ARROW_EXPORT
Status DictArrayFromJSON(const std::shared_ptr&, util::string_view indices_json,
util::string_view dictionary_json, std::shared_ptr* out);
+ARROW_EXPORT
+Status ScalarFromJSON(const std::shared_ptr&, util::string_view json,
+ std::shared_ptr* out);
+
} // namespace json
} // namespace internal
} // namespace ipc
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index ba4fe1e1fe7..39bd665d5b6 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -222,6 +222,20 @@ void AssertChunkedEquivalent(const ChunkedArray& expected, const ChunkedArray& a
}
}
+void AssertChunkedApproxEquivalent(const ChunkedArray& expected,
+ const ChunkedArray& actual,
+ const EqualOptions& equal_options) {
+ if (!actual.ApproxEquals(expected, equal_options)) {
+ std::stringstream pp_expected;
+ std::stringstream pp_actual;
+ ::arrow::PrettyPrintOptions options(/*indent=*/2);
+ options.window = 50;
+ ARROW_EXPECT_OK(PrettyPrint(expected, options, &pp_expected));
+ ARROW_EXPECT_OK(PrettyPrint(actual, options, &pp_actual));
+ FAIL() << "Got: \n" << pp_actual.str() << "\nExpected: \n" << pp_expected.str();
+ }
+}
+
void AssertBufferEqual(const Buffer& buffer, const std::vector& expected) {
ASSERT_EQ(static_cast(buffer.size()), expected.size())
<< "Mismatching buffer size";
@@ -361,6 +375,34 @@ void AssertDatumsEqual(const Datum& expected, const Datum& actual, bool verbose)
}
}
+void AssertDatumsApproxEqual(const Datum& expected, const Datum& actual, bool verbose,
+ const EqualOptions& options) {
+ ASSERT_EQ(expected.kind(), actual.kind())
+ << "expected:" << expected.ToString() << " got:" << actual.ToString();
+
+ switch (expected.kind()) {
+ case Datum::SCALAR:
+ AssertScalarsApproxEqual(*expected.scalar(), *actual.scalar(), verbose, options);
+ break;
+ case Datum::ARRAY: {
+ auto expected_array = expected.make_array();
+ auto actual_array = actual.make_array();
+ AssertArraysApproxEqual(*expected_array, *actual_array, verbose, options);
+ break;
+ }
+ case Datum::CHUNKED_ARRAY: {
+ auto expected_array = expected.chunked_array();
+ auto actual_array = actual.chunked_array();
+ AssertChunkedApproxEquivalent(*expected_array, *actual_array, options);
+ break;
+ }
+ default:
+ // TODO: Implement better print
+ ASSERT_TRUE(actual.Equals(expected));
+ break;
+ }
+}
+
std::shared_ptr ArrayFromJSON(const std::shared_ptr& type,
util::string_view json) {
std::shared_ptr out;
@@ -396,6 +438,13 @@ std::shared_ptr RecordBatchFromJSON(const std::shared_ptr&
return *RecordBatch::FromStructArray(struct_array);
}
+std::shared_ptr ScalarFromJSON(const std::shared_ptr& type,
+ util::string_view json) {
+ std::shared_ptr out;
+ ABORT_NOT_OK(ipc::internal::json::ScalarFromJSON(type, json, &out));
+ return out;
+}
+
std::shared_ptr TableFromJSON(const std::shared_ptr& schema,
const std::vector& json) {
std::vector> batches;
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 757986e13ca..b8ea8e76298 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -199,6 +199,9 @@ ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& actual,
// Like ChunkedEqual, but permits different chunk layout
ARROW_TESTING_EXPORT void AssertChunkedEquivalent(const ChunkedArray& expected,
const ChunkedArray& actual);
+ARROW_TESTING_EXPORT void AssertChunkedApproxEquivalent(
+ const ChunkedArray& expected, const ChunkedArray& actual,
+ const EqualOptions& equal_options = EqualOptions::Defaults());
ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer,
const std::vector& expected);
ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer,
@@ -246,6 +249,9 @@ ARROW_TESTING_EXPORT void AssertTablesEqual(const Table& expected, const Table&
ARROW_TESTING_EXPORT void AssertDatumsEqual(const Datum& expected, const Datum& actual,
bool verbose = false);
+ARROW_TESTING_EXPORT void AssertDatumsApproxEqual(
+ const Datum& expected, const Datum& actual, bool verbose = false,
+ const EqualOptions& options = EqualOptions::Defaults());
template
void AssertNumericDataEqual(const C_TYPE* raw_data,
@@ -301,6 +307,10 @@ ARROW_TESTING_EXPORT
std::shared_ptr ChunkedArrayFromJSON(const std::shared_ptr&,
const std::vector& json);
+ARROW_TESTING_EXPORT
+std::shared_ptr ScalarFromJSON(const std::shared_ptr&,
+ util::string_view json);
+
ARROW_TESTING_EXPORT
std::shared_ptr TableFromJSON(const std::shared_ptr&,
const std::vector& json);
From cf124aa97a7fc420404a2413a5ee77986d170899 Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 1 Jun 2021 16:37:24 -0500
Subject: [PATCH 2/2] ARROW-12859: [C++] Add simple unit test for
ScalarFromJSON
---
cpp/src/arrow/ipc/json_simple_test.cc | 54 ++++++++++++++++++++++++++-
1 file changed, 53 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc
index c5358ac89f1..481f38aab21 100644
--- a/cpp/src/arrow/ipc/json_simple_test.cc
+++ b/cpp/src/arrow/ipc/json_simple_test.cc
@@ -136,6 +136,21 @@ void AssertJSONDictArray(const std::shared_ptr& index_type,
AssertArraysEqual(*expected_values, *dict_array.dictionary());
}
+template
+void AssertJSONScalar(const std::shared_ptr& type, const std::string& json,
+ const bool is_valid, const C_TYPE value) {
+ SCOPED_TRACE(json);
+ std::shared_ptr actual, expected;
+
+ ASSERT_OK(ScalarFromJSON(type, json, &actual));
+ if (is_valid) {
+ ASSERT_OK_AND_ASSIGN(expected, MakeScalar(type, value));
+ } else {
+ expected = MakeNullScalar(type);
+ }
+ AssertScalarsEqual(*expected, *actual, /*verbose=*/true);
+}
+
TEST(TestHelper, JSONArray) {
// Test the JSONArray helper func
std::string s =
@@ -329,7 +344,6 @@ TEST(TestNull, Errors) {
TEST(TestBoolean, Basics) {
std::shared_ptr type = boolean();
- std::shared_ptr expected, actual;
AssertJSONArray(type, "[]", {});
AssertJSONArray(type, "[false, true, false]", {false, true, false});
@@ -1327,6 +1341,44 @@ TEST(TestDictArrayFromJSON, Errors) {
&array)); // dict value isn't string
}
+TEST(TestScalarFromJSON, Basics) {
+ // Sanity check for common types (not exhaustive)
+ std::shared_ptr scalar;
+ AssertJSONScalar(int64(), "4", true, 4);
+ AssertJSONScalar(int64(), "null", false, 0);
+ AssertJSONScalar>(utf8(), R"("")", true,
+ Buffer::FromString(""));
+ AssertJSONScalar>(utf8(), R"("foo")", true,
+ Buffer::FromString("foo"));
+ AssertJSONScalar>(utf8(), R"(null)", false,
+ Buffer::FromString(""));
+ AssertJSONScalar(null(), "null", false, nullptr);
+ AssertJSONScalar(boolean(), "true", true, true);
+ AssertJSONScalar(boolean(), "false", true, false);
+ AssertJSONScalar(boolean(), "null", false, false);
+ AssertJSONScalar(boolean(), "0", true, false);
+ AssertJSONScalar(boolean(), "1", true, true);
+ AssertJSONScalar(float64(), "1.0", true, 1.0);
+ AssertJSONScalar(float64(), "-0.0", true, -0.0);
+ ASSERT_OK(ScalarFromJSON(float64(), "NaN", &scalar));
+ ASSERT_TRUE(std::isnan(checked_cast(*scalar).value));
+ ASSERT_OK(ScalarFromJSON(float64(), "Inf", &scalar));
+ ASSERT_TRUE(std::isinf(checked_cast(*scalar).value));
+}
+
+TEST(TestScalarFromJSON, Errors) {
+ std::shared_ptr scalar;
+ ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[0]", &scalar));
+ ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[9223372036854775808]", &scalar));
+ ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[-9223372036854775809]", &scalar));
+ ASSERT_RAISES(Invalid, ScalarFromJSON(uint64(), "[18446744073709551616]", &scalar));
+ ASSERT_RAISES(Invalid, ScalarFromJSON(uint64(), "[-1]", &scalar));
+ ASSERT_RAISES(Invalid, ScalarFromJSON(binary(), "0", &scalar));
+ ASSERT_RAISES(Invalid, ScalarFromJSON(binary(), "[]", &scalar));
+ ASSERT_RAISES(Invalid, ScalarFromJSON(boolean(), "0.0", &scalar));
+ ASSERT_RAISES(Invalid, ScalarFromJSON(boolean(), "\"true\"", &scalar));
+}
+
} // namespace json
} // namespace internal
} // namespace ipc