Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions cpp/src/arrow/chunked_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,33 @@ bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
return Equals(*other.get());
}

bool ChunkedArray::ApproxEquals(const ChunkedArray& other,
const EqualOptions& equal_options) const {
if (length_ != other.length()) {
return false;
}
if (null_count_ != other.null_count()) {
return false;
}
// We cannot toggle check_metadata here yet, so we don't check it
if (!type_->Equals(*other.type_, /*check_metadata=*/false)) {
return false;
}

// Check contents of the underlying arrays. This checks for equality of
// the underlying data independently of the chunk size.
return internal::ApplyBinaryChunked(
*this, other,
[&](const Array& left_piece, const Array& right_piece,
int64_t ARROW_ARG_UNUSED(position)) {
if (!left_piece.ApproxEquals(right_piece, equal_options)) {
return Status::Invalid("Unequal piece");
}
return Status::OK();
})
.ok();
}

std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset, int64_t length) const {
ARROW_CHECK_LE(offset, length_) << "Slice offset greater than array length";
bool offset_equals_length = offset == length_;
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/chunked_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <utility>
#include <vector>

#include "arrow/compare.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type_fwd.h"
Expand Down Expand Up @@ -136,6 +137,9 @@ class ARROW_EXPORT ChunkedArray {
bool Equals(const ChunkedArray& other) const;
/// \brief Determine if two chunked arrays are equal.
bool Equals(const std::shared_ptr<ChunkedArray>& other) const;
/// \brief Determine if two chunked arrays approximately equal
bool ApproxEquals(const ChunkedArray& other,
const EqualOptions& = EqualOptions::Defaults()) const;

/// \return PrettyPrint representation suitable for debugging
std::string ToString() const;
Expand Down
21 changes: 21 additions & 0 deletions cpp/src/arrow/ipc/json_simple.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "arrow/array/builder_time.h"
#include "arrow/array/builder_union.h"
#include "arrow/ipc/json_simple.h"
#include "arrow/scalar.h"
#include "arrow/type_traits.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/decimal.h"
Expand Down Expand Up @@ -911,6 +912,26 @@ Status DictArrayFromJSON(const std::shared_ptr<DataType>& type,
.Value(out);
}

Status ScalarFromJSON(const std::shared_ptr<DataType>& type,
util::string_view json_string, std::shared_ptr<Scalar>* out) {
std::shared_ptr<Converter> converter;
RETURN_NOT_OK(GetConverter(type, &converter));

rj::Document json_doc;
json_doc.Parse<kParseFlags>(json_string.data(), json_string.length());
if (json_doc.HasParseError()) {
return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ",
GetParseError_En(json_doc.GetParseError()));
}

std::shared_ptr<Array> array;
RETURN_NOT_OK(converter->AppendValue(json_doc));
RETURN_NOT_OK(converter->Finish(&array));
DCHECK_EQ(array->length(), 1);
ARROW_ASSIGN_OR_RAISE(*out, array->GetScalar(0));
return Status::OK();
}

} // namespace json
} // namespace internal
} // namespace ipc
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/ipc/json_simple.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ ARROW_EXPORT
Status DictArrayFromJSON(const std::shared_ptr<DataType>&, util::string_view indices_json,
util::string_view dictionary_json, std::shared_ptr<Array>* out);

ARROW_EXPORT
Status ScalarFromJSON(const std::shared_ptr<DataType>&, util::string_view json,
std::shared_ptr<Scalar>* out);

} // namespace json
} // namespace internal
} // namespace ipc
Expand Down
54 changes: 53 additions & 1 deletion cpp/src/arrow/ipc/json_simple_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,21 @@ void AssertJSONDictArray(const std::shared_ptr<DataType>& index_type,
AssertArraysEqual(*expected_values, *dict_array.dictionary());
}

template <typename T, typename C_TYPE = typename T::c_type>
void AssertJSONScalar(const std::shared_ptr<DataType>& type, const std::string& json,
const bool is_valid, const C_TYPE value) {
SCOPED_TRACE(json);
std::shared_ptr<Scalar> actual, expected;

ASSERT_OK(ScalarFromJSON(type, json, &actual));
if (is_valid) {
ASSERT_OK_AND_ASSIGN(expected, MakeScalar(type, value));
} else {
expected = MakeNullScalar(type);
}
AssertScalarsEqual(*expected, *actual, /*verbose=*/true);
}

TEST(TestHelper, JSONArray) {
// Test the JSONArray helper func
std::string s =
Expand Down Expand Up @@ -329,7 +344,6 @@ TEST(TestNull, Errors) {

TEST(TestBoolean, Basics) {
std::shared_ptr<DataType> type = boolean();
std::shared_ptr<Array> expected, actual;

AssertJSONArray<BooleanType, bool>(type, "[]", {});
AssertJSONArray<BooleanType, bool>(type, "[false, true, false]", {false, true, false});
Expand Down Expand Up @@ -1327,6 +1341,44 @@ TEST(TestDictArrayFromJSON, Errors) {
&array)); // dict value isn't string
}

TEST(TestScalarFromJSON, Basics) {
// Sanity check for common types (not exhaustive)
std::shared_ptr<Scalar> scalar;
AssertJSONScalar<Int64Type>(int64(), "4", true, 4);
AssertJSONScalar<Int64Type>(int64(), "null", false, 0);
AssertJSONScalar<StringType, std::shared_ptr<Buffer>>(utf8(), R"("")", true,
Buffer::FromString(""));
AssertJSONScalar<StringType, std::shared_ptr<Buffer>>(utf8(), R"("foo")", true,
Buffer::FromString("foo"));
AssertJSONScalar<StringType, std::shared_ptr<Buffer>>(utf8(), R"(null)", false,
Buffer::FromString(""));
AssertJSONScalar<NullType, std::nullptr_t>(null(), "null", false, nullptr);
AssertJSONScalar<BooleanType, bool>(boolean(), "true", true, true);
AssertJSONScalar<BooleanType, bool>(boolean(), "false", true, false);
AssertJSONScalar<BooleanType, bool>(boolean(), "null", false, false);
AssertJSONScalar<BooleanType, bool>(boolean(), "0", true, false);
AssertJSONScalar<BooleanType, bool>(boolean(), "1", true, true);
AssertJSONScalar<DoubleType, bool>(float64(), "1.0", true, 1.0);
AssertJSONScalar<DoubleType, bool>(float64(), "-0.0", true, -0.0);
ASSERT_OK(ScalarFromJSON(float64(), "NaN", &scalar));
ASSERT_TRUE(std::isnan(checked_cast<DoubleScalar&>(*scalar).value));
ASSERT_OK(ScalarFromJSON(float64(), "Inf", &scalar));
ASSERT_TRUE(std::isinf(checked_cast<DoubleScalar&>(*scalar).value));
}

TEST(TestScalarFromJSON, Errors) {
std::shared_ptr<Scalar> scalar;
ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[0]", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[9223372036854775808]", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[-9223372036854775809]", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSON(uint64(), "[18446744073709551616]", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSON(uint64(), "[-1]", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSON(binary(), "0", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSON(binary(), "[]", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSON(boolean(), "0.0", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSON(boolean(), "\"true\"", &scalar));
}

} // namespace json
} // namespace internal
} // namespace ipc
Expand Down
49 changes: 49 additions & 0 deletions cpp/src/arrow/testing/gtest_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,20 @@ void AssertChunkedEquivalent(const ChunkedArray& expected, const ChunkedArray& a
}
}

void AssertChunkedApproxEquivalent(const ChunkedArray& expected,
const ChunkedArray& actual,
const EqualOptions& equal_options) {
if (!actual.ApproxEquals(expected, equal_options)) {
std::stringstream pp_expected;
std::stringstream pp_actual;
::arrow::PrettyPrintOptions options(/*indent=*/2);
options.window = 50;
ARROW_EXPECT_OK(PrettyPrint(expected, options, &pp_expected));
ARROW_EXPECT_OK(PrettyPrint(actual, options, &pp_actual));
FAIL() << "Got: \n" << pp_actual.str() << "\nExpected: \n" << pp_expected.str();
}
}

void AssertBufferEqual(const Buffer& buffer, const std::vector<uint8_t>& expected) {
ASSERT_EQ(static_cast<size_t>(buffer.size()), expected.size())
<< "Mismatching buffer size";
Expand Down Expand Up @@ -361,6 +375,34 @@ void AssertDatumsEqual(const Datum& expected, const Datum& actual, bool verbose)
}
}

void AssertDatumsApproxEqual(const Datum& expected, const Datum& actual, bool verbose,
const EqualOptions& options) {
ASSERT_EQ(expected.kind(), actual.kind())
<< "expected:" << expected.ToString() << " got:" << actual.ToString();

switch (expected.kind()) {
case Datum::SCALAR:
AssertScalarsApproxEqual(*expected.scalar(), *actual.scalar(), verbose, options);
break;
case Datum::ARRAY: {
auto expected_array = expected.make_array();
auto actual_array = actual.make_array();
AssertArraysApproxEqual(*expected_array, *actual_array, verbose, options);
break;
}
case Datum::CHUNKED_ARRAY: {
auto expected_array = expected.chunked_array();
auto actual_array = actual.chunked_array();
AssertChunkedApproxEquivalent(*expected_array, *actual_array, options);
break;
}
default:
// TODO: Implement better print
ASSERT_TRUE(actual.Equals(expected));
break;
}
}

std::shared_ptr<Array> ArrayFromJSON(const std::shared_ptr<DataType>& type,
util::string_view json) {
std::shared_ptr<Array> out;
Expand Down Expand Up @@ -396,6 +438,13 @@ std::shared_ptr<RecordBatch> RecordBatchFromJSON(const std::shared_ptr<Schema>&
return *RecordBatch::FromStructArray(struct_array);
}

std::shared_ptr<Scalar> ScalarFromJSON(const std::shared_ptr<DataType>& type,
util::string_view json) {
std::shared_ptr<Scalar> out;
ABORT_NOT_OK(ipc::internal::json::ScalarFromJSON(type, json, &out));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This approach might be worthwhile for consistency's sake, but we could also use Scalar::CastTo

return out;
}

std::shared_ptr<Table> TableFromJSON(const std::shared_ptr<Schema>& schema,
const std::vector<std::string>& json) {
std::vector<std::shared_ptr<RecordBatch>> batches;
Expand Down
10 changes: 10 additions & 0 deletions cpp/src/arrow/testing/gtest_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,9 @@ ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& actual,
// Like ChunkedEqual, but permits different chunk layout
ARROW_TESTING_EXPORT void AssertChunkedEquivalent(const ChunkedArray& expected,
const ChunkedArray& actual);
ARROW_TESTING_EXPORT void AssertChunkedApproxEquivalent(
const ChunkedArray& expected, const ChunkedArray& actual,
const EqualOptions& equal_options = EqualOptions::Defaults());
ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer,
const std::vector<uint8_t>& expected);
ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer,
Expand Down Expand Up @@ -246,6 +249,9 @@ ARROW_TESTING_EXPORT void AssertTablesEqual(const Table& expected, const Table&

ARROW_TESTING_EXPORT void AssertDatumsEqual(const Datum& expected, const Datum& actual,
bool verbose = false);
ARROW_TESTING_EXPORT void AssertDatumsApproxEqual(
const Datum& expected, const Datum& actual, bool verbose = false,
const EqualOptions& options = EqualOptions::Defaults());

template <typename C_TYPE>
void AssertNumericDataEqual(const C_TYPE* raw_data,
Expand Down Expand Up @@ -301,6 +307,10 @@ ARROW_TESTING_EXPORT
std::shared_ptr<ChunkedArray> ChunkedArrayFromJSON(const std::shared_ptr<DataType>&,
const std::vector<std::string>& json);

ARROW_TESTING_EXPORT
std::shared_ptr<Scalar> ScalarFromJSON(const std::shared_ptr<DataType>&,
util::string_view json);

ARROW_TESTING_EXPORT
std::shared_ptr<Table> TableFromJSON(const std::shared_ptr<Schema>&,
const std::vector<std::string>& json);
Expand Down