Skip to content

Commit 8e8d6d3

Browse files
committed
Implement simple C++ pretty printer for record batches. Debugging efforts
Change-Id: Id413c1295c85f1b6d8954e0f7bd4dc6eed958a52
1 parent 65b74b3 commit 8e8d6d3

File tree

12 files changed

+383
-22
lines changed

12 files changed

+383
-22
lines changed

cpp/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,7 @@ set(ARROW_SRCS
745745
src/arrow/array.cc
746746
src/arrow/builder.cc
747747
src/arrow/column.cc
748+
src/arrow/pretty_print.cc
748749
src/arrow/schema.cc
749750
src/arrow/table.cc
750751
src/arrow/type.cc

cpp/src/arrow/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ install(FILES
2121
array.h
2222
column.h
2323
builder.h
24+
pretty_print.h
2425
schema.h
2526
table.h
2627
type.h
@@ -37,6 +38,7 @@ set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
3738

3839
ADD_ARROW_TEST(array-test)
3940
ADD_ARROW_TEST(column-test)
41+
ADD_ARROW_TEST(pretty_print-test)
4042
ADD_ARROW_TEST(schema-test)
4143
ADD_ARROW_TEST(table-test)
4244

cpp/src/arrow/ipc/ipc-json-test.cc

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -96,26 +96,6 @@ void CheckPrimitive(const std::shared_ptr<DataType>& type,
9696
TestArrayRoundTrip(*array.get());
9797
}
9898

99-
template <typename TYPE, typename C_TYPE>
100-
void MakeArray(const std::shared_ptr<DataType>& type, const std::vector<bool>& is_valid,
101-
const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) {
102-
std::shared_ptr<Buffer> values_buffer;
103-
std::shared_ptr<Buffer> values_bitmap;
104-
105-
ASSERT_OK(test::CopyBufferFromVector(values, &values_buffer));
106-
ASSERT_OK(test::GetBitmapFromBoolVector(is_valid, &values_bitmap));
107-
108-
using ArrayType = typename TypeTraits<TYPE>::ArrayType;
109-
110-
int32_t null_count = 0;
111-
for (bool val : is_valid) {
112-
if (!val) { ++null_count; }
113-
}
114-
115-
*out = std::make_shared<ArrayType>(type, static_cast<int32_t>(values.size()),
116-
values_buffer, null_count, values_bitmap);
117-
}
118-
11999
TEST(TestJsonSchemaWriter, FlatTypes) {
120100
std::vector<std::shared_ptr<Field>> fields = {field("f0", int8()),
121101
field("f1", int16(), false), field("f2", int32()), field("f3", int64(), false),

cpp/src/arrow/ipc/json-integration-test.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "arrow/io/file.h"
3232
#include "arrow/ipc/file.h"
3333
#include "arrow/ipc/json.h"
34+
#include "arrow/pretty_print.h"
3435
#include "arrow/schema.h"
3536
#include "arrow/table.h"
3637
#include "arrow/test-util.h"
@@ -171,6 +172,12 @@ static Status ValidateArrowVsJson(
171172
if (!json_batch->Equals(*arrow_batch.get())) {
172173
std::stringstream ss;
173174
ss << "Record batch " << i << " did not match";
175+
176+
ss << "\nJSON: \n ";
177+
RETURN_NOT_OK(PrettyPrint(*json_batch.get(), &ss));
178+
179+
ss << "\nArrow: \n ";
180+
RETURN_NOT_OK(PrettyPrint(*arrow_batch.get(), &ss));
174181
return Status::Invalid(ss.str());
175182
}
176183
}

cpp/src/arrow/ipc/json-internal.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ class JsonSchemaWriter : public TypeVisitor {
343343

344344
class JsonArrayWriter : public ArrayVisitor {
345345
public:
346-
explicit JsonArrayWriter(const std::string& name, const Array& array, RjWriter* writer)
346+
JsonArrayWriter(const std::string& name, const Array& array, RjWriter* writer)
347347
: name_(name), array_(array), writer_(writer) {}
348348

349349
Status Write() { return VisitArray(name_, array_); }

cpp/src/arrow/pretty_print-test.cc

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include <cstdint>
19+
#include <cstdlib>
20+
#include <cstdio>
21+
#include <cstring>
22+
#include <memory>
23+
#include <sstream>
24+
#include <vector>
25+
26+
#include "gtest/gtest.h"
27+
28+
#include "arrow/array.h"
29+
#include "arrow/pretty_print.h"
30+
#include "arrow/test-util.h"
31+
#include "arrow/type.h"
32+
#include "arrow/types/list.h"
33+
#include "arrow/types/primitive.h"
34+
#include "arrow/types/string.h"
35+
#include "arrow/types/struct.h"
36+
#include "arrow/type_traits.h"
37+
38+
namespace arrow {
39+
40+
class TestArrayPrinter : public ::testing::Test {
41+
public:
42+
void SetUp() {}
43+
44+
void Print(const Array& array) {}
45+
46+
private:
47+
std::ostringstream sink_;
48+
};
49+
50+
template <typename TYPE, typename C_TYPE>
51+
void CheckPrimitive(const std::vector<bool>& is_valid,
52+
const std::vector<C_TYPE>& values, const char* expected) {
53+
std::ostringstream sink;
54+
55+
MemoryPool* pool = default_memory_pool();
56+
typename TypeTraits<TYPE>::BuilderType builder(pool, std::make_shared<TYPE>());
57+
58+
for (size_t i = 0; i < values.size(); ++i) {
59+
if (is_valid[i]) {
60+
ASSERT_OK(builder.Append(values[i]));
61+
} else {
62+
ASSERT_OK(builder.AppendNull());
63+
}
64+
}
65+
66+
std::shared_ptr<Array> array;
67+
ASSERT_OK(builder.Finish(&array));
68+
69+
ASSERT_OK(PrettyPrint(*array.get(), &sink));
70+
71+
std::string result = sink.str();
72+
ASSERT_EQ(std::string(expected, strlen(expected)), result);
73+
}
74+
75+
TEST_F(TestArrayPrinter, PrimitiveType) {
76+
std::vector<bool> is_valid = {true, true, false, true, false};
77+
78+
std::vector<int32_t> values = {0, 1, 2, 3, 4};
79+
static const char* expected = R"expected([0, 1, null, 3, null])expected";
80+
CheckPrimitive<Int32Type, int32_t>(is_valid, values, expected);
81+
82+
std::vector<std::string> values2 = {"foo", "bar", "", "baz", ""};
83+
static const char* ex2 = R"expected(["foo", "bar", null, "baz", null])expected";
84+
CheckPrimitive<StringType, std::string>(is_valid, values2, ex2);
85+
}
86+
87+
} // namespace arrow

cpp/src/arrow/pretty_print.cc

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include <ostream>
19+
20+
#include "arrow/array.h"
21+
#include "arrow/table.h"
22+
#include "arrow/type.h"
23+
#include "arrow/type_traits.h"
24+
#include "arrow/types/list.h"
25+
#include "arrow/types/string.h"
26+
#include "arrow/types/struct.h"
27+
#include "arrow/pretty_print.h"
28+
29+
namespace arrow {
30+
31+
class ArrayPrinter : public ArrayVisitor {
32+
public:
33+
ArrayPrinter(const Array& array, std::ostream* sink)
34+
: array_(array), sink_(sink) {}
35+
36+
Status Print() {
37+
return VisitArray(array_);
38+
}
39+
40+
Status VisitArray(const Array& array) {
41+
return array.Accept(this);
42+
}
43+
44+
template <typename T>
45+
typename std::enable_if<IsNumeric<T>::value, void>::type WriteDataValues(
46+
const T& array) {
47+
const auto data = array.raw_data();
48+
for (int i = 0; i < array.length(); ++i) {
49+
if (i > 0) {
50+
(*sink_) << ", ";
51+
}
52+
if (array.IsNull(i)) {
53+
(*sink_) << "null";
54+
} else {
55+
(*sink_) << data[i];
56+
}
57+
}
58+
}
59+
60+
// String (Utf8), Binary
61+
template <typename T>
62+
typename std::enable_if<std::is_base_of<BinaryArray, T>::value, void>::type
63+
WriteDataValues(const T& array) {
64+
int32_t length;
65+
for (int i = 0; i < array.length(); ++i) {
66+
if (i > 0) {
67+
(*sink_) << ", ";
68+
}
69+
if (array.IsNull(i)) {
70+
(*sink_) << "null";
71+
} else {
72+
const char* buf = reinterpret_cast<const char*>(array.GetValue(i, &length));
73+
(*sink_) << "\"" << std::string(buf, length) << "\"";
74+
}
75+
}
76+
}
77+
78+
template <typename T>
79+
typename std::enable_if<std::is_base_of<BooleanArray, T>::value, void>::type
80+
WriteDataValues(const T& array) {
81+
for (int i = 0; i < array.length(); ++i) {
82+
if (i > 0) {
83+
(*sink_) << ", ";
84+
}
85+
if (array.IsNull(i)) {
86+
(*sink_) << "null";
87+
} else {
88+
(*sink_) << (array.Value(i) ? "true" : "false");
89+
}
90+
}
91+
}
92+
93+
void OpenArray() {
94+
(*sink_) << "[";
95+
}
96+
97+
void CloseArray() {
98+
(*sink_) << "]";
99+
}
100+
101+
template <typename T>
102+
Status WritePrimitive(const T& array) {
103+
OpenArray();
104+
WriteDataValues(array);
105+
CloseArray();
106+
return Status::OK();
107+
}
108+
109+
template <typename T>
110+
Status WriteVarBytes(const T& array) {
111+
OpenArray();
112+
WriteDataValues(array);
113+
CloseArray();
114+
return Status::OK();
115+
}
116+
117+
Status Visit(const NullArray& array) override {
118+
return Status::OK();
119+
}
120+
121+
Status Visit(const BooleanArray& array) override { return WritePrimitive(array); }
122+
123+
Status Visit(const Int8Array& array) override { return WritePrimitive(array); }
124+
125+
Status Visit(const Int16Array& array) override { return WritePrimitive(array); }
126+
127+
Status Visit(const Int32Array& array) override { return WritePrimitive(array); }
128+
129+
Status Visit(const Int64Array& array) override { return WritePrimitive(array); }
130+
131+
Status Visit(const UInt8Array& array) override { return WritePrimitive(array); }
132+
133+
Status Visit(const UInt16Array& array) override { return WritePrimitive(array); }
134+
135+
Status Visit(const UInt32Array& array) override { return WritePrimitive(array); }
136+
137+
Status Visit(const UInt64Array& array) override { return WritePrimitive(array); }
138+
139+
Status Visit(const HalfFloatArray& array) override { return WritePrimitive(array); }
140+
141+
Status Visit(const FloatArray& array) override { return WritePrimitive(array); }
142+
143+
Status Visit(const DoubleArray& array) override { return WritePrimitive(array); }
144+
145+
Status Visit(const StringArray& array) override { return WriteVarBytes(array); }
146+
147+
Status Visit(const BinaryArray& array) override { return WriteVarBytes(array); }
148+
149+
Status Visit(const DateArray& array) override { return Status::NotImplemented("date"); }
150+
151+
Status Visit(const TimeArray& array) override { return Status::NotImplemented("time"); }
152+
153+
Status Visit(const TimestampArray& array) override {
154+
return Status::NotImplemented("timestamp");
155+
}
156+
157+
Status Visit(const IntervalArray& array) override {
158+
return Status::NotImplemented("interval");
159+
}
160+
161+
Status Visit(const DecimalArray& array) override {
162+
return Status::NotImplemented("decimal");
163+
}
164+
165+
Status Visit(const ListArray& array) override {
166+
// auto type = static_cast<const ListType*>(array.type().get());
167+
// for (size_t i = 0; i < fields.size(); ++i) {
168+
// RETURN_NOT_OK(VisitArray(fields[i]->name, *arrays[i].get()));
169+
// }
170+
// return WriteChildren(type->children(), {array.values()});
171+
return Status::OK();
172+
}
173+
174+
Status Visit(const StructArray& array) override {
175+
// auto type = static_cast<const StructType*>(array.type().get());
176+
// for (size_t i = 0; i < fields.size(); ++i) {
177+
// RETURN_NOT_OK(VisitArray(fields[i]->name, *arrays[i].get()));
178+
// }
179+
// return WriteChildren(type->children(), array.fields());
180+
return Status::OK();
181+
}
182+
183+
Status Visit(const UnionArray& array) override {
184+
return Status::NotImplemented("union");
185+
}
186+
187+
private:
188+
const Array& array_;
189+
std::ostream* sink_;
190+
};
191+
192+
Status PrettyPrint(const Array& arr, std::ostream* sink) {
193+
ArrayPrinter printer(arr, sink);
194+
return printer.Print();
195+
}
196+
197+
Status PrettyPrint(const RecordBatch& batch, std::ostream* sink) {
198+
for (int i = 0; i < batch.num_columns(); ++i) {
199+
const std::string& name = batch.column_name(i);
200+
(*sink) << name << ": ";
201+
RETURN_NOT_OK(PrettyPrint(*batch.column(i).get(), sink));
202+
(*sink) << "\n";
203+
}
204+
return Status::OK();
205+
}
206+
207+
} // namespace arrow

0 commit comments

Comments
 (0)