diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f0eb73dc41371..5e4c204581369 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -467,6 +467,8 @@ set(LINK_LIBS set(ARROW_SRCS src/arrow/array.cc src/arrow/builder.cc + src/arrow/field.cc + src/arrow/schema.cc src/arrow/type.cc ) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index eeea2dbc517b4..04f8dd1f908cb 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -30,4 +30,4 @@ install(FILES set(ARROW_TEST_LINK_LIBS arrow_test_util ${ARROW_MIN_TEST_LIBS}) ADD_ARROW_TEST(array-test) -ADD_ARROW_TEST(field-test) +ADD_ARROW_TEST(schema-test) diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h index 3d748c1bad6f8..0632146637e59 100644 --- a/cpp/src/arrow/array.h +++ b/cpp/src/arrow/array.h @@ -62,8 +62,8 @@ class Array { int32_t length() const { return length_;} int32_t null_count() const { return null_count_;} - const TypePtr& type() const { return type_;} - TypeEnum type_enum() const { return type_->type;} + const std::shared_ptr& type() const { return type_;} + LogicalType::type logical_type() const { return type_->type;} const std::shared_ptr& nulls() const { return nulls_; diff --git a/cpp/src/arrow/field-test.cc b/cpp/src/arrow/field-test.cc deleted file mode 100644 index 2bb8bad4054c3..0000000000000 --- a/cpp/src/arrow/field-test.cc +++ /dev/null @@ -1,38 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include - -#include "arrow/field.h" -#include "arrow/type.h" -#include "arrow/types/integer.h" - -using std::string; - -namespace arrow { - -TEST(TestField, Basics) { - TypePtr ftype = TypePtr(new Int32Type()); - Field f0("f0", ftype); - - ASSERT_EQ(f0.name, "f0"); - ASSERT_EQ(f0.type->ToString(), ftype->ToString()); -} - -} // namespace arrow diff --git a/cpp/src/arrow/field.h b/cpp/src/arrow/field.h index 664cae61a777a..89a450c66f256 100644 --- a/cpp/src/arrow/field.h +++ b/cpp/src/arrow/field.h @@ -35,12 +35,27 @@ struct Field { TypePtr type; Field(const std::string& name, const TypePtr& type) : - name(name), type(type) {} + name(name), + type(type) {} + + bool operator==(const Field& other) const { + return this->Equals(other); + } + + bool operator!=(const Field& other) const { + return !this->Equals(other); + } bool Equals(const Field& other) const { return (this == &other) || (this->name == other.name && this->type->Equals(other.type.get())); } + + bool nullable() const { + return this->type->nullable; + } + + std::string ToString() const; }; } // namespace arrow diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 12f19604c688d..04cdb52b535db 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -52,96 +52,98 @@ struct LayoutType { explicit LayoutType(LayoutEnum type) : type(type) {} }; - // Data types in this library are all *logical*. They can be expressed as // either a primitive physical type (bytes or bits of some fixed size), a // nested type consisting of other data types, or another data type (e.g. a // timestamp encoded as an int64) +struct LogicalType { + enum type { + // A degenerate NULL type represented as 0 bytes/bits + NA = 0, -enum class TypeEnum: char { - // A degenerate NULL type represented as 0 bytes/bits - NA = 0, - - // Little-endian integer types - UINT8 = 1, - INT8 = 2, - UINT16 = 3, - INT16 = 4, - UINT32 = 5, - INT32 = 6, - UINT64 = 7, - INT64 = 8, + // Little-endian integer types + UINT8 = 1, + INT8 = 2, + UINT16 = 3, + INT16 = 4, + UINT32 = 5, + INT32 = 6, + UINT64 = 7, + INT64 = 8, - // A boolean value represented as 1 byte - BOOL = 9, + // A boolean value represented as 1 byte + BOOL = 9, - // A boolean value represented as 1 bit - BIT = 10, + // A boolean value represented as 1 bit + BIT = 10, - // 4-byte floating point value - FLOAT = 11, + // 4-byte floating point value + FLOAT = 11, - // 8-byte floating point value - DOUBLE = 12, + // 8-byte floating point value + DOUBLE = 12, - // CHAR(N): fixed-length UTF8 string with length N - CHAR = 13, + // CHAR(N): fixed-length UTF8 string with length N + CHAR = 13, - // UTF8 variable-length string as List - STRING = 14, + // UTF8 variable-length string as List + STRING = 14, - // VARCHAR(N): Null-terminated string type embedded in a CHAR(N + 1) - VARCHAR = 15, + // VARCHAR(N): Null-terminated string type embedded in a CHAR(N + 1) + VARCHAR = 15, - // Variable-length bytes (no guarantee of UTF8-ness) - BINARY = 16, + // Variable-length bytes (no guarantee of UTF8-ness) + BINARY = 16, - // By default, int32 days since the UNIX epoch - DATE = 17, + // By default, int32 days since the UNIX epoch + DATE = 17, - // Exact timestamp encoded with int64 since UNIX epoch - // Default unit millisecond - TIMESTAMP = 18, + // Exact timestamp encoded with int64 since UNIX epoch + // Default unit millisecond + TIMESTAMP = 18, - // Timestamp as double seconds since the UNIX epoch - TIMESTAMP_DOUBLE = 19, + // Timestamp as double seconds since the UNIX epoch + TIMESTAMP_DOUBLE = 19, - // Exact time encoded with int64, default unit millisecond - TIME = 20, + // Exact time encoded with int64, default unit millisecond + TIME = 20, - // Precision- and scale-based decimal type. Storage type depends on the - // parameters. - DECIMAL = 21, + // Precision- and scale-based decimal type. Storage type depends on the + // parameters. + DECIMAL = 21, - // Decimal value encoded as a text string - DECIMAL_TEXT = 22, + // Decimal value encoded as a text string + DECIMAL_TEXT = 22, - // A list of some logical data type - LIST = 30, + // A list of some logical data type + LIST = 30, - // Struct of logical types - STRUCT = 31, + // Struct of logical types + STRUCT = 31, - // Unions of logical types - DENSE_UNION = 32, - SPARSE_UNION = 33, + // Unions of logical types + DENSE_UNION = 32, + SPARSE_UNION = 33, - // Union - JSON_SCALAR = 50, + // Union + JSON_SCALAR = 50, - // User-defined type - USER = 60 + // User-defined type + USER = 60 + }; }; - struct DataType { - TypeEnum type; + LogicalType::type type; + bool nullable; - explicit DataType(TypeEnum type) - : type(type) {} + explicit DataType(LogicalType::type type, bool nullable = true) : + type(type), + nullable(nullable) {} virtual bool Equals(const DataType* other) { - return this == other || this->type == other->type; + return this == other || (this->type == other->type && + this->nullable == other->nullable); } virtual std::string ToString() const = 0; @@ -171,6 +173,77 @@ struct ListLayoutType : public LayoutType { value_type(value_type) {} }; +template +struct PrimitiveType : public DataType { + explicit PrimitiveType(bool nullable = true) + : DataType(Derived::type_enum, nullable) {} + + virtual std::string ToString() const { + std::string result; + if (nullable) { + result.append("?"); + } + result.append(static_cast(this)->name()); + return result; + } +}; + +#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \ + typedef C_TYPE c_type; \ + static constexpr LogicalType::type type_enum = LogicalType::ENUM; \ + static constexpr int size = SIZE; \ + \ + explicit TYPENAME(bool nullable = true) \ + : PrimitiveType(nullable) {} \ + \ + static const char* name() { \ + return NAME; \ + } + +struct BooleanType : public PrimitiveType { + PRIMITIVE_DECL(BooleanType, uint8_t, BOOL, 1, "bool"); +}; + +struct UInt8Type : public PrimitiveType { + PRIMITIVE_DECL(UInt8Type, uint8_t, UINT8, 1, "uint8"); +}; + +struct Int8Type : public PrimitiveType { + PRIMITIVE_DECL(Int8Type, int8_t, INT8, 1, "int8"); +}; + +struct UInt16Type : public PrimitiveType { + PRIMITIVE_DECL(UInt16Type, uint16_t, UINT16, 2, "uint16"); +}; + +struct Int16Type : public PrimitiveType { + PRIMITIVE_DECL(Int16Type, int16_t, INT16, 2, "int16"); +}; + +struct UInt32Type : public PrimitiveType { + PRIMITIVE_DECL(UInt32Type, uint32_t, UINT32, 4, "uint32"); +}; + +struct Int32Type : public PrimitiveType { + PRIMITIVE_DECL(Int32Type, int32_t, INT32, 4, "int32"); +}; + +struct UInt64Type : public PrimitiveType { + PRIMITIVE_DECL(UInt64Type, uint64_t, UINT64, 8, "uint64"); +}; + +struct Int64Type : public PrimitiveType { + PRIMITIVE_DECL(Int64Type, int64_t, INT64, 8, "int64"); +}; + +struct FloatType : public PrimitiveType { + PRIMITIVE_DECL(FloatType, float, FLOAT, 4, "float"); +}; + +struct DoubleType : public PrimitiveType { + PRIMITIVE_DECL(DoubleType, double, DOUBLE, 8, "double"); +}; + } // namespace arrow #endif // ARROW_TYPE_H diff --git a/cpp/src/arrow/types/binary.h b/cpp/src/arrow/types/binary.h index a9f20046b582b..1fd675e5fdebf 100644 --- a/cpp/src/arrow/types/binary.h +++ b/cpp/src/arrow/types/binary.h @@ -25,9 +25,6 @@ namespace arrow { -struct StringType : public DataType { -}; - } // namespace arrow #endif // ARROW_TYPES_BINARY_H diff --git a/cpp/src/arrow/types/boolean.h b/cpp/src/arrow/types/boolean.h index 31388c8152d52..8fc9cfd19c0d4 100644 --- a/cpp/src/arrow/types/boolean.h +++ b/cpp/src/arrow/types/boolean.h @@ -22,10 +22,6 @@ namespace arrow { -struct BooleanType : public PrimitiveType { - PRIMITIVE_DECL(BooleanType, uint8_t, BOOL, 1, "bool"); -}; - typedef PrimitiveArrayImpl BooleanArray; // typedef PrimitiveBuilder BooleanBuilder; diff --git a/cpp/src/arrow/types/collection.h b/cpp/src/arrow/types/collection.h index 094b63f28988a..42a9c926bb134 100644 --- a/cpp/src/arrow/types/collection.h +++ b/cpp/src/arrow/types/collection.h @@ -25,7 +25,7 @@ namespace arrow { -template +template struct CollectionType : public DataType { std::vector child_types_; diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc index e1bb990063c1b..05d6b270fc3fd 100644 --- a/cpp/src/arrow/types/construct.cc +++ b/cpp/src/arrow/types/construct.cc @@ -33,7 +33,7 @@ class ArrayBuilder; // difficult #define BUILDER_CASE(ENUM, BuilderType) \ - case TypeEnum::ENUM: \ + case LogicalType::ENUM: \ *out = static_cast(new BuilderType(pool, type)); \ return Status::OK(); @@ -56,7 +56,7 @@ Status make_builder(MemoryPool* pool, const TypePtr& type, BUILDER_CASE(STRING, StringBuilder); - case TypeEnum::LIST: + case LogicalType::LIST: { ListType* list_type = static_cast(type.get()); ArrayBuilder* value_builder; diff --git a/cpp/src/arrow/types/datetime.h b/cpp/src/arrow/types/datetime.h index d90883cb01871..765fc29dd57ae 100644 --- a/cpp/src/arrow/types/datetime.h +++ b/cpp/src/arrow/types/datetime.h @@ -31,8 +31,8 @@ struct DateType : public DataType { Unit unit; - explicit DateType(Unit unit = Unit::DAY) - : DataType(TypeEnum::DATE), + explicit DateType(Unit unit = Unit::DAY, bool nullable = true) + : DataType(LogicalType::DATE, nullable), unit(unit) {} DateType(const DateType& other) @@ -58,8 +58,8 @@ struct TimestampType : public DataType { Unit unit; - explicit TimestampType(Unit unit = Unit::MILLI) - : DataType(TypeEnum::TIMESTAMP), + explicit TimestampType(Unit unit = Unit::MILLI, bool nullable = true) + : DataType(LogicalType::TIMESTAMP, nullable), unit(unit) {} TimestampType(const TimestampType& other) diff --git a/cpp/src/arrow/types/floating.h b/cpp/src/arrow/types/floating.h index 7551ce665a27b..e7522781d33e3 100644 --- a/cpp/src/arrow/types/floating.h +++ b/cpp/src/arrow/types/floating.h @@ -21,17 +21,10 @@ #include #include "arrow/types/primitive.h" +#include "arrow/type.h" namespace arrow { -struct FloatType : public PrimitiveType { - PRIMITIVE_DECL(FloatType, float, FLOAT, 4, "float"); -}; - -struct DoubleType : public PrimitiveType { - PRIMITIVE_DECL(DoubleType, double, DOUBLE, 8, "double"); -}; - typedef PrimitiveArrayImpl FloatArray; typedef PrimitiveArrayImpl DoubleArray; diff --git a/cpp/src/arrow/types/integer.h b/cpp/src/arrow/types/integer.h index 7e5eab55be0a9..568419124941f 100644 --- a/cpp/src/arrow/types/integer.h +++ b/cpp/src/arrow/types/integer.h @@ -22,41 +22,10 @@ #include #include "arrow/types/primitive.h" +#include "arrow/type.h" namespace arrow { -struct UInt8Type : public PrimitiveType { - PRIMITIVE_DECL(UInt8Type, uint8_t, UINT8, 1, "uint8"); -}; - -struct Int8Type : public PrimitiveType { - PRIMITIVE_DECL(Int8Type, int8_t, INT8, 1, "int8"); -}; - -struct UInt16Type : public PrimitiveType { - PRIMITIVE_DECL(UInt16Type, uint16_t, UINT16, 2, "uint16"); -}; - -struct Int16Type : public PrimitiveType { - PRIMITIVE_DECL(Int16Type, int16_t, INT16, 2, "int16"); -}; - -struct UInt32Type : public PrimitiveType { - PRIMITIVE_DECL(UInt32Type, uint32_t, UINT32, 4, "uint32"); -}; - -struct Int32Type : public PrimitiveType { - PRIMITIVE_DECL(Int32Type, int32_t, INT32, 4, "int32"); -}; - -struct UInt64Type : public PrimitiveType { - PRIMITIVE_DECL(UInt64Type, uint64_t, UINT64, 8, "uint64"); -}; - -struct Int64Type : public PrimitiveType { - PRIMITIVE_DECL(Int64Type, int64_t, INT64, 8, "int64"); -}; - // Array containers typedef PrimitiveArrayImpl UInt8Array; diff --git a/cpp/src/arrow/types/json.h b/cpp/src/arrow/types/json.h index 6c2b097a737c7..b67fb3807aded 100644 --- a/cpp/src/arrow/types/json.h +++ b/cpp/src/arrow/types/json.h @@ -28,8 +28,8 @@ struct JSONScalar : public DataType { static TypePtr dense_type; static TypePtr sparse_type; - explicit JSONScalar(bool dense = true) - : DataType(TypeEnum::JSON_SCALAR), + explicit JSONScalar(bool dense = true, bool nullable = true) + : DataType(LogicalType::JSON_SCALAR, nullable), dense(dense) {} }; diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc index 1d9ddbe607a41..b4bbd2841a89d 100644 --- a/cpp/src/arrow/types/list-test.cc +++ b/cpp/src/arrow/types/list-test.cc @@ -44,19 +44,19 @@ TEST(TypesTest, TestListType) { std::shared_ptr vt = std::make_shared(); ListType list_type(vt); - ASSERT_EQ(list_type.type, TypeEnum::LIST); + ASSERT_EQ(list_type.type, LogicalType::LIST); ASSERT_EQ(list_type.name(), string("list")); - ASSERT_EQ(list_type.ToString(), string("list")); + ASSERT_EQ(list_type.ToString(), string("?list")); ASSERT_EQ(list_type.value_type->type, vt->type); ASSERT_EQ(list_type.value_type->type, vt->type); - std::shared_ptr st = std::make_shared(); - std::shared_ptr lt = std::make_shared(st); + std::shared_ptr st = std::make_shared(false); + std::shared_ptr lt = std::make_shared(st, false); ASSERT_EQ(lt->ToString(), string("list")); - ListType lt2(lt); + ListType lt2(lt, false); ASSERT_EQ(lt2.ToString(), string("list>")); } diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc index f0ff5bf928a1a..577d71d0b2892 100644 --- a/cpp/src/arrow/types/list.cc +++ b/cpp/src/arrow/types/list.cc @@ -24,6 +24,9 @@ namespace arrow { std::string ListType::ToString() const { std::stringstream s; + if (this->nullable) { + s << "?"; + } s << "list<" << value_type->ToString() << ">"; return s.str(); } diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h index 4190b53df01cd..1fc83536db8c6 100644 --- a/cpp/src/arrow/types/list.h +++ b/cpp/src/arrow/types/list.h @@ -40,8 +40,8 @@ struct ListType : public DataType { // List can contain any other logical value type TypePtr value_type; - explicit ListType(const TypePtr& value_type) - : DataType(TypeEnum::LIST), + explicit ListType(const TypePtr& value_type, bool nullable = true) + : DataType(LogicalType::LIST, nullable), value_type(value_type) {} static char const *name() { @@ -51,7 +51,6 @@ struct ListType : public DataType { virtual std::string ToString() const; }; - class ListArray : public Array { public: ListArray() : Array(), offset_buf_(nullptr), offsets_(nullptr) {} diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc index 93634432d5ccb..02eaaa7542bf0 100644 --- a/cpp/src/arrow/types/primitive-test.cc +++ b/cpp/src/arrow/types/primitive-test.cc @@ -54,11 +54,11 @@ TEST(TypesTest, TestBytesType) { TEST(TypesTest, TestPrimitive_##ENUM) { \ KLASS tp; \ \ - ASSERT_EQ(tp.type, TypeEnum::ENUM); \ + ASSERT_EQ(tp.type, LogicalType::ENUM); \ ASSERT_EQ(tp.name(), string(NAME)); \ \ KLASS tp_copy = tp; \ - ASSERT_EQ(tp_copy.type, TypeEnum::ENUM); \ + ASSERT_EQ(tp_copy.type, LogicalType::ENUM); \ } PRIMITIVE_TEST(Int8Type, INT8, "int8"); diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index aa8f351202a20..49040fb66268f 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -34,28 +34,6 @@ namespace arrow { class MemoryPool; -template -struct PrimitiveType : public DataType { - PrimitiveType() - : DataType(Derived::type_enum) {} - - virtual std::string ToString() const { - return std::string(static_cast(this)->name()); - } -}; - -#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \ - typedef C_TYPE c_type; \ - static constexpr TypeEnum type_enum = TypeEnum::ENUM; \ - static constexpr int size = SIZE; \ - \ - TYPENAME() \ - : PrimitiveType() {} \ - \ - static const char* name() { \ - return NAME; \ - } - // Base class for fixed-size logical types class PrimitiveArray : public Array { diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc index e1dcebe97f013..9af667295026b 100644 --- a/cpp/src/arrow/types/string-test.cc +++ b/cpp/src/arrow/types/string-test.cc @@ -38,14 +38,14 @@ class Buffer; TEST(TypesTest, TestCharType) { CharType t1(5); - ASSERT_EQ(t1.type, TypeEnum::CHAR); + ASSERT_EQ(t1.type, LogicalType::CHAR); ASSERT_EQ(t1.size, 5); ASSERT_EQ(t1.ToString(), std::string("char(5)")); // Test copy constructor CharType t2 = t1; - ASSERT_EQ(t2.type, TypeEnum::CHAR); + ASSERT_EQ(t2.type, LogicalType::CHAR); ASSERT_EQ(t2.size, 5); } @@ -53,7 +53,7 @@ TEST(TypesTest, TestCharType) { TEST(TypesTest, TestVarcharType) { VarcharType t1(5); - ASSERT_EQ(t1.type, TypeEnum::VARCHAR); + ASSERT_EQ(t1.type, LogicalType::VARCHAR); ASSERT_EQ(t1.size, 5); ASSERT_EQ(t1.physical_type.size, 6); @@ -61,14 +61,14 @@ TEST(TypesTest, TestVarcharType) { // Test copy constructor VarcharType t2 = t1; - ASSERT_EQ(t2.type, TypeEnum::VARCHAR); + ASSERT_EQ(t2.type, LogicalType::VARCHAR); ASSERT_EQ(t2.size, 5); ASSERT_EQ(t2.physical_type.size, 6); } TEST(TypesTest, TestStringType) { StringType str; - ASSERT_EQ(str.type, TypeEnum::STRING); + ASSERT_EQ(str.type, LogicalType::STRING); ASSERT_EQ(str.name(), std::string("string")); } @@ -128,8 +128,8 @@ TEST_F(TestStringContainer, TestArrayBasics) { TEST_F(TestStringContainer, TestType) { TypePtr type = strings_.type(); - ASSERT_EQ(TypeEnum::STRING, type->type); - ASSERT_EQ(TypeEnum::STRING, strings_.type_enum()); + ASSERT_EQ(LogicalType::STRING, type->type); + ASSERT_EQ(LogicalType::STRING, strings_.logical_type()); } diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h index 084562530a8fc..5795cfed577c5 100644 --- a/cpp/src/arrow/types/string.h +++ b/cpp/src/arrow/types/string.h @@ -40,8 +40,8 @@ struct CharType : public DataType { BytesType physical_type; - explicit CharType(int size) - : DataType(TypeEnum::CHAR), + explicit CharType(int size, bool nullable = true) + : DataType(LogicalType::CHAR, nullable), size(size), physical_type(BytesType(size)) {} @@ -58,8 +58,8 @@ struct VarcharType : public DataType { BytesType physical_type; - explicit VarcharType(int size) - : DataType(TypeEnum::VARCHAR), + explicit VarcharType(int size, bool nullable = true) + : DataType(LogicalType::VARCHAR, nullable), size(size), physical_type(BytesType(size + 1)) {} VarcharType(const VarcharType& other) @@ -73,26 +73,26 @@ static const LayoutPtr physical_string = LayoutPtr(new ListLayoutType(byte1)); // String is a logical type consisting of a physical list of 1-byte values struct StringType : public DataType { - StringType() - : DataType(TypeEnum::STRING) {} + explicit StringType(bool nullable = true) + : DataType(LogicalType::STRING, nullable) {} StringType(const StringType& other) : StringType() {} - const LayoutPtr& physical_type() { - return physical_string; - } - static char const *name() { return "string"; } virtual std::string ToString() const { - return name(); + std::string result; + if (nullable) { + result.append("?"); + } + result.append(name()); + return result; } }; - // TODO: add a BinaryArray layer in between class StringArray : public ListArray { public: diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc index 1a9fc6be4a5ce..df6157104795e 100644 --- a/cpp/src/arrow/types/struct-test.cc +++ b/cpp/src/arrow/types/struct-test.cc @@ -49,7 +49,7 @@ TEST(TestStructType, Basics) { ASSERT_TRUE(struct_type.field(1).Equals(f1)); ASSERT_TRUE(struct_type.field(2).Equals(f2)); - ASSERT_EQ(struct_type.ToString(), "struct"); + ASSERT_EQ(struct_type.ToString(), "?struct"); // TODO: out of bounds for field(...) } diff --git a/cpp/src/arrow/types/struct.cc b/cpp/src/arrow/types/struct.cc index a245656b516cc..6b233bc372af1 100644 --- a/cpp/src/arrow/types/struct.cc +++ b/cpp/src/arrow/types/struct.cc @@ -26,6 +26,7 @@ namespace arrow { std::string StructType::ToString() const { std::stringstream s; + if (nullable) s << "?"; s << "struct<"; for (size_t i = 0; i < fields_.size(); ++i) { if (i > 0) s << ", "; diff --git a/cpp/src/arrow/types/struct.h b/cpp/src/arrow/types/struct.h index afba19a7e4699..e575c31287cb2 100644 --- a/cpp/src/arrow/types/struct.h +++ b/cpp/src/arrow/types/struct.h @@ -29,8 +29,8 @@ namespace arrow { struct StructType : public DataType { std::vector fields_; - explicit StructType(const std::vector& fields) - : DataType(TypeEnum::STRUCT) { + explicit StructType(const std::vector& fields, bool nullable = true) + : DataType(LogicalType::STRUCT, nullable) { fields_ = fields; } diff --git a/cpp/src/arrow/types/union.h b/cpp/src/arrow/types/union.h index 62a3d1c10355d..9aff780c6a392 100644 --- a/cpp/src/arrow/types/union.h +++ b/cpp/src/arrow/types/union.h @@ -30,8 +30,8 @@ namespace arrow { class Buffer; -struct DenseUnionType : public CollectionType { - typedef CollectionType Base; +struct DenseUnionType : public CollectionType { + typedef CollectionType Base; explicit DenseUnionType(const std::vector& child_types) : Base() { @@ -42,8 +42,8 @@ struct DenseUnionType : public CollectionType { }; -struct SparseUnionType : public CollectionType { - typedef CollectionType Base; +struct SparseUnionType : public CollectionType { + typedef CollectionType Base; explicit SparseUnionType(const std::vector& child_types) : Base() {