Skip to content

Commit

Permalink
Add simple in-memory Schema data structure. Restore nullable bit to type
Browse files Browse the repository at this point in the history
metadata only. Add "?" to nullable type formatting.
  • Loading branch information
wesm committed Mar 3, 2016
1 parent b88b69e commit d54595d
Show file tree
Hide file tree
Showing 25 changed files with 204 additions and 216 deletions.
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,8 @@ set(LINK_LIBS
set(ARROW_SRCS
src/arrow/array.cc
src/arrow/builder.cc
src/arrow/field.cc
src/arrow/schema.cc
src/arrow/type.cc
)

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ install(FILES
set(ARROW_TEST_LINK_LIBS arrow_test_util ${ARROW_MIN_TEST_LIBS})

ADD_ARROW_TEST(array-test)
ADD_ARROW_TEST(field-test)
ADD_ARROW_TEST(schema-test)
4 changes: 2 additions & 2 deletions cpp/src/arrow/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ class Array {
int32_t length() const { return length_;}
int32_t null_count() const { return null_count_;}

const TypePtr& type() const { return type_;}
TypeEnum type_enum() const { return type_->type;}
const std::shared_ptr<DataType>& type() const { return type_;}
LogicalType::type logical_type() const { return type_->type;}

const std::shared_ptr<Buffer>& nulls() const {
return nulls_;
Expand Down
38 changes: 0 additions & 38 deletions cpp/src/arrow/field-test.cc

This file was deleted.

17 changes: 16 additions & 1 deletion cpp/src/arrow/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,27 @@ struct Field {
TypePtr type;

Field(const std::string& name, const TypePtr& type) :
name(name), type(type) {}
name(name),
type(type) {}

bool operator==(const Field& other) const {
return this->Equals(other);
}

bool operator!=(const Field& other) const {
return !this->Equals(other);
}

bool Equals(const Field& other) const {
return (this == &other) || (this->name == other.name &&
this->type->Equals(other.type.get()));
}

bool nullable() const {
return this->type->nullable;
}

std::string ToString() const;
};

} // namespace arrow
Expand Down
193 changes: 133 additions & 60 deletions cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,96 +52,98 @@ struct LayoutType {
explicit LayoutType(LayoutEnum type) : type(type) {}
};


// Data types in this library are all *logical*. They can be expressed as
// either a primitive physical type (bytes or bits of some fixed size), a
// nested type consisting of other data types, or another data type (e.g. a
// timestamp encoded as an int64)
struct LogicalType {
enum type {
// A degenerate NULL type represented as 0 bytes/bits
NA = 0,

enum class TypeEnum: char {
// A degenerate NULL type represented as 0 bytes/bits
NA = 0,

// Little-endian integer types
UINT8 = 1,
INT8 = 2,
UINT16 = 3,
INT16 = 4,
UINT32 = 5,
INT32 = 6,
UINT64 = 7,
INT64 = 8,
// Little-endian integer types
UINT8 = 1,
INT8 = 2,
UINT16 = 3,
INT16 = 4,
UINT32 = 5,
INT32 = 6,
UINT64 = 7,
INT64 = 8,

// A boolean value represented as 1 byte
BOOL = 9,
// A boolean value represented as 1 byte
BOOL = 9,

// A boolean value represented as 1 bit
BIT = 10,
// A boolean value represented as 1 bit
BIT = 10,

// 4-byte floating point value
FLOAT = 11,
// 4-byte floating point value
FLOAT = 11,

// 8-byte floating point value
DOUBLE = 12,
// 8-byte floating point value
DOUBLE = 12,

// CHAR(N): fixed-length UTF8 string with length N
CHAR = 13,
// CHAR(N): fixed-length UTF8 string with length N
CHAR = 13,

// UTF8 variable-length string as List<Char>
STRING = 14,
// UTF8 variable-length string as List<Char>
STRING = 14,

// VARCHAR(N): Null-terminated string type embedded in a CHAR(N + 1)
VARCHAR = 15,
// VARCHAR(N): Null-terminated string type embedded in a CHAR(N + 1)
VARCHAR = 15,

// Variable-length bytes (no guarantee of UTF8-ness)
BINARY = 16,
// Variable-length bytes (no guarantee of UTF8-ness)
BINARY = 16,

// By default, int32 days since the UNIX epoch
DATE = 17,
// By default, int32 days since the UNIX epoch
DATE = 17,

// Exact timestamp encoded with int64 since UNIX epoch
// Default unit millisecond
TIMESTAMP = 18,
// Exact timestamp encoded with int64 since UNIX epoch
// Default unit millisecond
TIMESTAMP = 18,

// Timestamp as double seconds since the UNIX epoch
TIMESTAMP_DOUBLE = 19,
// Timestamp as double seconds since the UNIX epoch
TIMESTAMP_DOUBLE = 19,

// Exact time encoded with int64, default unit millisecond
TIME = 20,
// Exact time encoded with int64, default unit millisecond
TIME = 20,

// Precision- and scale-based decimal type. Storage type depends on the
// parameters.
DECIMAL = 21,
// Precision- and scale-based decimal type. Storage type depends on the
// parameters.
DECIMAL = 21,

// Decimal value encoded as a text string
DECIMAL_TEXT = 22,
// Decimal value encoded as a text string
DECIMAL_TEXT = 22,

// A list of some logical data type
LIST = 30,
// A list of some logical data type
LIST = 30,

// Struct of logical types
STRUCT = 31,
// Struct of logical types
STRUCT = 31,

// Unions of logical types
DENSE_UNION = 32,
SPARSE_UNION = 33,
// Unions of logical types
DENSE_UNION = 32,
SPARSE_UNION = 33,

// Union<Null, Int32, Double, String, Bool>
JSON_SCALAR = 50,
// Union<Null, Int32, Double, String, Bool>
JSON_SCALAR = 50,

// User-defined type
USER = 60
// User-defined type
USER = 60
};
};


struct DataType {
TypeEnum type;
LogicalType::type type;
bool nullable;

explicit DataType(TypeEnum type)
: type(type) {}
explicit DataType(LogicalType::type type, bool nullable = true) :
type(type),
nullable(nullable) {}

virtual bool Equals(const DataType* other) {
return this == other || this->type == other->type;
return this == other || (this->type == other->type &&
this->nullable == other->nullable);
}

virtual std::string ToString() const = 0;
Expand Down Expand Up @@ -171,6 +173,77 @@ struct ListLayoutType : public LayoutType {
value_type(value_type) {}
};

template <typename Derived>
struct PrimitiveType : public DataType {
explicit PrimitiveType(bool nullable = true)
: DataType(Derived::type_enum, nullable) {}

virtual std::string ToString() const {
std::string result;
if (nullable) {
result.append("?");
}
result.append(static_cast<const Derived*>(this)->name());
return result;
}
};

#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \
typedef C_TYPE c_type; \
static constexpr LogicalType::type type_enum = LogicalType::ENUM; \
static constexpr int size = SIZE; \
\
explicit TYPENAME(bool nullable = true) \
: PrimitiveType<TYPENAME>(nullable) {} \
\
static const char* name() { \
return NAME; \
}

struct BooleanType : public PrimitiveType<BooleanType> {
PRIMITIVE_DECL(BooleanType, uint8_t, BOOL, 1, "bool");
};

struct UInt8Type : public PrimitiveType<UInt8Type> {
PRIMITIVE_DECL(UInt8Type, uint8_t, UINT8, 1, "uint8");
};

struct Int8Type : public PrimitiveType<Int8Type> {
PRIMITIVE_DECL(Int8Type, int8_t, INT8, 1, "int8");
};

struct UInt16Type : public PrimitiveType<UInt16Type> {
PRIMITIVE_DECL(UInt16Type, uint16_t, UINT16, 2, "uint16");
};

struct Int16Type : public PrimitiveType<Int16Type> {
PRIMITIVE_DECL(Int16Type, int16_t, INT16, 2, "int16");
};

struct UInt32Type : public PrimitiveType<UInt32Type> {
PRIMITIVE_DECL(UInt32Type, uint32_t, UINT32, 4, "uint32");
};

struct Int32Type : public PrimitiveType<Int32Type> {
PRIMITIVE_DECL(Int32Type, int32_t, INT32, 4, "int32");
};

struct UInt64Type : public PrimitiveType<UInt64Type> {
PRIMITIVE_DECL(UInt64Type, uint64_t, UINT64, 8, "uint64");
};

struct Int64Type : public PrimitiveType<Int64Type> {
PRIMITIVE_DECL(Int64Type, int64_t, INT64, 8, "int64");
};

struct FloatType : public PrimitiveType<FloatType> {
PRIMITIVE_DECL(FloatType, float, FLOAT, 4, "float");
};

struct DoubleType : public PrimitiveType<DoubleType> {
PRIMITIVE_DECL(DoubleType, double, DOUBLE, 8, "double");
};

} // namespace arrow

#endif // ARROW_TYPE_H
3 changes: 0 additions & 3 deletions cpp/src/arrow/types/binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@

namespace arrow {

struct StringType : public DataType {
};

} // namespace arrow

#endif // ARROW_TYPES_BINARY_H
4 changes: 0 additions & 4 deletions cpp/src/arrow/types/boolean.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@

namespace arrow {

struct BooleanType : public PrimitiveType<BooleanType> {
PRIMITIVE_DECL(BooleanType, uint8_t, BOOL, 1, "bool");
};

typedef PrimitiveArrayImpl<BooleanType> BooleanArray;

// typedef PrimitiveBuilder<BooleanType, BooleanArray> BooleanBuilder;
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/types/collection.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

namespace arrow {

template <TypeEnum T>
template <LogicalType::type T>
struct CollectionType : public DataType {
std::vector<TypePtr> child_types_;

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/types/construct.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class ArrayBuilder;
// difficult

#define BUILDER_CASE(ENUM, BuilderType) \
case TypeEnum::ENUM: \
case LogicalType::ENUM: \
*out = static_cast<ArrayBuilder*>(new BuilderType(pool, type)); \
return Status::OK();

Expand All @@ -56,7 +56,7 @@ Status make_builder(MemoryPool* pool, const TypePtr& type,

BUILDER_CASE(STRING, StringBuilder);

case TypeEnum::LIST:
case LogicalType::LIST:
{
ListType* list_type = static_cast<ListType*>(type.get());
ArrayBuilder* value_builder;
Expand Down
Loading

0 comments on commit d54595d

Please sign in to comment.