Skip to content

Commit

Permalink
Merge pull request #22 from samansmink/add-missing-types
Browse files Browse the repository at this point in the history
add missing nested types
  • Loading branch information
samansmink authored May 30, 2024
2 parents b0acfc0 + 510fece commit 0106ae1
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 5 deletions.
27 changes: 22 additions & 5 deletions src/include/delta_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ class SchemaVisitor {
visitor.data = &state;
visitor.make_field_list = (uintptr_t (*)(void*, uintptr_t)) &MakeFieldList;
visitor.visit_struct = (void (*)(void*, uintptr_t, ffi::KernelStringSlice, uintptr_t)) &VisitStruct;
visitor.visit_array = (void (*)(void*, uintptr_t, ffi::KernelStringSlice, bool, uintptr_t)) &VisitArray;
visitor.visit_map = (void (*)(void*, uintptr_t, ffi::KernelStringSlice, bool, uintptr_t)) &VisitMap;
visitor.visit_decimal = (void (*)(void*, uintptr_t, ffi::KernelStringSlice, uint8_t , uint8_t)) &VisitDecimal;
visitor.visit_string = VisitSimpleType<LogicalType::VARCHAR>();
visitor.visit_long = VisitSimpleType<LogicalType::BIGINT>();
Expand All @@ -29,10 +31,10 @@ class SchemaVisitor {
visitor.visit_float = VisitSimpleType<LogicalType::FLOAT>();
visitor.visit_double = VisitSimpleType<LogicalType::DOUBLE>();
visitor.visit_boolean = VisitSimpleType<LogicalType::BOOLEAN>();
visitor.visit_binary = VisitSimpleType<LogicalType::VARCHAR>(); // TODO: check
visitor.visit_date = VisitSimpleType<LogicalType::DATE>(); // TODO: check
visitor.visit_timestamp = VisitSimpleType<LogicalType::TIMESTAMP>(); // TODO: check
visitor.visit_timestamp_ntz = VisitSimpleType<LogicalType::TIMESTAMP_TZ>(); // TODO: check
visitor.visit_binary = VisitSimpleType<LogicalType::VARCHAR>();
visitor.visit_date = VisitSimpleType<LogicalType::DATE>();
visitor.visit_timestamp = VisitSimpleType<LogicalType::TIMESTAMP>();
visitor.visit_timestamp_ntz = VisitSimpleType<LogicalType::TIMESTAMP_TZ>();

uintptr_t result = visit_schema(snapshot, &visitor);
return state.TakeFieldList(result);
Expand Down Expand Up @@ -66,6 +68,20 @@ class SchemaVisitor {
state->AppendToList(sibling_list_id, name, LogicalType::STRUCT(std::move(*children)));
}

static void VisitArray(SchemaVisitor* state, uintptr_t sibling_list_id, ffi::KernelStringSlice name, bool contains_null, uintptr_t child_list_id) {
auto children = state->TakeFieldList(child_list_id);

D_ASSERT(children->size() == 1);
state->AppendToList(sibling_list_id, name, LogicalType::LIST(children->front().second));
}

static void VisitMap(SchemaVisitor* state, uintptr_t sibling_list_id, ffi::KernelStringSlice name, bool contains_null, uintptr_t child_list_id) {
auto children = state->TakeFieldList(child_list_id);

D_ASSERT(children->size() == 2);
state->AppendToList(sibling_list_id, name, LogicalType::MAP(LogicalType::STRUCT(std::move(*children))));
}

uintptr_t MakeFieldListImpl(uintptr_t capacity_hint) {
uintptr_t id = next_id++;
auto list = make_uniq<FieldList>();
Expand All @@ -80,6 +96,7 @@ class SchemaVisitor {
auto it = inflight_lists.find(id);
if (it == inflight_lists.end()) {
// TODO... some error...
throw InternalException("WEIRD SHIT");
} else {
it->second->emplace_back(std::make_pair(string(name.ptr, name.len), std::move(child)));
}
Expand All @@ -89,7 +106,7 @@ class SchemaVisitor {
auto it = inflight_lists.find(id);
if (it == inflight_lists.end()) {
// TODO: Raise some kind of error.
return {}; // not present
throw InternalException("WEIRD SHIT 2");
}
auto rval = std::move(it->second);
inflight_lists.erase(it);
Expand Down
29 changes: 29 additions & 0 deletions test/sql/dat/nested_types.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# name: test/sql/dat_primitive_types.test
# description: DAT test suite: primitive types
# group: [delta]

require parquet

require delta

require-env DAT_PATH

query IIII
SELECT *
FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/nested_types/delta')
----
0 {'float64': 0.0, 'bool': true} [0] {}
1 {'float64': 1.0, 'bool': false} [0, 1] {0=0}
2 {'float64': 2.0, 'bool': true} [0, 1, 2] {0=0, 1=1}
3 {'float64': 3.0, 'bool': false} [0, 1, 2, 3] {0=0, 1=1, 2=2}
4 {'float64': 4.0, 'bool': true} [0, 1, 2, 3, 4] {0=0, 1=1, 2=2, 3=3}

query IIII
SELECT *
FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/nested_types/expected/**/*.parquet')
----
0 {'float64': 0.0, 'bool': true} [0] {}
1 {'float64': 1.0, 'bool': false} [0, 1] {0=0}
2 {'float64': 2.0, 'bool': true} [0, 1, 2] {0=0, 1=1}
3 {'float64': 3.0, 'bool': false} [0, 1, 2, 3] {0=0, 1=1, 2=2}
4 {'float64': 4.0, 'bool': true} [0, 1, 2, 3, 4] {0=0, 1=1, 2=2, 3=3}
11 changes: 11 additions & 0 deletions test/sql/dat/primitive_types.test
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,14 @@ FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/all_primitive_types/delt
2 2 2 2 2 2.0 2.0 true \x00\x00 12.000 1970-01-03 1970-01-01 02:00:00
3 3 3 3 3 3.0 3.0 false \x00\x00\x00 13.000 1970-01-04 1970-01-01 03:00:00
4 4 4 4 4 4.0 4.0 true \x00\x00\x00\x00 14.000 1970-01-05 1970-01-01 04:00:00

query IIIIIIIIIIII
SELECT *
FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/all_primitive_types/expected/**/*.parquet')
----
0 0 0 0 0 0.0 0.0 true (empty) 10.000 1970-01-01 1970-01-01 00:00:00
1 1 1 1 1 1.0 1.0 false \x00 11.000 1970-01-02 1970-01-01 01:00:00
2 2 2 2 2 2.0 2.0 true \x00\x00 12.000 1970-01-03 1970-01-01 02:00:00
3 3 3 3 3 3.0 3.0 false \x00\x00\x00 13.000 1970-01-04 1970-01-01 03:00:00
4 4 4 4 4 4.0 4.0 true \x00\x00\x00\x00 14.000 1970-01-05 1970-01-01 04:00:00

0 comments on commit 0106ae1

Please sign in to comment.