Skip to content

Commit

Permalink
apacheGH-43994: [C++][Parquet] Fix schema conversion from two-level e…
Browse files Browse the repository at this point in the history
…ncoding nested list
  • Loading branch information
wgtmac committed Sep 6, 2024
1 parent 12dddfc commit 7e6cb6d
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
20 changes: 20 additions & 0 deletions cpp/src/parquet/arrow/arrow_schema_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,26 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
arrow_fields.push_back(::arrow::field("name", arrow_list, false));
}

// Two-level encoding List<List<Integer>>:
// optional group nested_list (LIST) {
// repeated group array (LIST) {
// repeated int32 array;
// }
// }
{
auto inner_element =
PrimitiveNode::Make("array", Repetition::REPEATED, ParquetType::INT32);
auto outer_element = GroupNode::Make("array", Repetition::REPEATED, {inner_element},
ConvertedType::LIST);
parquet_fields.push_back(GroupNode::Make("nested_list", Repetition::OPTIONAL,
{outer_element}, ConvertedType::LIST));
auto arrow_inner_element = ::arrow::field("array", INT32, /*nullable=*/false);
auto arrow_outer_element =
::arrow::field("array", ::arrow::list(arrow_inner_element), /*nullable=*/false);
auto arrow_list = ::arrow::list(arrow_outer_element);
arrow_fields.push_back(::arrow::field("nested_list", arrow_list, true));
}

auto arrow_schema = ::arrow::schema(arrow_fields);
ASSERT_OK(ConvertSchema(parquet_fields));

Expand Down
4 changes: 4 additions & 0 deletions cpp/src/parquet/arrow/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,10 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels,
// List of primitive type
RETURN_NOT_OK(
NodeToSchemaField(*list_group.field(0), current_levels, ctx, out, child_field));
} else if (list_group.field_count() == 1 && list_group.field(0)->is_repeated()) {
// Special case for nested list in two-level list encoding
RETURN_NOT_OK(
NodeToSchemaField(*list_group.field(0), current_levels, ctx, out, child_field));
} else {
RETURN_NOT_OK(GroupToStruct(list_group, current_levels, ctx, out, child_field));
}
Expand Down

0 comments on commit 7e6cb6d

Please sign in to comment.