diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc index 9f60cd31d3541..b0a90632f544f 100644 --- a/cpp/src/parquet/arrow/arrow_schema_test.cc +++ b/cpp/src/parquet/arrow/arrow_schema_test.cc @@ -598,6 +598,26 @@ TEST_F(TestConvertParquetSchema, ParquetLists) { arrow_fields.push_back(::arrow::field("name", arrow_list, false)); } + // Two-level encoding List>: + // optional group nested_list (LIST) { + // repeated group array (LIST) { + // repeated int32 array; + // } + // } + { + auto inner_element = + PrimitiveNode::Make("array", Repetition::REPEATED, ParquetType::INT32); + auto outer_element = GroupNode::Make("array", Repetition::REPEATED, {inner_element}, + ConvertedType::LIST); + parquet_fields.push_back(GroupNode::Make("nested_list", Repetition::OPTIONAL, + {outer_element}, ConvertedType::LIST)); + auto arrow_inner_element = ::arrow::field("array", INT32, /*nullable=*/false); + auto arrow_outer_element = + ::arrow::field("array", ::arrow::list(arrow_inner_element), /*nullable=*/false); + auto arrow_list = ::arrow::list(arrow_outer_element); + arrow_fields.push_back(::arrow::field("nested_list", arrow_list, true)); + } + auto arrow_schema = ::arrow::schema(arrow_fields); ASSERT_OK(ConvertSchema(parquet_fields)); diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index ec3890a41f442..af29d69d94d5c 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -674,6 +674,10 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels, // List of primitive type RETURN_NOT_OK( NodeToSchemaField(*list_group.field(0), current_levels, ctx, out, child_field)); + } else if (list_group.field_count() == 1 && list_group.field(0)->is_repeated()) { + // Special case for nested list in two-level list encoding + RETURN_NOT_OK( + NodeToSchemaField(*list_group.field(0), current_levels, ctx, out, child_field)); } else { RETURN_NOT_OK(GroupToStruct(list_group, current_levels, ctx, out, child_field)); }