Skip to content

Commit

Permalink
fix parsing array struct when repetition_type is 'REPEATED'
Browse files Browse the repository at this point in the history
  • Loading branch information
yma11 committed Aug 15, 2024
1 parent 13899de commit f35e6e3
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 0 deletions.
17 changes: 17 additions & 0 deletions velox/dwio/parquet/reader/ParquetReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,23 @@ std::unique_ptr<ParquetTypeWithId> ReaderBase::getParquetColumnInfo(
maxDefine,
isOptional,
isRepeated);
} else {
// Row type
auto type =
createRowType(children, isFileColumnNamesReadAsLowerCase());
return std::make_unique<ParquetTypeWithId>(
std::move(type),
std::move(children),
curSchemaIdx,
maxSchemaElementIdx,
ParquetTypeWithId::kNonLeaf, // columnIdx,
std::move(name),
std::nullopt,
std::nullopt,
maxRepeat,
maxDefine,
isOptional,
isRepeated);
}
} else {
// Row type
Expand Down
Binary file not shown.
24 changes: 24 additions & 0 deletions velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,30 @@ TEST_F(ParquetReaderTest, parseMapKeyValueAsMap) {
assertReadWithReaderAndExpected(fileSchema, *rowReader, expected, *leafPool_);
}

TEST_F(ParquetReaderTest, parseRowArrayTest) {
// schema:
// optionalPrimitive:int
// requiredPrimitive:int
// repeatedPrimitive:array<int>
// optionalMessage:struct<someId:int>
// requiredMessage:struct<someId:int>
// repeatedMessage:array<struct<someId:int>>
const std::string sample(
getExampleFilePath("proto-struct-with-array.parquet"));

dwio::common::ReaderOptions readerOptions{leafPool_.get()};
auto reader = createReader(sample, readerOptions);
EXPECT_EQ(reader->numberOfRows(), 1ULL);

auto type = reader->typeWithId();
EXPECT_EQ(type->size(), 6ULL);

auto col6_type = type->childAt(5);
EXPECT_EQ(col6_type->type()->kind(), TypeKind::ROW);
auto col6_1_type = col6_type->childAt(0);
EXPECT_EQ(col6_1_type->type()->kind(), TypeKind::INTEGER);
}

TEST_F(ParquetReaderTest, readSampleBigintRangeFilter) {
// Read sample.parquet with the int filter "a BETWEEN 16 AND 20".
FilterMap filters;
Expand Down

0 comments on commit f35e6e3

Please sign in to comment.