Skip to content

Commit

Permalink
[OPPRO-170] Filter validation for Parquet reader at runtime (facebook…
Browse files Browse the repository at this point in the history
…incubator#27)

* Filter validation for Parquet reader at runtime

* Style

* Style

* Format
  • Loading branch information
zhztheplayer authored Jun 28, 2022
1 parent 595b614 commit 89741e5
Showing 1 changed file with 59 additions and 2 deletions.
61 changes: 59 additions & 2 deletions velox/substrait/SubstraitToVeloxPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,53 @@ std::shared_ptr<const core::PlanNode> SubstraitVeloxPlanConverter::toVeloxPlan(
childNode);
}

bool isPushDownSupportedByFormat(
const dwio::common::FileFormat& format,
connector::hive::SubfieldFilters& subfieldFilters) {
switch (format) {
case dwio::common::FileFormat::PARQUET: {
for (const auto& filter : subfieldFilters) {
switch (filter.second->kind()) {
// see ParquetReader.cpp:175

// supported
case common::FilterKind::kBigintRange:
case common::FilterKind::kDoubleRange:
case common::FilterKind::kBytesValues:
case common::FilterKind::kBytesRange:
case common::FilterKind::kBigintValuesUsingBitmask:
case common::FilterKind::kBigintValuesUsingHashTable:
break;

// not supported
case common::FilterKind::kAlwaysFalse:
case common::FilterKind::kAlwaysTrue:
case common::FilterKind::kIsNull:
case common::FilterKind::kIsNotNull:
case common::FilterKind::kBoolValue:
case common::FilterKind::kFloatRange:
case common::FilterKind::kBigintMultiRange:
case common::FilterKind::kMultiRange:
default:
return false;
}
}
break;
}
case dwio::common::FileFormat::ORC:
case dwio::common::FileFormat::RC:
case dwio::common::FileFormat::RC_TEXT:
case dwio::common::FileFormat::RC_BINARY:
case dwio::common::FileFormat::TEXT:
case dwio::common::FileFormat::JSON:
case dwio::common::FileFormat::ALPHA:
case dwio::common::FileFormat::UNKNOWN:
default:
break;
}
return true;
}

std::shared_ptr<const core::PlanNode> SubstraitVeloxPlanConverter::toVeloxPlan(
const ::substrait::ReadRel& sRead) {
// Check if the ReadRel specifies an input of stream. If yes, the pre-built
Expand Down Expand Up @@ -494,8 +541,18 @@ std::shared_ptr<const core::PlanNode> SubstraitVeloxPlanConverter::toVeloxPlan(
toSubfieldFilters(colNameList, veloxTypeList, subfieldFunctions);

// Connect the remaining filters with 'and'.
std::shared_ptr<const core::ITypedExpr> remainingFilter =
connectWithAnd(colNameList, veloxTypeList, remainingFunctions);
std::shared_ptr<const core::ITypedExpr> remainingFilter;

if (!isPushDownSupportedByFormat(splitInfo->format, subfieldFilters)) {
// A subfieldFilter is not supported by the format,
// mark all filter as remaining filters.
subfieldFilters.clear();
remainingFilter =
connectWithAnd(colNameList, veloxTypeList, scalarFunctions);
} else {
remainingFilter =
connectWithAnd(colNameList, veloxTypeList, remainingFunctions);
}

tableHandle = std::make_shared<connector::hive::HiveTableHandle>(
"hive_table",
Expand Down

0 comments on commit 89741e5

Please sign in to comment.