Skip to content

Commit

Permalink
Support more Substrait-to-Velox conversions and added validations (fa…
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo authored and zhejiangxiaomai committed Sep 1, 2022
1 parent 5f47eea commit 4c930c3
Show file tree
Hide file tree
Showing 12 changed files with 3,070 additions and 573 deletions.
12 changes: 2 additions & 10 deletions velox/substrait/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,8 @@ add_custom_command(
add_custom_target(substrait_proto ALL DEPENDS ${PROTO_OUTPUT_FILES})
add_dependencies(substrait_proto protobuf::libprotobuf)

set(SRCS
${PROTO_SRCS}
SubstraitParser.cpp
SubstraitToVeloxExpr.cpp
SubstraitToVeloxPlan.cpp
TypeUtils.cpp
VeloxToSubstraitExpr.cpp
VeloxToSubstraitPlan.cpp
VeloxToSubstraitType.cpp)

set(SRCS ${PROTO_SRCS} SubstraitUtils.cpp SubstraitToVeloxPlanValidator.cpp
SubstraitToVeloxExpr.cpp SubstraitToVeloxPlan.cpp TypeUtils.cpp)
add_library(velox_substrait_plan_converter ${SRCS})
target_include_directories(velox_substrait_plan_converter
PUBLIC ${PROTO_OUTPUT_DIR})
Expand Down
32 changes: 28 additions & 4 deletions velox/substrait/SubstraitParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ std::shared_ptr<SubstraitParser::SubstraitType> SubstraitParser::parseType(
switch (substraitType.kind_case()) {
case ::substrait::Type::KindCase::kBool: {
typeName = "BOOLEAN";
nullability = substraitType.bool_().nullability();
nullability = sType.bool_().nullability();
break;
}
case ::substrait::Type::KindCase::kI8: {
Expand Down Expand Up @@ -202,7 +202,7 @@ int SubstraitParser::getIdxFromNodeName(const std::string& nodeName) {
}
}

const std::string& SubstraitParser::findFunctionSpec(
std::string SubstraitParser::findSubstraitFuncSpec(
const std::unordered_map<uint64_t, std::string>& functionMap,
uint64_t id) const {
if (functionMap.find(id) == functionMap.end()) {
Expand Down Expand Up @@ -248,6 +248,30 @@ void SubstraitParser::getFunctionTypes(
types.emplace_back(funcTypes);
}

void SubstraitParser::getSubFunctionTypes(
const std::string& subFuncSpec,
std::vector<std::string>& types) const {
// Get the position of ":" in the function name.
std::size_t pos = subFuncSpec.find(":");
// Get the parameter types.
std::string funcTypes;
if (pos == std::string::npos) {
funcTypes = subFuncSpec;
} else {
if (pos == subFuncSpec.size() - 1) {
return;
}
funcTypes = subFuncSpec.substr(pos + 1);
}
// Split the types with delimiter.
std::string delimiter = "_";
while ((pos = funcTypes.find(delimiter)) != std::string::npos) {
types.emplace_back(funcTypes.substr(0, pos));
funcTypes.erase(0, pos + delimiter.length());
}
types.emplace_back(funcTypes);
}

std::string SubstraitParser::findVeloxFunction(
const std::unordered_map<uint64_t, std::string>& functionMap,
uint64_t id) const {
Expand All @@ -257,8 +281,8 @@ std::string SubstraitParser::findVeloxFunction(
}

std::string SubstraitParser::mapToVeloxFunction(
const std::string& substraitFunction) const {
auto it = substraitVeloxFunctionMap_.find(substraitFunction);
const std::string& subFunc) const {
auto it = substraitVeloxFunctionMap_.find(subFunc);
if (it != substraitVeloxFunctionMap_.end()) {
return it->second;
}
Expand Down
12 changes: 8 additions & 4 deletions velox/substrait/SubstraitParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,10 @@ class SubstraitParser {
/// Make node name in the format of n{nodeId}_{colIdx}.
std::string makeNodeName(int nodeId, int colIdx);

/// Get the column index from a node name in the format of
/// n{nodeId}_{colIdx}.
/// Used to get the column index from node name.
int getIdxFromNodeName(const std::string& nodeName);

/// Find the Substrait function name according to the function id
/// Used to find the Substrait function name according to the function id
/// from a pre-constructed function map. The function specification can be
/// a simple name or a compound name. The compound name format is:
/// <function name>:<short_arg_type0>_<short_arg_type1>_..._<short_arg_typeN>.
Expand All @@ -79,7 +78,12 @@ class SubstraitParser {
const std::string& functionSpec,
std::vector<std::string>& types) const;

/// Find the Velox function name according to the function id
/// This function is used get the types from the compound name.
void getSubFunctionTypes(
const std::string& subFuncSpec,
std::vector<std::string>& types) const;

/// Used to find the Velox function name according to the function id
/// from a pre-constructed function map.
std::string findVeloxFunction(
const std::unordered_map<uint64_t, std::string>& functionMap,
Expand Down
32 changes: 18 additions & 14 deletions velox/substrait/SubstraitToVeloxExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ SubstraitVeloxExprConverter::toVeloxExpr(
switch (typeCase) {
case ::substrait::Expression::FieldReference::ReferenceTypeCase::
kDirectReference: {
const auto& directRef = substraitField.direct_reference();
int32_t colIdx = substraitParser_.parseReferenceSegment(directRef);
const auto& dRef = sField.direct_reference();
int32_t colIdx = subParser_->parseReferenceSegment(dRef);

const auto& inputTypes = inputType->children();
const auto& inputNames = inputType->names();
Expand Down Expand Up @@ -59,10 +59,18 @@ SubstraitVeloxExprConverter::toVeloxExpr(
for (const auto& sArg : substraitFunc.args()) {
params.emplace_back(toVeloxExpr(sArg, inputType));
}
const auto& veloxFunction = substraitParser_.findVeloxFunction(
functionMap_, substraitFunc.function_reference());
const auto& veloxType = toVeloxType(
substraitParser_.parseType(substraitFunc.output_type())->type);
const auto& veloxFunction =
subParser_->findVeloxFunction(functionMap_, sFunc.function_reference());
const auto& veloxType =
toVeloxType(subParser_->parseType(sFunc.output_type())->type);

// Omit alias because because name change is not needed.
if (veloxFunction == "alias") {
if (params.size() != 1) {
VELOX_FAIL("Alias expects one parameter.");
}
return params[0];
}

return std::make_shared<const core::CallTypedExpr>(
veloxType, std::move(params), veloxFunction);
Expand All @@ -74,17 +82,13 @@ SubstraitVeloxExprConverter::toVeloxExpr(
auto typeCase = substraitLit.literal_type_case();
switch (typeCase) {
case ::substrait::Expression_Literal::LiteralTypeCase::kBoolean:
return std::make_shared<core::ConstantTypedExpr>(
variant(substraitLit.boolean()));
return std::make_shared<core::ConstantTypedExpr>(variant(sLit.boolean()));
case ::substrait::Expression_Literal::LiteralTypeCase::kI32:
return std::make_shared<core::ConstantTypedExpr>(
variant(substraitLit.i32()));
return std::make_shared<core::ConstantTypedExpr>(variant(sLit.i32()));
case ::substrait::Expression_Literal::LiteralTypeCase::kI64:
return std::make_shared<core::ConstantTypedExpr>(
variant(substraitLit.i64()));
return std::make_shared<core::ConstantTypedExpr>(variant(sLit.i64()));
case ::substrait::Expression_Literal::LiteralTypeCase::kFp64:
return std::make_shared<core::ConstantTypedExpr>(
variant(substraitLit.fp64()));
return std::make_shared<core::ConstantTypedExpr>(variant(sLit.fp64()));
case ::substrait::Expression_Literal::LiteralTypeCase::kNull: {
auto veloxType =
toVeloxType(substraitParser_.parseType(substraitLit.null())->type);
Expand Down
5 changes: 3 additions & 2 deletions velox/substrait/SubstraitToVeloxExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class SubstraitVeloxExprConverter {
/// subParser: A Substrait parser used to convert Substrait representations
/// into recognizable representations. functionMap: A pre-constructed map
/// storing the relations between the function id and the function name.
explicit SubstraitVeloxExprConverter(
SubstraitVeloxExprConverter(
const std::unordered_map<uint64_t, std::string>& functionMap)
: functionMap_(functionMap) {}

Expand Down Expand Up @@ -59,7 +59,8 @@ class SubstraitVeloxExprConverter {
private:
/// The Substrait parser used to convert Substrait representations into
/// recognizable representations.
SubstraitParser substraitParser_;
std::shared_ptr<SubstraitParser> subParser_ =
std::make_shared<SubstraitParser>();

/// The map storing the relations between the function id and the function
/// name.
Expand Down
Loading

0 comments on commit 4c930c3

Please sign in to comment.