Skip to content

Commit

Permalink
Support more Substrait-to-Velox conversions and added validations (fa…
Browse files Browse the repository at this point in the history
…cebookincubator#7)

Always compile Substrait (facebookincubator#8)
  • Loading branch information
rui-mo authored and zhejiangxiaomai committed Apr 20, 2023
1 parent 53b3da3 commit d1771cf
Show file tree
Hide file tree
Showing 13 changed files with 2,625 additions and 103 deletions.
4 changes: 2 additions & 2 deletions velox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,6 @@ if(${VELOX_CODEGEN_SUPPORT})
endif()

# substrait converter
if(${VELOX_ENABLE_SUBSTRAIT})
# if(${VELOX_ENABLE_SUBSTRAIT})
add_subdirectory(substrait)
endif()
# endif()
3 changes: 2 additions & 1 deletion velox/substrait/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ set(SRCS
VeloxToSubstraitPlan.cpp
VeloxToSubstraitType.cpp
VeloxSubstraitSignature.cpp
VariantToVectorConverter.cpp)
VariantToVectorConverter.cpp
SubstraitToVeloxPlanValidator.cpp)

add_library(velox_substrait_plan_converter ${SRCS})
target_include_directories(velox_substrait_plan_converter
Expand Down
67 changes: 63 additions & 4 deletions velox/substrait/SubstraitParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ std::shared_ptr<SubstraitParser::SubstraitType> SubstraitParser::parseType(
switch (substraitType.kind_case()) {
case ::substrait::Type::KindCase::kBool: {
typeName = "BOOLEAN";
nullability = substraitType.bool_().nullability();
nullability = sType.bool_().nullability();
break;
}
case ::substrait::Type::KindCase::kI8: {
Expand Down Expand Up @@ -208,7 +208,7 @@ int SubstraitParser::getIdxFromNodeName(const std::string& nodeName) {
}
}

const std::string& SubstraitParser::findFunctionSpec(
std::string SubstraitParser::findSubstraitFuncSpec(
const std::unordered_map<uint64_t, std::string>& functionMap,
uint64_t id) const {
if (functionMap.find(id) == functionMap.end()) {
Expand All @@ -219,6 +219,65 @@ const std::string& SubstraitParser::findFunctionSpec(
return map[id];
}

std::string SubstraitParser::getFunctionName(
const std::string& functionSpec) const {
// Get the position of ":" in the function name.
std::size_t pos = functionSpec.find(":");
if (pos == std::string::npos) {
return functionSpec;
}
return functionSpec.substr(0, pos);
}

void SubstraitParser::getFunctionTypes(
const std::string& functionSpec,
std::vector<std::string>& types) const {
types.clear();
// Get the position of ":" in the function name.
std::size_t pos = functionSpec.find(":");
// Get the parameter types.
std::string funcTypes;
if (pos == std::string::npos) {
return;
} else {
if (pos == functionSpec.size() - 1) {
return;
}
funcTypes = functionSpec.substr(pos + 1);
}
// Split the types with delimiter.
std::string delimiter = "_";
while ((pos = funcTypes.find(delimiter)) != std::string::npos) {
types.emplace_back(funcTypes.substr(0, pos));
funcTypes.erase(0, pos + delimiter.length());
}
types.emplace_back(funcTypes);
}

void SubstraitParser::getSubFunctionTypes(
const std::string& subFuncSpec,
std::vector<std::string>& types) const {
// Get the position of ":" in the function name.
std::size_t pos = subFuncSpec.find(":");
// Get the parameter types.
std::string funcTypes;
if (pos == std::string::npos) {
funcTypes = subFuncSpec;
} else {
if (pos == subFuncSpec.size() - 1) {
return;
}
funcTypes = subFuncSpec.substr(pos + 1);
}
// Split the types with delimiter.
std::string delimiter = "_";
while ((pos = funcTypes.find(delimiter)) != std::string::npos) {
types.emplace_back(funcTypes.substr(0, pos));
funcTypes.erase(0, pos + delimiter.length());
}
types.emplace_back(funcTypes);
}

std::string SubstraitParser::findVeloxFunction(
const std::unordered_map<uint64_t, std::string>& functionMap,
uint64_t id) const {
Expand All @@ -228,8 +287,8 @@ std::string SubstraitParser::findVeloxFunction(
}

std::string SubstraitParser::mapToVeloxFunction(
const std::string& substraitFunction) const {
auto it = substraitVeloxFunctionMap_.find(substraitFunction);
const std::string& subFunc) const {
auto it = substraitVeloxFunctionMap_.find(subFunc);
if (it != substraitVeloxFunctionMap_.end()) {
return it->second;
}
Expand Down
21 changes: 17 additions & 4 deletions velox/substrait/SubstraitParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,10 @@ class SubstraitParser {
/// Make node name in the format of n{nodeId}_{colIdx}.
std::string makeNodeName(int nodeId, int colIdx);

/// Get the column index from a node name in the format of
/// n{nodeId}_{colIdx}.
/// Used to get the column index from node name.
int getIdxFromNodeName(const std::string& nodeName);

/// Find the Substrait function name according to the function id
/// Used to find the Substrait function name according to the function id
/// from a pre-constructed function map. The function specification can be
/// a simple name or a compound name. The compound name format is:
/// <function name>:<short_arg_type0>_<short_arg_type1>_..._<short_arg_typeN>.
Expand All @@ -70,7 +69,21 @@ class SubstraitParser {
const std::unordered_map<uint64_t, std::string>& functionMap,
uint64_t id) const;

/// Find the Velox function name according to the function id
/// Extracts the function name for a function from specified compound name.
/// When the input is a simple name, it will be returned.
std::string getFunctionName(const std::string& functionSpec) const;

/// Extracts argument types for a function from specified compound name.
void getFunctionTypes(
const std::string& functionSpec,
std::vector<std::string>& types) const;

/// This function is used get the types from the compound name.
void getSubFunctionTypes(
const std::string& subFuncSpec,
std::vector<std::string>& types) const;

/// Used to find the Velox function name according to the function id
/// from a pre-constructed function map.
std::string findVeloxFunction(
const std::unordered_map<uint64_t, std::string>& functionMap,
Expand Down
21 changes: 15 additions & 6 deletions velox/substrait/SubstraitToVeloxExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@ SubstraitVeloxExprConverter::toVeloxExpr(
switch (typeCase) {
case ::substrait::Expression::FieldReference::ReferenceTypeCase::
kDirectReference: {
const auto& directRef = substraitField.direct_reference();
int32_t colIdx = substraitParser_.parseReferenceSegment(directRef);
const auto& dRef = sField.direct_reference();
int32_t colIdx = subParser_->parseReferenceSegment(dRef);
const auto& inputNames = inputType->names();
const int64_t inputSize = inputNames.size();
if (colIdx <= inputSize) {
Expand Down Expand Up @@ -196,10 +196,19 @@ SubstraitVeloxExprConverter::toVeloxExpr(
for (const auto& sArg : substraitFunc.arguments()) {
params.emplace_back(toVeloxExpr(sArg.value(), inputType));
}
const auto& veloxFunction = substraitParser_.findVeloxFunction(
functionMap_, substraitFunc.function_reference());
std::string typeName =
substraitParser_.parseType(substraitFunc.output_type())->type;
const auto& veloxFunction =
subParser_->findVeloxFunction(functionMap_, sFunc.function_reference());
const auto& veloxType =
toVeloxType(subParser_->parseType(sFunc.output_type())->type);

// Omit alias because because name change is not needed.
if (veloxFunction == "alias") {
if (params.size() != 1) {
VELOX_FAIL("Alias expects one parameter.");
}
return params[0];
}

return std::make_shared<const core::CallTypedExpr>(
toVeloxType(typeName), std::move(params), veloxFunction);
}
Expand Down
3 changes: 2 additions & 1 deletion velox/substrait/SubstraitToVeloxExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ class SubstraitVeloxExprConverter {

/// The Substrait parser used to convert Substrait representations into
/// recognizable representations.
SubstraitParser substraitParser_;
std::shared_ptr<SubstraitParser> subParser_ =
std::make_shared<SubstraitParser>();

/// The map storing the relations between the function id and the function
/// name.
Expand Down
Loading

0 comments on commit d1771cf

Please sign in to comment.