From 8a81a3f8d0128887f9878979d349dc6029e1fa64 Mon Sep 17 00:00:00 2001 From: "Zhang, Chaojun" Date: Fri, 21 Oct 2022 18:45:57 +0800 Subject: [PATCH 1/3] Merge code from upstream velox --- velox/CMakeLists.txt | 4 +- velox/substrait/CMakeLists.txt | 6 +- velox/substrait/ExprUtils.cpp | 34 - velox/substrait/JoinUtils.cpp | 4 +- velox/substrait/SubstraitExtension.cpp | 367 ----- velox/substrait/SubstraitExtension.h | 75 - velox/substrait/SubstraitFunction.cpp | 54 - velox/substrait/SubstraitFunction.h | 204 --- .../substrait/SubstraitFunctionCollector.cpp | 149 -- velox/substrait/SubstraitFunctionCollector.h | 86 - velox/substrait/SubstraitFunctionLookup.cpp | 231 --- velox/substrait/SubstraitFunctionLookup.h | 167 -- velox/substrait/SubstraitFunctionMappings.h | 52 - velox/substrait/SubstraitParser.h | 3 +- velox/substrait/SubstraitSignature.cpp | 74 - velox/substrait/SubstraitSignature.h | 72 - velox/substrait/SubstraitToVeloxExpr.cpp | 12 +- velox/substrait/SubstraitToVeloxPlan.cpp | 73 +- velox/substrait/SubstraitToVeloxPlan.h | 3 +- velox/substrait/SubstraitType.cpp | 357 ----- velox/substrait/SubstraitType.h | 480 ------ velox/substrait/SubstraitTypeLookup.cpp | 41 - velox/substrait/SubstraitTypeLookup.h | 39 - velox/substrait/TypeUtils.cpp | 220 --- velox/substrait/TypeUtils.h | 3 - velox/substrait/VariantToVectorConverter.cpp | 74 + ...ExprUtils.h => VariantToVectorConverter.h} | 14 +- .../VeloxToSubstraitCallConverter.cpp | 79 - .../substrait/VeloxToSubstraitCallConverter.h | 77 - velox/substrait/VeloxToSubstraitExpr.cpp | 40 +- velox/substrait/VeloxToSubstraitExpr.h | 2 +- velox/substrait/VeloxToSubstraitMappings.h | 57 - velox/substrait/VeloxToSubstraitPlan.cpp | 50 - velox/substrait/VeloxToSubstraitPlan.h | 19 +- velox/substrait/VeloxToSubstraitType.cpp | 28 +- velox/substrait/VeloxToSubstraitType.h | 10 - .../substrait/extensions/extension_types.yaml | 10 - .../functions_aggregate_approx.yaml | 17 - .../functions_aggregate_generic.yaml | 24 - .../extensions/functions_arithmetic.yaml | 1413 ----------------- .../functions_arithmetic_decimal.yaml | 151 -- .../extensions/functions_boolean.yaml | 138 -- .../extensions/functions_comparison.yaml | 216 --- .../extensions/functions_datetime.yaml | 351 ---- .../extensions/functions_logarithmic.yaml | 132 -- .../extensions/functions_rounding.yaml | 29 - velox/substrait/extensions/functions_set.yaml | 35 - .../extensions/functions_string.yaml | 1312 --------------- .../substrait/extensions/type_variations.yaml | 23 - velox/substrait/extensions/unknown.yaml | 66 - velox/substrait/tests/CMakeLists.txt | 7 +- velox/substrait/tests/FunctionTest.cpp | 88 +- .../substrait/tests/JsonToProtoConverter.cpp | 17 - velox/substrait/tests/JsonToProtoConverter.h | 3 - ...ubstrait2VeloxValuesNodeConversionTest.cpp | 6 +- .../tests/SubstraitExtensionTest.cpp | 59 - .../tests/SubstraitFunctionLookupTest.cpp | 237 --- .../tests/SubstraitSignatureTest.cpp | 52 - .../tests/SubstraitTypeLookupTest.cpp | 41 - velox/substrait/tests/SubstraitTypeTest.cpp | 144 -- ...pp => VeloxSubstraitJoinRoundTripTest.cpp} | 39 +- ...st.cpp => VeloxSubstraitRoundTripTest.cpp} | 120 +- .../tests/VeloxToSubstraitTypeTest.cpp | 62 - 63 files changed, 284 insertions(+), 7768 deletions(-) delete mode 100644 velox/substrait/ExprUtils.cpp delete mode 100644 velox/substrait/SubstraitExtension.cpp delete mode 100644 velox/substrait/SubstraitExtension.h delete mode 100644 velox/substrait/SubstraitFunction.cpp delete mode 100644 velox/substrait/SubstraitFunction.h delete mode 100644 velox/substrait/SubstraitFunctionCollector.cpp delete mode 100644 velox/substrait/SubstraitFunctionCollector.h delete mode 100644 velox/substrait/SubstraitFunctionLookup.cpp delete mode 100644 velox/substrait/SubstraitFunctionLookup.h delete mode 100644 velox/substrait/SubstraitFunctionMappings.h delete mode 100644 velox/substrait/SubstraitSignature.cpp delete mode 100644 velox/substrait/SubstraitSignature.h delete mode 100644 velox/substrait/SubstraitType.cpp delete mode 100644 velox/substrait/SubstraitType.h delete mode 100644 velox/substrait/SubstraitTypeLookup.cpp delete mode 100644 velox/substrait/SubstraitTypeLookup.h create mode 100644 velox/substrait/VariantToVectorConverter.cpp rename velox/substrait/{ExprUtils.h => VariantToVectorConverter.h} (71%) delete mode 100644 velox/substrait/VeloxToSubstraitCallConverter.cpp delete mode 100644 velox/substrait/VeloxToSubstraitCallConverter.h delete mode 100644 velox/substrait/VeloxToSubstraitMappings.h delete mode 100644 velox/substrait/extensions/extension_types.yaml delete mode 100644 velox/substrait/extensions/functions_aggregate_approx.yaml delete mode 100644 velox/substrait/extensions/functions_aggregate_generic.yaml delete mode 100644 velox/substrait/extensions/functions_arithmetic.yaml delete mode 100644 velox/substrait/extensions/functions_arithmetic_decimal.yaml delete mode 100644 velox/substrait/extensions/functions_boolean.yaml delete mode 100644 velox/substrait/extensions/functions_comparison.yaml delete mode 100644 velox/substrait/extensions/functions_datetime.yaml delete mode 100644 velox/substrait/extensions/functions_logarithmic.yaml delete mode 100644 velox/substrait/extensions/functions_rounding.yaml delete mode 100644 velox/substrait/extensions/functions_set.yaml delete mode 100644 velox/substrait/extensions/functions_string.yaml delete mode 100644 velox/substrait/extensions/type_variations.yaml delete mode 100644 velox/substrait/extensions/unknown.yaml delete mode 100644 velox/substrait/tests/SubstraitExtensionTest.cpp delete mode 100644 velox/substrait/tests/SubstraitFunctionLookupTest.cpp delete mode 100644 velox/substrait/tests/SubstraitSignatureTest.cpp delete mode 100644 velox/substrait/tests/SubstraitTypeLookupTest.cpp delete mode 100644 velox/substrait/tests/SubstraitTypeTest.cpp rename velox/substrait/tests/{VeloxSustraitHashJoinRoundTripConverterTest.cpp => VeloxSubstraitJoinRoundTripTest.cpp} (89%) rename velox/substrait/tests/{VeloxSubstraitRoundTripPlanConverterTest.cpp => VeloxSubstraitRoundTripTest.cpp} (74%) diff --git a/velox/CMakeLists.txt b/velox/CMakeLists.txt index 3f24ef512fad..7e7df744223d 100644 --- a/velox/CMakeLists.txt +++ b/velox/CMakeLists.txt @@ -71,6 +71,6 @@ if(${VELOX_CODEGEN_SUPPORT}) endif() # substrait converter -if(${VELOX_ENABLE_SUBSTRAIT}) +#if(${VELOX_ENABLE_SUBSTRAIT}) add_subdirectory(substrait) -endif() +#endif() diff --git a/velox/substrait/CMakeLists.txt b/velox/substrait/CMakeLists.txt index 71b52cbfd37d..ae9fafcf45e6 100644 --- a/velox/substrait/CMakeLists.txt +++ b/velox/substrait/CMakeLists.txt @@ -52,14 +52,14 @@ set(SRCS VeloxToSubstraitPlan.cpp VeloxToSubstraitType.cpp VeloxSubstraitSignature.cpp - JoinUtils.cpp) - + JoinUtils.cpp + VariantToVectorConverter.cpp) add_library(velox_substrait_plan_converter ${SRCS}) target_include_directories(velox_substrait_plan_converter PUBLIC ${PROTO_OUTPUT_DIR}) target_link_libraries(velox_substrait_plan_converter velox_connector - velox_dwio_dwrf_common yaml-cpp) + velox_dwio_dwrf_common) if(${VELOX_BUILD_TESTING}) add_subdirectory(tests) diff --git a/velox/substrait/ExprUtils.cpp b/velox/substrait/ExprUtils.cpp deleted file mode 100644 index 88ac33bc92fb..000000000000 --- a/velox/substrait/ExprUtils.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/substrait/ExprUtils.h" -#include "velox/substrait/SubstraitType.h" - -namespace facebook::velox::substrait { - -SubstraitSignaturePtr toSubstraitSignature( - const core::CallTypedExprPtr& callTypedExpr) { - std::vector types; - types.reserve(callTypedExpr->inputs().size()); - for (const auto& input : callTypedExpr->inputs()) { - types.emplace_back(fromVelox(input->type())); - } - - return SubstraitFunctionSignature::of( - callTypedExpr->name(), types, fromVelox(callTypedExpr->type())); -} - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/JoinUtils.cpp b/velox/substrait/JoinUtils.cpp index 96eb795f8dd0..0576db9c2ba6 100644 --- a/velox/substrait/JoinUtils.cpp +++ b/velox/substrait/JoinUtils.cpp @@ -32,7 +32,7 @@ ::substrait::JoinRel_JoinType toProto(core::JoinType joinType) { return ::substrait::JoinRel_JoinType_JOIN_TYPE_OUTER; case core::JoinType::kLeftSemi: return ::substrait::JoinRel_JoinType_JOIN_TYPE_SEMI; - case core::JoinType::kAnti: + case core::JoinType::kNullAwareAnti: return ::substrait::JoinRel_JoinType_JOIN_TYPE_ANTI; default: VELOX_UNSUPPORTED( @@ -53,7 +53,7 @@ core::JoinType fromProto(::substrait::JoinRel_JoinType joinType) { case ::substrait::JoinRel_JoinType_JOIN_TYPE_SEMI: return core::JoinType::kLeftSemi; case ::substrait::JoinRel_JoinType_JOIN_TYPE_ANTI: - return core::JoinType::kAnti; + return core::JoinType::kNullAwareAnti; default: VELOX_UNSUPPORTED("Unsupported substrait join type, {}", joinType); } diff --git a/velox/substrait/SubstraitExtension.cpp b/velox/substrait/SubstraitExtension.cpp deleted file mode 100644 index 8cf6ec06d407..000000000000 --- a/velox/substrait/SubstraitExtension.cpp +++ /dev/null @@ -1,367 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include "velox/substrait/SubstraitExtension.h" -#include "velox/substrait/SubstraitSignature.h" - -namespace YAML { - -using namespace facebook::velox::substrait; - -static bool decodeFunctionVariant( - const Node& node, - SubstraitFunctionVariant& function) { - auto& returnType = node["return"]; - if (returnType && returnType.IsScalar()) { - /// return type can be an expression - const auto& returnExpr = returnType.as(); - std::stringstream ss(returnExpr); - std::string lastReturnType; - while (std::getline(ss, lastReturnType, '\n')) { - } - function.returnType = SubstraitType::decode(lastReturnType); - } - auto& args = node["args"]; - if (args && args.IsSequence()) { - for (auto& arg : args) { - if (arg["options"]) { // enum argument - auto enumArgument = std::make_shared( - arg.as()); - function.arguments.emplace_back(enumArgument); - } else if (arg["value"]) { // value argument - auto valueArgument = std::make_shared( - arg.as()); - function.arguments.emplace_back(valueArgument); - } else { // type argument - auto typeArgument = std::make_shared( - arg.as()); - function.arguments.emplace_back(typeArgument); - } - } - } - - auto& variadic = node["variadic"]; - if (variadic) { - auto& min = variadic["min"]; - auto& max = variadic["max"]; - if (min) { - function.variadic = std::make_optional( - {min.as(), - max ? std::make_optional(max.as()) : std::nullopt}); - } else { - function.variadic = std::nullopt; - } - } else { - function.variadic = std::nullopt; - } - - return true; -} - -template <> -struct convert { - static bool decode(const Node& node, SubstraitEnumArgument& argument) { - // 'options' is required property - auto& options = node["options"]; - if (options && options.IsSequence()) { - auto& required = node["required"]; - argument.required = required && required.as(); - return true; - } else { - return false; - } - } -}; - -template <> -struct convert { - static bool decode(const Node& node, SubstraitValueArgument& argument) { - auto& value = node["value"]; - if (value && value.IsScalar()) { - auto valueType = value.as(); - argument.type = SubstraitType::decode(valueType); - return true; - } - return false; - } -}; - -template <> -struct convert { - static bool decode(const Node& node, SubstraitTypeArgument& argument) { - // no properties need to populate for type argument, just return true if - // 'type' element exists. - return node["type"]; - } -}; - -template <> -struct convert { - static bool decode( - const Node& node, - SubstraitScalarFunctionVariant& function) { - return decodeFunctionVariant(node, function); - }; -}; - -template <> -struct convert { - static bool decode( - const Node& node, - SubstraitAggregateFunctionVariant& function) { - const auto& res = decodeFunctionVariant(node, function); - if (res) { - const auto& intermediate = node["intermediate"]; - if (intermediate) { - function.intermediate = - SubstraitType::decode(intermediate.as()); - } - } - return res; - } -}; - -template <> -struct convert { - static bool decode(const Node& node, SubstraitScalarFunction& function) { - auto& name = node["name"]; - if (name && name.IsScalar()) { - function.name = name.as(); - auto& impls = node["impls"]; - if (impls && impls.IsSequence() && impls.size() > 0) { - for (auto& impl : impls) { - auto scalarFunctionVariant = - impl.as(); - scalarFunctionVariant.name = function.name; - function.impls.emplace_back( - std::make_shared( - scalarFunctionVariant)); - } - } - return true; - } - return false; - } -}; - -template <> -struct convert { - static bool decode(const Node& node, SubstraitAggregateFunction& function) { - auto& name = node["name"]; - if (name && name.IsScalar()) { - function.name = name.as(); - auto& impls = node["impls"]; - if (impls && impls.IsSequence() && impls.size() > 0) { - for (auto& impl : impls) { - auto aggregateFunctionVariant = - impl.as(); - aggregateFunctionVariant.name = function.name; - function.impls.emplace_back( - std::make_shared( - aggregateFunctionVariant)); - } - } - return true; - } - return false; - } -}; - -template <> -struct convert { - static bool decode( - const Node& node, - facebook::velox::substrait::SubstraitTypeAnchor& typeAnchor) { - auto& name = node["name"]; - if (name && name.IsScalar()) { - typeAnchor.name = name.as(); - return true; - } - return false; - } -}; - -template <> -struct convert { - static bool decode( - const Node& node, - facebook::velox::substrait::SubstraitExtension& extension) { - auto& scalarFunctions = node["scalar_functions"]; - auto& aggregateFunctions = node["aggregate_functions"]; - const bool scalarFunctionsExists = - scalarFunctions && scalarFunctions.IsSequence(); - const bool aggregateFunctionsExists = - aggregateFunctions && aggregateFunctions.IsSequence(); - if (!scalarFunctionsExists && !aggregateFunctionsExists) { - return false; - } - - if (scalarFunctionsExists) { - for (auto& scalarFunctionNode : scalarFunctions) { - const auto& scalarFunction = - scalarFunctionNode.as(); - for (auto& scalaFunctionVariant : scalarFunction.impls) { - extension.scalarFunctionVariants.emplace_back(scalaFunctionVariant); - } - } - } - - if (aggregateFunctionsExists) { - for (auto& aggregateFunctionNode : aggregateFunctions) { - const auto& aggregateFunction = - aggregateFunctionNode.as(); - for (auto& aggregateFunctionVariant : aggregateFunction.impls) { - extension.aggregateFunctionVariants.emplace_back( - aggregateFunctionVariant); - } - } - } - - auto& types = node["types"]; - if (types && types.IsSequence()) { - for (auto& type : types) { - auto typeAnchor = type.as(); - extension.types.emplace_back( - std::make_shared(typeAnchor)); - } - } - - return true; - } -}; - -} // namespace YAML - -namespace facebook::velox::substrait { - -namespace { - -std::string getSubstraitExtensionAbsolutePath() { - const std::string absolute_path = __FILE__; - auto const pos = absolute_path.find_last_of('/'); - return absolute_path.substr(0, pos) + "/extensions/"; -} - -} // namespace - -std::shared_ptr SubstraitExtension::loadExtension() { - static const auto& extension = loadDefault(); - return extension; -} - -std::shared_ptr SubstraitExtension::loadDefault() { - static const std::vector extensionFiles = { - "functions_aggregate_approx.yaml", - "functions_aggregate_generic.yaml", - "functions_arithmetic.yaml", - "functions_arithmetic_decimal.yaml", - "functions_boolean.yaml", - "functions_comparison.yaml", - "functions_datetime.yaml", - "functions_logarithmic.yaml", - "functions_rounding.yaml", - "functions_string.yaml", - "functions_set.yaml", - "unknown.yaml", - }; - const auto& extensionRootPath = getSubstraitExtensionAbsolutePath(); - return loadExtension(extensionRootPath, extensionFiles); -} - -std::shared_ptr SubstraitExtension::loadExtension( - const std::string& basePath, - const std::vector& extensionFiles) { - std::vector yamlExtensionFiles; - yamlExtensionFiles.reserve(extensionFiles.size()); - for (auto& extensionFile : extensionFiles) { - auto const pos = basePath.find_last_of('/'); - const auto& extensionUri = basePath.substr(0, pos) + "/" + extensionFile; - yamlExtensionFiles.emplace_back(extensionUri); - } - return loadExtension(yamlExtensionFiles); -} - -std::shared_ptr SubstraitExtension::loadExtension( - const std::vector& yamlExtensionFiles) { - SubstraitExtension mergedExtension; - for (const auto& extensionUri : yamlExtensionFiles) { - const auto& substraitExtension = - YAML::LoadFile(extensionUri).as(); - - for (auto& scalarFunctionVariant : - substraitExtension.scalarFunctionVariants) { - scalarFunctionVariant->uri = extensionUri; - mergedExtension.scalarFunctionVariants.emplace_back( - scalarFunctionVariant); - } - - for (auto& aggregateFunctionVariant : - substraitExtension.aggregateFunctionVariants) { - aggregateFunctionVariant->uri = extensionUri; - mergedExtension.aggregateFunctionVariants.emplace_back( - aggregateFunctionVariant); - } - - for (auto& type : substraitExtension.types) { - type->uri = extensionUri; - mergedExtension.types.emplace_back(type); - } - } - return std::make_shared(mergedExtension); -} - -std::optional -SubstraitExtension::lookupScalarFunction(const std::string& signature) const { - for (const auto& scalarFunctionVariant : scalarFunctionVariants) { - if (scalarFunctionVariant->signature() == signature) { - return std::make_optional(scalarFunctionVariant); - } - } - return std::nullopt; -} - -std::optional -SubstraitExtension::lookupAggregateFunction( - const std::string& signature) const { - for (const auto& aggregateFunctionVariant : aggregateFunctionVariants) { - if (aggregateFunctionVariant->signature() == signature) { - return std::make_optional(aggregateFunctionVariant); - } - } - return std::nullopt; -} - -std::optional SubstraitExtension::lookupFunction( - const std::string& signature) const { - const auto& function = this->lookupScalarFunction(signature); - if (!function.has_value()) { - return this->lookupAggregateFunction(signature); - } - return function; -} - -std::optional SubstraitExtension::lookupFunction( - const SubstraitFunctionMappingsPtr& functionMappings, - const std::string& signature) const { - const auto& functionSignature = - SubstraitFunctionSignature::signature(signature, functionMappings); - return this->lookupFunction(functionSignature); -} - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/SubstraitExtension.h b/velox/substrait/SubstraitExtension.h deleted file mode 100644 index ee7567194eb9..000000000000 --- a/velox/substrait/SubstraitExtension.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "velox/substrait/SubstraitFunction.h" -#include "velox/substrait/SubstraitFunctionMappings.h" -#include "velox/substrait/SubstraitType.h" - -namespace facebook::velox::substrait { - -/// class used to deserialize substrait YAML extension files. -class SubstraitExtension { - public: - /// deserialize default substrait extension. - static std::shared_ptr loadExtension(); - - /// deserialize substrait extension by given basePath and extensionFiles. - static std::shared_ptr loadExtension( - const std::string& basePath, - const std::vector& extensionFiles); - - /// deserialize substrait extension by given extensionFiles. - static std::shared_ptr loadExtension( - const std::vector& extensionFiles); - - /// lookup scalar function by given scalar function signature. - std::optional lookupScalarFunction( - const std::string& signature) const; - - /// lookup aggregate function by given aggregate function signature. - std::optional lookupAggregateFunction( - const std::string& signature) const; - - /// lookup scalar or aggregate function by given function signature. - std::optional lookupFunction( - const std::string& signature) const; - - /// lookup scalar or aggregate function by given function signature and - /// function mappings. - std::optional lookupFunction( - const SubstraitFunctionMappingsPtr& functionMappings, - const std::string& signature) const; - - /// a collection of scalar function variants loaded from Substrait extension - /// yaml. - std::vector scalarFunctionVariants; - /// a collection of aggregate function variants loaded from Substrait - /// extension yaml. - std::vector aggregateFunctionVariants; - - /// substrait user defined types loaded from Substrait extension yaml. - std::vector types; - - private: - /// deserialize default substrait extension. - static std::shared_ptr loadDefault(); -}; - -using SubstraitExtensionPtr = std::shared_ptr; - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/SubstraitFunction.cpp b/velox/substrait/SubstraitFunction.cpp deleted file mode 100644 index 59d1be156142..000000000000 --- a/velox/substrait/SubstraitFunction.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/substrait/SubstraitFunction.h" -#include -#include "velox/substrait/SubstraitType.h" - -namespace facebook::velox::substrait { - -std::string SubstraitFunctionVariant::signature( - const std::string& name, - const std::vector& arguments) { - std::stringstream ss; - ss << name; - if (!arguments.empty()) { - ss << ":"; - for (auto it = arguments.begin(); it != arguments.end(); ++it) { - const auto& typeSign = (*it)->toTypeString(); - if (it == arguments.end() - 1) { - ss << typeSign; - } else { - ss << typeSign << "_"; - } - } - } - - return ss.str(); -} - -std::vector -SubstraitFunctionVariant::requiredArguments() const { - std::vector res; - for (auto& arg : arguments) { - if (arg->isRequired()) { - res.push_back(arg); - } - } - return res; -} - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/SubstraitFunction.h b/velox/substrait/SubstraitFunction.h deleted file mode 100644 index 05902d60fc55..000000000000 --- a/velox/substrait/SubstraitFunction.h +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "velox/substrait/SubstraitType.h" - -namespace facebook::velox::substrait { - -struct SubstraitFunctionArgument { - /// whether the argument is required or not. - virtual const bool isRequired() const = 0; - /// convert argument type to short type string based on - /// https://substrait.io/extensions/#function-signature-compound-names - virtual const std::string toTypeString() const = 0; - - virtual const bool isWildcardType() const { - return false; - }; - - virtual const bool isValueArgument() const { - return false; - } -}; - -using SubstraitFunctionArgumentPtr = std::shared_ptr; - -struct SubstraitEnumArgument : public SubstraitFunctionArgument { - bool required; - bool const isRequired() const override { - return required; - } - - const std::string toTypeString() const override { - return required ? "req" : "opt"; - } -}; - -struct SubstraitTypeArgument : public SubstraitFunctionArgument { - const std::string toTypeString() const override { - return "type"; - } - const bool isRequired() const override { - return true; - } -}; - -struct SubstraitValueArgument : public SubstraitFunctionArgument { - SubstraitTypePtr type; - - const std::string toTypeString() const override { - return type->signature(); - } - - const bool isRequired() const override { - return true; - } - - const bool isWildcardType() const override { - return type->isWildcard(); - } - - const bool isValueArgument() const override { - return true; - } -}; - -using SubstraitValueArgumentPtr = std::shared_ptr; - -struct SubstraitFunctionAnchor { - /// uri of function anchor corresponding the file - std::string uri; - - /// function signature which is combination of function name and type of - /// arguments. - std::string key; - - bool operator==(const SubstraitFunctionAnchor& other) const { - return (uri == other.uri && key == other.key); - } -}; - -struct SubstraitFunctionVariadic { - int min; - std::optional max; -}; - -struct SubstraitFunctionVariant { - /// scalar function name. - std::string name; - /// scalar function uri. - std::string uri; - /// function arguments. - std::vector arguments; - /// return type of scalar function. - SubstraitTypePtr returnType; - /// function variadic - std::optional variadic; - - /// create function signature by given function name and arguments. - static std::string signature( - const std::string& name, - const std::vector& arguments); - - /// create function signature by function name and arguments. - const std::string signature() const { - return signature(name, arguments); - } - - const SubstraitFunctionAnchor anchor() const { - return {uri, signature()}; - } - - const bool isWildcard() const { - for (auto& arg : arguments) { - if (arg->isWildcardType()) { - return true; - } - } - return false; - } - - const bool isVariadic() const { - return variadic.has_value(); - } - - virtual const bool isAggregateFunction() { - return false; - } - - virtual const bool isScalarFunction() { - return true; - } - - /// A collection of required arguments - std::vector requiredArguments() const; -}; - -using SubstraitFunctionVariantPtr = std::shared_ptr; - -struct SubstraitScalarFunctionVariant : public SubstraitFunctionVariant {}; - -struct SubstraitAggregateFunctionVariant : public SubstraitFunctionVariant { - SubstraitTypePtr intermediate; - const bool isAggregateFunction() override { - return true; - } - const bool isScalarFunction() override { - return false; - } - - /// return intermediate function signature by function name and intermediate. - const std::string intermediateSignature() const { - if (intermediate) { - return name + ":" + intermediate->signature(); - } - return name; - } -}; - -using SubstraitAggregateFunctionVariantPtr = - std::shared_ptr; - -struct SubstraitScalarFunction { - /// scalar function name. - std::string name; - /// A collection of scalar function variants. - std::vector> impls; -}; - -struct SubstraitAggregateFunction { - /// aggregate function name. - std::string name; - /// A collection of aggregate function variants. - std::vector> impls; -}; - -} // namespace facebook::velox::substrait - -namespace std { - -/// hash function of facebook::velox::substrait::SubstraitFunctionAnchor -template <> -struct hash { - size_t operator()( - const facebook::velox::substrait::SubstraitFunctionAnchor& k) const { - return hash()(k.key) ^ hash()(k.uri); - } -}; - -}; // namespace std diff --git a/velox/substrait/SubstraitFunctionCollector.cpp b/velox/substrait/SubstraitFunctionCollector.cpp deleted file mode 100644 index bf66c2b9605c..000000000000 --- a/velox/substrait/SubstraitFunctionCollector.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/substrait/SubstraitFunctionCollector.h" -#include "velox/substrait/proto/substrait/extensions/extensions.pb.h" - -namespace facebook::velox::substrait { - -SubstraitFunctionCollector::SubstraitFunctionCollector() { - functions_ = std::make_shared>(); - types_ = std::make_shared>(); -} - -void SubstraitFunctionCollector::addFunctionToPlan( - ::substrait::Plan* substraitPlan) const { - using SimpleExtensionURI = ::substrait::extensions::SimpleExtensionURI; - int uriPos = 1; - std::unordered_map uris; - for (auto& [referenceNum, function] : functions_->forwardMap_) { - SimpleExtensionURI* extensionUri; - const auto uri = uris.find(function.uri); - if (uri == uris.end()) { - extensionUri = substraitPlan->add_extension_uris(); - extensionUri->set_extension_uri_anchor(++uriPos); - extensionUri->set_uri(function.uri); - uris[function.uri] = extensionUri; - } else { - extensionUri = uri->second; - } - - auto extensionFunction = - substraitPlan->add_extensions()->mutable_extension_function(); - extensionFunction->set_extension_uri_reference( - extensionUri->extension_uri_anchor()); - extensionFunction->set_function_anchor(referenceNum); - extensionFunction->set_name(function.key); - } -} - -int SubstraitFunctionCollector::getFunctionReference( - const SubstraitFunctionVariantPtr& function) { - const auto& anchorReference = - functions_->reverseMap_.find(function->anchor()); - if (anchorReference != functions_->reverseMap_.end()) { - return anchorReference->second; - } - ++functionReference_; - functions_->put(functionReference_, function->anchor()); - return functionReference_; -} - -template -void SubstraitFunctionCollector::BiDirectionHashMap::put( - const int& key, - const T& value) { - forwardMap_[key] = value; - reverseMap_[value] = key; -} - -void SubstraitFunctionCollector::addExtensionToPlan( - ::substrait::Plan* substraitPlan) const { - addFunctionToPlan(substraitPlan); - addTypeToPlan(substraitPlan); -} - -void SubstraitFunctionCollector::addTypeToPlan( - ::substrait::Plan* substraitPlan) const { - using SimpleExtensionURI = ::substrait::extensions::SimpleExtensionURI; - int uriPos = 1; - std::unordered_map uris; - for (auto& [referenceNum, typeAnchor] : types_->forwardMap_) { - SimpleExtensionURI* extensionUri; - if (uris.find(typeAnchor.uri) == uris.end()) { - extensionUri = substraitPlan->add_extension_uris(); - extensionUri->set_extension_uri_anchor(++uriPos); - extensionUri->set_uri(typeAnchor.uri); - uris[typeAnchor.uri] = extensionUri; - } else { - extensionUri = uris.at(typeAnchor.uri); - } - - auto extensionType = - substraitPlan->add_extensions()->mutable_extension_type(); - extensionType->set_extension_uri_reference( - extensionUri->extension_uri_anchor()); - extensionType->set_type_anchor(referenceNum); - extensionType->set_name(typeAnchor.name); - } -} - -int SubstraitFunctionCollector::getTypeReference( - const SubstraitTypeAnchorPtr& typeAnchor) { - const auto& anchorReference = types_->reverseMap_.find(*typeAnchor); - if (anchorReference != types_->reverseMap_.end()) { - return anchorReference->second; - } - ++typeReference_; - types_->put(functionReference_, *typeAnchor); - return typeReference_; -} - -SubstraitFunctionVariantPtr -SubstraitFunctionCollector::getScalarFunctionVariant( - const int& referernce, - const SubstraitExtension& extension) { - const auto& functionAnchor = functions_->forwardMap_.find(referernce); - if (functionAnchor != functions_->forwardMap_.end()) { - for (const auto& scalarFunctionVariant : extension.scalarFunctionVariants) { - if (scalarFunctionVariant->anchor() == functionAnchor->second) { - return scalarFunctionVariant; - } - } - } - VELOX_NYI( - "Unknown scalar function id. Make sure that the function id provided was shared in the extensions section of the plan."); -} - -SubstraitFunctionVariantPtr -SubstraitFunctionCollector::getAggregateFunctionVariant( - const int& referernce, - const SubstraitExtension& extension) { - const auto& functionAnchor = functions_->forwardMap_.find(referernce); - if (functionAnchor != functions_->forwardMap_.end()) { - for (const auto& aggregateFunctionVaraint : - extension.aggregateFunctionVariants) { - if (aggregateFunctionVaraint->anchor() == functionAnchor->second) { - return aggregateFunctionVaraint; - } - } - } - - VELOX_NYI( - "Unknown aggregate function id. Make sure that the function id provided was shared in the extensions section of the plan."); -} - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/SubstraitFunctionCollector.h b/velox/substrait/SubstraitFunctionCollector.h deleted file mode 100644 index e16b2138ff6a..000000000000 --- a/velox/substrait/SubstraitFunctionCollector.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include "velox/core/Expressions.h" -#include "velox/substrait/SubstraitExtension.h" -#include "velox/substrait/proto/substrait/algebra.pb.h" -#include "velox/substrait/proto/substrait/plan.pb.h" - -namespace facebook::velox::substrait { - -/// Maintains a mapping for function and function reference -class SubstraitFunctionCollector { - public: - SubstraitFunctionCollector(); - - /// get function reference by given Substrait function. - /// @param function substrait extension function - /// @return reference number of a Substrait extension function - int getFunctionReference(const SubstraitFunctionVariantPtr& function); - - /// get type reference by given Substrait type anchor. - /// @param typeAnchor substrait extension type - /// @return reference number of a Substrait extension type - int getTypeReference(const SubstraitTypeAnchorPtr& typeAnchor); - - /// add extension functions and types to Substrait plan. - void addExtensionToPlan(::substrait::Plan* plan) const; - - /// find substrait scalar function by given function reference and the - /// extension which could be useful for resolve function varaint from a - /// substrait plan. - SubstraitFunctionVariantPtr getScalarFunctionVariant( - const int& referernce, - const SubstraitExtension& extension); - - /// find substrait aggregate function by given function reference and the - /// extension which could be useful for resolve function varaint from a - // substrait plan. - SubstraitFunctionVariantPtr getAggregateFunctionVariant( - const int& referernce, - const SubstraitExtension& extension); - - private: - /// A bi-direction hash map to keep the relation between reference number and - /// either function or type. - /// @T either SubstraitFunctionAnchor or std::string - template - class BiDirectionHashMap { - public: - void put(const int& key, const T& value); - std::unordered_map forwardMap_; - std::unordered_map reverseMap_; - }; - - /// add extension functions to Substrait plan. - void addFunctionToPlan(::substrait::Plan* plan) const; - - /// add extension functions to Substrait plan. - void addTypeToPlan(::substrait::Plan* plan) const; - - int functionReference_ = -1; - int typeReference_ = -1; - std::shared_ptr> functions_; - std::shared_ptr> types_; -}; - -using SubstraitFunctionCollectorPtr = - std::shared_ptr; - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/SubstraitFunctionLookup.cpp b/velox/substrait/SubstraitFunctionLookup.cpp deleted file mode 100644 index bf12b5529576..000000000000 --- a/velox/substrait/SubstraitFunctionLookup.cpp +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/substrait/SubstraitFunctionLookup.h" -#include -#include "velox/substrait/SubstraitSignature.h" - -namespace facebook::velox::substrait { - -namespace { - -/// create a new function variant with existing function variant and substrait -/// signature. -SubstraitFunctionVariantPtr cloneFunctionVariantWithSignature( - SubstraitFunctionVariantPtr substraitFunctionVariant, - SubstraitSignaturePtr substraitSignature) { - auto functionVariant = *substraitFunctionVariant.get(); - auto& functionArguments = functionVariant.arguments; - functionArguments.clear(); - functionArguments.reserve(substraitSignature->getArguments().size()); - for (const auto& argument : substraitSignature->getArguments()) { - const auto& valueArgument = std::make_shared(); - valueArgument->type = argument; - functionArguments.emplace_back(valueArgument); - } - return std::make_shared(functionVariant); -} - -} // namespace - -SubstraitFunctionLookup::SubstraitFunctionLookup( - const std::vector& functions, - const SubstraitFunctionMappingsPtr& functionMappings) - : functionMappings_(functionMappings) { - std::unordered_map> - signatures; - - for (const auto& function : functions) { - const auto& functionSignature = signatures.find(function->name); - if (functionSignature == signatures.end()) { - std::vector nameFunctions; - nameFunctions.emplace_back(function); - signatures.insert({function->name, nameFunctions}); - } else { - auto& nameFunctions = functionSignature->second; - nameFunctions.emplace_back(function); - } - } - - for (const auto& [name, signature] : signatures) { - auto functionFinder = - std::make_shared(name, signature); - functionFinders_.insert({name, functionFinder}); - } -} - -const std::optional -SubstraitFunctionLookup::lookupFunction( - const SubstraitSignaturePtr& functionSignature) const { - const auto& functionMappings = getFunctionMappings(); - const auto& functionName = functionSignature->getName(); - const auto& substraitFunctionName = - functionMappings.find(functionName) != functionMappings.end() - ? functionMappings.at(functionName) - : functionName; - - if (functionFinders_.find(substraitFunctionName) == functionFinders_.end()) { - return std::nullopt; - } - const auto& newFunctionSignature = SubstraitFunctionSignature::of( - substraitFunctionName, - functionSignature->getArguments(), - functionSignature->getReturnType()); - const auto& functionFinder = functionFinders_.at(substraitFunctionName); - return functionFinder->lookupFunction(newFunctionSignature); -} - -SubstraitFunctionLookup::SubstraitFunctionFinder::SubstraitFunctionFinder( - const std::string& name, - const std::vector& functions) - : name_(name) { - for (const auto& function : functions) { - if (function->isVariadic()) { - functionVariantMatchers_.emplace_back( - std::make_shared(function)); - } else if (function->isWildcard()) { - functionVariantMatchers_.emplace_back( - std::make_shared(function)); - } else { - directMap_.insert({function->signature(), function}); - if (function->requiredArguments().size() != function->arguments.size()) { - const std::string& functionKey = SubstraitFunctionVariant::signature( - function->name, function->requiredArguments()); - directMap_.insert({functionKey, function}); - } - if (function->isAggregateFunction()) { - const auto& aggregateFunc = - std::dynamic_pointer_cast( - function); - directMap_.insert({aggregateFunc->intermediateSignature(), function}); - } - } - } -} - -const std::optional -SubstraitFunctionLookup::SubstraitFunctionFinder::lookupFunction( - const SubstraitSignaturePtr& functionSignature) const { - const auto& types = functionSignature->getArguments(); - const auto& signature = functionSignature->signature(); - /// try to do a direct match - const auto& directFunctionVariant = directMap_.find(signature); - if (directFunctionVariant != directMap_.end()) { - const auto& functionVariant = directFunctionVariant->second; - const auto& returnType = functionSignature->getReturnType(); - if (returnType && functionSignature->getReturnType() && - returnType->isSameAs(functionSignature->getReturnType())) { - return std::make_optional(functionVariant); - } - return std::nullopt; - } - - // return empty if no arguments - if (functionSignature->getArguments().empty()) { - return std::nullopt; - } - - // try to match with wildcard or variadic function variants. - for (const auto& functionVariantMatcher : functionVariantMatchers_) { - const auto& matched = functionVariantMatcher->tryMatch(functionSignature); - if (matched.has_value()) { - return matched; - } - } - return std::nullopt; -} - -std::optional -SubstraitFunctionLookup::VariadicFunctionVariantMatcher ::tryMatch( - const SubstraitSignaturePtr& signature) const { - const auto& arguments = signature->getArguments(); - const auto& maxArgumentNum = underlying_->variadic->max; - if ((arguments.size() < underlying_->variadic->min) || - (maxArgumentNum.has_value() && - arguments.size() > maxArgumentNum.value())) { - return std::nullopt; - } - - const auto& variadicArgument = underlying_->arguments[0]; - - for (auto& type : signature->getArguments()) { - if (variadicArgument->isValueArgument()) { - const auto& variadicValueArgument = - std::dynamic_pointer_cast( - variadicArgument); - if (!variadicValueArgument->type->isSameAs(type)) { - return std::nullopt; - } - } - } - return std::make_optional(underlying_); -} - -SubstraitFunctionLookup::WildcardFunctionVariantMatcher:: - WildcardFunctionVariantMatcher( - const SubstraitFunctionVariantPtr& functionVariant) - : underlying_(functionVariant) { - std::unordered_map typeToRef; - int typeRef = 0; - int pos = 0; - for (auto& arg : underlying_->arguments) { - if (arg->isValueArgument()) { - const auto& typeString = arg->toTypeString(); - if (typeToRef.find(typeString) == typeToRef.end()) { - typeToRef.insert({typeString, typeRef++}); - } - typeTraits.insert({pos++, typeToRef[typeString]}); - } - } -} - -std::optional -SubstraitFunctionLookup::WildcardFunctionVariantMatcher ::tryMatch( - const SubstraitSignaturePtr& signature) const { - if (isSameTypeTraits(signature)) { - return cloneFunctionVariantWithSignature(underlying_, signature); - } - return std::nullopt; -} - -bool SubstraitFunctionLookup::WildcardFunctionVariantMatcher::isSameTypeTraits( - const SubstraitSignaturePtr& signature) const { - std::unordered_map typeToRef; - std::unordered_map signatureTraits; - int ref = 0; - int pos = 0; - for (auto& arg : signature->getArguments()) { - const auto& typeString = arg->signature(); - if (typeToRef.find(typeString) == typeToRef.end()) { - typeToRef.insert({typeString, ref++}); - } - signatureTraits.insert({pos++, typeToRef[typeString]}); - } - - bool sameSize = typeTraits.size() == signatureTraits.size(); - if (sameSize) { - for (const auto& [typePos, typeRef] : typeTraits) { - if (signatureTraits.at(typePos) != typeRef) { - return false; - } - } - return true; - } else { - return false; - } -} - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/SubstraitFunctionLookup.h b/velox/substrait/SubstraitFunctionLookup.h deleted file mode 100644 index 6fcc2f0d7dbd..000000000000 --- a/velox/substrait/SubstraitFunctionLookup.h +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "velox/common/base/Exceptions.h" -#include "velox/core/Expressions.h" -#include "velox/substrait/SubstraitExtension.h" -#include "velox/substrait/SubstraitFunctionMappings.h" -#include "velox/substrait/SubstraitParser.h" -#include "velox/substrait/SubstraitSignature.h" -#include "velox/substrait/VeloxToSubstraitType.h" -#include "velox/type/Type.h" - -namespace facebook::velox::substrait { - -class SubstraitFunctionLookup { - protected: - SubstraitFunctionLookup( - const std::vector& functionVariants, - const SubstraitFunctionMappingsPtr& functionMappings); - - public: - /// lookup function variant by given substrait function Signature. - const std::optional lookupFunction( - const SubstraitSignaturePtr& functionSignature) const; - - protected: - /// get the map which store the function names in difference between velox - /// and substrait. - virtual const FunctionMappings getFunctionMappings() const = 0; - - const SubstraitFunctionMappingsPtr functionMappings_; - - private: - /// An interface for lookup function variant with substrait signature. - class FunctionVariantMatcher { - public: - /// lookup function variant by given substrait function signature. - ///@return substrait function variant if matched, or null option if not - /// matched. - virtual std::optional tryMatch( - const SubstraitSignaturePtr& signature) const = 0; - }; - - using FunctionVariantMatcherPtr = - std::shared_ptr; - - /// An implementation of FunctionVariantMatcher which match signature with - /// wildcard type. - class WildcardFunctionVariantMatcher : public FunctionVariantMatcher { - public: - WildcardFunctionVariantMatcher( - const SubstraitFunctionVariantPtr& functionVaraint); - - /// return function varaint if current wildcard function variant match the - /// given signature and. - std::optional tryMatch( - const SubstraitSignaturePtr& signature) const override; - - private: - /// test current wildcard function variant match the given signature. - bool isSameTypeTraits(const SubstraitSignaturePtr& signature) const; - - /// A map store type position and its type reference. - std::unordered_map typeTraits; - - /// the underlying function variant; - const SubstraitFunctionVariantPtr underlying_; - }; - - /// An implementation of FunctionVariantMatcher which match signature with - /// variadic arguments. - class VariadicFunctionVariantMatcher : public FunctionVariantMatcher { - public: - VariadicFunctionVariantMatcher( - const SubstraitFunctionVariantPtr& functionVaraint) - : underlying_(functionVaraint) {} - - std::optional tryMatch( - const SubstraitSignaturePtr& signature) const override; - - private: - /// the underlying function variant; - const SubstraitFunctionVariantPtr underlying_; - }; - - class SubstraitFunctionFinder { - public: - /// construct FunctionFinder with function name and it's function variants - SubstraitFunctionFinder( - const std::string& name, - const std::vector& functionVariants); - - /// lookup function variant by given substrait function signature. - const std::optional lookupFunction( - const SubstraitSignaturePtr& signature) const; - - private: - /// function name - const std::string name_; - /// A map store the function signature and corresponding function variant - std::unordered_map directMap_; - /// A collection of function variant matcher - std::vector functionVariantMatchers_; - }; - - using SubstraitFunctionFinderPtr = - std::shared_ptr; - - std::unordered_map functionFinders_; -}; - -class SubstraitScalarFunctionLookup : public SubstraitFunctionLookup { - public: - SubstraitScalarFunctionLookup( - const SubstraitExtensionPtr& extension, - const SubstraitFunctionMappingsPtr& functionMappings) - : SubstraitFunctionLookup( - extension->scalarFunctionVariants, - functionMappings) {} - - protected: - /// A map store the difference of scalar function names between velox - /// and substrait. - const FunctionMappings getFunctionMappings() const override { - return functionMappings_->scalarMappings(); - } -}; - -using SubstraitScalarFunctionLookupPtr = - std::shared_ptr; - -class SubstraitAggregateFunctionLookup : public SubstraitFunctionLookup { - public: - SubstraitAggregateFunctionLookup( - const SubstraitExtensionPtr& extension, - const SubstraitFunctionMappingsPtr& functionMappings) - : SubstraitFunctionLookup( - extension->aggregateFunctionVariants, - functionMappings) {} - - protected: - /// A map store the difference of aggregate function names between velox - /// and substrait. - const FunctionMappings getFunctionMappings() const override { - return functionMappings_->aggregateMappings(); - } -}; - -using SubstraitAggregateFunctionLookupPtr = - std::shared_ptr; - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/SubstraitFunctionMappings.h b/velox/substrait/SubstraitFunctionMappings.h deleted file mode 100644 index bc9e400589a1..000000000000 --- a/velox/substrait/SubstraitFunctionMappings.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "memory" -#include "unordered_map" -#include "vector" - -namespace facebook::velox::substrait { - -using FunctionMappings = std::unordered_map; - -/// An interface describe the function names in difference between velox engine -/// own and Substrait system. -class SubstraitFunctionMappings { - public: - /// scalar function names in difference between engine own and Substrait. - virtual const FunctionMappings scalarMappings() const { - static const FunctionMappings scalarMappings{}; - return scalarMappings; - }; - - /// aggregate function names in difference between engine own and Substrait. - virtual const FunctionMappings aggregateMappings() const { - static const FunctionMappings aggregateMappings{}; - return aggregateMappings; - }; - - /// window function names in difference between engine own and Substrait. - virtual const FunctionMappings windowMappings() const { - static const FunctionMappings windowMappings{}; - return windowMappings; - }; -}; - -using SubstraitFunctionMappingsPtr = - std::shared_ptr; -} // namespace facebook::velox::substrait diff --git a/velox/substrait/SubstraitParser.h b/velox/substrait/SubstraitParser.h index 6cc44158df7d..a76db95efc1d 100644 --- a/velox/substrait/SubstraitParser.h +++ b/velox/substrait/SubstraitParser.h @@ -89,8 +89,7 @@ class SubstraitParser { {"subtract", "minus"}, {"modulus", "mod"}, {"not_equal", "neq"}, - {"equal", "eq"}, - {"substring", "substr"}}; + {"equal", "eq"}}; }; } // namespace facebook::velox::substrait diff --git a/velox/substrait/SubstraitSignature.cpp b/velox/substrait/SubstraitSignature.cpp deleted file mode 100644 index 7757a716d26a..000000000000 --- a/velox/substrait/SubstraitSignature.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/substrait/SubstraitSignature.h" -#include -#include - -using namespace folly; - -namespace facebook::velox::substrait { - -const std::string SubstraitFunctionSignature::signature() const { - std::stringstream signature; - signature << name_; - if (!arguments_.empty()) { - signature << ":"; - for (auto it = arguments_.begin(); it != arguments_.end(); ++it) { - const auto& typeSign = (*it)->signature(); - if (it == arguments_.end() - 1) { - signature << typeSign; - } else { - signature << typeSign << "_"; - } - } - } - - return signature.str(); -} - -const std::string SubstraitFunctionSignature::signature( - const std::string& functionSignature, - const SubstraitFunctionMappingsPtr& functionMappings) { - // try to replace function name with function mappings - if (functionMappings) { - std::vector functionAndSignatures; - folly::split(":", functionSignature, functionAndSignatures); - const auto& scalarMappings = functionMappings->scalarMappings(); - const auto& aggregateMappings = functionMappings->aggregateMappings(); - if (functionAndSignatures.size() == 2) { - const auto& functionName = functionAndSignatures.at(0); - const auto& signatures = functionAndSignatures.at(1); - if (scalarMappings.find(functionName) != scalarMappings.end()) { - return scalarMappings.at(functionName) + ":" + signatures; - } else if ( - aggregateMappings.find(functionName) != aggregateMappings.end()) { - return aggregateMappings.at(functionName) + ":" + signatures; - } - } else if (functionAndSignatures.size() == 1) { - if (scalarMappings.find(functionSignature) != scalarMappings.end()) { - return scalarMappings.at(functionSignature); - } else if ( - aggregateMappings.find(functionSignature) != - aggregateMappings.end()) { - return aggregateMappings.at(functionSignature); - } - } - } - return functionSignature; -} - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/SubstraitSignature.h b/velox/substrait/SubstraitSignature.h deleted file mode 100644 index f1e84d733558..000000000000 --- a/velox/substrait/SubstraitSignature.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "SubstraitType.h" -#include "velox/substrait/SubstraitFunctionMappings.h" - -namespace facebook::velox::substrait { - -class SubstraitFunctionSignature { - public: - /// construct the substrait function signature with function name, return type - /// and arguments. - SubstraitFunctionSignature( - const std::string& name, - const std::vector& arguments, - const SubstraitTypePtr& returnType) - : name_(name), arguments_(arguments), returnType_(returnType) {} - - /// A shortcut method to create shared_ptr of SubstraitFunctionSignature. - static std::shared_ptr of( - const std::string& name, - const std::vector& arguments = {}, - const SubstraitTypePtr& returnType = nullptr) { - return std::make_shared( - name, arguments, returnType); - } - - /// Return function signature according to the given function name and - /// substrait types. - const std::string signature() const; - - const std::string getName() const { - return name_; - } - - const std::vector getArguments() const { - return arguments_; - } - - const SubstraitTypePtr getReturnType() const { - return returnType_; - } - - /// return an new function signature with function mappings - static const std::string signature( - const std::string& functionSignature, - const SubstraitFunctionMappingsPtr& functionMappings); - - private: - const std::string name_; - const std::vector arguments_; - const SubstraitTypePtr returnType_; -}; - -using SubstraitSignaturePtr = std::shared_ptr; - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/SubstraitToVeloxExpr.cpp b/velox/substrait/SubstraitToVeloxExpr.cpp index 696905dd86fa..cfe56d42776b 100644 --- a/velox/substrait/SubstraitToVeloxExpr.cpp +++ b/velox/substrait/SubstraitToVeloxExpr.cpp @@ -75,20 +75,22 @@ SubstraitVeloxExprConverter::toVeloxExpr( return std::make_shared( variant(substraitLit.boolean())); case ::substrait::Expression_Literal::LiteralTypeCase::kI8: + // SubstraitLit.i8() will return int32, so we need this type conversion. return std::make_shared( - variant(substraitLit.i8())); + variant(static_cast(substraitLit.i8()))); case ::substrait::Expression_Literal::LiteralTypeCase::kI16: + // SubstraitLit.i16() will return int32, so we need this type conversion. return std::make_shared( - variant(substraitLit.i16())); + variant(static_cast(substraitLit.i16()))); case ::substrait::Expression_Literal::LiteralTypeCase::kI32: return std::make_shared( variant(substraitLit.i32())); - case ::substrait::Expression_Literal::LiteralTypeCase::kI64: - return std::make_shared( - variant(substraitLit.i64())); case ::substrait::Expression_Literal::LiteralTypeCase::kFp32: return std::make_shared( variant(substraitLit.fp32())); + case ::substrait::Expression_Literal::LiteralTypeCase::kI64: + return std::make_shared( + variant(substraitLit.i64())); case ::substrait::Expression_Literal::LiteralTypeCase::kFp64: return std::make_shared( variant(substraitLit.fp64())); diff --git a/velox/substrait/SubstraitToVeloxPlan.cpp b/velox/substrait/SubstraitToVeloxPlan.cpp index 99c0bf587bac..701a80a5bb90 100644 --- a/velox/substrait/SubstraitToVeloxPlan.cpp +++ b/velox/substrait/SubstraitToVeloxPlan.cpp @@ -15,11 +15,9 @@ */ #include "velox/substrait/SubstraitToVeloxPlan.h" -#include "velox/core/Expressions.h" #include "velox/substrait/TypeUtils.h" +#include "velox/substrait/VariantToVectorConverter.h" #include "velox/type/Type.h" -#include "velox/vector/ComplexVector.h" -#include "velox/vector/FlatVector.h" namespace facebook::velox::substrait { namespace { @@ -47,58 +45,6 @@ core::AggregationNode::Step toAggregationStep( } } } // namespace -namespace { -template -VectorPtr setVectorFromVariantsByKind( - const std::vector& value, - memory::MemoryPool* pool) { - using T = typename TypeTraits::NativeType; - - auto flatVector = std::dynamic_pointer_cast>( - BaseVector::create(CppToType::create(), value.size(), pool)); - - for (vector_size_t i = 0; i < value.size(); i++) { - if (value[i].isNull()) { - flatVector->setNull(i, true); - } else { - flatVector->set(i, value[i].value()); - } - } - return flatVector; -} - -template <> -VectorPtr setVectorFromVariantsByKind( - const std::vector& value, - memory::MemoryPool* pool) { - throw std::invalid_argument("Return of VARBINARY data is not supported"); -} - -template <> -VectorPtr setVectorFromVariantsByKind( - const std::vector& value, - memory::MemoryPool* pool) { - auto flatVector = std::dynamic_pointer_cast>( - BaseVector::create(VARCHAR(), value.size(), pool)); - - for (vector_size_t i = 0; i < value.size(); i++) { - if (value[i].isNull()) { - flatVector->setNull(i, true); - } else { - flatVector->set(i, StringView(value[i].value())); - } - } - return flatVector; -} - -VectorPtr setVectorFromVariants( - const TypePtr& type, - const std::vector& value, - velox::memory::MemoryPool* pool) { - return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH( - setVectorFromVariantsByKind, type->kind(), value, pool); -} -} // namespace core::PlanNodePtr SubstraitVeloxPlanConverter::toVeloxPlan( const ::substrait::AggregateRel& aggRel) { @@ -408,7 +354,7 @@ core::PlanNodePtr SubstraitVeloxPlanConverter::toVeloxPlan( return planNode; } if (rel.has_join()) { - return toVeloxPlan(rel.join(), pool); + return toVeloxPlan(rel.join()); } VELOX_NYI("Substrait conversion not supported for Rel."); @@ -533,7 +479,6 @@ connector::hive::SubfieldFilters SubstraitVeloxPlanConverter::toVeloxFilter( switch (typeCase) { case ::substrait::Expression::RexTypeCase::kSelection: { auto sel = argExpr.selection(); - // TODO: Only direct reference is considered here. auto dRef = sel.direct_reference(); colIdx = substraitParser_->parseReferenceSegment(dRef); @@ -615,7 +560,6 @@ void SubstraitVeloxPlanConverter::flattenConditions( if (getNameBeforeDelimiter(filterNameSpec, ":") == "and") { for (const auto& sCondition : sFunc.arguments()) { flattenConditions(sCondition.value(), scalarFunctions); - } } else { scalarFunctions.emplace_back(sFunc); @@ -673,15 +617,13 @@ void SubstraitVeloxPlanConverter::extractJoinKeys( if (visited->rex_type_case() == ::substrait::Expression::RexTypeCase::kScalarFunction) { auto sFunc = visited->scalar_function(); - auto filterNameSpec = substraitParser_->findFunctionSpec( + auto funcName = substraitParser_->findFunctionSpec( functionMap_, sFunc.function_reference()); - const auto& funcName = substraitParser_->getFunctionName(filterNameSpec); const auto& args = visited->scalar_function().arguments(); if (funcName == "and") { expressions.push_back(&args[0].value()); expressions.push_back(&args[1].value()); - } else if ( - funcName == "eq" || funcName == "equalto" || funcName == "equal") { + } else if (funcName == "eq") { VELOX_CHECK(std::all_of( args.cbegin(), args.cend(), @@ -702,8 +644,7 @@ void SubstraitVeloxPlanConverter::extractJoinKeys( } core::PlanNodePtr SubstraitVeloxPlanConverter::toVeloxPlan( - const ::substrait::JoinRel& sJoin, - memory::MemoryPool* pool) { + const ::substrait::JoinRel& sJoin) { if (!sJoin.has_left()) { VELOX_FAIL("Left Rel is expected in JoinRel."); } @@ -711,8 +652,8 @@ core::PlanNodePtr SubstraitVeloxPlanConverter::toVeloxPlan( VELOX_FAIL("Right Rel is expected in JoinRel."); } - auto leftNode = toVeloxPlan(sJoin.left(), pool); - auto rightNode = toVeloxPlan(sJoin.right(), pool); + auto leftNode = toVeloxPlan(sJoin.left()); + auto rightNode = toVeloxPlan(sJoin.right()); auto outputRowType = leftNode->outputType()->unionWith(rightNode->outputType()); diff --git a/velox/substrait/SubstraitToVeloxPlan.h b/velox/substrait/SubstraitToVeloxPlan.h index 5e83772b2e9c..c0ad9452f02d 100644 --- a/velox/substrait/SubstraitToVeloxPlan.h +++ b/velox/substrait/SubstraitToVeloxPlan.h @@ -75,8 +75,7 @@ class SubstraitVeloxPlanConverter { /// Convert Substrait JoinRel into Velox PlanNode. core::PlanNodePtr toVeloxPlan( - const ::substrait::JoinRel& sJoin, - memory::MemoryPool* pool); + const ::substrait::JoinRel& sJoin); /// Convert Substrait Plan into Velox PlanNode. core::PlanNodePtr toVeloxPlan(const ::substrait::Plan& substraitPlan); diff --git a/velox/substrait/SubstraitType.cpp b/velox/substrait/SubstraitType.cpp deleted file mode 100644 index 71b3a7450109..000000000000 --- a/velox/substrait/SubstraitType.cpp +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/substrait/SubstraitType.h" -#include "velox/type/Type.h" - -namespace facebook::velox::substrait { - -namespace { - -size_t findNextComma(const std::string& str, size_t start) { - int cnt = 0; - for (auto i = start; i < str.size(); i++) { - if (str[i] == '<') { - cnt++; - } else if (str[i] == '>') { - cnt--; - } else if (cnt == 0 && str[i] == ',') { - return i; - } - } - - return std::string::npos; -} - -} // namespace - -SubstraitTypePtr SubstraitType::decode(const std::string& rawType) { - std::string matchingType = rawType; - const auto& questionMaskPos = rawType.find_last_of('?'); - // deal with type and with a question mask like "i32?". - if (questionMaskPos != std::string::npos) { - matchingType = rawType.substr(0, questionMaskPos); - } - std::transform( - matchingType.begin(), - matchingType.end(), - matchingType.begin(), - [](unsigned char c) { return std::tolower(c); }); - - const auto& leftAngleBracketPos = rawType.find('<'); - if (leftAngleBracketPos == std::string::npos) { - const auto& scalarType = scalarTypeMapping().find(matchingType); - if (scalarType != scalarTypeMapping().end()) { - return scalarType->second; - } else if (matchingType.rfind("unknown", 0) == 0) { - return std::make_shared(rawType); - } else { - return std::make_shared(rawType); - } - } - const auto& rightAngleBracketPos = rawType.rfind('>'); - VELOX_CHECK( - rightAngleBracketPos != std::string::npos, - "Couldn't find the closing angle bracket."); - - auto baseType = matchingType.substr(0, leftAngleBracketPos); - - std::vector nestedTypes; - nestedTypes.reserve(8); - auto prevPos = leftAngleBracketPos + 1; - auto commaPos = findNextComma(rawType, prevPos); - while (commaPos != std::string::npos) { - auto token = rawType.substr(prevPos, commaPos - prevPos); - nestedTypes.emplace_back(decode(token)); - prevPos = commaPos + 1; - commaPos = findNextComma(rawType, prevPos); - } - auto token = rawType.substr(prevPos, rightAngleBracketPos - prevPos); - nestedTypes.emplace_back(decode(token)); - - if (baseType == "list") { - VELOX_CHECK( - nestedTypes.size() == 1, - "list type can only have one parameterized type"); - return std::make_shared(nestedTypes[0]); - } else if (baseType == "map") { - VELOX_CHECK( - nestedTypes.size() == 2, - "map type must have a parameterized type for key and a parameterized type for value"); - return std::make_shared(nestedTypes[0], nestedTypes[1]); - } else if (baseType == "decimal") { - VELOX_CHECK( - nestedTypes.size() == 2, - "decimal type must have a parameterized type for precision and a parameterized type for scale"); - auto precision = - std::dynamic_pointer_cast( - nestedTypes[0]); - auto scale = std::dynamic_pointer_cast( - nestedTypes[1]); - return std::make_shared(precision, scale); - } else if (baseType == "varchar") { - VELOX_CHECK( - nestedTypes.size() == 1, - "varchar type must have a parameterized type length"); - auto length = std::dynamic_pointer_cast( - nestedTypes[0]); - return std::make_shared(length); - } else if (baseType == "fixedchar") { - VELOX_CHECK( - nestedTypes.size() == 1, - "fixedchar type must have a parameterized type length"); - auto length = std::dynamic_pointer_cast( - nestedTypes[0]); - return std::make_shared(length); - } else if (baseType == "fixedbinary") { - VELOX_CHECK( - nestedTypes.size() == 1, - "fixedbinary type must have a parameterized type length"); - auto length = std::dynamic_pointer_cast( - nestedTypes[0]); - return std::make_shared(length); - } else if (baseType == "struct") { - VELOX_CHECK( - !nestedTypes.empty(), - "struct type must have at least one parameterized type"); - return std::make_shared(nestedTypes); - } else { - VELOX_NYI("Unsupported typed {}", rawType); - } -} - -#define SUBSTRAIT_SCALAR_TYPE_MAPPING(typeKind) \ - { \ - SubstraitTypeTraits::typeString, \ - std::make_shared>( \ - SubstraitTypeBase()) \ - } - -const std::unordered_map& -SubstraitType::scalarTypeMapping() { - static const std::unordered_map scalarTypeMap{ - SUBSTRAIT_SCALAR_TYPE_MAPPING(kBool), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kI8), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kI16), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kI32), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kI64), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kFp32), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kFp64), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kString), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kBinary), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kTimestamp), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kTimestampTz), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kDate), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kTime), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kIntervalDay), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kIntervalYear), - SUBSTRAIT_SCALAR_TYPE_MAPPING(kUuid), - }; - return scalarTypeMap; -} - -const std::string SubstraitFixedBinaryType::signature() const { - std::stringstream sign; - sign << SubstraitTypeBase::signature(); - sign << "<"; - sign << length_->value(); - sign << ">"; - return sign.str(); -} - -bool SubstraitFixedBinaryType::isSameAs( - const std::shared_ptr& other) const { - if (const auto& type = - std::dynamic_pointer_cast(other)) { - return true; - } - return false; -} - -const std::string SubstraitDecimalType::signature() const { - std::stringstream signature; - signature << SubstraitTypeBase::signature(); - signature << "<"; - signature << precision_->value() << "," << scale_->value(); - signature << ">"; - return signature.str(); -} - -bool SubstraitDecimalType::isSameAs( - const std::shared_ptr& other) const { - if (const auto& type = - std::dynamic_pointer_cast(other)) { - return true; - } - return false; -} - -const std::string SubstraitFixedCharType::signature() const { - std::ostringstream sign; - sign << SubstraitTypeBase::signature(); - sign << "<"; - sign << length_->value(); - sign << ">"; - return sign.str(); -} - -bool SubstraitFixedCharType::isSameAs( - const std::shared_ptr& other) const { - if (const auto& type = - std::dynamic_pointer_cast(other)) { - return true; - } - return false; -} - -const std::string SubstraitVarcharType::signature() const { - std::ostringstream sign; - sign << SubstraitTypeBase::signature(); - sign << "<"; - sign << length_->value(); - sign << ">"; - return sign.str(); -} - -bool SubstraitVarcharType::isSameAs( - const std::shared_ptr& other) const { - if (const auto& type = - std::dynamic_pointer_cast(other)) { - return true; - } - return false; -} - -const std::string SubstraitStructType::signature() const { - std::ostringstream signature; - signature << SubstraitTypeBase::signature(); - signature << "<"; - for (auto it = children_.begin(); it != children_.end(); ++it) { - const auto& typeSign = (*it)->signature(); - if (it == children_.end() - 1) { - signature << typeSign; - } else { - signature << typeSign << ","; - } - } - signature << ">"; - return signature.str(); -} - -bool SubstraitStructType::isSameAs( - const std::shared_ptr& other) const { - if (const auto& type = - std::dynamic_pointer_cast(other)) { - bool sameSize = type->children_.size() == children_.size(); - if (sameSize) { - for (int i = 0; i < children_.size(); i++) { - if (!children_[i]->isSameAs(type->children_[i])) { - return false; - } - } - return true; - } - } - return false; -} - -const std::string SubstraitMapType::signature() const { - std::ostringstream signature; - signature << SubstraitTypeBase::signature(); - signature << "<"; - signature << keyType_->signature(); - signature << ","; - signature << valueType_->signature(); - signature << ">"; - return signature.str(); -} - -bool SubstraitMapType::isSameAs( - const std::shared_ptr& other) const { - if (const auto& type = - std::dynamic_pointer_cast(other)) { - return keyType_->isSameAs(type->keyType_) && - valueType_->isSameAs(type->valueType_); - } - return false; -} - -const std::string SubstraitListType::signature() const { - std::ostringstream signature; - signature << SubstraitTypeBase::signature(); - signature << "<"; - signature << type_->signature(); - signature << ">"; - return signature.str(); -} - -bool SubstraitListType::isSameAs( - const std::shared_ptr& other) const { - if (const auto& type = - std::dynamic_pointer_cast(other)) { - return type_->isSameAs(type->type_); - } - return false; -} - -bool SubstraitUsedDefinedType::isSameAs( - const std::shared_ptr& other) const { - if (const auto& type = - std::dynamic_pointer_cast(other)) { - return type->value_ == value_; - } - return false; -} - -bool SubstraitStringLiteralType::isSameAs( - const std::shared_ptr& other) const { - if (isWildcard()) { - return true; - } - if (const auto& type = - std::dynamic_pointer_cast(other)) { - return type->value_ == value_; - } - return false; -} - -#define DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(typeKind) \ - std::shared_ptr> \ - typeKind() { \ - return std::make_shared< \ - const SubstraitScalarType>(); \ - } - -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kBool); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kI8); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kI16); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kI32); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kI64); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kFp32); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kFp64); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kString); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kBinary); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kTimestamp); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kDate); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kTime); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kIntervalYear); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kIntervalDay); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kTimestampTz); -DEFINE_SUBSTRAIT_SCALAR_ACCESSOR(kUuid); - -#undef DEFINE_SUBSTRAIT_SCALAR_ACCESSOR - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/SubstraitType.h b/velox/substrait/SubstraitType.h deleted file mode 100644 index 7159a19c9a92..000000000000 --- a/velox/substrait/SubstraitType.h +++ /dev/null @@ -1,480 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include "velox/substrait/proto/substrait/algebra.pb.h" - -namespace facebook::velox::substrait { - -using SubstraitTypeKind = ::substrait::Type::KindCase; - -template -struct SubstraitTypeTraits {}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "bool"; - static constexpr const char* typeString = "boolean"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "i8"; - static constexpr const char* typeString = "i8"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "i16"; - static constexpr const char* typeString = "i16"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "i32"; - static constexpr const char* typeString = "i32"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "i64"; - static constexpr const char* typeString = "i64"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "fp32"; - static constexpr const char* typeString = "fp32"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "fp64"; - static constexpr const char* typeString = "fp64"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "str"; - static constexpr const char* typeString = "string"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "vbin"; - static constexpr const char* typeString = "binary"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "ts"; - static constexpr const char* typeString = "timestamp"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "tstz"; - static constexpr const char* typeString = "timestamp_tz"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "date"; - static constexpr const char* typeString = "date"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "time"; - static constexpr const char* typeString = "time"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "iyear"; - static constexpr const char* typeString = "interval_year"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "iday"; - static constexpr const char* typeString = "interval_day"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "uuid"; - static constexpr const char* typeString = "uuid"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "fchar"; - static constexpr const char* typeString = "fixedchar"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "vchar"; - static constexpr const char* typeString = "varchar"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "fbin"; - static constexpr const char* typeString = "fixedbinary"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "dec"; - static constexpr const char* typeString = "decimal"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "struct"; - static constexpr const char* typeString = "struct"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "list"; - static constexpr const char* typeString = "list"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "map"; - static constexpr const char* typeString = "map"; -}; - -template <> -struct SubstraitTypeTraits { - static constexpr const char* signature = "u!name"; - static constexpr const char* typeString = "user defined type"; -}; - -class SubstraitType { - public: - /// deserialize substrait raw type string into Substrait extension type. - /// @param rawType - substrait extension raw string type - static std::shared_ptr decode( - const std::string& rawType); - - /// signature name of substrait type. - virtual const std::string signature() const = 0; - - /// test type is a Wildcard type or not. - virtual const bool isWildcard() const { - return false; - } - - /// unknown type,see @SubstraitUnknownType - virtual const bool isUnknown() const { - return false; - } - - /// a known substrait type kind - virtual const SubstraitTypeKind kind() const = 0; - - virtual const std::string typeString() const = 0; - - /// whether two types are same as each other - virtual bool isSameAs( - const std::shared_ptr& other) const { - return kind() == other->kind(); - } - - private: - /// A map store the raw type string and corresponding Substrait Type - static const std:: - unordered_map>& - scalarTypeMapping(); -}; - -using SubstraitTypePtr = std::shared_ptr; - -/// Types used in function argument declarations. -template -class SubstraitTypeBase : public SubstraitType { - public: - const std::string signature() const override { - return SubstraitTypeTraits::signature; - } - - virtual const SubstraitTypeKind kind() const override { - return Kind; - } - - const std::string typeString() const override { - return SubstraitTypeTraits::typeString; - } -}; - -template -class SubstraitScalarType : public SubstraitTypeBase {}; - -/// A string literal type can present the 'any1' -class SubstraitStringLiteralType : public SubstraitType { - public: - SubstraitStringLiteralType(const std::string& value) : value_(value) {} - - const std::string& value() const { - return value_; - } - - const std::string signature() const override { - return value_; - } - - const std::string typeString() const override { - return value_; - } - const bool isWildcard() const override { - return value_.find("any") == 0 || value_ == "T"; - } - - bool isSameAs( - const std::shared_ptr& other) const override; - - const SubstraitTypeKind kind() const override { - return SubstraitTypeKind ::KIND_NOT_SET; - } - - private: - /// raw string of wildcard type. - const std::string value_; -}; - -using SubstraitStringLiteralTypePtr = - std::shared_ptr; - -class SubstraitDecimalType - : public SubstraitTypeBase { - public: - SubstraitDecimalType( - const SubstraitStringLiteralTypePtr& precision, - const SubstraitStringLiteralTypePtr& scale) - : precision_(precision), scale_(scale) {} - - SubstraitDecimalType(const std::string& precision, const std::string& scale) - : precision_(std::make_shared(precision)), - scale_(std::make_shared(scale)) {} - - bool isSameAs( - const std::shared_ptr& other) const override; - - const std::string signature() const override; - - const std::string precision() const { - return precision_->value(); - } - - const std::string scale() const { - return scale_->value(); - } - - private: - SubstraitStringLiteralTypePtr precision_; - SubstraitStringLiteralTypePtr scale_; -}; - -class SubstraitFixedBinaryType - : public SubstraitTypeBase { - public: - SubstraitFixedBinaryType(const SubstraitStringLiteralTypePtr& length) - : length_(length) {} - - bool isSameAs( - const std::shared_ptr& other) const override; - - const SubstraitStringLiteralTypePtr& length() const { - return length_; - } - - const std::string signature() const override; - - protected: - SubstraitStringLiteralTypePtr length_; -}; - -class SubstraitFixedCharType - : public SubstraitTypeBase { - public: - SubstraitFixedCharType(const SubstraitStringLiteralTypePtr& length) - : length_(length) {} - - bool isSameAs( - const std::shared_ptr& other) const override; - - const SubstraitStringLiteralTypePtr& length() const { - return length_; - } - - const std::string signature() const override; - - protected: - SubstraitStringLiteralTypePtr length_; -}; - -class SubstraitVarcharType - : public SubstraitTypeBase { - public: - SubstraitVarcharType(const SubstraitStringLiteralTypePtr& length) - : length_(length) {} - - bool isSameAs( - const std::shared_ptr& other) const override; - - const SubstraitStringLiteralTypePtr& length() const { - return length_; - } - - const std::string signature() const override; - - protected: - SubstraitStringLiteralTypePtr length_; -}; - -class SubstraitListType : public SubstraitTypeBase { - public: - SubstraitListType(const SubstraitTypePtr& child) : type_(child){}; - - const SubstraitTypePtr type() const { - return type_; - } - - bool isSameAs( - const std::shared_ptr& other) const override; - - const std::string signature() const override; - - private: - SubstraitTypePtr type_; -}; - -class SubstraitStructType - : public SubstraitTypeBase { - public: - SubstraitStructType(const std::vector& types) - : children_(types) {} - - bool isSameAs( - const std::shared_ptr& other) const override; - - const std::string signature() const override; - - const std::vector& children() const { - return children_; - } - - private: - std::vector children_; -}; - -class SubstraitMapType : public SubstraitTypeBase { - public: - SubstraitMapType( - const SubstraitTypePtr& keyType, - const SubstraitTypePtr& valueType) - : keyType_(keyType), valueType_(valueType) {} - - const SubstraitTypePtr keyType() const { - return keyType_; - } - - const SubstraitTypePtr valueType() const { - return valueType_; - } - - bool isSameAs( - const std::shared_ptr& other) const override; - - const std::string signature() const override; - - private: - SubstraitTypePtr keyType_; - SubstraitTypePtr valueType_; -}; - -class SubstraitUsedDefinedType - : public SubstraitTypeBase { - public: - SubstraitUsedDefinedType(const std::string& value) : value_(value) {} - - const std::string& value() const { - return value_; - } - - bool isSameAs( - const std::shared_ptr& other) const override; - - const bool isUnknown() const override { - return "unknown" == value_; - } - - private: - /// raw string of wildcard type. - const std::string value_; -}; - -struct SubstraitTypeAnchor { - std::string uri; - std::string name; - - bool operator==(const SubstraitTypeAnchor& other) const { - return (uri == other.uri && name == other.name); - } -}; - -using SubstraitTypeAnchorPtr = std::shared_ptr; - -#define SUBSTRAIT_SCALAR_ACCESSOR(KIND) \ - std::shared_ptr> KIND() - -SUBSTRAIT_SCALAR_ACCESSOR(kBool); -SUBSTRAIT_SCALAR_ACCESSOR(kI8); -SUBSTRAIT_SCALAR_ACCESSOR(kI16); -SUBSTRAIT_SCALAR_ACCESSOR(kI32); -SUBSTRAIT_SCALAR_ACCESSOR(kI64); -SUBSTRAIT_SCALAR_ACCESSOR(kFp32); -SUBSTRAIT_SCALAR_ACCESSOR(kFp64); -SUBSTRAIT_SCALAR_ACCESSOR(kString); -SUBSTRAIT_SCALAR_ACCESSOR(kBinary); -SUBSTRAIT_SCALAR_ACCESSOR(kTimestamp); -SUBSTRAIT_SCALAR_ACCESSOR(kDate); -SUBSTRAIT_SCALAR_ACCESSOR(kTime); -SUBSTRAIT_SCALAR_ACCESSOR(kIntervalYear); -SUBSTRAIT_SCALAR_ACCESSOR(kIntervalDay); -SUBSTRAIT_SCALAR_ACCESSOR(kTimestampTz); -SUBSTRAIT_SCALAR_ACCESSOR(kUuid); - -} // namespace facebook::velox::substrait - -namespace std { -/// hash function of facebook::velox::substrait::SubstraitTypeAnchor -template <> -struct hash { - size_t operator()( - const facebook::velox::substrait::SubstraitTypeAnchor& k) const { - return hash()(k.name) ^ hash()(k.uri); - } -}; - -}; // namespace std \ No newline at end of file diff --git a/velox/substrait/SubstraitTypeLookup.cpp b/velox/substrait/SubstraitTypeLookup.cpp deleted file mode 100644 index 3c00bc6a357d..000000000000 --- a/velox/substrait/SubstraitTypeLookup.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/substrait/SubstraitTypeLookup.h" - -namespace facebook::velox::substrait { - -SubstraitTypeLookup::SubstraitTypeLookup( - const std::vector& types) { - for (auto& typeAnchor : types) { - signatures_.insert({typeAnchor->name, typeAnchor}); - } -} - -std::optional SubstraitTypeLookup::lookupType( - const std::string& typeName) const { - if (signatures_.find(typeName) != signatures_.end()) { - return std::make_optional(signatures_.at(typeName)); - } - return std::nullopt; -} - -std::optional SubstraitTypeLookup::lookupUnknownType() - const { - return lookupType("unknown"); -} - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/SubstraitTypeLookup.h b/velox/substrait/SubstraitTypeLookup.h deleted file mode 100644 index 51659bd81a68..000000000000 --- a/velox/substrait/SubstraitTypeLookup.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "velox/core/PlanNode.h" -#include "velox/substrait/SubstraitType.h" - -namespace facebook::velox::substrait { -class SubstraitTypeLookup { - public: - SubstraitTypeLookup(const std::vector& types); - /// lookup substrait type anchor by given type name - std::optional lookupType( - const std::string& typeName) const; - - std::optional lookupUnknownType() const; - - private: - // type signatures , key is type name, value is the type anchor - std::unordered_map signatures_; -}; - -using SubstraitTypeLookupPtr = std::shared_ptr; - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/TypeUtils.cpp b/velox/substrait/TypeUtils.cpp index 073cba71560e..a7517140037e 100644 --- a/velox/substrait/TypeUtils.cpp +++ b/velox/substrait/TypeUtils.cpp @@ -124,224 +124,4 @@ TypePtr toVeloxType(const std::string& typeName) { } } -template -class SubstraitTypeCreator; - -/// template method for create SubstraitType by velox TypeKind. -template -std::shared_ptr fromVeloxType(const TypePtr& type) { - return SubstraitTypeCreator::create(type); -} - -SubstraitTypePtr fromVelox(const TypePtr& type) { - if (type) { - return VELOX_DYNAMIC_TYPE_DISPATCH(fromVeloxType, type->kind(), type); - } - return nullptr; -} - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - static const auto type = kBool(); - return type; - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - static const auto type = kI8(); - return type; - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - static const auto type = kI16(); - return type; - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - static const auto type = kI32(); - return type; - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - static const auto type = kI64(); - return type; - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - static const auto type = kFp32(); - return type; - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - static const auto type = kFp64(); - return type; - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - static const auto type = kString(); - return type; - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - static const auto type = kBinary(); - return type; - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - static const auto type = kTimestamp(); - return type; - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - static const auto type = kDate(); - return type; - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - static const auto type = kIntervalDay(); - return type; - } -}; - -template <> -class SubstraitTypeCreator { - public: - static std::shared_ptr create(const TypePtr& iType) { - const auto& decimalType = - std::dynamic_pointer_cast>( - iType); - - return std::make_shared( - std::to_string(decimalType->precision()), - std::to_string(decimalType->scale())); - } -}; - -template <> -class SubstraitTypeCreator { - public: - static std::shared_ptr create(const TypePtr& iType) { - const auto& decimalType = - std::dynamic_pointer_cast>( - iType); - return std::make_shared( - std::to_string(decimalType->precision()), - std::to_string(decimalType->scale())); - } -}; - -template <> -class SubstraitTypeCreator { - public: - static std::shared_ptr create(const TypePtr& iType) { - const auto& arrayType = std::dynamic_pointer_cast(iType); - return std::make_shared( - fromVelox(arrayType->elementType())); - } -}; - -template <> -class SubstraitTypeCreator { - public: - static std::shared_ptr create(const TypePtr& iType) { - const auto& mapType = std::dynamic_pointer_cast(iType); - return std::make_shared( - fromVelox(mapType->keyType()), fromVelox(mapType->valueType())); - } -}; - -template <> -class SubstraitTypeCreator { - public: - static std::shared_ptr create(const TypePtr& iType) { - const auto& rowType = std::dynamic_pointer_cast(iType); - - std::vector types; - for (const auto& type : rowType->children()) { - const auto& substraitType = fromVelox(type); - types.emplace_back(substraitType); - } - return std::make_shared(types); - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - static const auto type = - std::make_shared("unknown"); - return type; - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - VELOX_NYI("FUNCTION type not supported."); - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(TypePtr& iType) { - VELOX_NYI("OPAQUE type not supported."); - } -}; - -template <> -class SubstraitTypeCreator { - public: - static SubstraitTypePtr create(const TypePtr& iType) { - VELOX_NYI("Invalid type not supported."); - } -}; - } // namespace facebook::velox::substrait diff --git a/velox/substrait/TypeUtils.h b/velox/substrait/TypeUtils.h index 19d107f8e828..3a649eef674c 100644 --- a/velox/substrait/TypeUtils.h +++ b/velox/substrait/TypeUtils.h @@ -14,8 +14,6 @@ * limitations under the License. */ -#include -#include "velox/substrait/SubstraitType.h" #include "velox/type/Type.h" namespace facebook::velox::substrait { @@ -26,5 +24,4 @@ TypePtr toVeloxType(const std::string& typeName); std::string_view getNameBeforeDelimiter( const std::string& compoundName, const std::string& delimiter); - } // namespace facebook::velox::substrait diff --git a/velox/substrait/VariantToVectorConverter.cpp b/velox/substrait/VariantToVectorConverter.cpp new file mode 100644 index 000000000000..a38ad0678a95 --- /dev/null +++ b/velox/substrait/VariantToVectorConverter.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/substrait/VariantToVectorConverter.h" +#include "velox/vector/FlatVector.h" + +namespace facebook::velox::substrait { + +namespace { +template +VectorPtr setVectorFromVariantsByKind( + const std::vector& values, + memory::MemoryPool* pool) { + using T = typename TypeTraits::NativeType; + + auto flatVector = std::dynamic_pointer_cast>( + BaseVector::create(CppToType::create(), values.size(), pool)); + + for (vector_size_t i = 0; i < values.size(); i++) { + if (values[i].isNull()) { + flatVector->setNull(i, true); + } else { + flatVector->set(i, values[i].value()); + } + } + return flatVector; +} + +template <> +VectorPtr setVectorFromVariantsByKind( + const std::vector& /* values */, + memory::MemoryPool* /* pool */) { + throw std::invalid_argument("Return of VARBINARY data is not supported"); +} + +template <> +VectorPtr setVectorFromVariantsByKind( + const std::vector& values, + memory::MemoryPool* pool) { + auto flatVector = std::dynamic_pointer_cast>( + BaseVector::create(VARCHAR(), values.size(), pool)); + + for (vector_size_t i = 0; i < values.size(); i++) { + if (values[i].isNull()) { + flatVector->setNull(i, true); + } else { + flatVector->set(i, StringView(values[i].value())); + } + } + return flatVector; +} +} // namespace + +VectorPtr setVectorFromVariants( + const TypePtr& type, + const std::vector& values, + memory::MemoryPool* pool) { + return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH( + setVectorFromVariantsByKind, type->kind(), values, pool); +} +} // namespace facebook::velox::substrait diff --git a/velox/substrait/ExprUtils.h b/velox/substrait/VariantToVectorConverter.h similarity index 71% rename from velox/substrait/ExprUtils.h rename to velox/substrait/VariantToVectorConverter.h index 80d540d6deb4..a7146f76088f 100644 --- a/velox/substrait/ExprUtils.h +++ b/velox/substrait/VariantToVectorConverter.h @@ -13,17 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #pragma once -#include "velox/expression/Expr.h" -#include "velox/substrait/SubstraitSignature.h" -#include "velox/substrait/TypeUtils.h" +#include "velox/vector/BaseVector.h" namespace facebook::velox::substrait { -/// convert velox callTyped expression to substrait function signature. -SubstraitSignaturePtr toSubstraitSignature( - const core::CallTypedExprPtr& callTypedExpr); +/// Create Base Vector from velox variants. +/// Only scalar types are supported except VARBINARY. +VectorPtr setVectorFromVariants( + const TypePtr& type, + const std::vector& values, + velox::memory::MemoryPool* pool); } // namespace facebook::velox::substrait diff --git a/velox/substrait/VeloxToSubstraitCallConverter.cpp b/velox/substrait/VeloxToSubstraitCallConverter.cpp deleted file mode 100644 index dc860c3c01ea..000000000000 --- a/velox/substrait/VeloxToSubstraitCallConverter.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/substrait/VeloxToSubstraitCallConverter.h" -#include "velox/substrait/ExprUtils.h" - -namespace facebook::velox::substrait { - -const std::optional<::substrait::Expression*> -VeloxToSubstraitIfThenConverter::convert( - const core::CallTypedExprPtr& callTypeExpr, - google::protobuf::Arena& arena, - SubstraitExprConverter& topLevelConverter) const { - if (callTypeExpr->name() != "if" && callTypeExpr->name() != "switch") { - return std::nullopt; - } - if (callTypeExpr->inputs().size() % 2 != 1) { - VELOX_NYI( - "Number of arguments are always going to be odd for if/then or switch expression"); - } - - auto* substraitExpr = - google::protobuf::Arena::CreateMessage<::substrait::Expression>(&arena); - auto ifThenExpr = substraitExpr->mutable_if_then(); - auto last = callTypeExpr->inputs().size() - 1; - for (int i = 0; i < last; i += 2) { - auto ifClauseExpr = ifThenExpr->add_ifs(); - ifClauseExpr->mutable_if_()->MergeFrom( - topLevelConverter(callTypeExpr->inputs().at(i))); - ifClauseExpr->mutable_then()->MergeFrom( - topLevelConverter(callTypeExpr->inputs().at(i + 1))); - } - ifThenExpr->mutable_else_()->MergeFrom( - topLevelConverter(callTypeExpr->inputs().at(last))); - return std::make_optional(substraitExpr); -} - -const std::optional<::substrait::Expression*> -VeloxToSubstraitScalarFunctionConverter::convert( - const core::CallTypedExprPtr& callTypeExpr, - google::protobuf::Arena& arena, - SubstraitExprConverter& topLevelConverter) const { - const auto& scalarFunctionOption = - functionLookup_->lookupFunction(toSubstraitSignature(callTypeExpr)); - - if (!scalarFunctionOption.has_value()) { - return std::nullopt; - } - - auto* substraitExpr = - google::protobuf::Arena::CreateMessage<::substrait::Expression>(&arena); - auto scalarExpr = substraitExpr->mutable_scalar_function(); - scalarExpr->set_function_reference( - extensionCollector_->getFunctionReference(scalarFunctionOption.value())); - - for (auto& arg : callTypeExpr->inputs()) { - const auto& message = topLevelConverter(arg); - scalarExpr->add_arguments()->mutable_value()->MergeFrom(message); - } - scalarExpr->mutable_output_type()->MergeFrom( - typeConvertor_->toSubstraitType(arena, callTypeExpr->type())); - - return std::make_optional(substraitExpr); -} - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/VeloxToSubstraitCallConverter.h b/velox/substrait/VeloxToSubstraitCallConverter.h deleted file mode 100644 index 721f63a156ac..000000000000 --- a/velox/substrait/VeloxToSubstraitCallConverter.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "optional" -#include "velox/expression/Expr.h" -#include "velox/substrait/VeloxToSubstraitType.h" -#include "velox/substrait/proto/substrait/algebra.pb.h" -#include "velox/substrait/SubstraitFunctionCollector.h" -#include "velox/substrait/SubstraitFunctionLookup.h" - -namespace facebook::velox::substrait { - -using SubstraitExprConverter = - std::function<::substrait::Expression(const core::TypedExprPtr&)>; - -// This class is used to convert the velox CallTypedExpr into substrait scalar -// function expression. -class VeloxToSubstraitCallConverter { - public: - /// convert callTypedExpr to substrait Expression. - virtual const std::optional<::substrait::Expression*> convert( - const core::CallTypedExprPtr& callTypeExpr, - google::protobuf::Arena& arena, - SubstraitExprConverter& topLevelConverter) const = 0; -}; - -using VeloxToSubstraitCallConverterPtr = - std::shared_ptr; - -/// convert 'if/switch' CallTypedExpr to substrait ifThen expression. -class VeloxToSubstraitIfThenConverter : public VeloxToSubstraitCallConverter { - public: - const std::optional<::substrait::Expression*> convert( - const core::CallTypedExprPtr& callTypeExpr, - google::protobuf::Arena& arena, - SubstraitExprConverter& topLevelConverter) const override; -}; - -/// convert callTypedExpr to substrait expression except 'if/switch' -class VeloxToSubstraitScalarFunctionConverter - : public VeloxToSubstraitCallConverter { - public: - VeloxToSubstraitScalarFunctionConverter( - const SubstraitScalarFunctionLookupPtr& functionLookup, - const SubstraitFunctionCollectorPtr& extensionCollector, - const VeloxToSubstraitTypeConvertorPtr typeConvertor) - : functionLookup_(functionLookup), - extensionCollector_(extensionCollector), - typeConvertor_(typeConvertor) {} - - const std::optional<::substrait::Expression*> convert( - const core::CallTypedExprPtr& callTypeExpr, - google::protobuf::Arena& arena, - SubstraitExprConverter& topLevelConverter) const override; - - private: - SubstraitScalarFunctionLookupPtr functionLookup_; - SubstraitFunctionCollectorPtr extensionCollector_; - VeloxToSubstraitTypeConvertorPtr typeConvertor_; -}; - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/VeloxToSubstraitExpr.cpp b/velox/substrait/VeloxToSubstraitExpr.cpp index 6b548228b6b3..603135826565 100644 --- a/velox/substrait/VeloxToSubstraitExpr.cpp +++ b/velox/substrait/VeloxToSubstraitExpr.cpp @@ -177,7 +177,7 @@ const ::substrait::Expression& VeloxToSubstraitExprConvertor::toSubstraitExpr( } } - VELOX_NYI("Unsupported function name '{}'", callTypeExpr->name()); + return *substraitExpr; } const ::substrait::Expression_Literal& @@ -308,41 +308,41 @@ VeloxToSubstraitExprConvertor::toSubstraitNotNullLiteral( google::protobuf::Arena::CreateMessage<::substrait::Expression_Literal>( &arena); switch (variantValue.kind()) { - case velox::TypeKind::DOUBLE: { - literalExpr->set_fp64(variantValue.value()); + case velox::TypeKind::BOOLEAN: { + literalExpr->set_boolean(variantValue.value()); break; } - case velox::TypeKind::VARCHAR: { - auto vCharValue = variantValue.value(); - ::substrait::Expression_Literal::VarChar* sVarChar = - new ::substrait::Expression_Literal::VarChar(); - sVarChar->set_value(vCharValue.data()); - sVarChar->set_length(vCharValue.size()); - literalExpr->set_allocated_var_char(sVarChar); + case velox::TypeKind::TINYINT: { + literalExpr->set_i8(variantValue.value()); break; } - case velox::TypeKind::BIGINT: { - literalExpr->set_i64(variantValue.value()); + case velox::TypeKind::SMALLINT: { + literalExpr->set_i16(variantValue.value()); break; } case velox::TypeKind::INTEGER: { literalExpr->set_i32(variantValue.value()); break; } - case velox::TypeKind::SMALLINT: { - literalExpr->set_i16(variantValue.value()); + case velox::TypeKind::BIGINT: { + literalExpr->set_i64(variantValue.value()); break; } - case velox::TypeKind::TINYINT: { - literalExpr->set_i8(variantValue.value()); + case velox::TypeKind::REAL: { + literalExpr->set_fp32(variantValue.value()); break; } - case velox::TypeKind::BOOLEAN: { - literalExpr->set_boolean(variantValue.value()); + case velox::TypeKind::DOUBLE: { + literalExpr->set_fp64(variantValue.value()); break; } - case velox::TypeKind::REAL: { - literalExpr->set_fp32(variantValue.value()); + case velox::TypeKind::VARCHAR: { + auto vCharValue = variantValue.value(); + ::substrait::Expression_Literal::VarChar* sVarChar = + new ::substrait::Expression_Literal::VarChar(); + sVarChar->set_value(vCharValue.data()); + sVarChar->set_length(vCharValue.size()); + literalExpr->set_allocated_var_char(sVarChar); break; } case velox::TypeKind::TIMESTAMP: { diff --git a/velox/substrait/VeloxToSubstraitExpr.h b/velox/substrait/VeloxToSubstraitExpr.h index a9db54f763e3..8ad3b0a12515 100644 --- a/velox/substrait/VeloxToSubstraitExpr.h +++ b/velox/substrait/VeloxToSubstraitExpr.h @@ -17,8 +17,8 @@ #pragma once #include "velox/core/PlanNode.h" -#include "velox/substrait/SubstraitExtensionCollector.h" +#include "velox/substrait/SubstraitExtensionCollector.h" #include "velox/substrait/VeloxToSubstraitType.h" #include "velox/substrait/proto/substrait/algebra.pb.h" #include "velox/vector/ConstantVector.h" diff --git a/velox/substrait/VeloxToSubstraitMappings.h b/velox/substrait/VeloxToSubstraitMappings.h deleted file mode 100644 index 30b5fd605238..000000000000 --- a/velox/substrait/VeloxToSubstraitMappings.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "velox/substrait/SubstraitFunctionLookup.h" - -namespace facebook::velox::substrait { - -class VeloxToSubstraitFunctionMappings : public SubstraitFunctionMappings { - public: - static const std::shared_ptr make() { - return std::make_shared(); - } - /// scalar function names in difference between velox and Substrait. - const FunctionMappings scalarMappings() const override { - static const FunctionMappings scalarMappings{ - {"plus", "add"}, - {"minus", "subtract"}, - {"mod", "modulus"}, - {"eq", "equal"}, - {"neq", "not_equal"}, - {"substr", "substring"}, - }; - return scalarMappings; - }; - - /// aggregate function names in difference between velox and Substrait. - const FunctionMappings aggregateMappings() const override { - static const FunctionMappings aggregateMappings{}; - return aggregateMappings; - }; - - /// window function names in difference between velox and Substrait. - const FunctionMappings windowMappings() const override { - static const FunctionMappings aggregateMappings{}; - return aggregateMappings; - }; -}; - -using VeloxToSubstraitFunctionMappingsPtr = - std::shared_ptr; - -} // namespace facebook::velox::substrait diff --git a/velox/substrait/VeloxToSubstraitPlan.cpp b/velox/substrait/VeloxToSubstraitPlan.cpp index 88cde9f5ca2e..3997df555736 100644 --- a/velox/substrait/VeloxToSubstraitPlan.cpp +++ b/velox/substrait/VeloxToSubstraitPlan.cpp @@ -15,8 +15,6 @@ */ #include "velox/substrait/VeloxToSubstraitPlan.h" -#include "VeloxToSubstraitMappings.h" -#include "velox/substrait/ExprUtils.h" #include "velox/substrait/JoinUtils.h" namespace facebook::velox::substrait { @@ -44,56 +42,8 @@ ::substrait::AggregationPhase toAggregationPhase( } } -// Return true if the join type is supported. -bool checkForSupportJoinType( - const std::shared_ptr& nodePtr) { - // TODO: Implemented other types of Join. - return nodePtr->isInnerJoin(); -} - } // namespace -VeloxToSubstraitPlanConvertor::VeloxToSubstraitPlanConvertor() - : VeloxToSubstraitPlanConvertor( - SubstraitExtension::loadExtension(), - VeloxToSubstraitFunctionMappings::make()) {} - -VeloxToSubstraitPlanConvertor::VeloxToSubstraitPlanConvertor( - const SubstraitExtensionPtr& substraitExtension, - const SubstraitFunctionMappingsPtr& functionMappings) { - // Construct the extension collector - functionCollector_ = std::make_shared(); - - auto substraitTypeLookup = - std::make_shared(substraitExtension->types); - typeConvertor_ = std::make_shared( - functionCollector_, substraitTypeLookup); - // Construct the scalar function lookup - auto scalarFunctionLookup = - std::make_shared( - substraitExtension, functionMappings); - - // Construct the if/Then call converter - auto ifThenCallConverter = - std::make_shared(); - // Construct the scalar function converter. - auto scalaFunctionConverter = - std::make_shared( - scalarFunctionLookup, functionCollector_, typeConvertor_); - - std::vector callConvertors; - callConvertors.push_back(ifThenCallConverter); - callConvertors.push_back(scalaFunctionConverter); - - // Construct the expression converter. - exprConvertor_ = std::make_shared( - typeConvertor_, callConvertors); - - // Construct the aggregate function lookup - aggregateFunctionLookup_ = std::make_shared( - substraitExtension, functionMappings); -} - ::substrait::Plan& VeloxToSubstraitPlanConvertor::toSubstrait( google::protobuf::Arena& arena, const core::PlanNodePtr& plan) { diff --git a/velox/substrait/VeloxToSubstraitPlan.h b/velox/substrait/VeloxToSubstraitPlan.h index da63d0ed03c8..bd6102634d46 100644 --- a/velox/substrait/VeloxToSubstraitPlan.h +++ b/velox/substrait/VeloxToSubstraitPlan.h @@ -19,6 +19,7 @@ #include #include #include + #include "velox/core/PlanNode.h" #include "velox/type/Type.h" @@ -26,22 +27,12 @@ #include "velox/substrait/VeloxToSubstraitExpr.h" #include "velox/substrait/proto/substrait/algebra.pb.h" #include "velox/substrait/proto/substrait/plan.pb.h" -#include "velox/type/Type.h" namespace facebook::velox::substrait { /// Convert the Velox plan into Substrait plan. class VeloxToSubstraitPlanConvertor { public: - /// constructor VeloxToSubstraitPlanConvertor - VeloxToSubstraitPlanConvertor(); - - /// constructor VeloxToSubstraitPlanConvertor with given substrait extension - /// and function mappings. - VeloxToSubstraitPlanConvertor( - const SubstraitExtensionPtr& substraitExtension, - const SubstraitFunctionMappingsPtr& functionMappings); - /// Convert Velox PlanNode into Substrait Plan. /// @param vPlan Velox query plan to convert. /// @param arena Arena to use for allocating Substrait plan objects. @@ -87,14 +78,6 @@ class VeloxToSubstraitPlanConvertor { google::protobuf::Arena& arena, const std::shared_ptr joinNode, ::substrait::Rel* joinRel); - - /// Construct the function map between the Velox function name and index. - void constructFunctionMap(); - - /// Fetch all functions from Velox's registry and create Substrait extensions - /// for these. - ::substrait::Plan& addExtensionFunc(google::protobuf::Arena& arena); - /// The Expression converter used to convert Velox representations into /// Substrait expressions. VeloxToSubstraitExprConvertorPtr exprConvertor_; diff --git a/velox/substrait/VeloxToSubstraitType.cpp b/velox/substrait/VeloxToSubstraitType.cpp index 5fb393f497dd..f0e1636ec89f 100644 --- a/velox/substrait/VeloxToSubstraitType.cpp +++ b/velox/substrait/VeloxToSubstraitType.cpp @@ -20,11 +20,6 @@ namespace facebook::velox::substrait { -VeloxToSubstraitTypeConvertor::VeloxToSubstraitTypeConvertor( - const SubstraitFunctionCollectorPtr& functionCollector, - const SubstraitTypeLookupPtr& typeLookup) - : functionCollector_(functionCollector), typeLookup_(typeLookup) {} - const ::substrait::Type& VeloxToSubstraitTypeConvertor::toSubstraitType( google::protobuf::Arena& arena, const velox::TypePtr& type) const { @@ -173,21 +168,14 @@ const ::substrait::Type& VeloxToSubstraitTypeConvertor::toSubstraitType( break; } case velox::TypeKind::UNKNOWN: { - //velox unknown type binding to substrait unknown type - const auto& substraitTypeAnchor = typeLookup_->lookupUnknownType(); - if (substraitTypeAnchor.has_value()) { - auto substraitUserDefined = google::protobuf::Arena::CreateMessage< - ::substrait::Type_UserDefined>(&arena); - substraitUserDefined->set_type_reference( - functionCollector_->getTypeReference(substraitTypeAnchor.value())); - substraitUserDefined->set_nullability( - ::substrait::Type_Nullability_NULLABILITY_NULLABLE); - substraitType->set_allocated_user_defined(substraitUserDefined); - break; - } else { - VELOX_UNSUPPORTED( - "type anchor not found for velox type '{}'", type->toString()); - } + auto substraitUserDefined = + google::protobuf::Arena::CreateMessage<::substrait::Type_UserDefined>( + &arena); + substraitUserDefined->set_type_reference(0); + substraitUserDefined->set_nullability( + ::substrait::Type_Nullability_NULLABILITY_NULLABLE); + substraitType->set_allocated_user_defined(substraitUserDefined); + break; } case velox::TypeKind::FUNCTION: case velox::TypeKind::OPAQUE: diff --git a/velox/substrait/VeloxToSubstraitType.h b/velox/substrait/VeloxToSubstraitType.h index c78462d06198..6008a8ca2a6c 100644 --- a/velox/substrait/VeloxToSubstraitType.h +++ b/velox/substrait/VeloxToSubstraitType.h @@ -18,8 +18,6 @@ #include "velox/core/PlanNode.h" -#include "velox/substrait/SubstraitFunctionCollector.h" -#include "velox/substrait/SubstraitTypeLookup.h" #include "velox/substrait/proto/substrait/algebra.pb.h" #include "velox/substrait/proto/substrait/type.pb.h" @@ -27,9 +25,6 @@ namespace facebook::velox::substrait { class VeloxToSubstraitTypeConvertor { public: - VeloxToSubstraitTypeConvertor( - const SubstraitFunctionCollectorPtr& functionCollector, - const SubstraitTypeLookupPtr& typeLookup); /// Convert Velox RowType to Substrait NamedStruct. const ::substrait::NamedStruct& toSubstraitNamedStruct( google::protobuf::Arena& arena, @@ -39,11 +34,6 @@ class VeloxToSubstraitTypeConvertor { const ::substrait::Type& toSubstraitType( google::protobuf::Arena& arena, const velox::TypePtr& type) const; - - private: - /// The function Collector used to collect the function reference. - const SubstraitFunctionCollectorPtr functionCollector_; - const SubstraitTypeLookupPtr typeLookup_; }; using VeloxToSubstraitTypeConvertorPtr = diff --git a/velox/substrait/extensions/extension_types.yaml b/velox/substrait/extensions/extension_types.yaml deleted file mode 100644 index e03073c50798..000000000000 --- a/velox/substrait/extensions/extension_types.yaml +++ /dev/null @@ -1,10 +0,0 @@ ---- -types: - - name: point - structure: - latitude: i32 - longitude: i32 - - name: line - structure: - start: point - end: point diff --git a/velox/substrait/extensions/functions_aggregate_approx.yaml b/velox/substrait/extensions/functions_aggregate_approx.yaml deleted file mode 100644 index 92cbf6312547..000000000000 --- a/velox/substrait/extensions/functions_aggregate_approx.yaml +++ /dev/null @@ -1,17 +0,0 @@ -%YAML 1.2 ---- -aggregate_functions: - - name: "approx_count_distinct" - description: >- - Calculates the approximate number of rows that contain distinct values of the expression argument using - HyperLogLog. This function provides an alternative to the COUNT (DISTINCT expression) function, which - returns the exact number of rows that contain distinct values of an expression. APPROX_COUNT_DISTINCT - processes large amounts of data significantly faster than COUNT, with negligible deviation from the exact - result. - impls: - - args: - - value: any - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: binary - return: i64 diff --git a/velox/substrait/extensions/functions_aggregate_generic.yaml b/velox/substrait/extensions/functions_aggregate_generic.yaml deleted file mode 100644 index 99433510fae0..000000000000 --- a/velox/substrait/extensions/functions_aggregate_generic.yaml +++ /dev/null @@ -1,24 +0,0 @@ -%YAML 1.2 ---- -aggregate_functions: - - name: "count" - description: Count a set of values - impls: - - args: - - options: [SILENT, SATURATE, ERROR] - required: false - - value: any - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64 - return: i64 - - name: "count" - description: "Count a set of records (not field referenced)" - impls: - - args: - - options: [SILENT, SATURATE, ERROR] - required: false - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64 - return: i64 diff --git a/velox/substrait/extensions/functions_arithmetic.yaml b/velox/substrait/extensions/functions_arithmetic.yaml deleted file mode 100644 index a3e5cef880bb..000000000000 --- a/velox/substrait/extensions/functions_arithmetic.yaml +++ /dev/null @@ -1,1413 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "add" - description: "Add two values." - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i8 - - name: y - value: i8 - return: i8 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i16 - - name: y - value: i16 - return: i16 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i32 - - name: y - value: i32 - return: i32 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - value: i64 - - value: i64 - return: i64 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp32 - - name: y - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp64 - - name: y - value: fp64 - return: fp64 - - - name: "subtract" - description: "Subtract one value from another." - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i8 - - name: y - value: i8 - return: i8 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i16 - - name: y - value: i16 - return: i16 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i32 - - name: y - value: i32 - return: i32 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i64 - - name: y - value: i64 - return: i64 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp32 - - name: y - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp64 - - name: y - value: fp64 - return: fp64 - - - name: "multiply" - description: "Multiply two values." - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i8 - - name: y - value: i8 - return: i8 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i16 - - name: y - value: i16 - return: i16 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i32 - - name: y - value: i32 - return: i32 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i64 - - name: y - value: i64 - return: i64 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp32 - - name: y - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp64 - - name: y - value: fp64 - return: fp64 - - - name: "divide" - description: "Divide one value by another. Partial values are truncated." - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i8 - - name: y - value: i8 - return: i8 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i16 - - name: y - value: i16 - return: i16 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i32 - - name: y - value: i32 - return: i32 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i64 - - name: y - value: i64 - return: i64 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp32 - - name: y - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp64 - - name: y - value: fp64 - return: fp64 - - - name: "negate" - description: "Negation of the value" - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i8 - return: i8 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i16 - return: i16 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i32 - return: i32 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i64 - return: i64 - - args: - - name: x - value: fp32 - return: fp32 - - args: - - name: x - value: fp64 - return: fp64 - - - name: "modulus" - description: "Get the remainder when dividing one value by another." - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - return: i64 - - - name: "power" - description: "Take the power with x as the base and y as exponent." - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i64 - - name: y - value: i64 - return: i64 - - args: - - name: x - value: fp32 - - name: y - value: fp32 - return: fp32 - - args: - - name: x - value: fp64 - - name: y - value: fp64 - return: fp64 - - - name: "sqrt" - description: "Square root of the value" - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: i64 - return: fp64 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp64 - return: fp64 - - - name: "exp" - description: "The mathematical constant e, raised to the power of the value." - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp64 - return: fp64 - - - name: "cos" - description: "Get the cosine of a value in radians." - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp32 - return: fp64 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp64 - return: fp64 - - - name: "sin" - description: "Get the sine of a value in radians." - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp32 - return: fp64 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp64 - return: fp64 - - - name: "tan" - description: "Get the tangent of a value in radians." - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp32 - return: fp64 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp64 - return: fp64 - - - name: "cosh" - description: "Get the hyperbolic cosine of a value in radians." - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp64 - return: fp64 - - - name: "sinh" - description: "Get the hyperbolic sine of a value in radians." - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp64 - return: fp64 - - - name: "tanh" - description: "Get the hyperbolic tangent of a value in radians." - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp64 - return: fp64 - - - name: "acos" - description: "Get the arccosine of a value in radians." - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp32 - return: fp64 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp64 - return: fp64 - - - name: "asin" - description: "Get the arcsine of a value in radians." - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp32 - return: fp64 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp64 - return: fp64 - - - name: "atan" - description: "Get the arctangent of a value in radians." - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp32 - return: fp64 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp64 - return: fp64 - - - name: "acosh" - description: "Get the hyperbolic arccosine of a value in radians." - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp64 - return: fp64 - - - name: "asinh" - description: "Get the hyperbolic arcsine of a value in radians." - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp64 - return: fp64 - - - name: "atanh" - description: "Get the hyperbolic arctangent of a value in radians." - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp64 - return: fp64 - - - name: "atan2" - description: "Get the arctangent of values given as x/y pairs." - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp32 - - name: y - value: fp32 - return: fp64 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp64 - - name: y - value: fp64 - return: fp64 - - - name: "abs" - description: > - Calculate the absolute value of the argument. - - Integer values allow the specification of overflow behavior to handle the - unevenness of the twos complement, e.g. Int8 range [-128 : 127]. - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i8 - return: i8 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i16 - return: i16 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i32 - return: i32 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i64 - return: i64 - - args: - - name: x - value: fp32 - return: fp32 - - args: - - name: x - value: fp64 - return: fp64 - - - name: "sign" - description: > - Return the signedness of the argument. - - Integer values return signedness with the same type as the input. - Possible return values are [-1, 0, 1] - - Floating point values return signedness with the same type as the input. - Possible return values are [-1.0, -0.0, 0.0, 1.0, NaN] - impls: - - args: - - name: x - value: i8 - return: i8 - - args: - - name: x - value: i16 - return: i16 - - args: - - name: x - value: i32 - return: i32 - - args: - - name: x - value: i64 - return: i64 - - args: - - name: x - value: fp32 - return: fp32 - - args: - - name: x - value: fp64 - return: fp64 - - - name: "factorial" - description: > - Return the factorial of a given integer input. - - The factorial of 0! is 1 by convention. - - Negative inputs will raise an error. - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - value: i32 - name: "n" - return: i32 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - value: i64 - name: "n" - return: i64 - -aggregate_functions: - - name: "sum" - description: Sum a set of values. The sum of zero elements yields null. - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i8 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i16 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: fp32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp64? - return: fp64? - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: fp64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp64? - return: fp64? - - name: "avg" - description: Average a set of values. For integral types, this truncates partial values. - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i8 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: i8? - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i16 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: i16? - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: i32? - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: i64? - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: fp32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: fp32? - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: fp64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: fp64? - - name: "min" - description: Min a set of values. - impls: - - args: - - name: x - value: i8 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i8? - return: i8? - - args: - - name: x - value: i16 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i16? - return: i16? - - args: - - name: x - value: i32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i32? - return: i32? - - args: - - name: x - value: i64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: x - value: fp32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp32? - return: fp32? - - args: - - name: x - value: fp64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp64? - return: fp64? - - name: "max" - description: Max a set of values. - impls: - - args: - - name: x - value: i8 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i8? - return: i8? - - args: - - name: x - value: i16 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i16? - return: i16? - - args: - - name: x - value: i32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i32? - return: i32? - - args: - - name: x - value: i64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: x - value: fp32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp32? - return: fp32? - - args: - - name: x - value: fp64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp64? - return: fp64? - - name: "product" - description: Product of a set of values. Returns 1 for empty input. - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i8 - nullability: MIRROR - decomposable: MANY - intermediate: i64 - return: i8 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i16 - nullability: MIRROR - decomposable: MANY - intermediate: i64 - return: i16 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i32 - nullability: MIRROR - decomposable: MANY - intermediate: i64 - return: i32 - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: i64 - nullability: MIRROR - decomposable: MANY - intermediate: i64 - return: i64 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp32 - nullability: MIRROR - decomposable: MANY - intermediate: fp64 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: x - value: fp64 - nullability: MIRROR - decomposable: MANY - intermediate: fp64 - return: fp64 - - name: "std_dev" - description: Calculates standard-deviation for a set of values. - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: distribution - options: [ SAMPLE, POPULATION] - required: false - - name: x - value: fp32 - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: distribution - options: [ SAMPLE, POPULATION] - required: false - - name: x - value: fp64 - nullability: DECLARED_OUTPUT - return: fp64? - - name: "variance" - description: Calculates variance for a set of values. - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: distribution - options: [ SAMPLE, POPULATION] - required: false - - name: x - value: fp32 - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: distribution - options: [ SAMPLE, POPULATION] - required: false - - name: x - value: fp64 - nullability: DECLARED_OUTPUT - return: fp64? - - name: "corr" - description: > - Calculates correlation of two set of values. - If there is no input, null is returned. - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - value: fp32 - - value: fp32 - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - value: fp64 - - value: fp64 - nullability: DECLARED_OUTPUT - return: fp64? - - name: "mode" - description: > - Calculates mode for a set of values. - If there is no input, null is returned. - impls: - - args: - - value: i8 - nullability: DECLARED_OUTPUT - return: i8? - - args: - - value: i16 - nullability: DECLARED_OUTPUT - return: i16? - - args: - - value: i32 - nullability: DECLARED_OUTPUT - return: i32? - - args: - - value: i64 - nullability: DECLARED_OUTPUT - return: i64? - - args: - - value: fp32 - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - value: fp64 - nullability: DECLARED_OUTPUT - return: fp64? - - name: "median" - description: > - Calculate the median for a set of values. - - Returns null if applied to zero records. For the integer implementations, - the rounding option determines how the median should be rounded if it ends - up midway between two values. For the floating point implementations, - they specify the usual floating point rounding mode. - impls: - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - required: true - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - value: i8 - nullability: DECLARED_OUTPUT - return: i8? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - required: true - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - value: i16 - nullability: DECLARED_OUTPUT - return: i16? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - required: true - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - value: i32 - nullability: DECLARED_OUTPUT - return: i32? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - required: true - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - value: i64 - nullability: DECLARED_OUTPUT - return: i64? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - required: true - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - value: fp32 - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - required: true - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - value: fp64 - nullability: DECLARED_OUTPUT - return: fp64? - - name: "quantile" - description: > - Calculates quantiles for a set of values. - - This function will divide the aggregated values (passed via the - distribution argument) over N equally-sized bins, where N is passed - via a constant argument. It will then return the values at the - boundaries of these bins in list form. If the input is appropriately - sorted, this computes the quantiles of the distribution. - - The function can optionally return the first and/or last element of - the input, as specified by the `boundaries` argument. If the input is - appropriately sorted, this will thus be the minimum and/or maximum - values of the distribution. - - When the boundaries do not lie exactly on elements of the incoming - distribution, the function will interpolate between the two nearby - elements. If the interpolated value cannot be represented exactly, - the `rounding` option controls how the value should be selected or - computed. - - The function fails and returns null in the following cases: - - `n` is null or less than one; - - any value in `distribution` is null. - - The function returns an empty list if `n` equals 1 and `boundaries` is - set to `NEITHER`. - - impls: - - args: - - name: boundaries - description: > - Which boundaries to include. For NEITHER, the output will have - n-1 elements, for MINIMUM and MAXIMUM it will have n elements, - and for BOTH it will have n+1 elements. - options: [ NEITHER, MINIMUM, MAXIMUM, BOTH ] - required: true - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - required: true - - name: rounding - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. For floating point numbers, it specifies the IEEE - 754 rounding mode (as it does for all other floating point - operations). For integer types: - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - For non-numeric types, the behavior is the same as for integer - types, but applied to the index of the value in distribution. - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - value: i64 - constant: yes - name: n - description: > - A positive integer which defines the number of quantile - partitions. - - value: any - name: distribution - description: > - The data for which the quantiles should be computed. - nullability: DECLARED_OUTPUT - ordered: true - return: LIST? - -window_functions: - - name: "row_number" - description: "the number of the current row within its partition." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i64? - window_type: PARTITION - - name: "rank" - description: "the rank of the current row, with gaps." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i64? - window_type: PARTITION - - name: "dense_rank" - description: "the rank of the current row, without gaps." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i64? - window_type: PARTITION - - name: "percent_rank" - description: "the relative rank of the current row." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: fp64? - window_type: PARTITION - - name: "cume_dist" - description: "the cumulative distribution." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: fp64? - window_type: PARTITION - - name: "ntile" - description: "Return an integer ranging from 1 to the argument value,dividing the partition as equally as possible." - impls: - - args: - - name: x - value: i32 - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i32? - window_type: PARTITION - - args: - - name: x - value: i64 - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i64? - window_type: PARTITION diff --git a/velox/substrait/extensions/functions_arithmetic_decimal.yaml b/velox/substrait/extensions/functions_arithmetic_decimal.yaml deleted file mode 100644 index 29a2eabd19b9..000000000000 --- a/velox/substrait/extensions/functions_arithmetic_decimal.yaml +++ /dev/null @@ -1,151 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "add" - description: "Add two decimal values." - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: decimal - - name: y - value: decimal - return: |- - init_scale = max(S1,S2) - init_prec = init_scale + max(P1 - S1, P2 - S2) + 1 - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL - - - name: "subtract" - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: decimal - - name: y - value: decimal - return: |- - init_scale = max(S1,S2) - init_prec = init_scale + max(P1 - S1, P2 - S2) + 1 - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL - - - name: "multiply" - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: decimal - - name: y - value: decimal - return: |- - init_scale = S1 + S2 - init_prec = P1 + P2 + 1 - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL - - - name: "divide" - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: decimal - - name: y - value: decimal - return: |- - init_scale = max(6, S1 + P2 + 1) - init_prec = P1 - S1 + P2 + init_scale - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL - - - name: "modulus" - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: decimal - - name: y - value: decimal - return: |- - init_scale = max(S1,S2) - init_prec = min(P1 - S1, P2 - S2) + init_scale - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL -aggregate_functions: - - name: "sum" - description: Sum a set of values. - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: "DECIMAL" - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "DECIMAL<38,S>?" - return: "DECIMAL<38,S>?" - - name: "avg" - description: Average a set of values. - impls: - - args: - - name: overflow - options: [ SILENT, SATURATE, ERROR ] - required: false - - name: x - value: "DECIMAL" - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT,i64>" - return: "DECIMAL<38,S>" - - name: "min" - description: Min a set of values. - impls: - - args: - - name: x - value: "DECIMAL" - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "DECIMAL?" - return: "DECIMAL?" - - name: "max" - description: Max a set of values. - impls: - - args: - - name: x - value: "DECIMAL" - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "DECIMAL?" - return: "DECIMAL?" diff --git a/velox/substrait/extensions/functions_boolean.yaml b/velox/substrait/extensions/functions_boolean.yaml deleted file mode 100644 index 0f8b68fb5e23..000000000000 --- a/velox/substrait/extensions/functions_boolean.yaml +++ /dev/null @@ -1,138 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: or - description: > - The boolean `or` using Kleene logic. - - This function behaves as follows with nulls: - - true or null = true - - null or true = true - - false or null = null - - null or false = null - - null or null = null - - In other words, in this context a null value really means "unknown", and - an unknown value `or` true is always true. - - Behavior for 0 or 1 inputs is as follows: - or() -> false - or(x) -> x - impls: - - args: - - value: boolean? - variadic: - min: 0 - return: boolean? - - - name: and - description: > - The boolean `and` using Kleene logic. - - This function behaves as follows with nulls: - - true and null = null - - null and true = null - - false and null = false - - null and false = false - - null and null = null - - In other words, in this context a null value really means "unknown", and - an unknown value `and` false is always false. - - Behavior for 0 or 1 inputs is as follows: - and() -> true - and(x) -> x - impls: - - args: - - value: boolean? - variadic: - min: 0 - return: boolean? - - - name: and_not - description: > - The boolean `and` of one value and the negation of the other using Kleene logic. - - This function behaves as follows with nulls: - - true and not null = null - - null and not false = null - - false and not null = false - - null and not true = false - - null and not null = null - - In other words, in this context a null value really means "unknown", and - an unknown value `and not` true is always false, as is false `and not` an - unknown value. - impls: - - args: - - value: boolean? - name: a - - value: boolean? - name: b - return: boolean? - - - name: xor - description: > - The boolean `xor` of two values using Kleene logic. - - When a null is encountered in either input, a null is output. - impls: - - args: - - value: boolean? - name: a - - value: boolean? - name: b - return: boolean? - - - name: not - description: > - The `not` of a boolean value. - - When a null is input, a null is output. - impls: - - args: - - value: boolean? - name: a - return: boolean? - -aggregate_functions: - - - name: "bool_and" - description: > - If any value in the input is false, false is returned. If the input is - empty or only contains nulls, null is returned. Otherwise, true is - returned. - impls: - - args: - - value: boolean - name: x - nullability: DECLARED_OUTPUT - return: boolean? - - - name: "bool_or" - description: > - If any value in the input is true, true is returned. If the input is - empty or only contains nulls, null is returned. Otherwise, false is - returned. - impls: - - args: - - value: boolean - name: x - nullability: DECLARED_OUTPUT - return: boolean? diff --git a/velox/substrait/extensions/functions_comparison.yaml b/velox/substrait/extensions/functions_comparison.yaml deleted file mode 100644 index 7d11f3c7abf1..000000000000 --- a/velox/substrait/extensions/functions_comparison.yaml +++ /dev/null @@ -1,216 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "not_equal" - description: > - Whether two values are not_equal. - - `not_equal(x, y) := (x != y)` - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: BOOLEAN - - - name: "equal" - description: > - Whether two values are equal. - - `equal(x, y) := (x == y)` - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: BOOLEAN - - - name: "is_not_distinct_from" - description: > - Whether two values are equal. - - This function treats `null` values as comparable, so - - `is_not_distinct_from(null, null) == True` - - This is in contrast to `equal`, in which `null` values do not compare. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: BOOLEAN - - - name: "lt" - description: > - Less than. - - lt(x, y) := (x < y) - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: BOOLEAN - - - name: "gt" - description: > - Greater than. - - gt(x, y) := (x > y) - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: BOOLEAN - - - name: "lte" - description: > - Less than or equal to. - - lte(x, y) := (x <= y) - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: BOOLEAN - - - name: "gte" - description: > - Greater than or equal to. - - gte(x, y) := (x >= y) - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: BOOLEAN - - - name: "between" - description: >- - Whether the `expression` is greater than or equal to `low` and less than or equal to `high`. - - `expression` BETWEEN `low` AND `high` - - If `low`, `high`, or `expression` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: expression - description: The expression to test for in the range defined by `low` and `high`. - - value: any1 - name: low - description: The value to check if greater than or equal to. - - value: any1 - name: high - description: The value to check if less than or equal to. - return: BOOLEAN - - - name: "is_null" - description: Whether a value is null. NaN is not null. - impls: - - args: - - value: any1 - name: x - return: BOOLEAN - nullability: DECLARED_OUTPUT - - - name: "is_not_null" - description: Whether a value is not null. NaN is not null. - impls: - - args: - - value: any1 - name: x - return: BOOLEAN - nullability: DECLARED_OUTPUT - - - name: "is_nan" - description: > - Whether a value is not a number. - - If `x` is `null`, `null` is returned. - impls: - - args: - - value: fp32 - name: x - return: BOOLEAN - - args: - - value: fp64 - name: x - return: BOOLEAN - - - name: "is_finite" - description: > - Whether a value is finite (neither infinite nor NaN). - - If `x` is `null`, `null` is returned. - impls: - - args: - - value: fp32 - name: x - return: BOOLEAN - - args: - - value: fp64 - name: x - return: BOOLEAN - - - name: "is_infinite" - description: > - Whether a value is infinite. - - If `x` is `null`, `null` is returned. - impls: - - args: - - value: fp32 - name: x - return: BOOLEAN - - args: - - value: fp64 - name: x - return: BOOLEAN - - - name: "nullif" - description: If two values are equal, return null. Otherwise, return the first value. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: any1 - - - name: "coalesce" - description: >- - Evaluate arguments from left to right and return the first argument that is not null. Once - a non-null argument is found, the remaining arguments are not evaluated. - - If all arguments are null, return null. - impls: - - args: - - value: any1 - variadic: - min: 2 - return: any1 diff --git a/velox/substrait/extensions/functions_datetime.yaml b/velox/substrait/extensions/functions_datetime.yaml deleted file mode 100644 index 062442650540..000000000000 --- a/velox/substrait/extensions/functions_datetime.yaml +++ /dev/null @@ -1,351 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: extract - description: Extract portion of a date/time value. - impls: - - args: - - options: [ YEAR, MONTH, DAY, SECOND ] - name: The part of the value to extract. - required: true - - value: timestamp - return: i64 - - args: - - options: [ YEAR, MONTH, DAY, SECOND ] - name: The part of the value to extract. - required: true - - value: timestamp_tz - return: i64 - - args: - - options: [ YEAR, MONTH, DAY ] - name: The part of the value to extract. - required: true - - value: date - return: i64 - - args: - - options: [ SECOND ] - name: The part of the value to extract. - required: true - - value: time - return: i64 - - - name: "add" - description: Add an interval to a date/time type. - impls: - - args: - - value: timestamp - - value: interval_year - return: timestamp - - args: - - value: timestamp_tz - - value: interval_year - return: timestamp - - args: - - value: date - - value: interval_year - return: timestamp - - args: - - value: timestamp - - value: interval_day - return: timestamp - - args: - - value: timestamp_tz - - value: interval_day - return: timestamp - - args: - - value: date - - value: interval_day - return: timestamp - - - name: "add_intervals" - description: Add two intervals together. - impls: - - args: - - value: interval_day - - value: interval_day - return: interval_day - - args: - - value: interval_year - - value: interval_year - return: interval_year - - - name: "subtract" - description: Subtract an interval from a date/time type. - impls: - - args: - - value: timestamp - - value: interval_year - return: timestamp - - args: - - value: timestamp_tz - - value: interval_year - return: timestamp_tz - - args: - - value: date - - value: interval_year - return: date - - args: - - value: timestamp - - value: interval_day - return: timestamp - - args: - - value: timestamp_tz - - value: interval_day - return: timestamp_tz - - args: - - value: date - - value: interval_day - return: date - - - name: "lte" - description: less than or equal to - impls: - - args: - - value: timestamp - - value: timestamp - return: boolean - - args: - - value: timestamp_tz - - value: timestamp_tz - return: boolean - - args: - - value: date - - value: date - return: boolean - - args: - - value: interval_day - - value: interval_day - return: boolean - - args: - - value: interval_year - - value: interval_year - return: boolean - - - name: "lt" - description: less than - impls: - - args: - - value: timestamp - - value: timestamp - return: boolean - - args: - - value: timestamp_tz - - value: timestamp_tz - return: boolean - - args: - - value: date - - value: date - return: boolean - - args: - - value: interval_day - - value: interval_day - return: boolean - - args: - - value: interval_year - - value: interval_year - return: boolean - - - name: "gte" - description: greater than or equal to - impls: - - args: - - value: timestamp - - value: timestamp - return: boolean - - args: - - value: timestamp_tz - - value: timestamp_tz - return: boolean - - args: - - value: date - - value: date - return: boolean - - args: - - value: interval_day - - value: interval_day - return: boolean - - args: - - value: interval_year - - value: interval_year - return: boolean - - - name: "gt" - description: greater than - impls: - - args: - - value: timestamp - - value: timestamp - return: boolean - - args: - - value: timestamp_tz - - value: timestamp_tz - return: boolean - - args: - - value: date - - value: date - return: boolean - - args: - - value: interval_day - - value: interval_day - return: boolean - - args: - - value: interval_year - - value: interval_year - return: boolean - - - name: year - description: Return the year from date/timestamp - impls: - - args: - - value: date - return: i64 - - args: - - value: timestamp - return: i64 - - args: - - value: timestamp_tz - return: i64 - - - name: month - description: Return the month from date/timestamp - impls: - - args: - - value: date - return: i64 - - args: - - value: timestamp - return: i64 - - args: - - value: timestamp_tz - return: i64 - - - name: quarter - description: Return the quarter from date/timestamp - impls: - - args: - - value: date - return: i64 - - args: - - value: timestamp - return: i64 - - args: - - value: timestamp_tz - return: i64 - - - name: week - description: Return the week from date/timestamp - impls: - - args: - - value: date - return: i64 - - args: - - value: timestamp - return: i64 - - args: - - value: timestamp_tz - return: i64 - - - name: day - description: Return the day from date/timestamp - impls: - - args: - - value: date - return: i64 - - args: - - value: timestamp - return: i64 - - args: - - value: timestamp_tz - return: i64 - - - name: day_of_month - description: Return the day of month from date/timestamp - impls: - - args: - - value: date - return: i64 - - args: - - value: timestamp - return: i64 - - args: - - value: timestamp_tz - return: i64 - - - name: day_of_week - description: Return the day of week from date/timestamp - impls: - - args: - - value: date - return: i64 - - args: - - value: timestamp - return: i64 - - args: - - value: timestamp_tz - return: i64 - - - name: day_of_year - description: Return the day of year from date/timestamp - impls: - - args: - - value: date - return: i64 - - args: - - value: timestamp - return: i64 - - args: - - value: timestamp_tz - return: i64 - - - name: year_of_week - description: Return the year of week from date/timestamp - impls: - - args: - - value: date - return: i64 - - args: - - value: timestamp - return: i64 - - args: - - value: timestamp_tz - return: i64 - - - name: hour - description: Return the hour from date/timestamp - impls: - - args: - - value: date - return: i64 - - args: - - value: timestamp - return: i64 - - args: - - value: timestamp_tz - return: i64 - - - name: minute - description: Return the minute from date/timestamp - impls: - - args: - - value: date - return: i64 - - args: - - value: timestamp - return: i64 - - args: - - value: timestamp_tz - return: i64 - - - name: second - description: Return the second from date/timestamp - impls: - - args: - - value: date - return: i64 - - args: - - value: timestamp - return: i64 - - args: - - value: timestamp_tz - return: i64 \ No newline at end of file diff --git a/velox/substrait/extensions/functions_logarithmic.yaml b/velox/substrait/extensions/functions_logarithmic.yaml deleted file mode 100644 index e8cc43ae99ee..000000000000 --- a/velox/substrait/extensions/functions_logarithmic.yaml +++ /dev/null @@ -1,132 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "ln" - description: "Natural logarithm of the value" - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - value: fp64 - return: fp64 - - - name: "log10" - description: "Logarithm to base 10 of the value" - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - value: fp64 - return: fp64 - - - name: "log2" - description: "Logarithm to base 2 of the value" - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - value: fp64 - return: fp64 - - - name: "logb" - description: > - Logarithm of the value with the given base - - logb(x, b) => log_{b} (x) - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - value: fp32 - name: "x" - description: "The number `x` to compute the logarithm of" - - value: fp32 - name: "base" - description: "The logarithm base `b` to use" - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - value: fp64 - name: "x" - description: "The number `x` to compute the logarithm of" - - value: fp64 - name: "base" - description: "The logarithm base `b` to use" - return: fp64 - - - name: "log1p" - description: > - Natural logarithm (base e) of 1 + x - - log1p(x) => log(1+x) - impls: - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp32 - return: fp32 - - args: - - name: rounding - options: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - required: false - - name: on_domain_error - options: [ NAN, ERROR ] - required: false - - name: x - value: fp64 - return: fp64 - diff --git a/velox/substrait/extensions/functions_rounding.yaml b/velox/substrait/extensions/functions_rounding.yaml deleted file mode 100644 index 2243ef7b0fbb..000000000000 --- a/velox/substrait/extensions/functions_rounding.yaml +++ /dev/null @@ -1,29 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "ceil" - description: > - Rounding to the ceiling of the value `x`. - impls: - - args: - - value: fp32 - name: "x" - return: fp32 - - args: - - value: fp64 - name: "x" - return: fp64 - - - name: "floor" - description: > - Rounding to the floor of the value `x`. - impls: - - args: - - value: fp32 - name: "x" - return: fp32 - - args: - - value: fp64 - name: "x" - return: fp64 diff --git a/velox/substrait/extensions/functions_set.yaml b/velox/substrait/extensions/functions_set.yaml deleted file mode 100644 index 67aec19c451e..000000000000 --- a/velox/substrait/extensions/functions_set.yaml +++ /dev/null @@ -1,35 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "index_in" - description: > - Checks the membership of a value in a list of values - Returns the first 0-based index value of some input `T` if `T` is equal to - any element in `List`. Returns `NULL` if not found. - If `T` is `NULL`, returns `NULL`. - If `T` is `NaN`: - - Returns 0-based index of `NaN` in `List` (default) - - Returns `NULL` (if `NAN_IS_NOT_NAN` is specified) - impls: - - args: - - options: [ NAN_IS_NAN, NAN_IS_NOT_NAN ] - required: false - - value: T - - value: List - nullability: DECLARED_OUTPUT - return: int64? - - - name: "in" - description: > - Checks the membership of a value in a list of values - Returns true if `T` is equal to any element in `List`. Returns false if not found. - If `T` is `NULL`, returns false. - impls: - - args: - - options: [ NAN_IS_NAN, NAN_IS_NOT_NAN ] - required: false - - value: T - - value: List - nullability: DECLARED_OUTPUT - return: boolean diff --git a/velox/substrait/extensions/functions_string.yaml b/velox/substrait/extensions/functions_string.yaml deleted file mode 100644 index 9aa06f0080eb..000000000000 --- a/velox/substrait/extensions/functions_string.yaml +++ /dev/null @@ -1,1312 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: concat - description: Concatenate strings. - impls: - - args: - - value: "varchar" - name: "input" - variadic: - min: 1 - return: "varchar" - - args: - - value: "string" - name: "input" - variadic: - min: 1 - return: "string" - - - name: like - description: >- - Are two strings like each other. - - The `case_sensitivity` option applies to the `match` argument. - impls: - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "match" - description: The string to match against the input string. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "match" - description: The string to match against the input string. - return: "BOOLEAN" - - - name: substring - description: >- - Extract a substring of a specified `length` starting from position `start`. - A `start` value of 1 refers to the first characters of the string. - impls: - - args: - - value: "varchar" - name: "input" - - value: i32 - name: "start" - - value: i32 - name: "length" - return: "varchar" - - args: - - value: "string" - name: "input" - - value: i32 - name: "start" - - value: i32 - name: "length" - return: "string" - - args: - - value: "fixedchar" - name: "input" - - value: i32 - name: "start" - - value: i32 - name: "length" - return: "string" - - args: - - value: "varchar" - - value: i64 - - value: i64 - return: "varchar" - - args: - - value: "string" - - value: i64 - - value: i64 - return: "string" - - args: - - value: "fixedchar" - - value: i64 - - value: i64 - return: "string" - - - name: regexp_match_substring - description: >- - Extract a substring that matches the given regular expression pattern. The regular expression - pattern should follow the International Components for Unicode implementation - (https://unicode-org.github.io/icu/userguide/strings/regexp.html). The occurrence of the - pattern to be extracted is specified using the `occurrence` argument. Specifying `1` means - the first occurrence will be extracted, `2` means the second occurrence, and so on. - The `occurrence` argument should be a positive non-zero integer. The number of characters - from the beginning of the string to begin starting to search for pattern matches can be - specified using the `position` argument. Specifying `1` means to search for matches - starting at the first character of the input string, `2` means the second character, and so - on. The `position` argument should be a positive non-zero integer. - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - - Behavior is undefined if the regex fails to compile, the occurrence value is out of range, or - the position value is out of range. - impls: - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII] - required: false - - name: multiline - options: [ MULTILINE_DISABLED, MULTILINE_ENABLED] - required: false - - name: dotall - options: [ DOTALL_DISABLED, DOTALL_ENABLED] - required: false - - value: "varchar" - name: "input" - - value: "varchar" - name: "pattern" - - value: i64 - name: "position" - - value: i64 - name: "occurrence" - return: "varchar" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII] - required: false - - name: multiline - options: [ MULTILINE_DISABLED, MULTILINE_ENABLED] - required: false - - name: dotall - options: [ DOTALL_DISABLED, DOTALL_ENABLED] - required: false - - value: "string" - name: "input" - - value: "string" - name: "pattern" - - value: i64 - name: "position" - - value: i64 - name: "occurrence" - return: "string" - - args: - - value: "varchar" - - value: i64 - - value: i64 - return: "varchar" - - args: - - value: "string" - - value: i64 - - value: i64 - return: "string" - - args: - - value: "fixedchar" - - value: i64 - - value: i64 - return: "string" - - - name: starts_with - description: >- - Whether the `input` string starts with the `substring`. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "varchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "varchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "string" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "string" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "fixedchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "fixedchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - - name: ends_with - description: >- - Whether `input` string ends with the substring. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "varchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "varchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "string" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "string" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "fixedchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "fixedchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - - name: contains - description: >- - Whether the `input` string contains the `substring`. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "varchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "varchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "string" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "string" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "fixedchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "fixedchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - return: "BOOLEAN" - - - name: strpos - description: >- - Return the position of the first occurrence of a string in another string. The first - character of the string is at position 1. If no occurrence is found, 0 is returned. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - return: i64 - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - return: i64 - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - return: i64 - - - name: regexp_strpos - description: >- - Return the position of an occurrence of the given regular expression pattern in a - string. The first character of the string is at position 1. The regular expression pattern - should follow the International Components for Unicode implementation - (https://unicode-org.github.io/icu/userguide/strings/regexp.html). The number of characters - from the beginning of the string to begin starting to search for pattern matches can be - specified using the `position` argument. Specifying `1` means to search for matches - starting at the first character of the input string, `2` means the second character, and so - on. The `position` argument should be a positive non-zero integer. Which occurrence to - return the position of is specified using the `occurrence` argument. Specifying `1` means - the position first occurrence will be returned, `2` means the position of the second - occurrence, and so on. The `occurrence` argument should be a positive non-zero integer. If - no occurrence is found, 0 is returned. - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - - Behavior is undefined if the regex fails to compile, the occurrence value is out of range, or - the position value is out of range. - impls: - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII] - required: false - - name: multiline - options: [ MULTILINE_DISABLED, MULTILINE_ENABLED] - required: false - - name: dotall - options: [ DOTALL_DISABLED, DOTALL_ENABLED] - required: false - - value: "varchar" - name: "input" - - value: "varchar" - name: "pattern" - - value: i64 - name: "position" - - value: i64 - name: "occurrence" - return: i64 - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII] - required: false - - name: multiline - options: [ MULTILINE_DISABLED, MULTILINE_ENABLED] - required: false - - name: dotall - options: [ DOTALL_DISABLED, DOTALL_ENABLED] - required: false - - value: "string" - name: "input" - - value: "string" - name: "pattern" - - value: i64 - name: "position" - - value: i64 - name: "occurrence" - return: i64 - - - name: count_substring - description: >- - Return the number of non-overlapping occurrences of a substring in an input string. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to count. - return: i64 - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to count. - return: i64 - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to count. - return: i64 - - - name: regexp_count_substring - description: >- - Return the number of non-overlapping occurrences of a regular expression pattern in an input - string. The regular expression pattern should follow the International Components for - Unicode implementation (https://unicode-org.github.io/icu/userguide/strings/regexp.html). - The number of characters from the beginning of the string to begin starting to search for - pattern matches can be specified using the `position` argument. Specifying `1` means to - search for matches starting at the first character of the input string, `2` means the - second character, and so on. The `position` argument should be a positive non-zero integer. - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - - Behavior is undefined if the regex fails to compile or the position value is out of range. - impls: - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII] - required: false - - name: multiline - options: [ MULTILINE_DISABLED, MULTILINE_ENABLED] - required: false - - name: dotall - options: [ DOTALL_DISABLED, DOTALL_ENABLED] - required: false - - value: "string" - name: "input" - - value: "string" - name: "pattern" - - value: i64 - name: "position" - return: i64 - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII] - required: false - - name: multiline - options: [ MULTILINE_DISABLED, MULTILINE_ENABLED] - required: false - - name: dotall - options: [ DOTALL_DISABLED, DOTALL_ENABLED] - required: false - - value: "varchar" - name: "input" - - value: "varchar" - name: "pattern" - - value: i64 - name: "position" - return: i64 - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII] - required: false - - name: multiline - options: [ MULTILINE_DISABLED, MULTILINE_ENABLED] - required: false - - name: dotall - options: [ DOTALL_DISABLED, DOTALL_ENABLED] - required: false - - value: "fixedchar" - name: "input" - - value: "fixedchar" - name: "pattern" - - value: i64 - name: "position" - return: i64 - - - name: replace - description: >- - Replace all occurrences of the substring with the replacement string. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "string" - name: "input" - description: Input string. - - value: "string" - name: "substring" - description: The substring to replace. - - value: "string" - name: "replacement" - description: The replacement string. - return: "string" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - required: false - - value: "varchar" - name: "input" - description: Input string. - - value: "varchar" - name: "substring" - description: The substring to replace. - - value: "varchar" - name: "replacement" - description: The replacement string. - return: "varchar" - - - name: concat_ws - description: Concatenate strings together separated by a separator. - impls: - - args: - - value: "string" - name: "separator" - description: Character to separate strings by. - - value: "string" - name: "string_arguments" - description: Strings to be concatenated. - variadic: - min: 1 - return: "string" - - args: - - value: "varchar" - name: "separator" - description: Character to separate strings by. - - value: "varchar" - name: "string_arguments" - description: Strings to be concatenated. - variadic: - min: 1 - return: "varchar" - - - name: repeat - description: Repeat a string `count` number of times. - impls: - - args: - - value: "string" - name: "input" - - value: i64 - name: "count" - return: "string" - - args: - - value: "varchar" - - value: i64 - name: "input" - - value: i64 - name: "count" - return: "varchar" - - - name: reverse - description: Returns the string in reverse order. - impls: - - args: - - value: "string" - name: "input" - return: "string" - - args: - - value: "varchar" - name: "input" - return: "varchar" - - args: - - value: "fixedchar" - name: "input" - return: "fixedchar" - - - name: replace_slice - description: >- - Replace a slice of the input string. A specified 'length' of characters will be deleted from - the input string beginning at the 'start' position and will be replaced by a new string. A - start value of 1 indicates the first character of the input string. If start is negative - or zero, or greater than the length of the input string, a null string is returned. If 'length' - is negative, a null string is returned. If 'length' is zero, inserting of the new string - occurs at the specified 'start' position and no characters are deleted. If 'length' is - greater than the input string, deletion will occur up to the last character of the input string. - impls: - - args: - - value: "string" - name: "input" - description: Input string. - - value: i64 - name: "start" - description: The position in the string to start deleting/inserting characters. - - value: i64 - name: "length" - description: The number of characters to delete from the input string. - - value: "string" - name: "replacement" - description: The new string to insert at the start position. - return: "string" - - args: - - value: "varchar" - name: "input" - description: Input string. - - value: i64 - name: "start" - description: The position in the string to start deleting/inserting characters. - - value: i64 - name: "length" - description: The number of characters to delete from the input string. - - value: "varchar" - name: "replacement" - description: The new string to insert at the start position. - return: "varchar" - - - name: lower - description: >- - Transform the string to lower case characters. Implementation should follow the utf8_unicode_ci - collations according to the Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/. - impls: - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "string" - name: "input" - return: "string" - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "varchar" - name: "input" - return: "varchar" - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "fixedchar" - name: "input" - return: "fixedchar" - - - name: upper - description: >- - Transform the string to upper case characters. Implementation should follow the utf8_unicode_ci - collations according to the Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/. - impls: - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "string" - name: "input" - return: "string" - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "varchar" - name: "input" - return: "varchar" - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "fixedchar" - name: "input" - return: "fixedchar" - - - name: swapcase - description: >- - Transform the string's lowercase characters to uppercase and uppercase characters to - lowercase. Implementation should follow the utf8_unicode_ci collations according to the - Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/. - impls: - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "string" - name: "input" - return: "string" - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "varchar" - name: "input" - return: "varchar" - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "fixedchar" - name: "input" - return: "fixedchar" - - - name: capitalize - description: >- - Capitalize the first character of the input string. Implementation should follow the - utf8_unicode_ci collations according to the Unicode Collation Algorithm described at - http://www.unicode.org/reports/tr10/. - impls: - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "string" - name: "input" - return: "string" - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "varchar" - name: "input" - return: "varchar" - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "fixedchar" - name: "input" - return: "fixedchar" - - - name: title - description: >- - Converts the input string into titlecase. Capitalize the first character of each word in the - input string except for articles (a, an, the). Implementation should follow the - utf8_unicode_ci collations according to the Unicode Collation Algorithm described at - http://www.unicode.org/reports/tr10/. - impls: - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "string" - name: "input" - return: "string" - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "varchar" - name: "input" - return: "varchar" - - args: - - name: char_set - options: [ UTF8, ASCII_ONLY ] - required: false - - value: "fixedchar" - name: "input" - return: "fixedchar" - - - name: char_length - description: >- - Return the number of characters in the input string. The length includes trailing spaces. - impls: - - args: - - value: "string" - name: "input" - return: i64 - - args: - - value: "varchar" - name: "input" - return: i64 - - args: - - value: "fixedchar" - name: "input" - return: i64 - - - name: bit_length - description: Return the number of bits in the input string. - impls: - - args: - - value: "string" - name: "input" - return: i64 - - args: - - value: "varchar" - name: "input" - return: i64 - - args: - - value: "fixedchar" - name: "input" - return: i64 - - - name: octet_length - description: Return the number of bytes in the input string. - impls: - - args: - - value: "string" - name: "input" - return: i64 - - args: - - value: "varchar" - name: "input" - return: i64 - - args: - - value: "fixedchar" - name: "input" - return: i64 - - - name: regexp_replace - description: >- - Search a string for a substring that matches a given regular expression pattern and replace - it with a replacement string. The regular expression pattern should follow the - International Components for Unicode implementation (https://unicode-org.github - .io/icu/userguide/strings/regexp.html). The occurrence of the pattern to be replaced is - specified using the `occurrence` argument. Specifying `1` means only the first occurrence - will be replaced, `2` means the second occurrence, and so on. Specifying `0` means all - occurrences will be replaced. The number of characters from the beginning of the string to - begin starting to search for pattern matches can be specified using the `position` argument. - Specifying `1` means to search for matches starting at the first character of the input - string, `2` means the second character, and so on. The `position` argument should be a - positive non-zero integer. The replacement string can capture groups using numbered - backreferences. - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - - Behavior is undefined if the regex fails to compile, the replacement contains an illegal - back-reference, the occurrence value is out of range, or the position value is out of range. - impls: - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII] - required: false - - name: multiline - options: [ MULTILINE_DISABLED, MULTILINE_ENABLED] - required: false - - name: dotall - options: [ DOTALL_DISABLED, DOTALL_ENABLED] - required: false - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "pattern" - description: The regular expression to search for within the input string. - - value: "string" - name: "replacement" - description: The replacement string. - - value: i64 - name: "position" - description: The position to start the search. - - value: i64 - name: "occurrence" - description: Which occurrence of the match to replace. - return: "string" - - args: - - name: case_sensitivity - options: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII] - required: false - - name: multiline - options: [ MULTILINE_DISABLED, MULTILINE_ENABLED] - required: false - - name: dotall - options: [ DOTALL_DISABLED, DOTALL_ENABLED] - required: false - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "pattern" - description: The regular expression to search for within the input string. - - value: "varchar" - name: "replacement" - description: The replacement string. - - value: i64 - name: "position" - description: The position to start the search. - - value: i64 - name: "occurrence" - description: Which occurrence of the match to replace. - return: "varchar" - - - name: ltrim - description: >- - Remove any occurrence of the characters from the left side of the string. - If no characters are specified, spaces are removed. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to remove characters from." - - value: "varchar" - name: "characters" - description: "The set of characters to remove." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to remove characters from." - - value: "string" - name: "characters" - description: "The set of characters to remove." - return: "string" - - - name: rtrim - description: >- - Remove any occurrence of the characters from the right side of the string. - If no characters are specified, spaces are removed. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to remove characters from." - - value: "varchar" - name: "characters" - description: "The set of characters to remove." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to remove characters from." - - value: "string" - name: "characters" - description: "The set of characters to remove." - return: "string" - - - name: trim - description: >- - Remove any occurrence of the characters from the left and right sides of - the string. If no characters are specified, spaces are removed. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to remove characters from." - - value: "varchar" - name: "characters" - description: "The set of characters to remove." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to remove characters from." - - value: "string" - name: "characters" - description: "The set of characters to remove." - return: "string" - - - name: lpad - description: >- - Left-pad the input string with the string of 'characters' until the specified length of the - string has been reached. If the input string is longer than 'length', remove characters from - the right-side to shorten it to 'length' characters. If the string of 'characters' is longer - than the remaining 'length' needed to be filled, only pad until 'length' has been reached. - If 'characters' is not specified, the default value is a single space. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "varchar" - name: "characters" - description: "The string of characters to use for padding." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "string" - name: "characters" - description: "The string of characters to use for padding." - return: "string" - - - name: rpad - description: >- - Right-pad the input string with the string of 'characters' until the specified length of the - string has been reached. If the input string is longer than 'length', remove characters from - the left-side to shorten it to 'length' characters. If the string of 'characters' is longer - than the remaining 'length' needed to be filled, only pad until 'length' has been reached. - If 'characters' is not specified, the default value is a single space. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "varchar" - name: "characters" - description: "The string of characters to use for padding." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "string" - name: "characters" - description: "The string of characters to use for padding." - return: "string" - - - name: center - description: >- - Center the input string by padding the sides with a single `character` until the specified - `length` of the string has been reached. By default, if the `length` will be reached with - an uneven number of padding, the extra padding will be applied to the right side. - The side with extra padding can be controlled with the `padding` option. - - Behavior is undefined if the number of characters passed to the `character` argument is not 1. - impls: - - args: - - name: padding - options: [ RIGHT, LEFT ] - required: false - - value: "varchar" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "varchar<1>" - name: "character" - description: "The character to use for padding." - return: "varchar" - - args: - - name: padding - options: [ RIGHT, LEFT ] - required: false - - value: "string" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "string" - name: "character" - description: "The character to use for padding." - return: "string" - - - name: left - description: Extract `count` characters starting from the left of the string. - impls: - - args: - - value: "varchar" - name: "input" - - value: i32 - name: "count" - return: "varchar" - - args: - - value: "string" - name: "input" - - value: i32 - name: "count" - return: "string" - - - name: right - description: Extract `count` characters starting from the right of the string. - impls: - - args: - - value: "varchar" - name: "input" - - value: i32 - name: "count" - return: "varchar" - - args: - - value: "string" - name: "input" - - value: i32 - name: "count" - return: "string" - -aggregate_functions: - - - - name: string_agg - description: Concatenates a column of string values with a separator. - impls: - - args: - - value: "string" - name: "input" - description: "Column of string values." - - value: "string" - name: "separator" - constant: true - description: "Separator for concatenated strings" - ordered: true - return: "string" diff --git a/velox/substrait/extensions/type_variations.yaml b/velox/substrait/extensions/type_variations.yaml deleted file mode 100644 index 570760a09a3d..000000000000 --- a/velox/substrait/extensions/type_variations.yaml +++ /dev/null @@ -1,23 +0,0 @@ -type_variations: - - parent: string - name: dict4 - description: a four-byte dictionary encoded string - functions: INHERITS - - parent: string - name: bigoffset - description: >- - The arrow large string representation of strings, still restricted to the default string size defined in - Substrait. - functions: SEPARATE - - parent: struct - name: avro - description: an avro encoded struct - functions: SEPARATE - - parent: struct - name: cstruct - description: a cstruct representation of the struct - functions: SEPARATE - - parent: struct - name: dict2 - description: a 2-byte dictionary encoded string. - functions: INHERITS diff --git a/velox/substrait/extensions/unknown.yaml b/velox/substrait/extensions/unknown.yaml deleted file mode 100644 index 3b0e6c1e7f58..000000000000 --- a/velox/substrait/extensions/unknown.yaml +++ /dev/null @@ -1,66 +0,0 @@ -%YAML 1.2 ---- -types: - - name: unknown -scalar_functions: - - name: "add" - impls: - - args: - - value: unknown - - value: unknown - return: unknown - - name: "subtract" - impls: - - args: - - value: unknown - - value: unknown - return: unknown - - name: "multiply" - impls: - - args: - - value: unknown - - value: unknown - return: unknown - - name: "divide" - impls: - - args: - - value: unknown - - value: unknown - return: unknown - - name: "modulus" - impls: - - args: - - value: unknown - - value: unknown - return: unknown -aggregate_functions: - - name: "sum" - impls: - - args: - - value: unknown - intermediate: unknown - return: unknown - - name: "avg" - impls: - - args: - - value: unknown - intermediate: unknown - return: unknown - - name: "min" - impls: - - args: - - value: unknown - intermediate: unknown - return: unknown - - name: "max" - impls: - - args: - - value: unknown - intermediate: unknown - return: unknown - - name: "count" - impls: - - args: - - value: unknown - intermediate: unknown - return: unknown diff --git a/velox/substrait/tests/CMakeLists.txt b/velox/substrait/tests/CMakeLists.txt index fa3203f20246..32b548cb6d52 100644 --- a/velox/substrait/tests/CMakeLists.txt +++ b/velox/substrait/tests/CMakeLists.txt @@ -18,11 +18,12 @@ add_executable( Substrait2VeloxValuesNodeConversionTest.cpp FunctionTest.cpp JsonToProtoConverter.cpp - VeloxSubstraitRoundTripPlanConverterTest.cpp + VeloxSubstraitRoundTripTest.cpp VeloxToSubstraitTypeTest.cpp VeloxSubstraitSignatureTest.cpp - SubstraitExtensionCollectorTest.cpp - VeloxSustraitHashJoinRoundTripConverterTest.cpp) + SubstraitExtensionCollectorTest.cpp + VeloxSubstraitRoundTripTest.cpp + VeloxSubstraitJoinRoundTripTest.cpp) add_dependencies(velox_plan_conversion_test velox_substrait_plan_converter) diff --git a/velox/substrait/tests/FunctionTest.cpp b/velox/substrait/tests/FunctionTest.cpp index f372e376f969..97742281ba95 100644 --- a/velox/substrait/tests/FunctionTest.cpp +++ b/velox/substrait/tests/FunctionTest.cpp @@ -14,15 +14,15 @@ * limitations under the License. */ -#include "velox/substrait/tests/JsonToProtoConverter.h" - -#include "velox/common/base/tests/Fs.h" #include "velox/common/base/tests/GTestUtils.h" #include "velox/dwio/common/tests/utils/DataFiles.h" +#include "velox/substrait/tests/JsonToProtoConverter.h" #include "velox/substrait/SubstraitToVeloxPlan.h" #include "velox/substrait/TypeUtils.h" +#include "velox/substrait/VariantToVectorConverter.h" #include "velox/substrait/VeloxToSubstraitType.h" + using namespace facebook::velox; using namespace facebook::velox::test; using namespace facebook::velox::substrait; @@ -119,3 +119,85 @@ TEST_F(FunctionTest, constructFunctionMap) { function = planConverter_->findFunction(9); ASSERT_EQ(function, "is_not_null:fp64"); } + +TEST_F(FunctionTest, setVectorFromVariants) { + auto resultVec = setVectorFromVariants( + BOOLEAN(), {variant(false), variant(true)}, pool_.get()); + ASSERT_EQ(false, resultVec->asFlatVector()->valueAt(0)); + ASSERT_EQ(true, resultVec->asFlatVector()->valueAt(1)); + + auto min8 = std::numeric_limits::min(); + auto max8 = std::numeric_limits::max(); + resultVec = setVectorFromVariants( + TINYINT(), {variant(min8), variant(max8)}, pool_.get()); + EXPECT_EQ(min8, resultVec->asFlatVector()->valueAt(0)); + EXPECT_EQ(max8, resultVec->asFlatVector()->valueAt(1)); + + auto min16 = std::numeric_limits::min(); + auto max16 = std::numeric_limits::max(); + resultVec = setVectorFromVariants( + SMALLINT(), {variant(min16), variant(max16)}, pool_.get()); + EXPECT_EQ(min16, resultVec->asFlatVector()->valueAt(0)); + EXPECT_EQ(max16, resultVec->asFlatVector()->valueAt(1)); + + auto min32 = std::numeric_limits::min(); + auto max32 = std::numeric_limits::max(); + resultVec = setVectorFromVariants( + INTEGER(), {variant(min32), variant(max32)}, pool_.get()); + EXPECT_EQ(min32, resultVec->asFlatVector()->valueAt(0)); + EXPECT_EQ(max32, resultVec->asFlatVector()->valueAt(1)); + + auto min64 = std::numeric_limits::min(); + auto max64 = std::numeric_limits::max(); + resultVec = setVectorFromVariants( + BIGINT(), {variant(min64), variant(max64)}, pool_.get()); + EXPECT_EQ(min64, resultVec->asFlatVector()->valueAt(0)); + EXPECT_EQ(max64, resultVec->asFlatVector()->valueAt(1)); + + // Floats are harder to compare because of low-precision. Just making sure + // they don't throw. + EXPECT_NO_THROW(setVectorFromVariants( + REAL(), {variant(float(0.99L)), variant(float(-1.99L))}, pool_.get())); + + resultVec = setVectorFromVariants( + DOUBLE(), {variant(double(0.99L)), variant(double(-1.99L))}, pool_.get()); + ASSERT_EQ(double(0.99L), resultVec->asFlatVector()->valueAt(0)); + ASSERT_EQ(double(-1.99L), resultVec->asFlatVector()->valueAt(1)); + + resultVec = setVectorFromVariants( + VARCHAR(), {variant(""), variant("asdf")}, pool_.get()); + ASSERT_EQ("", resultVec->asFlatVector()->valueAt(0).str()); + ASSERT_EQ("asdf", resultVec->asFlatVector()->valueAt(1).str()); + + ASSERT_ANY_THROW(setVectorFromVariants( + VARBINARY(), {variant(""), variant("asdf")}, pool_.get())); + + resultVec = setVectorFromVariants( + TIMESTAMP(), + {variant(Timestamp(9020, 0)), variant(Timestamp(8875, 0))}, + pool_.get()); + ASSERT_EQ( + "1970-01-01T02:30:20.000000000", + resultVec->asFlatVector()->valueAt(0).toString()); + ASSERT_EQ( + "1970-01-01T02:27:55.000000000", + resultVec->asFlatVector()->valueAt(1).toString()); + + resultVec = setVectorFromVariants( + DATE(), {variant(Date(9020)), variant(Date(8875))}, pool_.get()); + ASSERT_EQ( + "1994-09-12", resultVec->asFlatVector()->valueAt(0).toString()); + ASSERT_EQ( + "1994-04-20", resultVec->asFlatVector()->valueAt(1).toString()); + + resultVec = setVectorFromVariants( + INTERVAL_DAY_TIME(), + {variant(IntervalDayTime(9020)), variant(IntervalDayTime(8875))}, + pool_.get()); + ASSERT_EQ( + "0 00:00:09.020", + resultVec->asFlatVector()->valueAt(0).toString()); + ASSERT_EQ( + "0 00:00:08.875", + resultVec->asFlatVector()->valueAt(1).toString()); +} diff --git a/velox/substrait/tests/JsonToProtoConverter.cpp b/velox/substrait/tests/JsonToProtoConverter.cpp index aa0e5d586de2..356e296b0c02 100644 --- a/velox/substrait/tests/JsonToProtoConverter.cpp +++ b/velox/substrait/tests/JsonToProtoConverter.cpp @@ -36,20 +36,3 @@ void JsonToProtoConverter::readFromFile( status.code(), status.message()); } - -std::string JsonToProtoConverter::messageToJson( - const google::protobuf::Message& message) { - google::protobuf::util::JsonPrintOptions options; - options.add_whitespace = true; - options.always_print_primitive_fields = true; - options.preserve_proto_field_names = true; - std::string json; - auto status = - google::protobuf::util::MessageToJsonString(message, &json, options); - VELOX_CHECK( - status.ok(), - "Failed to convert message to JSON: {} {}", - status.code(), - status.message()); - return json; -} diff --git a/velox/substrait/tests/JsonToProtoConverter.h b/velox/substrait/tests/JsonToProtoConverter.h index b08f46998aea..ebaf613b6abf 100644 --- a/velox/substrait/tests/JsonToProtoConverter.h +++ b/velox/substrait/tests/JsonToProtoConverter.h @@ -24,7 +24,4 @@ class JsonToProtoConverter { static void readFromFile( const std::string& msgPath, google::protobuf::Message& msg); - - /// Convert Protobuf message to Json. - static std::string messageToJson(const google::protobuf::Message& message); }; diff --git a/velox/substrait/tests/Substrait2VeloxValuesNodeConversionTest.cpp b/velox/substrait/tests/Substrait2VeloxValuesNodeConversionTest.cpp index b9984ea8cb69..f7340b763809 100644 --- a/velox/substrait/tests/Substrait2VeloxValuesNodeConversionTest.cpp +++ b/velox/substrait/tests/Substrait2VeloxValuesNodeConversionTest.cpp @@ -15,8 +15,6 @@ */ #include "velox/substrait/tests/JsonToProtoConverter.h" - -#include "velox/common/base/tests/Fs.h" #include "velox/dwio/common/tests/utils/DataFiles.h" #include "velox/exec/tests/utils/OperatorTestBase.h" #include "velox/exec/tests/utils/PlanBuilder.h" @@ -31,9 +29,7 @@ using namespace facebook::velox::exec::test; using namespace facebook::velox::substrait; class Substrait2VeloxValuesNodeConversionTest : public OperatorTestBase { - public: - std::unique_ptr pool_{ - memory::getDefaultScopedMemoryPool()}; + protected: std::shared_ptr planConverter_ = std::make_shared(pool_.get()); }; diff --git a/velox/substrait/tests/SubstraitExtensionTest.cpp b/velox/substrait/tests/SubstraitExtensionTest.cpp deleted file mode 100644 index 8743f181e82f..000000000000 --- a/velox/substrait/tests/SubstraitExtensionTest.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/common/base/tests/GTestUtils.h" - -#include "velox/substrait/SubstraitExtension.h" - -using namespace facebook::velox; -using namespace facebook::velox::substrait; - -class SubstraitExtensionTest : public ::testing::Test {}; - -/// used to lookup function with user provided function mappings. -class SubstraitFunctionMappingsTest : public SubstraitFunctionMappings { - public: - const FunctionMappings scalarMappings() const override { - static const FunctionMappings scalarMappings{ - {"plus", "add"}, - }; - return scalarMappings; - } - - const FunctionMappings aggregateMappings() const override { - return facebook::velox::substrait::FunctionMappings(); - } - const FunctionMappings windowMappings() const override { - return facebook::velox::substrait::FunctionMappings(); - } -}; - -TEST_F(SubstraitExtensionTest, lookupFunction) { - auto extension = SubstraitExtension::loadExtension(); - const auto& function = extension->lookupFunction("add:opt_i8_i8"); - ASSERT_TRUE(function.has_value()); - ASSERT_EQ(function.value()->signature(), "add:opt_i8_i8"); -} - -TEST_F(SubstraitExtensionTest, lookupFunctionWithMappings) { - auto extension = SubstraitExtension::loadExtension(); - auto testSubstraitFunctionMappings = - std::make_shared(); - const auto& function = extension->lookupFunction( - testSubstraitFunctionMappings, "plus:opt_i8_i8"); - ASSERT_TRUE(function.has_value()); - ASSERT_EQ(function.value()->signature(), "add:opt_i8_i8"); -} diff --git a/velox/substrait/tests/SubstraitFunctionLookupTest.cpp b/velox/substrait/tests/SubstraitFunctionLookupTest.cpp deleted file mode 100644 index f4ebe7f01235..000000000000 --- a/velox/substrait/tests/SubstraitFunctionLookupTest.cpp +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/common/base/tests/GTestUtils.h" - -#include "velox/substrait/SubstraitFunctionLookup.h" -#include "velox/substrait/VeloxToSubstraitMappings.h" - -using namespace facebook::velox; -using namespace facebook::velox::substrait; - -class SubstraitFunctionLookupTest : public ::testing::Test { - protected: - void SetUp() override { - extension_ = SubstraitExtension::loadExtension(); - mappings_ = std::make_shared(); - scalarFunctionLookup_ = - std::make_shared(extension_, mappings_); - aggregateFunctionLookup_ = - std::make_shared( - extension_, mappings_); - const auto& testExtension = SubstraitExtension::loadExtension( - {getDataPath() + "functions_test.yaml"}); - testScalarFunctionLookup_ = std::make_shared( - testExtension, mappings_); - } - - void testScalarFunctionLookup( - const std::string& name, - const std::vector& arguments, - const SubstraitTypePtr& returnType, - const std::string& outputSignature) { - const auto& functionSignature = - SubstraitFunctionSignature::of(name, arguments, returnType); - const auto& functionOption = - scalarFunctionLookup_->lookupFunction(functionSignature); - - ASSERT_TRUE(functionOption.has_value()); - ASSERT_EQ(functionOption.value()->anchor().key, outputSignature); - } - - void testAggregateFunctionLookup( - const std::string& name, - const std::vector& arguments, - const SubstraitTypePtr& returnType, - const std::string& outputSignature) { - const auto& functionSignature = - SubstraitFunctionSignature::of(name, arguments, returnType); - const auto& functionOption = - aggregateFunctionLookup_->lookupFunction(functionSignature); - - ASSERT_TRUE(functionOption.has_value()); - ASSERT_EQ(functionOption.value()->anchor().key, outputSignature); - } - - void assertTestSignature( - const std::string& name, - const std::vector& arguments, - const SubstraitTypePtr& returnType, - const std::string& outputSignature) { - const auto& functionSignature = - SubstraitFunctionSignature::of(name, arguments, returnType); - const auto& functionOption = - testScalarFunctionLookup_->lookupFunction(functionSignature); - - ASSERT_TRUE(functionOption.has_value()); - ASSERT_EQ(functionOption.value()->anchor().key, outputSignature); - } - - private: - static std::string getDataPath() { - const std::string absolute_path = __FILE__; - auto const pos = absolute_path.find_last_of('/'); - return absolute_path.substr(0, pos) + "/data/"; - } - - SubstraitExtensionPtr extension_; - SubstraitFunctionMappingsPtr mappings_; - SubstraitScalarFunctionLookupPtr scalarFunctionLookup_; - SubstraitAggregateFunctionLookupPtr aggregateFunctionLookup_; - SubstraitScalarFunctionLookupPtr testScalarFunctionLookup_; -}; - -TEST_F(SubstraitFunctionLookupTest, lt) { - testScalarFunctionLookup("lt", {kI8(), kI8()}, kBool(), "lt:i8_i8"); - - testScalarFunctionLookup("lt", {kI16(), kI16()}, kBool(), "lt:i16_i16"); - - testScalarFunctionLookup("lt", {kI32(), kI32()}, kBool(), "lt:i32_i32"); - - testScalarFunctionLookup("lt", {kI64(), kI64()}, kBool(), "lt:i64_i64"); - - testScalarFunctionLookup("lt", {kFp32(), kFp32()}, kBool(), "lt:fp32_fp32"); - - testScalarFunctionLookup("lt", {kFp64(), kFp64()}, kBool(), "lt:fp64_fp64"); -} - -TEST_F(SubstraitFunctionLookupTest, between) { - testScalarFunctionLookup( - "between", {kI8(), kI8(), kI8()}, kBool(), "between:i8_i8_i8"); -} - -TEST_F(SubstraitFunctionLookupTest, add) { - testScalarFunctionLookup("add", {kI8(), kI8()}, kI8(), "add:opt_i8_i8"); - - testScalarFunctionLookup("plus", {kI8(), kI8()}, kI8(), "add:opt_i8_i8"); -} - -TEST_F(SubstraitFunctionLookupTest, devide) { - testScalarFunctionLookup( - "divide", - { - kFp32(), - kFp32(), - }, - kFp32(), - "divide:opt_opt_fp32_fp32"); -} - -TEST_F(SubstraitFunctionLookupTest, test) { - assertTestSignature( - "test", - { - kFp32(), - kFp32(), - }, - kBool(), - "test:fp32_fp32"); - - assertTestSignature( - "test", - { - kI8(), - kI16(), - }, - kBool(), - "test:i8_i16"); - - assertTestSignature( - "test", - { - kI8(), - kI16(), - kI16(), - kI32(), - }, - kBool(), - "test:i8_i16_i16_i32"); - - assertTestSignature( - "test", - { - kBool(), - kBool(), - kI16(), - kI16(), - kI8(), - }, - kBool(), - "test:bool_bool_i16_i16_i8"); - - assertTestSignature( - "test", - { - kBool(), - kI8(), - kBool(), - kI16(), - kI32(), - }, - kBool(), - "test:bool_i8_bool_i16_i32"); - - assertTestSignature( - "test", - { - kBool(), - kI8(), - kBool(), - kI16(), - kI32(), - }, - kBool(), - "test:bool_i8_bool_i16_i32"); -} - -TEST_F(SubstraitFunctionLookupTest, avg) { - testAggregateFunctionLookup( - "avg", - {SubstraitType::decode("struct")}, - kFp64(), - "avg:opt_fp32"); -} - -TEST_F(SubstraitFunctionLookupTest, functionSet) { - testScalarFunctionLookup( - "in", - {kI32(), SubstraitType::decode("list")}, - kBool(), - "in:i32_list"); -} - -TEST_F(SubstraitFunctionLookupTest, logical) { - testScalarFunctionLookup("and", {kBool(), kBool()}, kBool(), "and:bool"); - testScalarFunctionLookup("or", {kBool(), kBool()}, kBool(), "or:bool"); - testScalarFunctionLookup("not", {kBool()}, kBool(), "not:bool"); - testScalarFunctionLookup("xor", {kBool(), kBool()}, kBool(), "xor:bool_bool"); -} - -TEST_F(SubstraitFunctionLookupTest, functionString) { - testScalarFunctionLookup( - "like", {kString(), kString()}, kBool(), "like:str_str"); - testScalarFunctionLookup( - "like", - {SubstraitType::decode("varchar"), - SubstraitType::decode("varchar")}, - kBool(), - "like:vchar_vchar"); - testScalarFunctionLookup( - "substr", - {kString(), kI64(), kI64()}, - kString(), - "substring:str_i64_i64"); -} \ No newline at end of file diff --git a/velox/substrait/tests/SubstraitSignatureTest.cpp b/velox/substrait/tests/SubstraitSignatureTest.cpp deleted file mode 100644 index 4361c36446af..000000000000 --- a/velox/substrait/tests/SubstraitSignatureTest.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/common/base/tests/GTestUtils.h" - -#include "velox/functions/prestosql/registration/RegistrationFunctions.h" -#include "velox/substrait/SubstraitSignature.h" - -using namespace facebook::velox; -using namespace facebook::velox::substrait; - -class SubstraitFunctionMappingsTest : public SubstraitFunctionMappings { - public: - const FunctionMappings scalarMappings() const override { - static const FunctionMappings scalarMappings{ - {"plus", "add"}, - }; - return scalarMappings; - } -}; - -class SubstraitSignatureTest : public ::testing::Test { - protected: - void assertSignature( - const std::string& inputSignature, - const std::string& outputSignature) { - auto testSubstraitFunctionMappings = - std::make_shared(); - auto signature = SubstraitFunctionSignature ::signature( - inputSignature, testSubstraitFunctionMappings); - ASSERT_EQ(signature, outputSignature); - } -}; - -TEST_F(SubstraitSignatureTest, signatureTest) { - assertSignature("plus:opt_i8_i8", "add:opt_i8_i8"); - assertSignature("add:opt_i8_i8", "add:opt_i8_i8"); - assertSignature("add", "add"); -} diff --git a/velox/substrait/tests/SubstraitTypeLookupTest.cpp b/velox/substrait/tests/SubstraitTypeLookupTest.cpp deleted file mode 100644 index 3ea3cf8a8c28..000000000000 --- a/velox/substrait/tests/SubstraitTypeLookupTest.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/common/base/tests/GTestUtils.h" - -#include "velox/substrait/SubstraitFunctionLookup.h" -#include "velox/substrait/VeloxToSubstraitMappings.h" - -using namespace facebook::velox; -using namespace facebook::velox::substrait; - -class SubstraitTypeLookupTest : public ::testing::Test { - protected: - void SetUp() override { - extension = SubstraitExtension::loadExtension(); - typeLookup = std::make_shared(extension->types); - } - - public: - SubstraitExtensionPtr extension; - SubstraitTypeLookupPtr typeLookup; -}; - -TEST_F(SubstraitTypeLookupTest, unknowLookup) { - auto unknown = typeLookup->lookupUnknownType(); - ASSERT_TRUE(unknown.has_value()); - ASSERT_EQ(unknown.value()->name, "unknown"); -} diff --git a/velox/substrait/tests/SubstraitTypeTest.cpp b/velox/substrait/tests/SubstraitTypeTest.cpp deleted file mode 100644 index c9c12d643541..000000000000 --- a/velox/substrait/tests/SubstraitTypeTest.cpp +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/substrait/SubstraitType.h" -#include "velox/common/base/tests/GTestUtils.h" -#include "velox/substrait/TypeUtils.h" - -using namespace facebook::velox; -using namespace facebook::velox::substrait; - -class SubstraitTypeTest : public ::testing::Test { - protected: - template - void testDecode(const std::string& rawType, const std::string& signature) { - const auto& type = SubstraitType::decode(rawType); - ASSERT_TRUE(type->kind() == kind); - ASSERT_EQ(type->signature(), signature); - } - - template - void testDecode( - const std::string& rawType, - const std::function&)>& - typeCallBack) { - const auto& type = SubstraitType::decode(rawType); - if (typeCallBack) { - typeCallBack(std::dynamic_pointer_cast(type)); - } - } -}; - -TEST_F(SubstraitTypeTest, decodeTest) { - testDecode("i32?", "i32"); - testDecode("BOOLEAN", "bool"); - testDecode("boolean", "bool"); - testDecode("i8", "i8"); - testDecode("i16", "i16"); - testDecode("i32", "i32"); - testDecode("i64", "i64"); - testDecode("fp32", "fp32"); - testDecode("fp64", "fp64"); - testDecode("binary", "vbin"); - testDecode("timestamp", "ts"); - testDecode("date", "date"); - testDecode("time", "time"); - testDecode("interval_day", "iday"); - testDecode("interval_year", "iyear"); - testDecode("timestamp_tz", "tstz"); - testDecode("uuid", "uuid"); - - testDecode( - "fixedchar", - [](const std::shared_ptr& typePtr) { - ASSERT_EQ(typePtr->length()->value(), "L1"); - ASSERT_EQ(typePtr->signature(), "fchar"); - }); - - testDecode( - "fixedbinary", - [](const std::shared_ptr& typePtr) { - ASSERT_EQ(typePtr->length()->value(), "L1"); - ASSERT_EQ(typePtr->signature(), "fbin"); - }); - - testDecode( - "varchar", - [](const std::shared_ptr& typePtr) { - ASSERT_EQ(typePtr->signature(), "vchar"); - ASSERT_EQ(typePtr->length()->value(), "L1"); - }); - - testDecode( - "decimal", - [](const std::shared_ptr& typePtr) { - ASSERT_EQ(typePtr->signature(), "dec"); - ASSERT_EQ(typePtr->precision(), "P"); - ASSERT_EQ(typePtr->scale(), "S"); - }); - - testDecode( - "struct", - [](const std::shared_ptr& typePtr) { - ASSERT_EQ(typePtr->signature(), "struct"); - }); - - testDecode( - "struct>", - [](const std::shared_ptr& typePtr) { - ASSERT_EQ(typePtr->signature(), "struct>"); - }); - - testDecode( - "list", - [](const std::shared_ptr& typePtr) { - ASSERT_EQ(typePtr->signature(), "list"); - }); - - testDecode( - "map", - [](const std::shared_ptr& typePtr) { - ASSERT_EQ(typePtr->signature(), "map"); - }); - - testDecode( - "any1", - [](const std::shared_ptr& typePtr) { - ASSERT_EQ(typePtr->signature(), "any1"); - ASSERT_TRUE(typePtr->isWildcard()); - }); - - testDecode( - "any", - [](const std::shared_ptr& typePtr) { - ASSERT_EQ(typePtr->signature(), "any"); - ASSERT_TRUE(typePtr->isWildcard()); - }); - - testDecode( - "T", - [](const std::shared_ptr& typePtr) { - ASSERT_EQ(typePtr->signature(), "T"); - ASSERT_TRUE(typePtr->isWildcard()); - }); - - testDecode( - "unknown", - [](const std::shared_ptr& typePtr) { - ASSERT_EQ(typePtr->signature(), "u!name"); - ASSERT_TRUE(typePtr->isUnknown()); - }); -} diff --git a/velox/substrait/tests/VeloxSustraitHashJoinRoundTripConverterTest.cpp b/velox/substrait/tests/VeloxSubstraitJoinRoundTripTest.cpp similarity index 89% rename from velox/substrait/tests/VeloxSustraitHashJoinRoundTripConverterTest.cpp rename to velox/substrait/tests/VeloxSubstraitJoinRoundTripTest.cpp index ce110848fa95..49a47dd5adbc 100644 --- a/velox/substrait/tests/VeloxSustraitHashJoinRoundTripConverterTest.cpp +++ b/velox/substrait/tests/VeloxSubstraitJoinRoundTripTest.cpp @@ -28,7 +28,7 @@ using namespace facebook::velox::test; using namespace facebook::velox::exec::test; using namespace facebook::velox::substrait; -class VeloxSubstraitJoinRoundTripConverterTest : public OperatorTestBase { +class VeloxSubstraitJoinRoundTripTest : public OperatorTestBase { protected: static std::vector makeKeyNames( int cnt, @@ -75,7 +75,7 @@ class VeloxSubstraitJoinRoundTripConverterTest : public OperatorTestBase { auto rightBatch = std::dynamic_pointer_cast( BatchMaker::createBatch(rightType, rightSize, *pool_)); - auto planNodeIdGenerator = std::make_shared(); + auto planNodeIdGenerator = std::make_shared(); auto planNode = PlanBuilder(planNodeIdGenerator) .values({leftBatch}) @@ -105,8 +105,7 @@ class VeloxSubstraitJoinRoundTripConverterTest : public OperatorTestBase { auto substraitPlan = veloxConvertor_->toSubstrait(arena, plan); // Convert Substrait Plan to the same Velox Plan. - auto samePlan = - substraitConverter_->toVeloxPlan(substraitPlan, pool_.get()); + auto samePlan = substraitConverter_->toVeloxPlan(substraitPlan); // Assert velox again. assertQuery(samePlan, duckDbSql); @@ -116,10 +115,10 @@ class VeloxSubstraitJoinRoundTripConverterTest : public OperatorTestBase { std::make_shared(); std::shared_ptr substraitConverter_ = - std::make_shared(); + std::make_shared(pool_.get()); }; -TEST_F(VeloxSubstraitJoinRoundTripConverterTest, bigintArray) { +TEST_F(VeloxSubstraitJoinRoundTripTest, bigintArray) { testJoin( {BIGINT()}, 16000, @@ -129,7 +128,7 @@ TEST_F(VeloxSubstraitJoinRoundTripConverterTest, bigintArray) { " WHERE t_k0 = u_k0"); } -TEST_F(VeloxSubstraitJoinRoundTripConverterTest, emptyBuild) { +TEST_F(VeloxSubstraitJoinRoundTripTest, emptyBuild) { testJoin( {BIGINT()}, 16000, @@ -139,7 +138,7 @@ TEST_F(VeloxSubstraitJoinRoundTripConverterTest, emptyBuild) { " WHERE t_k0 = u_k0"); } -TEST_F(VeloxSubstraitJoinRoundTripConverterTest, normalizedKey) { +TEST_F(VeloxSubstraitJoinRoundTripTest, normalizedKey) { testJoin( {INTEGER(), INTEGER(), INTEGER()}, 16000, @@ -149,7 +148,7 @@ TEST_F(VeloxSubstraitJoinRoundTripConverterTest, normalizedKey) { " WHERE t_k0 = u_k0 AND t_k1 = u_k1"); } -TEST_F(VeloxSubstraitJoinRoundTripConverterTest, filter) { +TEST_F(VeloxSubstraitJoinRoundTripTest, filter) { testJoin( {BIGINT()}, 16000, @@ -160,7 +159,7 @@ TEST_F(VeloxSubstraitJoinRoundTripConverterTest, filter) { "((t_k0 % 100) + (u_k0 % 100)) % 40 < 20"); } -TEST_F(VeloxSubstraitJoinRoundTripConverterTest, leftJoin) { +TEST_F(VeloxSubstraitJoinRoundTripTest, leftJoin) { auto leftVectors = { makeRowVector({ makeFlatVector({1, 2, 3}), @@ -177,7 +176,7 @@ TEST_F(VeloxSubstraitJoinRoundTripConverterTest, leftJoin) { createDuckDbTable("t", leftVectors); createDuckDbTable("u", rightVectors); - auto planNodeIdGenerator = std::make_shared(); + auto planNodeIdGenerator = std::make_shared(); auto buildSide = PlanBuilder(planNodeIdGenerator) .values(rightVectors) @@ -200,7 +199,7 @@ TEST_F(VeloxSubstraitJoinRoundTripConverterTest, leftJoin) { "SELECT t.c0,u.c0 FROM t LEFT JOIN u ON (t.c0 = u.c0 AND t.c1 + u.c0 > 0)"); } -TEST_F(VeloxSubstraitJoinRoundTripConverterTest, rightJoin) { +TEST_F(VeloxSubstraitJoinRoundTripTest, rightJoin) { auto leftVectors = { makeRowVector({ makeFlatVector({1, 2, 3, 4, 5}), @@ -219,7 +218,7 @@ TEST_F(VeloxSubstraitJoinRoundTripConverterTest, rightJoin) { createDuckDbTable("t", leftVectors); createDuckDbTable("u", rightVectors); - auto planNodeIdGenerator = std::make_shared(); + auto planNodeIdGenerator = std::make_shared(); auto buildSide = PlanBuilder(planNodeIdGenerator) .values(rightVectors) @@ -242,7 +241,7 @@ TEST_F(VeloxSubstraitJoinRoundTripConverterTest, rightJoin) { "SELECT t.c0, t.c1 FROM t RIGHT JOIN u ON (t.c0 = u.c0 AND t.c1 + u.c0 > 0)"); } -TEST_F(VeloxSubstraitJoinRoundTripConverterTest, leftSemiJoin) { +TEST_F(VeloxSubstraitJoinRoundTripTest, leftSemiJoin) { auto leftVectors = makeRowVector({ makeFlatVector( 1'234, [](auto row) { return row % 11; }, nullEvery(13)), @@ -257,7 +256,7 @@ TEST_F(VeloxSubstraitJoinRoundTripConverterTest, leftSemiJoin) { createDuckDbTable("t", {leftVectors}); createDuckDbTable("u", {rightVectors}); - auto planNodeIdGenerator = std::make_shared(); + auto planNodeIdGenerator = std::make_shared(); auto op = PlanBuilder(planNodeIdGenerator) .values({leftVectors}) .hashJoin( @@ -276,7 +275,7 @@ TEST_F(VeloxSubstraitJoinRoundTripConverterTest, leftSemiJoin) { op, "SELECT t.c1 FROM t WHERE t.c0 IN (SELECT c0 FROM u)"); } -TEST_F(VeloxSubstraitJoinRoundTripConverterTest, fullJoin) { +TEST_F(VeloxSubstraitJoinRoundTripTest, fullJoin) { // Left side keys are [0, 1, 2,..10]. auto leftVectors = { makeRowVector({ @@ -302,7 +301,7 @@ TEST_F(VeloxSubstraitJoinRoundTripConverterTest, fullJoin) { createDuckDbTable("t", leftVectors); createDuckDbTable("u", {rightVectors}); - auto planNodeIdGenerator = std::make_shared(); + auto planNodeIdGenerator = std::make_shared(); auto buildSide = PlanBuilder(planNodeIdGenerator) .values({rightVectors}) @@ -324,7 +323,7 @@ TEST_F(VeloxSubstraitJoinRoundTripConverterTest, fullJoin) { op, "SELECT t.c0, t.c1, u.c1 FROM t FULL OUTER JOIN u ON t.c0 = u.c0"); } -TEST_F(VeloxSubstraitJoinRoundTripConverterTest, antiJoin) { +TEST_F(VeloxSubstraitJoinRoundTripTest, antiJoin) { auto leftVectors = makeRowVector({ makeFlatVector( 1'000, [](auto row) { return row % 11; }, nullEvery(13)), @@ -339,7 +338,7 @@ TEST_F(VeloxSubstraitJoinRoundTripConverterTest, antiJoin) { createDuckDbTable("t", {leftVectors}); createDuckDbTable("u", {rightVectors}); - auto planNodeIdGenerator = std::make_shared(); + auto planNodeIdGenerator = std::make_shared(); auto op = PlanBuilder(planNodeIdGenerator) .values({leftVectors}) .hashJoin( @@ -351,7 +350,7 @@ TEST_F(VeloxSubstraitJoinRoundTripConverterTest, antiJoin) { .planNode(), "", {"c1"}, - core::JoinType::kAnti) + core::JoinType::kNullAwareAnti) .planNode(); assertQuery( diff --git a/velox/substrait/tests/VeloxSubstraitRoundTripPlanConverterTest.cpp b/velox/substrait/tests/VeloxSubstraitRoundTripTest.cpp similarity index 74% rename from velox/substrait/tests/VeloxSubstraitRoundTripPlanConverterTest.cpp rename to velox/substrait/tests/VeloxSubstraitRoundTripTest.cpp index fe96bc7d3126..e5fdd491185a 100644 --- a/velox/substrait/tests/VeloxSubstraitRoundTripPlanConverterTest.cpp +++ b/velox/substrait/tests/VeloxSubstraitRoundTripTest.cpp @@ -21,7 +21,6 @@ #include "velox/vector/tests/utils/VectorMaker.h" #include "velox/substrait/SubstraitToVeloxPlan.h" -#include "velox/substrait/VeloxToSubstraitMappings.h" #include "velox/substrait/VeloxToSubstraitPlan.h" using namespace facebook::velox; @@ -29,7 +28,7 @@ using namespace facebook::velox::test; using namespace facebook::velox::exec::test; using namespace facebook::velox::substrait; -class VeloxSubstraitRoundTripPlanConverterTest : public OperatorTestBase { +class VeloxSubstraitRoundTripTest : public OperatorTestBase { protected: /// Makes a vector of INTEGER type with 'size' RowVectorPtr. /// @param size The number of RowVectorPtr. @@ -70,16 +69,14 @@ class VeloxSubstraitRoundTripPlanConverterTest : public OperatorTestBase { // Assert velox again. assertQuery(samePlan, duckDbSql); } - std::unique_ptr pool_{ - memory::getDefaultScopedMemoryPool()}; + std::shared_ptr veloxConvertor_ = std::make_shared(); - std::shared_ptr substraitConverter_ = std::make_shared(pool_.get()); }; -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, project) { +TEST_F(VeloxSubstraitRoundTripTest, project) { auto vectors = makeVectors(3, 4, 2); createDuckDbTable(vectors); auto plan = @@ -87,7 +84,7 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, project) { assertPlanConversion(plan, "SELECT c0 + c1, c1 / c2 FROM tmp"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, filter) { +TEST_F(VeloxSubstraitRoundTripTest, filter) { auto vectors = makeVectors(3, 4, 2); createDuckDbTable(vectors); @@ -95,60 +92,13 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, filter) { assertPlanConversion(plan, "SELECT * FROM tmp WHERE c2 < 1000"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, scalarFunc_string_test) { - std::vector vectors; - vectors.reserve(1); - auto dow = makeFlatVector( - {"monday", - "tuesday", - "wednesday", - "thursday", - "friday", - "saturday", - "sunday"}); - auto rowVector = makeRowVector({"dow"}, {dow}); - vectors.emplace_back(rowVector); - createDuckDbTable(vectors); - auto plan = PlanBuilder().values(vectors).filter("dow like 's%'").planNode(); - assertPlanConversion(plan, "SELECT * FROM tmp where dow like 's%'"); - plan = PlanBuilder().values(vectors).project({"substr(dow,1,3)"}).planNode(); - assertPlanConversion(plan, "SELECT substr(dow,1,3) FROM tmp "); -} - -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, scalarFunc_boolean_test) { - auto vectors = makeVectors(3, 4, 2); - createDuckDbTable(vectors); - - auto plan = - PlanBuilder().values(vectors).filter("c0 < 100 and c2 < 1000").planNode(); - assertPlanConversion(plan, "SELECT * FROM tmp WHERE c0 < 100 and c2 < 1000"); - - plan = - PlanBuilder().values(vectors).filter("c0 < 100 or c2 < 1000").planNode(); - assertPlanConversion(plan, "SELECT * FROM tmp WHERE c0 < 100 or c2 < 1000"); - - plan = PlanBuilder().values(vectors).filter("not c0 < 100").planNode(); - assertPlanConversion(plan, "SELECT * FROM tmp WHERE not c0 < 100"); -} - -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, scalarFunc_compare_test) { - auto vectors = makeVectors(3, 4, 2); - createDuckDbTable(vectors); - - auto plan = PlanBuilder() - .values(vectors) - .filter("c0 between 100 and 1000") - .planNode(); - assertPlanConversion(plan, "SELECT * FROM tmp WHERE c0 between 100 and 1000"); -} - -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, null) { +TEST_F(VeloxSubstraitRoundTripTest, null) { auto vectors = makeRowVector(ROW({}, {}), 1); auto plan = PlanBuilder().values({vectors}).project({"NULL"}).planNode(); assertPlanConversion(plan, "SELECT NULL "); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, values) { +TEST_F(VeloxSubstraitRoundTripTest, values) { RowVectorPtr vectors = makeRowVector( {makeFlatVector( {2499109626526694126, 2342493223442167775, 4077358421272316858}), @@ -166,7 +116,7 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, values) { assertPlanConversion(plan, "SELECT * FROM tmp"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, count) { +TEST_F(VeloxSubstraitRoundTripTest, count) { auto vectors = makeVectors(2, 7, 3); createDuckDbTable(vectors); @@ -182,7 +132,7 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, count) { "SELECT count(c4) as num_price FROM tmp WHERE c6 < 24 GROUP BY c0, c1"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, countAll) { +TEST_F(VeloxSubstraitRoundTripTest, countAll) { auto vectors = makeVectors(2, 7, 3); createDuckDbTable(vectors); @@ -198,7 +148,7 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, countAll) { "SELECT count(*) as num_price FROM tmp WHERE c6 < 24 GROUP BY c0, c1"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, sum) { +TEST_F(VeloxSubstraitRoundTripTest, sum) { auto vectors = makeVectors(2, 7, 3); createDuckDbTable(vectors); @@ -210,7 +160,7 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, sum) { assertPlanConversion(plan, "SELECT sum(1), count(c4) FROM tmp"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, sumAndCount) { +TEST_F(VeloxSubstraitRoundTripTest, sumAndCount) { auto vectors = makeVectors(2, 7, 3); createDuckDbTable(vectors); @@ -223,7 +173,7 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, sumAndCount) { assertPlanConversion(plan, "SELECT sum(c1), count(c4) FROM tmp"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, avgAndCount) { +TEST_F(VeloxSubstraitRoundTripTest, avgAndCount) { auto vectors = makeVectors(2, 7, 3); createDuckDbTable(vectors); @@ -236,7 +186,7 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, avgAndCount) { assertPlanConversion(plan, "SELECT avg(c1), count(c4) FROM tmp"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, sumGlobal) { +TEST_F(VeloxSubstraitRoundTripTest, sumGlobal) { auto vectors = makeVectors(2, 7, 3); createDuckDbTable(vectors); @@ -251,7 +201,7 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, sumGlobal) { plan, "SELECT c0, sum(c0), sum(c1) FROM tmp GROUP BY c0"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, sumMask) { +TEST_F(VeloxSubstraitRoundTripTest, sumMask) { auto vectors = makeVectors(2, 7, 3); createDuckDbTable(vectors); @@ -271,7 +221,7 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, sumMask) { "FROM tmp"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, rowConstructor) { +TEST_F(VeloxSubstraitRoundTripTest, rowConstructor) { RowVectorPtr vectors = makeRowVector( {makeFlatVector({0.905791934145, 0.968867771124}), makeFlatVector({2499109626526694126, 2342493223442167775}), @@ -285,7 +235,7 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, rowConstructor) { assertPlanConversion(plan, "SELECT row(c1, c2) FROM tmp"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, projectAs) { +TEST_F(VeloxSubstraitRoundTripTest, projectAs) { RowVectorPtr vectors = makeRowVector( {makeFlatVector({0.905791934145, 0.968867771124}), makeFlatVector({2499109626526694126, 2342493223442167775}), @@ -302,7 +252,7 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, projectAs) { plan, "SELECT sum(c1 * c2) as revenue FROM tmp WHERE c0 < 0.5"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, avg) { +TEST_F(VeloxSubstraitRoundTripTest, avg) { auto vectors = makeVectors(2, 7, 3); createDuckDbTable(vectors); @@ -315,7 +265,7 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, avg) { assertPlanConversion(plan, "SELECT avg(c4) FROM tmp"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, caseWhen) { +TEST_F(VeloxSubstraitRoundTripTest, caseWhen) { auto vectors = makeVectors(3, 4, 2); createDuckDbTable(vectors); auto plan = @@ -339,17 +289,16 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, caseWhen) { "SELECT case when c0=1 then c1 when c0=2 then c2 end as x FROM tmp"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, cast) { +TEST_F(VeloxSubstraitRoundTripTest, cast) { auto vectors = makeVectors(3, 4, 2); createDuckDbTable(vectors); auto plan = PlanBuilder().values(vectors).project({"true"}).planNode(); assertPlanConversion(plan, "SELECT true FROM tmp"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, ifThen) { +TEST_F(VeloxSubstraitRoundTripTest, ifThen) { auto vectors = makeVectors(3, 4, 2); createDuckDbTable(vectors); - auto plan = PlanBuilder() .values(vectors) .project({"if (c0 = 1, c0 + 1, c1 + 2) as x"}) @@ -358,7 +307,7 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, ifThen) { plan, "SELECT if (c0 = 1, c0 + 1, c1 + 2) as x FROM tmp"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, coalesce) { +TEST_F(VeloxSubstraitRoundTripTest, coalesce) { auto vectors = makeVectors(3, 4, 2); createDuckDbTable(vectors); auto plan = @@ -366,18 +315,31 @@ TEST_F(VeloxSubstraitRoundTripPlanConverterTest, coalesce) { assertPlanConversion(plan, "SELECT coalesce(c0,c1) FROM tmp"); } -TEST_F(VeloxSubstraitRoundTripPlanConverterTest, arrayLiteral) { +TEST_F(VeloxSubstraitRoundTripTest, notNullLiteral) { auto vectors = makeRowVector(ROW({}, {}), 1); auto plan = PlanBuilder(pool_.get()) .values({vectors}) - .project({"array[0, 1, 2, 3, 4]"}) + .addNode([&](std::string id, core::PlanNodePtr input) { + std::vector projectNames = { + "a", "b", "c", "d", "e", "f", "g", "h"}; + std::vector projectExpressions = { + std::make_shared((bool)1), + std::make_shared((int8_t)23), + std::make_shared((int16_t)45), + std::make_shared((int32_t)678), + std::make_shared((int64_t)910), + std::make_shared((float)1.23), + std::make_shared((double)4.56), + std::make_shared("789")}; + return std::make_shared( + id, + std::move(projectNames), + std::move(projectExpressions), + input); + }) .planNode(); - // TODO: enable this after velox updated to the latest 20221011 - // assertQuery(plan, "SELECT array[0, 1, 2, 3, 4]"); - - // Convert Velox Plan to Substrait Plan. - google::protobuf::Arena arena; - auto substraitPlan = veloxConvertor_->toSubstrait(arena, plan); + assertPlanConversion( + plan, "SELECT true, 23, 45, 678, 910, 1.23, 4.56, '789'"); } int main(int argc, char** argv) { diff --git a/velox/substrait/tests/VeloxToSubstraitTypeTest.cpp b/velox/substrait/tests/VeloxToSubstraitTypeTest.cpp index ebbd07dc884d..9970bff0d514 100644 --- a/velox/substrait/tests/VeloxToSubstraitTypeTest.cpp +++ b/velox/substrait/tests/VeloxToSubstraitTypeTest.cpp @@ -39,23 +39,6 @@ class VeloxToSubstraitTypeTest : public ::testing::Test { << ", but got: " << sameType->toString(); } - template - void testFromVelox(const TypePtr& type) { - const auto& substraitType = fromVelox(type); - ASSERT_EQ(substraitType->kind(), kind); - } - - template - void testFromVelox( - const TypePtr& type, - const std::function&)>& - typeCallBack) { - const auto& substraitType = fromVelox(type); - if (typeCallBack) { - typeCallBack(std::dynamic_pointer_cast(substraitType)); - } - } - std::shared_ptr typeConvertor_; std::shared_ptr substraitParser_ = @@ -85,49 +68,4 @@ TEST_F(VeloxToSubstraitTypeTest, basic) { {BIGINT(), ROW({"x", "y"}, {BOOLEAN(), VARCHAR()}), REAL()})); ASSERT_ANY_THROW(testTypeConversion(ROW({}, {}))); } - -TEST_F(VeloxToSubstraitTypeTest, fromVeloxTest) { - testFromVelox(BOOLEAN()); - testFromVelox(TINYINT()); - testFromVelox(SMALLINT()); - testFromVelox(INTEGER()); - testFromVelox(BIGINT()); - testFromVelox(REAL()); - testFromVelox(DOUBLE()); - testFromVelox(TIMESTAMP()); - testFromVelox(DATE()); - - testFromVelox(INTERVAL_DAY_TIME()); - - testFromVelox( - ROW({"a", "b"}, {TINYINT(), INTEGER()}), - [](const std::shared_ptr& typePtr) { - ASSERT_TRUE(typePtr->children().size() == 2); - ASSERT_TRUE(typePtr->children()[0]->kind() == SubstraitTypeKind::kI8); - ASSERT_TRUE(typePtr->children()[1]->kind() == SubstraitTypeKind::kI32); - }); - - testFromVelox( - ROW({"a", "b"}, {TINYINT(), ROW({INTEGER(), BIGINT()})}), - [](const std::shared_ptr& typePtr) { - ASSERT_TRUE(typePtr->children().size() == 2); - ASSERT_TRUE(typePtr->children()[0]->kind() == SubstraitTypeKind::kI8); - ASSERT_TRUE( - typePtr->children()[1]->kind() == SubstraitTypeKind::kStruct); - }); - - testFromVelox( - ARRAY({TINYINT()}), - [](const std::shared_ptr& typePtr) { - ASSERT_TRUE(typePtr->type()->kind() == SubstraitTypeKind ::kI8); - }); - - testFromVelox( - MAP(INTEGER(), BIGINT()), - [](const std::shared_ptr& typePtr) { - ASSERT_TRUE(typePtr->keyType()->kind() == SubstraitTypeKind ::kI32); - ASSERT_TRUE(typePtr->valueType()->kind() == SubstraitTypeKind ::kI64); - }); -} - } // namespace facebook::velox::substrait::test From ca88078fb44056e884862790f5d0e0c9a31ada5e Mon Sep 17 00:00:00 2001 From: "Zhang, Chaojun" Date: Fri, 21 Oct 2022 18:52:41 +0800 Subject: [PATCH 2/3] fix code style --- velox/substrait/SubstraitToVeloxPlan.h | 3 +-- velox/substrait/VeloxToSubstraitPlan.cpp | 4 ++-- .../tests/Substrait2VeloxValuesNodeConversionTest.cpp | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/velox/substrait/SubstraitToVeloxPlan.h b/velox/substrait/SubstraitToVeloxPlan.h index c0ad9452f02d..93430e19a967 100644 --- a/velox/substrait/SubstraitToVeloxPlan.h +++ b/velox/substrait/SubstraitToVeloxPlan.h @@ -74,8 +74,7 @@ class SubstraitVeloxPlanConverter { core::PlanNodePtr toVeloxPlan(const ::substrait::RelRoot& root); /// Convert Substrait JoinRel into Velox PlanNode. - core::PlanNodePtr toVeloxPlan( - const ::substrait::JoinRel& sJoin); + core::PlanNodePtr toVeloxPlan(const ::substrait::JoinRel& substraitJoin); /// Convert Substrait Plan into Velox PlanNode. core::PlanNodePtr toVeloxPlan(const ::substrait::Plan& substraitPlan); diff --git a/velox/substrait/VeloxToSubstraitPlan.cpp b/velox/substrait/VeloxToSubstraitPlan.cpp index 3997df555736..9edf2bd38f1d 100644 --- a/velox/substrait/VeloxToSubstraitPlan.cpp +++ b/velox/substrait/VeloxToSubstraitPlan.cpp @@ -350,8 +350,8 @@ void VeloxToSubstraitPlanConvertor::toSubstraitJoin( for (auto i = 0; i < numColumns; i++) { joinCondition.emplace_back(std::make_shared( BOOLEAN(), - std::vector{joinNode->leftKeys().at(i), - joinNode->rightKeys().at(i)}, + std::vector{ + joinNode->leftKeys().at(i), joinNode->rightKeys().at(i)}, "eq")); } diff --git a/velox/substrait/tests/Substrait2VeloxValuesNodeConversionTest.cpp b/velox/substrait/tests/Substrait2VeloxValuesNodeConversionTest.cpp index f7340b763809..e438483c51ed 100644 --- a/velox/substrait/tests/Substrait2VeloxValuesNodeConversionTest.cpp +++ b/velox/substrait/tests/Substrait2VeloxValuesNodeConversionTest.cpp @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "velox/substrait/tests/JsonToProtoConverter.h" #include "velox/dwio/common/tests/utils/DataFiles.h" #include "velox/exec/tests/utils/OperatorTestBase.h" #include "velox/exec/tests/utils/PlanBuilder.h" +#include "velox/substrait/tests/JsonToProtoConverter.h" #include "velox/vector/tests/utils/VectorTestBase.h" #include "velox/substrait/SubstraitToVeloxPlan.h" From 2ecf1fc1764e9e1d6e38607734b289e1babb4045 Mon Sep 17 00:00:00 2001 From: "Zhang, Chaojun" Date: Fri, 21 Oct 2022 18:56:30 +0800 Subject: [PATCH 3/3] revert cmakelist.txt --- velox/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/velox/CMakeLists.txt b/velox/CMakeLists.txt index 7e7df744223d..3f24ef512fad 100644 --- a/velox/CMakeLists.txt +++ b/velox/CMakeLists.txt @@ -71,6 +71,6 @@ if(${VELOX_CODEGEN_SUPPORT}) endif() # substrait converter -#if(${VELOX_ENABLE_SUBSTRAIT}) +if(${VELOX_ENABLE_SUBSTRAIT}) add_subdirectory(substrait) -#endif() +endif()