diff --git a/velox/core/PlanNode.h b/velox/core/PlanNode.h index 5cbffb66780d..457ba586e64b 100644 --- a/velox/core/PlanNode.h +++ b/velox/core/PlanNode.h @@ -220,10 +220,12 @@ class ArrowStreamNode : public PlanNode { const PlanNodeId& id, const RowTypePtr& outputType, std::shared_ptr arrowStream, + memory::MemoryPool* pool, bool parallelizable = false) : PlanNode(id), outputType_(outputType), arrowStream_(arrowStream), + pool_(pool), parallelizable_(parallelizable) { VELOX_CHECK(arrowStream != nullptr); } @@ -247,6 +249,11 @@ class ArrowStreamNode : public PlanNode { return arrowStream_; } + + memory::MemoryPool* memoryPool() const { + return pool_; + } + std::string_view name() const override { return "ArrowStream"; } @@ -256,6 +263,7 @@ class ArrowStreamNode : public PlanNode { const RowTypePtr outputType_; std::shared_ptr arrowStream_; + memory::MemoryPool* pool_; const bool parallelizable_; }; diff --git a/velox/exec/ArrowStream.cpp b/velox/exec/ArrowStream.cpp index a594fda405a7..b107692e68c3 100644 --- a/velox/exec/ArrowStream.cpp +++ b/velox/exec/ArrowStream.cpp @@ -28,6 +28,7 @@ ArrowStream::ArrowStream( arrowStream->id(), "Arrow Stream") { arrowStream_ = arrowStream->arrowStream(); + pool_ = arrowStream->memoryPool(); } ArrowStream::~ArrowStream() { @@ -54,7 +55,7 @@ RowVectorPtr ArrowStream::getOutput() { } // Convert Arrow data into RowVector. rowVector_ = std::dynamic_pointer_cast( - facebook::velox::importFromArrowAsViewer(arrowSchema, arrowArray)); + facebook::velox::importFromArrowAsOwner(arrowSchema, arrowArray, pool_)); return rowVector_; } diff --git a/velox/exec/ArrowStream.h b/velox/exec/ArrowStream.h index 9ee7f4c5fe00..1b7409e0e20d 100644 --- a/velox/exec/ArrowStream.h +++ b/velox/exec/ArrowStream.h @@ -48,6 +48,7 @@ class ArrowStream : public SourceOperator { bool closed_ = false; RowVectorPtr rowVector_; std::shared_ptr arrowStream_; + memory::MemoryPool* pool_; // For calls from destructor bool isFinished0(); diff --git a/velox/functions/prestosql/DateTimeFunctions.h b/velox/functions/prestosql/DateTimeFunctions.h index f24a7756b7ce..52f48df2f3d2 100644 --- a/velox/functions/prestosql/DateTimeFunctions.h +++ b/velox/functions/prestosql/DateTimeFunctions.h @@ -139,22 +139,28 @@ struct YearFunction : public InitSessionTimezone, public TimestampWithTimezoneSupport { VELOX_DEFINE_FUNCTION_TYPES(T); - FOLLY_ALWAYS_INLINE int64_t getYear(const std::tm& time) { + FOLLY_ALWAYS_INLINE int32_t getYear(const std::tm& time) { return 1900 + time.tm_year; } - FOLLY_ALWAYS_INLINE void call( - int64_t& result, + template + FOLLY_ALWAYS_INLINE + void call( + TInput& result, const arg_type& timestamp) { result = getYear(getDateTime(timestamp, this->timeZone_)); } - FOLLY_ALWAYS_INLINE void call(int64_t& result, const arg_type& date) { + template + FOLLY_ALWAYS_INLINE + void call(TInput& result, const arg_type& date) { result = getYear(getDateTime(date)); } - FOLLY_ALWAYS_INLINE void call( - int64_t& result, + template + FOLLY_ALWAYS_INLINE + void call( + TInput& result, const arg_type& timestampWithTimezone) { auto timestamp = this->toTimestamp(timestampWithTimezone); result = getYear(getDateTime(timestamp, nullptr)); diff --git a/velox/functions/prestosql/registration/DateTimeFunctionsRegistration.cpp b/velox/functions/prestosql/registration/DateTimeFunctionsRegistration.cpp index 1fd44f8809e1..f0a828db60bc 100644 --- a/velox/functions/prestosql/registration/DateTimeFunctionsRegistration.cpp +++ b/velox/functions/prestosql/registration/DateTimeFunctionsRegistration.cpp @@ -27,6 +27,9 @@ void registerSimpleFunctions() { {"to_unixtime"}); registerFunction({"from_unixtime"}); + registerFunction({"year"}); + registerFunction({"year"}); + registerFunction({"year"}); registerFunction({"year"}); registerFunction({"year"}); registerFunction({"year"}); diff --git a/velox/substrait/SubstraitParser.cpp b/velox/substrait/SubstraitParser.cpp index b255635b0f6a..728d717345b8 100644 --- a/velox/substrait/SubstraitParser.cpp +++ b/velox/substrait/SubstraitParser.cpp @@ -113,6 +113,11 @@ std::shared_ptr SubstraitParser::parseType( nullability = substraitType.string().nullability(); break; } + case ::substrait::Type::KindCase::kDate: { + typeName = "DATE"; + nullability = sType.date().nullability(); + break; + } default: VELOX_NYI( "Parsing for Substrait type not supported: {}", diff --git a/velox/substrait/SubstraitToVeloxExpr.cpp b/velox/substrait/SubstraitToVeloxExpr.cpp index 70052ad27296..155f124b9814 100644 --- a/velox/substrait/SubstraitToVeloxExpr.cpp +++ b/velox/substrait/SubstraitToVeloxExpr.cpp @@ -71,6 +71,36 @@ SubstraitVeloxExprConverter::toIsNotNullExpr( outputType, std::move(notParams), "not"); } +std::shared_ptr +SubstraitVeloxExprConverter::toExtractExpr( + const std::vector>& params, + const TypePtr& outputType) { + VELOX_CHECK_EQ(params.size(), 2); + auto functionArg = + std::dynamic_pointer_cast(params[0]); + if (functionArg) { + // Get the function argument. + auto variant = functionArg->value(); + if (!variant.hasValue()) { + VELOX_FAIL("Value expected in variant."); + } + // The first parameter specifies extracting from which field. + // Only year is supported currently. + std::string from = variant.value(); + + // The second parameter is the function parameter. + std::vector> exprParams; + exprParams.reserve(1); + exprParams.emplace_back(params[1]); + if (from == "YEAR") { + return std::make_shared( + outputType, std::move(exprParams), "year"); + } + VELOX_NYI("Extract from {} not supported.", from); + } + VELOX_FAIL("Constant is expected to be the first parameter in extract."); +} + std::shared_ptr SubstraitVeloxExprConverter::toVeloxExpr( const ::substrait::Expression::ScalarFunction& substraitFunc, @@ -85,12 +115,16 @@ SubstraitVeloxExprConverter::toVeloxExpr( const auto& veloxType = toVeloxType(subParser_->parseType(sFunc.output_type())->type); + if (veloxFunction == "extract") { + return toExtractExpr(params, veloxType); + } if (veloxFunction == "alias") { return toAliasExpr(params); } if (veloxFunction == "is_not_null") { return toIsNotNullExpr(params, veloxType); } + return std::make_shared( toVeloxType(typeName), std::move(params), veloxFunction); } diff --git a/velox/substrait/SubstraitToVeloxExpr.h b/velox/substrait/SubstraitToVeloxExpr.h index 53c072815bbc..d8441493b27e 100644 --- a/velox/substrait/SubstraitToVeloxExpr.h +++ b/velox/substrait/SubstraitToVeloxExpr.h @@ -68,6 +68,11 @@ class SubstraitVeloxExprConverter { const std::vector>& params, const TypePtr& outputType); + /// Create expression for extract. + std::shared_ptr toExtractExpr( + const std::vector>& params, + const TypePtr& outputType); + /// Used to convert Substrait Literal into Velox Expression. std::shared_ptr toVeloxExpr( const ::substrait::Expression::Literal& substraitLit); diff --git a/velox/substrait/SubstraitToVeloxPlan.cpp b/velox/substrait/SubstraitToVeloxPlan.cpp index 04bbb416eda2..5b65a8242d25 100644 --- a/velox/substrait/SubstraitToVeloxPlan.cpp +++ b/velox/substrait/SubstraitToVeloxPlan.cpp @@ -295,11 +295,12 @@ core::PlanNodePtr SubstraitVeloxPlanConverter::toVeloxPlan( const auto& inputType = childNode->outputType(); const auto& sExpr = filterRel.condition(); + std::shared_ptr - return std::make_shared( - nextPlanNodeId(), - exprConverter_->toVeloxExpr(sExpr, inputType), - childNode); + return std::make_shared( + nextPlanNodeId(), + exprConverter_->toVeloxExpr(sExpr, inputType), + childNode); } core::PlanNodePtr SubstraitVeloxPlanConverter::toVeloxPlan( diff --git a/velox/substrait/SubstraitToVeloxPlan.h b/velox/substrait/SubstraitToVeloxPlan.h index c7907a25570b..d476bcfe4cae 100644 --- a/velox/substrait/SubstraitToVeloxPlan.h +++ b/velox/substrait/SubstraitToVeloxPlan.h @@ -23,388 +23,377 @@ namespace facebook::velox::substrait { -/// This class is used to convert the Substrait plan into Velox plan. -class SubstraitVeloxPlanConverter { - public: - explicit SubstraitVeloxPlanConverter(memory::MemoryPool* pool) - : pool_(pool) {} - - struct SplitInfo { - /// Whether the split comes from arrow array stream node. - bool isStream = false; +struct SplitInfo { + /// Whether the split comes from arrow array stream node. + bool isStream = false; - /// The Partition index. - u_int32_t partitionIndex; + /// The Partition index. + u_int32_t partitionIndex; - /// The file paths to be scanned. - std::vector paths; + /// The file paths to be scanned. + std::vector paths; - /// The file starts in the scan. - std::vector starts; + /// The file starts in the scan. + std::vector starts; - /// The lengths to be scanned. - std::vector lengths; + /// The lengths to be scanned. + std::vector lengths; - /// The file format of the files to be scanned. - dwio::common::FileFormat format; - }; + /// The file format of the files to be scanned. + dwio::common::FileFormat format; +}; +/// This class is used to convert the Substrait plan into Velox plan. +class SubstraitVeloxPlanConverter { + public: + explicit SubstraitVeloxPlanConverter(memory::MemoryPool* pool) + : pool_(pool) {} - /// This class is used to convert the Substrait plan into Velox plan. - class SubstraitVeloxPlanConverter { + /// Used to convert Substrait JoinRel into Velox PlanNode. + core::PlanNodePtr toVeloxPlan(const ::substrait::JoinRel& sJoin); + + /// Used to convert Substrait AggregateRel into Velox PlanNode. + core::PlanNodePtr toVeloxPlan(const ::substrait::AggregateRel& sAgg); + + /// Convert Substrait ProjectRel into Velox PlanNode. + core::PlanNodePtr toVeloxPlan(const ::substrait::ProjectRel& projectRel); + + /// Convert Substrait FilterRel into Velox PlanNode. + core::PlanNodePtr toVeloxPlan(const ::substrait::FilterRel& filterRel); + + /// Convert Substrait ReadRel into Velox PlanNode. + /// Index: the index of the partition this item belongs to. + /// Starts: the start positions in byte to read from the items. + /// Lengths: the lengths in byte to read from the items. + core::PlanNodePtr toVeloxPlan(const ::substrait::ReadRel& sRead); + + /// Convert Substrait ReadRel into Velox Values Node. + core::PlanNodePtr toVeloxPlan( + const ::substrait::ReadRel& readRel, + const RowTypePtr& type); + + /// Convert Substrait Rel into Velox PlanNode. + core::PlanNodePtr toVeloxPlan(const ::substrait::Rel& rel); + + /// Convert Substrait RelRoot into Velox PlanNode. + core::PlanNodePtr toVeloxPlan(const ::substrait::RelRoot& root); + + /// Convert Substrait Plan into Velox PlanNode. + core::PlanNodePtr toVeloxPlan(const ::substrait::Plan& substraitPlan); + + /// Check the Substrait type extension only has one unknown extension. + bool checkTypeExtension(const ::substrait::Plan& substraitPlan); + + /// Convert Substrait ReadRel into Velox PlanNode. + /// Index: the index of the partition this item belongs to. + /// Starts: the start positions in byte to read from the items. + /// Lengths: the lengths in byte to read from the items. + std::shared_ptr toVeloxPlan( + const ::substrait::ReadRel& sRead, + u_int32_t& index, + std::vector& paths, + std::vector& starts, + std::vector& lengths); + + /// Used to convert Substrait Rel into Velox PlanNode. + std::shared_ptr toVeloxPlan( + const ::substrait::Rel& sRel); + + /// Used to convert Substrait RelRoot into Velox PlanNode. + std::shared_ptr toVeloxPlan( + const ::substrait::RelRoot& sRoot); + + /// Used to convert Substrait Plan into Velox PlanNode. + std::shared_ptr toVeloxPlan( + const ::substrait::Plan& sPlan); + + /// Used to construct the function map between the index + /// and the Substrait function name. Initialize the expression + /// converter based on the constructed function map. + void constructFuncMap(const ::substrait::Plan& sPlan); + + /// Will return the function map used by this plan converter. + const std::unordered_map& getFunctionMap() { + return functionMap_; + } + + /// Return the splitInfo map used by this plan converter. + const std::unordered_map>& + splitInfos() const { + return splitInfoMap_; + } + + /// Looks up a function by ID and returns function name if found. Throws if + /// function with specified ID doesn't exist. Returns a compound + /// function specification consisting of the function name and the input + /// types. The format is as follows: :__..._ + const std::string& findFunction(uint64_t id) const; + + /// Used to insert certain plan node as input. The plan node + /// id will start from the setted one. + void insertInputNode( + uint64_t inputIdx, + const std::shared_ptr& inputNode, + int planNodeId) { + inputNodesMap_[inputIdx] = inputNode; + planNodeId_ = planNodeId; + } + + /// Used to check if ReadRel specifies an input of stream. + /// If yes, the index of input stream will be returned. + /// If not, -1 will be returned. + int32_t streamIsInput(const ::substrait::ReadRel& sRel); + + /// Multiple conditions are connected to a binary tree structure with + /// the relation key words, including AND, OR, and etc. Currently, only + /// AND is supported. This function is used to extract all the Substrait + /// conditions in the binary tree structure into a vector. + void flattenConditions( + const ::substrait::Expression& sFilter, + std::vector<::substrait::Expression_ScalarFunction>& scalarFunctions); + + /// Used to find the function specification in the constructed function map. + std::string findFuncSpec(uint64_t id); + + /// Extract join keys from joinExpression. + /// joinExpression is a boolean condition that describes whether each record + /// from the left set “match” the record from the right set. The condition + /// must only include the following operations: AND, ==, field references. + /// Field references correspond to the direct output order of the data. + void extractJoinKeys( + const ::substrait::Expression& joinExpression, + std::vector& leftExprs, + std::vector& rightExprs); + + private: + /// Memory pool. + memory::MemoryPool* pool_; + + /// Filter info for a column used in filter push down. + class FilterInfo { public: - SubstraitVeloxPlanConverter( - memory::MemoryPool* pool, - bool validationMode = false) - : pool_(pool), validationMode_(validationMode) {} - - /// Used to convert Substrait JoinRel into Velox PlanNode. - core::PlanNodePtr toVeloxPlan(const ::substrait::JoinRel& sJoin); - - /// Used to convert Substrait AggregateRel into Velox PlanNode. - core::PlanNodePtr toVeloxPlan(const ::substrait::AggregateRel& sAgg); - - /// Convert Substrait ProjectRel into Velox PlanNode. - core::PlanNodePtr toVeloxPlan(const ::substrait::ProjectRel& projectRel); - - /// Convert Substrait FilterRel into Velox PlanNode. - core::PlanNodePtr toVeloxPlan(const ::substrait::FilterRel& filterRel); - - /// Convert Substrait ReadRel into Velox PlanNode. - /// Index: the index of the partition this item belongs to. - /// Starts: the start positions in byte to read from the items. - /// Lengths: the lengths in byte to read from the items. - core::PlanNodePtr toVeloxPlan(const ::substrait::ReadRel& sRead); - - /// Convert Substrait ReadRel into Velox Values Node. - core::PlanNodePtr toVeloxPlan( - const ::substrait::ReadRel& readRel, - const RowTypePtr& type); - - /// Convert Substrait Rel into Velox PlanNode. - core::PlanNodePtr toVeloxPlan(const ::substrait::Rel& rel); - - /// Convert Substrait RelRoot into Velox PlanNode. - core::PlanNodePtr toVeloxPlan(const ::substrait::RelRoot& root); - - /// Convert Substrait Plan into Velox PlanNode. - core::PlanNodePtr toVeloxPlan(const ::substrait::Plan& substraitPlan); - - /// Check the Substrait type extension only has one unknown extension. - bool checkTypeExtension(const ::substrait::Plan& substraitPlan); - - /// Convert Substrait ReadRel into Velox PlanNode. - /// Index: the index of the partition this item belongs to. - /// Starts: the start positions in byte to read from the items. - /// Lengths: the lengths in byte to read from the items. - std::shared_ptr toVeloxPlan( - const ::substrait::ReadRel& sRead, - u_int32_t& index, - std::vector& paths, - std::vector& starts, - std::vector& lengths); - - /// Used to convert Substrait Rel into Velox PlanNode. - std::shared_ptr toVeloxPlan( - const ::substrait::Rel& sRel); - - /// Used to convert Substrait RelRoot into Velox PlanNode. - std::shared_ptr toVeloxPlan( - const ::substrait::RelRoot& sRoot); - - /// Used to convert Substrait Plan into Velox PlanNode. - std::shared_ptr toVeloxPlan( - const ::substrait::Plan& sPlan); - - /// Used to construct the function map between the index - /// and the Substrait function name. Initialize the expression - /// converter based on the constructed function map. - void constructFuncMap(const ::substrait::Plan& sPlan); - - /// Will return the function map used by this plan converter. - const std::unordered_map& getFunctionMap() { - return functionMap_; + // Disable null allow. + void forbidsNull() { + nullAllowed_ = false; + if (!isInitialized_) { + isInitialized_ = true; + } } - /// Return the splitInfo map used by this plan converter. - const std::unordered_map>& - splitInfos() const { - return splitInfoMap_; + // Return the initialization status. + bool isInitialized() { + return isInitialized_ ? true : false; } - /// Looks up a function by ID and returns function name if found. Throws if - /// function with specified ID doesn't exist. Returns a compound - /// function specification consisting of the function name and the input - /// types. The format is as follows: :__..._ - const std::string& findFunction(uint64_t id) const; - - /// Used to insert certain plan node as input. The plan node - /// id will start from the setted one. - void insertInputNode( - uint64_t inputIdx, - const std::shared_ptr& inputNode, - int planNodeId) { - inputNodesMap_[inputIdx] = inputNode; - planNodeId_ = planNodeId; + // Add a lower bound to the range. Multiple lower bounds are + // regarded to be in 'or' relation. + void setLower(const std::optional& left, bool isExclusive) { + lowerBounds_.emplace_back(left); + lowerExclusives_.emplace_back(isExclusive); + if (!isInitialized_) { + isInitialized_ = true; + } } - /// Used to check if ReadRel specifies an input of stream. - /// If yes, the index of input stream will be returned. - /// If not, -1 will be returned. - int32_t streamIsInput(const ::substrait::ReadRel& sRel); - - /// Multiple conditions are connected to a binary tree structure with - /// the relation key words, including AND, OR, and etc. Currently, only - /// AND is supported. This function is used to extract all the Substrait - /// conditions in the binary tree structure into a vector. - void flattenConditions( - const ::substrait::Expression& sFilter, - std::vector<::substrait::Expression_ScalarFunction>& scalarFunctions); - - /// Used to find the function specification in the constructed function map. - std::string findFuncSpec(uint64_t id); - - /// Extract join keys from joinExpression. - /// joinExpression is a boolean condition that describes whether each record - /// from the left set “match” the record from the right set. The condition - /// must only include the following operations: AND, ==, field references. - /// Field references correspond to the direct output order of the data. - void extractJoinKeys( - const ::substrait::Expression& joinExpression, - std::vector& leftExprs, - std::vector& - rightExprs); - - private: - /// Memory pool. - memory::MemoryPool* pool_; - - /// Filter info for a column used in filter push down. - class FilterInfo { - public: - // Disable null allow. - void forbidsNull() { - nullAllowed_ = false; - if (!isInitialized_) { - isInitialized_ = true; - } + // Add a upper bound to the range. Multiple upper bounds are + // regarded to be in 'or' relation. + void setUpper(const std::optional& right, bool isExclusive) { + upperBounds_.emplace_back(right); + upperExclusives_.emplace_back(isExclusive); + if (!isInitialized_) { + isInitialized_ = true; } + } - // Return the initialization status. - bool isInitialized() { - return isInitialized_ ? true : false; + // Set a list of values to be used in the push down of 'in' expression. + void setValues(const std::vector& values) { + for (const auto& value : values) { + valuesVector_.emplace_back(value); } - - // Add a lower bound to the range. Multiple lower bounds are - // regarded to be in 'or' relation. - void setLower(const std::optional& left, bool isExclusive) { - lowerBounds_.emplace_back(left); - lowerExclusives_.emplace_back(isExclusive); - if (!isInitialized_) { - isInitialized_ = true; - } + if (!isInitialized_) { + isInitialized_ = true; } + } - // Add a upper bound to the range. Multiple upper bounds are - // regarded to be in 'or' relation. - void setUpper(const std::optional& right, bool isExclusive) { - upperBounds_.emplace_back(right); - upperExclusives_.emplace_back(isExclusive); - if (!isInitialized_) { - isInitialized_ = true; - } + // Set a value for the not(equal) condition. + void setNotValue(const std::optional& notValue) { + notValue_ = notValue; + if (!isInitialized_) { + isInitialized_ = true; } + } - // Set a list of values to be used in the push down of 'in' expression. - void setValues(const std::vector& values) { - for (const auto& value : values) { - valuesVector_.emplace_back(value); - } - if (!isInitialized_) { - isInitialized_ = true; - } - } + // Whether this filter map is initialized. + bool isInitialized_ = false; - // Set a value for the not(equal) condition. - void setNotValue(const std::optional& notValue) { - notValue_ = notValue; - if (!isInitialized_) { - isInitialized_ = true; - } - } + // The null allow. + bool nullAllowed_ = true; - // Whether this filter map is initialized. - bool isInitialized_ = false; - - // The null allow. - bool nullAllowed_ = true; - - // If true, left bound will be exclusive. - std::vector lowerExclusives_; - - // If true, right bound will be exclusive. - std::vector upperExclusives_; - - // A value should not be equal to. - std::optional notValue_ = std::nullopt; - - // The lower bounds in 'or' relation. - std::vector> lowerBounds_; - - // The upper bounds in 'or' relation. - std::vector> upperBounds_; - - // The list of values used in 'in' expression. - std::vector valuesVector_; - }; - - /// A function returning current function id and adding the plan node id by - /// one once called. - std::string nextPlanNodeId(); - - /// Check the args of the scalar function. Should be field or - /// field with literal. - bool fieldOrWithLiteral( - const ::substrait::Expression_ScalarFunction& function); - - /// Separate the functions to be two parts: - /// subfield functions to be handled by the subfieldFilters in - /// HiveConnector, and remaining functions to be handled by the - /// remainingFilter in HiveConnector. - void separateFilters( - const std::vector<::substrait::Expression_ScalarFunction>& - scalarFunctions, - std::vector<::substrait::Expression_ScalarFunction>& subfieldFunctions, - std::vector<::substrait::Expression_ScalarFunction>& - remainingFunctions); - - /// Check whether the chidren functions of this scalar function have the - /// same column index. Curretly used to check whether the two chilren - /// functions of 'or' expression are effective on the same column. - bool chidrenFunctionsOnSameField( - const ::substrait::Expression_ScalarFunction& function); - - /// Extract the list from in function, and set it to the filter info. - void setInValues( - const ::substrait::Expression_ScalarFunction& scalarFunction, - std::unordered_map>& colInfoMap); - - /// Extract the scalar function, and set the filter info for different types - /// of columns. If reverse is true, the opposite filter info will be set. - void setFilterMap( - const ::substrait::Expression_ScalarFunction& scalarFunction, - const std::vector& inputTypeList, - std::unordered_map>& colInfoMap, - bool reverse = false); - - /// Set the filter info for a column base on the information - /// extracted from filter condition. - template - void setColInfoMap( - const std::string& filterName, - uint32_t colIdx, - std::optional literalVariant, - bool reverse, - std::unordered_map>& colInfoMap); - - /// Create a multirange to specify the filter 'x != notValue' with: - /// x > notValue or x < notValue. - template - void createNotEqualFilter( - variant notVariant, - bool nullAllowed, - std::vector>& colFilters); - - /// Create a values range to handle in filter. - /// variants: the list of values extracted from the in expression. - /// inputName: the column input name. - template - void setInFilter( - const std::vector& variants, - bool nullAllowed, - const std::string& inputName, - connector::hive::SubfieldFilters& filters); - - /// Set the constructed filters into SubfieldFilters. - /// The FilterType is used to distinguish BigintRange and - /// Filter (the base class). This is needed because BigintMultiRange - /// can only accept the unique ptr of BigintRange as parameter. - template - void setSubfieldFilter( - std::vector> colFilters, - const std::string& inputName, - bool nullAllowed, - connector::hive::SubfieldFilters& filters); - - /// Create the subfield filter based on the constructed filter info. - /// inputName: the input name of a column. - template - void constructSubfieldFilters( - uint32_t colIdx, - const std::string& inputName, - const std::shared_ptr& filterInfo, - connector::hive::SubfieldFilters& filters); - - /// Construct subfield filters according to the pre-set map of filter info. - connector::hive::SubfieldFilters mapToFilters( - const std::vector& inputNameList, - const std::vector& inputTypeList, - std::unordered_map> colInfoMap); - - /// Convert subfield functions into subfieldFilters to - /// be used in Hive Connector. - connector::hive::SubfieldFilters toSubfieldFilters( - const std::vector& inputNameList, - const std::vector& inputTypeList, - const std::vector<::substrait::Expression_ScalarFunction>& - subfieldFunctions); - - /// Connect all remaining functions with 'and' relation - /// for the use of remaingFilter in Hive Connector. - std::shared_ptr connectWithAnd( - std::vector inputNameList, - std::vector inputTypeList, - const std::vector<::substrait::Expression_ScalarFunction>& - remainingFunctions); - - /// Used to convert AggregateRel into Velox plan node. - /// The output of child node will be used as the input of Aggregation. - std::shared_ptr toVeloxAgg( - const ::substrait::AggregateRel& sAgg, - const std::shared_ptr& childNode, - const core::AggregationNode::Step& aggStep); - - /// The unique identification for each PlanNode. - int planNodeId_ = 0; - - /// The map storing the relations between the function id and the function - /// name. Will be constructed based on the Substrait representation. - std::unordered_map functionMap_; - - /// The map storing the split stats for each PlanNode. - std::unordered_map> - splitInfoMap_; - - /// The map storing the pre-built plan nodes which can be accessed through - /// index. This map is only used when the computation of a Substrait plan - /// depends on other input nodes. - std::unordered_map> - inputNodesMap_; - - /// The Substrait parser used to convert Substrait representations into - /// recognizable representations. - std::shared_ptr subParser_{ - std::make_shared()}; - - /// The Expression converter used to convert Substrait representations into - /// Velox expressions. - std::shared_ptr exprConverter_; - - /// Memory pool. - memory::MemoryPool* pool_; - - /// A flag used to specify validation. - bool validationMode_ = false; + // If true, left bound will be exclusive. + std::vector lowerExclusives_; + + // If true, right bound will be exclusive. + std::vector upperExclusives_; + + // A value should not be equal to. + std::optional notValue_ = std::nullopt; + + // The lower bounds in 'or' relation. + std::vector> lowerBounds_; + + // The upper bounds in 'or' relation. + std::vector> upperBounds_; + + // The list of values used in 'in' expression. + std::vector valuesVector_; }; + /// A function returning current function id and adding the plan node id by + /// one once called. + std::string nextPlanNodeId(); + + /// Check the args of the scalar function. Should be field or + /// field with literal. + bool fieldOrWithLiteral( + const ::substrait::Expression_ScalarFunction& function); + + /// Separate the functions to be two parts: + /// subfield functions to be handled by the subfieldFilters in + /// HiveConnector, and remaining functions to be handled by the + /// remainingFilter in HiveConnector. + void separateFilters( + const std::vector<::substrait::Expression_ScalarFunction>& + scalarFunctions, + std::vector<::substrait::Expression_ScalarFunction>& subfieldFunctions, + std::vector<::substrait::Expression_ScalarFunction>& remainingFunctions); + + /// Check whether the chidren functions of this scalar function have the + /// same column index. Curretly used to check whether the two chilren + /// functions of 'or' expression are effective on the same column. + bool chidrenFunctionsOnSameField( + const ::substrait::Expression_ScalarFunction& function); + + /// Extract the list from in function, and set it to the filter info. + void setInValues( + const ::substrait::Expression_ScalarFunction& scalarFunction, + std::unordered_map>& colInfoMap); + + /// Extract the scalar function, and set the filter info for different types + /// of columns. If reverse is true, the opposite filter info will be set. + void setFilterMap( + const ::substrait::Expression_ScalarFunction& scalarFunction, + const std::vector& inputTypeList, + std::unordered_map>& colInfoMap, + bool reverse = false); + + /// Set the filter info for a column base on the information + /// extracted from filter condition. + template + void setColInfoMap( + const std::string& filterName, + uint32_t colIdx, + std::optional literalVariant, + bool reverse, + std::unordered_map>& colInfoMap); + + /// Create a multirange to specify the filter 'x != notValue' with: + /// x > notValue or x < notValue. + template + void createNotEqualFilter( + variant notVariant, + bool nullAllowed, + std::vector>& colFilters); + + /// Create a values range to handle in filter. + /// variants: the list of values extracted from the in expression. + /// inputName: the column input name. + template + void setInFilter( + const std::vector& variants, + bool nullAllowed, + const std::string& inputName, + connector::hive::SubfieldFilters& filters); + + /// Set the constructed filters into SubfieldFilters. + /// The FilterType is used to distinguish BigintRange and + /// Filter (the base class). This is needed because BigintMultiRange + /// can only accept the unique ptr of BigintRange as parameter. + template + void setSubfieldFilter( + std::vector> colFilters, + const std::string& inputName, + bool nullAllowed, + connector::hive::SubfieldFilters& filters); + + /// Create the subfield filter based on the constructed filter info. + /// inputName: the input name of a column. + template + void constructSubfieldFilters( + uint32_t colIdx, + const std::string& inputName, + const std::shared_ptr& filterInfo, + connector::hive::SubfieldFilters& filters); + + /// Construct subfield filters according to the pre-set map of filter info. + connector::hive::SubfieldFilters mapToFilters( + const std::vector& inputNameList, + const std::vector& inputTypeList, + std::unordered_map> colInfoMap); + + /// Convert subfield functions into subfieldFilters to + /// be used in Hive Connector. + connector::hive::SubfieldFilters toSubfieldFilters( + const std::vector& inputNameList, + const std::vector& inputTypeList, + const std::vector<::substrait::Expression_ScalarFunction>& + subfieldFunctions); + + /// Connect all remaining functions with 'and' relation + /// for the use of remaingFilter in Hive Connector. + std::shared_ptr connectWithAnd( + std::vector inputNameList, + std::vector inputTypeList, + const std::vector<::substrait::Expression_ScalarFunction>& + remainingFunctions); + + /// Used to convert AggregateRel into Velox plan node. + /// The output of child node will be used as the input of Aggregation. + std::shared_ptr toVeloxAgg( + const ::substrait::AggregateRel& sAgg, + const std::shared_ptr& childNode, + const core::AggregationNode::Step& aggStep); + + /// The unique identification for each PlanNode. + int planNodeId_ = 0; + + /// The map storing the relations between the function id and the function + /// name. Will be constructed based on the Substrait representation. + std::unordered_map functionMap_; + + /// The map storing the split stats for each PlanNode. + std::unordered_map> + splitInfoMap_; + + /// The map storing the pre-built plan nodes which can be accessed through + /// index. This map is only used when the computation of a Substrait plan + /// depends on other input nodes. + std::unordered_map> + inputNodesMap_; + + /// The Substrait parser used to convert Substrait representations into + /// recognizable representations. + std::shared_ptr subParser_{ + std::make_shared()}; + + /// The Expression converter used to convert Substrait representations into + /// Velox expressions. + std::shared_ptr exprConverter_; + + /// Memory pool. + memory::MemoryPool* pool_; + + /// A flag used to specify validation. + bool validationMode_ = false; +}; + } // namespace facebook::velox::substrait diff --git a/velox/substrait/TypeUtils.cpp b/velox/substrait/TypeUtils.cpp index a7517140037e..adc237f93e5b 100644 --- a/velox/substrait/TypeUtils.cpp +++ b/velox/substrait/TypeUtils.cpp @@ -119,6 +119,8 @@ TypePtr toVeloxType(const std::string& typeName) { } case TypeKind::UNKNOWN: return UNKNOWN(); + case TypeKind::DATE: + return DATE(); default: VELOX_NYI("Velox type conversion not supported for type {}.", typeName); }