Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 94 additions & 25 deletions ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ struct TPushdownSettings : public NPushdown::TSettings {
Enable(EFlag::LikeOperator, NSsa::RuntimeVersion >= 2U);
Enable(EFlag::LikeOperatorOnlyForUtf8, NSsa::RuntimeVersion < 3U);
Enable(EFlag::JsonQueryOperators | EFlag::JsonExistsOperator, NSsa::RuntimeVersion >= 3U);
Enable(EFlag::ArithmeticalExpressions | EFlag::UnaryOperators, NSsa::RuntimeVersion >= 4U);
Enable(EFlag::ArithmeticalExpressions | EFlag::UnaryOperators | EFlag::DoNotCheckCompareArgumentsTypes, NSsa::RuntimeVersion >= 4U);
Enable(EFlag::LogicalXorOperator
| EFlag::ParameterExpression
| EFlag::CastExpression
Expand Down Expand Up @@ -61,7 +61,7 @@ struct TFilterOpsLevels {
}
}

bool IsValid() {
bool IsValid() const {
return FirstLevelOps.IsValid() || SecondLevelOps.IsValid();
}

Expand Down Expand Up @@ -152,10 +152,6 @@ TMaybeNode<TExprBase> YqlCoalescePushdown(const TCoCoalesce& coalesce, TExprCont
return NullNode;
}

bool IsGoodTypeForPushdown(const TTypeAnnotationNode& type) {
return NUdf::EDataTypeFeatures::IntegralType & NUdf::GetDataTypeInfo(RemoveOptionality(type).Cast<TDataExprType>()->GetSlot()).Features;
}

std::vector<TExprBase> ConvertComparisonNode(const TExprBase& nodeIn, TExprContext& ctx, TPositionHandle pos)
{
std::vector<TExprBase> out;
Expand Down Expand Up @@ -203,27 +199,25 @@ std::vector<TExprBase> ConvertComparisonNode(const TExprBase& nodeIn, TExprConte

if constexpr (NKikimr::NSsa::RuntimeVersion >= 4U) {
if (const auto maybeArithmetic = node.Maybe<TCoBinaryArithmetic>()) {
if (const auto arithmetic = maybeArithmetic.Cast(); IsGoodTypeForPushdown(*arithmetic.Ref().GetTypeAnn()) && !arithmetic.Maybe<TCoAggrAdd>()) {
if (const auto params = ExtractBinaryFunctionParameters(arithmetic, ctx, pos)) {
return Build<TKqpOlapFilterBinaryOp>(ctx, pos)
.Operator().Value(arithmetic.Ref().Content(), TNodeFlags::Default).Build()
.Left(params->first)
.Right(params->second)
.Done();
}
const auto arithmetic = maybeArithmetic.Cast();
if (const auto params = ExtractBinaryFunctionParameters(arithmetic, ctx, pos)) {
return Build<TKqpOlapFilterBinaryOp>(ctx, pos)
.Operator().Value(arithmetic.Ref().Content(), TNodeFlags::Default).Build()
.Left(params->first)
.Right(params->second)
.Done();
}
}

if (const auto maybeArithmetic = node.Maybe<TCoUnaryArithmetic>()) {
if (const auto arithmetic = maybeArithmetic.Cast(); IsGoodTypeForPushdown(*arithmetic.Ref().GetTypeAnn())) {
if (const auto params = ConvertComparisonNode(arithmetic.Arg(), ctx, pos); 1U == params.size()) {
TString oper(arithmetic.Ref().Content());
YQL_ENSURE(oper.to_lower());
return Build<TKqpOlapFilterUnaryOp>(ctx, pos)
.Operator().Value(oper, TNodeFlags::Default).Build()
.Arg(params.front())
.Done();
}
const auto arithmetic = maybeArithmetic.Cast();
if (const auto params = ConvertComparisonNode(arithmetic.Arg(), ctx, pos); 1U == params.size()) {
TString oper(arithmetic.Ref().Content());
YQL_ENSURE(oper.to_lower());
return Build<TKqpOlapFilterUnaryOp>(ctx, pos)
.Operator().Value(oper, TNodeFlags::Default).Build()
.Arg(params.front())
.Done();
}
}

Expand Down Expand Up @@ -653,6 +647,80 @@ void SplitForPartialPushdown(const NPushdown::TPredicateNode& predicateTree, NPu
remainingPredicates.SetPredicates(remaining, ctx, pos);
}

bool IsGoodTypeForPushdown(const TTypeAnnotationNode& type) {
return NUdf::EDataTypeFeatures::IntegralType & NUdf::GetDataTypeInfo(RemoveOptionality(type).Cast<TDataExprType>()->GetSlot()).Features;
}

bool IsGoodTypesForPushdownCompare(const TTypeAnnotationNode& typeOne, const TTypeAnnotationNode& typeTwo) {
const auto& rawOne = RemoveOptionality(typeOne);
const auto& rawTwo = RemoveOptionality(typeTwo);
if (IsSameAnnotation(rawOne, rawTwo))
return true;

const auto kindOne = rawOne.GetKind();
const auto kindTwo = rawTwo.GetKind();
if (ETypeAnnotationKind::Null == kindOne || ETypeAnnotationKind::Null == kindTwo)
return true;

if (kindTwo != kindOne)
return false;

switch (kindOne) {
case ETypeAnnotationKind::Tuple: {
const auto& itemsOne = rawOne.Cast<TTupleExprType>()->GetItems();
const auto& itemsTwo = rawTwo.Cast<TTupleExprType>()->GetItems();
const auto size = itemsOne.size();
if (size != itemsTwo.size())
return false;
for (auto i = 0U; i < size; ++i) {
if (!IsGoodTypesForPushdownCompare(*itemsOne[i], *itemsTwo[i])) {
return false;
}
}
return true;
}
case ETypeAnnotationKind::Data: {
const auto fOne = NUdf::GetDataTypeInfo(rawOne.Cast<TDataExprType>()->GetSlot()).Features;
const auto fTwo = NUdf::GetDataTypeInfo(rawTwo.Cast<TDataExprType>()->GetSlot()).Features;
return ((NUdf::EDataTypeFeatures::NumericType | NUdf::EDataTypeFeatures::StringType) & fOne) && (NUdf::EDataTypeFeatures::CanCompare & fOne)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here are two spaces before &

&& ((NUdf::EDataTypeFeatures::NumericType | NUdf::EDataTypeFeatures::StringType) & fTwo) && (NUdf::EDataTypeFeatures::CanCompare & fTwo);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here are two spaces before &

}
default: break;
}
return false;
}

bool IsGoodNodeForPushdown(const TExprBase& node) {
if (const auto maybeCompare = node.Maybe<TCoCompare>()) {
const auto compare = maybeCompare.Cast();
return IsGoodTypesForPushdownCompare(*compare.Left().Ref().GetTypeAnn(), *compare.Right().Ref().GetTypeAnn())
&& IsGoodNodeForPushdown(compare.Left()) && IsGoodNodeForPushdown(compare.Right());
} else if (const auto maybeUnaryOp = node.Maybe<TCoUnaryArithmetic>()) {
return IsGoodTypeForPushdown(*node.Ref().GetTypeAnn()) && IsGoodNodeForPushdown(maybeUnaryOp.Cast().Arg());
} else if (const auto maybeBinaryOp = node.Maybe<TCoBinaryArithmetic>()) {
const auto binaryOp = maybeBinaryOp.Cast();
return IsGoodTypeForPushdown(*binaryOp.Ref().GetTypeAnn()) && !binaryOp.Maybe<TCoAggrAdd>()
&& IsGoodNodeForPushdown(binaryOp.Left()) && IsGoodNodeForPushdown(binaryOp.Right());
} else if (const auto maybeCoalesce = node.Maybe<TCoCoalesce>()) {
const auto coalesce = maybeCoalesce.Cast();
return IsGoodNodeForPushdown(coalesce.Predicate()) && IsGoodNodeForPushdown(coalesce.Value());
}

return true;
}

void UpdatePushableFlagWithOlapSpecific(NPushdown::TPredicateNode& tree) {
if constexpr (NSsa::RuntimeVersion < 4U)
return;

std::for_each(tree.Children.begin(), tree.Children.end(), std::bind(&UpdatePushableFlagWithOlapSpecific, std::placeholders::_1));
tree.CanBePushed = tree.CanBePushed && std::all_of(tree.Children.cbegin(), tree.Children.cend(), [](const NPushdown::TPredicateNode& node) { return node.CanBePushed; });

if (tree.CanBePushed && NPushdown::EBoolOp::Undefined == tree.Op) {
tree.CanBePushed = IsGoodNodeForPushdown(tree.ExprNode.Cast());
}
}

} // anonymous namespace end

TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx,
Expand Down Expand Up @@ -685,10 +753,11 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
return node;
}

auto optionalIf = maybeOptionalIf.Cast();
const auto optionalIf = maybeOptionalIf.Cast();
NPushdown::TPredicateNode predicateTree(optionalIf.Predicate());
CollectPredicates(optionalIf.Predicate(), predicateTree, lambdaArg, read.Process().Body(), TPushdownSettings());
YQL_ENSURE(predicateTree.IsValid(), "Collected OLAP predicates are invalid");
UpdatePushableFlagWithOlapSpecific(predicateTree);

NPushdown::TPredicateNode predicatesToPush;
NPushdown::TPredicateNode remainingPredicates;
Expand All @@ -700,7 +769,7 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
YQL_ENSURE(predicatesToPush.IsValid(), "Predicates to push is invalid");
YQL_ENSURE(remainingPredicates.IsValid(), "Remaining predicates is invalid");

auto pushedFilters = PredicatePushdown(predicatesToPush.ExprNode.Cast(), ctx, node.Pos());
const auto pushedFilters = PredicatePushdown(predicatesToPush.ExprNode.Cast(), ctx, node.Pos());
YQL_ENSURE(pushedFilters.IsValid(), "Pushed predicate should be always valid!");

TMaybeNode<TExprBase> olapFilter;
Expand Down
47 changes: 31 additions & 16 deletions ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1595,12 +1595,12 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
R"(`level` % 3 != 1)",
R"(-`level` < -2)",
R"(Abs(`level` - 3) >= 1)",
R"(LENGTH(`message`) > 1037U)",
R"(LENGTH(`uid`) > 1U OR `resource_id` = "10001")",
R"((LENGTH(`uid`) > 2U AND `resource_id` = "10001") OR `resource_id` = "10002")",
R"((LENGTH(`uid`) > 3U OR `resource_id` = "10002") AND (LENGTH(`uid`) < 15 OR `resource_id` = "10001"))",
R"(NOT(LENGTH(`uid`) > 0U AND `resource_id` = "10001"))",
R"(NOT(LENGTH(`uid`) > 0U OR `resource_id` = "10001"))",
R"(LENGTH(`message`) > 1037)",
R"(LENGTH(`uid`) > 1 OR `resource_id` = "10001")",
R"((LENGTH(`uid`) > 2 AND `resource_id` = "10001") OR `resource_id` = "10002")",
R"((LENGTH(`uid`) > 3 OR `resource_id` = "10002") AND (LENGTH(`uid`) < 15 OR `resource_id` = "10001"))",
R"(NOT(LENGTH(`uid`) > 0 AND `resource_id` = "10001"))",
R"(NOT(LENGTH(`uid`) > 0 OR `resource_id` = "10001"))",
R"(`level` IS NULL OR `message` IS NULL)",
R"(`level` IS NOT NULL AND `message` IS NULL)",
R"(`level` IS NULL AND `message` IS NOT NULL)",
Expand Down Expand Up @@ -1667,14 +1667,6 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
R"(`level` >= CAST("2" As Uint32))",
R"(`level` = NULL)",
R"(`level` > NULL)",
R"(LENGTH(`uid`) > 0 OR `resource_id` = "10001")",
R"((LENGTH(`uid`) > 0 AND `resource_id` = "10001") OR `resource_id` = "10002")",
R"((LENGTH(`uid`) > 0 OR `resource_id` = "10002") AND (LENGTH(`uid`) < 15 OR `resource_id` = "10001"))",
R"(NOT(LENGTH(`uid`) > 0 AND `resource_id` = "10001"))",
// Not strict function in the beginning causes to disable pushdown
R"(Unwrap(`level`/1) = `level` AND `resource_id` = "10001")",
// We can handle this case in future
R"(NOT(LENGTH(`uid`) > 0 OR `resource_id` = "10001"))",
R"(`level` * 3.14 > 4)",
#if SSA_RUNTIME_VERSION < 2U
R"(`uid` LIKE "%30000%")",
Expand All @@ -1683,6 +1675,12 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
R"(`uid` LIKE "uid%001")",
#endif
#if SSA_RUNTIME_VERSION < 4U
R"(LENGTH(`uid`) > 0 OR `resource_id` = "10001")",
R"((LENGTH(`uid`) > 0 AND `resource_id` = "10001") OR `resource_id` = "10002")",
R"((LENGTH(`uid`) > 0 OR `resource_id` = "10002") AND (LENGTH(`uid`) < 15 OR `resource_id` = "10001"))",
R"(NOT(LENGTH(`uid`) > 0 AND `resource_id` = "10001"))",
R"(Unwrap(`level`/1) = `level` AND `resource_id` = "10001")",
R"(NOT(LENGTH(`uid`) > 0 OR `resource_id` = "10001"))",
R"(`level` + 2 < 5)",
R"(`level` - 2 >= 1)",
R"(`level` * 3 > 4)",
Expand Down Expand Up @@ -4705,6 +4703,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
);
}
*/

Y_UNIT_TEST(PredicatePushdownCastErrors) {
auto settings = TKikimrSettings()
.SetWithSampleTables(false);
Expand All @@ -4717,6 +4716,23 @@ Y_UNIT_TEST_SUITE(KqpOlap) {

auto tableClient = kikimr.GetTableClient();

#if SSA_RUNTIME_VERSION >= 4U
const std::set<std::string> numerics = {"Int8", "Int16", "Int32", "Int64", "UInt8", "UInt16", "UInt32", "UInt64", "Float", "Double"};
const std::map<std::string, std::set<std::string>> exceptions = {
{"Int8", numerics},
{"Int16", numerics},
{"Int32", numerics},
{"Int64", numerics},
{"UInt8", numerics},
{"UInt16", numerics},
{"UInt32", numerics},
{"UInt64", numerics},
{"Float", numerics},
{"Double", numerics},
{"String", {"Utf8"}},
{"Utf8", {"String"}},
};
#else
std::map<std::string, std::set<std::string>> exceptions = {
{"Int8", {"Int16", "Int32"}},
{"Int16", {"Int8", "Int32"}},
Expand All @@ -4726,9 +4742,8 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
{"UInt32", {"UInt8", "UInt16"}},
{"String", {"Utf8"}},
{"Utf8", {"String", "Json", "Yson"}},
{"Json", {"Utf8", "Yson"}},
{"Yson", {"Utf8", "Json"}},
};
#endif

std::vector<std::string> allTypes = {
//"Bool",
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/kqp/ut/query/kqp_explain_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ Y_UNIT_TEST_SUITE(KqpExplain) {
NJson::ReadJsonTree(*streamRes.PlanJson, &plan, true);
UNIT_ASSERT(ValidatePlanNodeIds(plan));

auto readNode = FindPlanNodeByKv(plan, "Node Type", "Filter-TableFullScan");
auto readNode = FindPlanNodeByKv(plan, "Node Type", "TableFullScan");
UNIT_ASSERT(readNode.IsDefined());

auto& operators = readNode.GetMapSafe().at("Operators").GetArraySafe();
Expand Down
13 changes: 8 additions & 5 deletions ydb/library/yql/providers/common/pushdown/collection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -382,18 +382,21 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr
return false;
}

bool equality = compare.Maybe<TCoCmpEqual>() || compare.Maybe<TCoCmpNotEqual>();
auto leftList = GetComparisonNodes(compare.Left());
auto rightList = GetComparisonNodes(compare.Right());
const auto leftList = GetComparisonNodes(compare.Left());
const auto rightList = GetComparisonNodes(compare.Right());
YQL_ENSURE(leftList.size() == rightList.size(), "Different sizes of lists in comparison!");

for (size_t i = 0; i < leftList.size(); ++i) {
if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg, settings) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg, settings)) {
return false;
}
if (!IsComparableTypes(leftList[i], rightList[i], equality, inputType, settings)) {
return false;

if (!settings.IsEnabled(TSettings::EFeatureFlag::DoNotCheckCompareArgumentsTypes)) {
if (!IsComparableTypes(leftList[i], rightList[i], compare.Maybe<TCoCmpEqual>() || compare.Maybe<TCoCmpNotEqual>(), inputType, settings)) {
return false;
}
}

if (IsLikeOperator(compare) && settings.IsEnabled(TSettings::EFeatureFlag::LikeOperatorOnlyForUtf8) && !IsSupportedLikeForUtf8(leftList[i], rightList[i])) {
// (KQP OLAP) If SSA_RUNTIME_VERSION == 2 Column Shard doesn't have LIKE kernel for binary strings
return false;
Expand Down
1 change: 1 addition & 0 deletions ydb/library/yql/providers/common/pushdown/settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ struct TSettings {
DyNumberType = 1 << 13,
ImplicitConversionToInt64 = 1 << 14, // Allow implicit conversions to 64-bits integers from other types of integers
UnaryOperators = 1 << 15, // -, Abs, Size
DoNotCheckCompareArgumentsTypes = 1 << 16
};

explicit TSettings(NLog::EComponent logComponent)
Expand Down