From d5a627a293ad89e88644c380483ec53c70c3f210 Mon Sep 17 00:00:00 2001 From: jievince <38901892+jievince@users.noreply.github.com> Date: Sat, 3 Dec 2022 17:43:36 +0800 Subject: [PATCH 1/5] optimize left join predicate & Eliminate useless appendVertices --- src/common/function/FunctionManager.cpp | 5 + src/graph/optimizer/CMakeLists.txt | 1 + .../rule/OptimizeLeftJoinPredicateRule.cpp | 163 ++++++++++++++++++ .../rule/OptimizeLeftJoinPredicateRule.h | 29 ++++ 4 files changed, 198 insertions(+) create mode 100644 src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp create mode 100644 src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h diff --git a/src/common/function/FunctionManager.cpp b/src/common/function/FunctionManager.cpp index 646e09dc911..048ba23769d 100644 --- a/src/common/function/FunctionManager.cpp +++ b/src/common/function/FunctionManager.cpp @@ -2004,6 +2004,11 @@ FunctionManager::FunctionManager() { }; } { + // `none_direct_dst` always return the dstId of an edge key + // without considering the direction of the edge type. + // The encoding of the edge key is: + // type(1) + partId(3) + srcId(*) + edgeType(4) + edgeRank(8) + dstId(*) + placeHolder(1) + // More information of encoding could be found in `NebulaKeyUtils.h` auto &attr = functions_["none_direct_dst"]; attr.minArity_ = 1; attr.maxArity_ = 1; diff --git a/src/graph/optimizer/CMakeLists.txt b/src/graph/optimizer/CMakeLists.txt index b27c20e97aa..b3c17f8427c 100644 --- a/src/graph/optimizer/CMakeLists.txt +++ b/src/graph/optimizer/CMakeLists.txt @@ -55,6 +55,7 @@ nebula_add_library( rule/PushLimitDownScanEdgesRule.cpp rule/RemoveProjectDedupBeforeGetDstBySrcRule.cpp rule/PushFilterDownTraverseRule.cpp + rule/OptimizeLeftJoinPredicateRule.cpp ) nebula_add_subdirectory(test) diff --git a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp new file mode 100644 index 00000000000..2b61a62867c --- /dev/null +++ b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp @@ -0,0 +1,163 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h" + +#include "graph/optimizer/OptContext.h" +#include "graph/optimizer/OptGroup.h" +#include "graph/planner/plan/PlanNode.h" +#include "graph/planner/plan/Query.h" +#include "graph/util/ExpressionUtils.h" + +using nebula::graph::PlanNode; +using nebula::graph::QueryContext; + +namespace nebula { +namespace opt { + +std::unique_ptr OptimizeLeftJoinPredicateRule::kInstance = + std::unique_ptr(new OptimizeLeftJoinPredicateRule()); + +OptimizeLeftJoinPredicateRule::OptimizeLeftJoinPredicateRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern& OptimizeLeftJoinPredicateRule::pattern() const { + static Pattern pattern = Pattern::create( + PlanNode::Kind::kBiLeftJoin, + {Pattern::create(PlanNode::Kind::kUnknown), + Pattern::create(PlanNode::Kind::kProject, + {Pattern::create(PlanNode::Kind::kAppendVertices, + {Pattern::create(PlanNode::Kind::kTraverse)})})}); + return pattern; +} + +StatusOr OptimizeLeftJoinPredicateRule::transform( + OptContext* octx, const MatchedResult& matched) const { + auto* leftJoinGroupNode = matched.node; + auto* leftJoinGroup = leftJoinGroupNode->group(); + auto* leftJoin = static_cast(leftJoinGroupNode->node()); + + auto* projectGroupNode = matched.dependencies[1].node; + auto* projectGroup = projectGroupNode->group(); + UNUSED(projectGroup); + + auto* project = static_cast(projectGroupNode->node()); + + auto* appendVerticesGroup = matched.dependencies[1].dependencies[0].node->group(); + UNUSED(appendVerticesGroup); + auto appendVertices = + static_cast(matched.dependencies[1].dependencies[0].node->node()); + + auto* traverseGroup = matched.dependencies[1].dependencies[0].node->group(); + UNUSED(traverseGroup); + auto traverse = static_cast( + matched.dependencies[1].dependencies[0].dependencies[0].node->node()); + + auto& avColNames = appendVertices->colNames(); + DCHECK_GE(avColNames.size(), 1); + auto& avNodeAlias = avColNames.back(); + + auto& tvColNames = traverse->colNames(); + DCHECK_GE(tvColNames.size(), 1); + auto& tvEdgeAlias = traverse->colNames().back(); + + auto& hashKeys = leftJoin->hashKeys(); + auto& probeKeys = leftJoin->probeKeys(); + + // Use visitor to collect all function `id` in the hashKeys + + std::vector hashKeyIdx; + for (size_t i = 0; i < hashKeys.size(); ++i) { + auto* key = hashKeys[i]; + if (key->kind() != Expression::Kind::kFunctionCall) { + continue; + } + auto* func = static_cast(key); + if (func->name() != "id" || func->name() != "_joinkey") { + continue; + } + auto& args = func->args()->args(); + DCHECK_EQ(args.size(), 1); + auto* arg = args[0]; + if (arg->kind() != Expression::Kind::kInputProperty) { + continue; + } + auto& alias = static_cast(arg)->prop(); + if (alias != avNodeAlias) continue; + // FIXME(jie): Must check if probe keys contain the same key + hashKeyIdx.emplace_back(i); + } + if (hashKeyIdx.size() != 1) { + return TransformResult::noTransform(); + } + + std::vector prjIdx; + for (size_t i = 0; i < project->columns()->size(); ++i) { + const auto* col = project->columns()->columns()[i]; + if (col->expr()->kind() != Expression::Kind::kInputProperty) { + continue; + } + auto* inputProp = static_cast(col->expr()); + if (inputProp->prop() != avNodeAlias) continue; + prjIdx.push_back(i); + } + if (prjIdx.size() != 1) { + return TransformResult::noTransform(); + } + + auto* pool = octx->qctx()->objPool(); + // Let the new project generate expr `none_direct_dst($-.tvEdgeAlias)`, and let the new left join + // use it as hash key + auto* args = ArgumentList::make(pool); + args->addArgument(InputPropertyExpression::make(pool, tvEdgeAlias)); + auto* newPrjExpr = FunctionCallExpression::make(pool, "none_direct_dst", args); + + auto* newYieldColumns = pool->makeAndAdd(); + for (size_t i = 0; i < project->columns()->size(); ++i) { + if (i == prjIdx[0]) { + newYieldColumns->addColumn(pool->makeAndAdd(newPrjExpr, newPrjExpr->toString())); + } else { + newYieldColumns->addColumn(project->columns()->columns()[i]); + } + } + auto* newProject = graph::Project::make(octx->qctx(), nullptr, newYieldColumns); + + auto* newHashExpr = InputPropertyExpression::make(pool, newPrjExpr->toString()); + std::vector newHashKeys; + for (size_t i = 0; i < hashKeys.size(); ++i) { + if (i == hashKeyIdx[0]) { + newHashKeys.emplace_back(newHashExpr); + } else { + newHashKeys.emplace_back(hashKeys[i]); + } + } + auto* newLeftJoin = + graph::BiLeftJoin::make(octx->qctx(), nullptr, nullptr, newHashKeys, probeKeys); + + TransformResult result; + result.eraseAll = true; + + newProject->setInputVar(appendVertices->inputVar()); + newProject->setOutputVar(project->outputVar()); + auto newProjectGroup = OptGroup::create(octx); + auto* newProjectGroupNode = newProjectGroup->makeGroupNode(newProject); + newProjectGroupNode->setDeps(projectGroupNode->dependencies()); + + newLeftJoin->setDep(1, newProject); + auto* newLeftJoinGroupNode = OptGroupNode::create(octx, newLeftJoin, leftJoinGroup); + newLeftJoinGroupNode->dependsOn(leftJoinGroupNode->dependencies()[0]); + newLeftJoinGroupNode->dependsOn(newProjectGroup); + + result.newGroupNodes.emplace_back(newLeftJoinGroupNode); + return result; +} + +std::string OptimizeLeftJoinPredicateRule::toString() const { + return "OptimizeLeftJoinPredicateRule"; +} + +} // namespace opt +} // namespace nebula diff --git a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h new file mode 100644 index 00000000000..0a8a5eb6cb9 --- /dev/null +++ b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2022 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#pragma once + +#include "graph/optimizer/OptRule.h" + +namespace nebula { +namespace opt { + +class OptimizeLeftJoinPredicateRule final : public OptRule { + public: + const Pattern &pattern() const override; + + StatusOr transform(OptContext *qctx, + const MatchedResult &matched) const override; + + std::string toString() const override; + + private: + OptimizeLeftJoinPredicateRule(); + + static std::unique_ptr kInstance; +}; + +} // namespace opt +} // namespace nebula From ab0a0cd62ba8d97dd2b7535c3b98af24fb3bbeb2 Mon Sep 17 00:00:00 2001 From: jievince <38901892+jievince@users.noreply.github.com> Date: Tue, 6 Dec 2022 13:56:10 +0800 Subject: [PATCH 2/5] fix --- .../rule/OptimizeLeftJoinPredicateRule.h | 17 +++++ .../OptimizeLeftJoinPredicateRule.feature | 64 +++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature diff --git a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h index 0a8a5eb6cb9..d075aefa7c1 100644 --- a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h +++ b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h @@ -10,6 +10,23 @@ namespace nebula { namespace opt { +/* +Before: + BiLeftJoin({id(v)}, id(v)) + / \ + ... Project + \ + AppendVertices(v) + \ + Traverse(e) + +After: + BiLeftJoin({id(v)}, none_direct_dst(e)) + / \ + ... Project + \ + Traverse(e) +*/ class OptimizeLeftJoinPredicateRule final : public OptRule { public: const Pattern &pattern() const override; diff --git a/tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature b/tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature new file mode 100644 index 00000000000..3386e1313d8 --- /dev/null +++ b/tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature @@ -0,0 +1,64 @@ +# Copyright (c) 2021 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: Optimize left join predicate + + Background: + Given a graph with space named "nba" + + Scenario: optimize left join predicate + When profiling query: + """ + MATCH (person:player)-[:like*1..2]-(friend:player)-[:serve]->(friendTeam:team) + WHERE id(person) == "Tony Parker" AND id(friend) != "Tony Parker" + WITH DISTINCT friend, friendTeam + OPTIONAL MATCH (friend)<-[:like]-(friend2:player)<-[:like]-(friendTeam) + WITH friendTeam, count(friend2) AS numFriends + RETURN + id(friendTeam) AS teamId, + friendTeam.team.name AS teamName, + numFriends + ORDER BY numFriends DESC + LIMIT 20 + """ + Then the result should be, in any order, with relax comparison: + | teamId | teamName | numFriends | + | "Clippers" | "Clippers" | 0 | + | "Bulls" | "Bulls" | 0 | + | "Spurs" | "Spurs" | 0 | + | "Thunders" | "Thunders" | 0 | + | "Hornets" | "Hornets" | 0 | + | "Warriors" | "Warriors" | 0 | + | "Hawks" | "Hawks" | 0 | + | "Kings" | "Kings" | 0 | + | "Magic" | "Magic" | 0 | + | "Trail Blazers" | "Trail Blazers" | 0 | + | "Lakers" | "Lakers" | 0 | + | "Grizzlies" | "Grizzlies" | 0 | + | "Suns" | "Suns" | 0 | + | "Rockets" | "Rockets" | 0 | + | "Cavaliers" | "Cavaliers" | 0 | + | "Raptors" | "Raptors" | 0 | + | "Celtics" | "Celtics" | 0 | + | "76ers" | "76ers" | 0 | + | "Heat" | "Heat" | 0 | + | "Jazz" | "Jazz" | 0 | + And the execution plan should be: + | id | name | dependencies | profiling data | operator info | + | 21 | TopN | 18 | | | + | 18 | Project | 17 | | | + | 17 | Aggregate | 16 | | | + | 16 | BiLeftJoin | 10,15 | | | + | 10 | Dedup | 28 | | | + | 28 | Project | 22 | | | + | 22 | Filter | 26 | | | + | 26 | AppendVertices | 25 | | | + | 25 | Traverse | 24 | | | + | 24 | Traverse | 2 | | | + | 2 | Dedup | 1 | | | + | 1 | PassThrough | 3 | | | + | 3 | Start | | | | + | 15 | Project | 14 | | | + | 14 | Traverse | 12 | | | + | 12 | Traverse | 11 | | | + | 11 | Argument | | | | From b791035841c1c593e0dcb2ace03d485e1b99097d Mon Sep 17 00:00:00 2001 From: jievince <38901892+jievince@users.noreply.github.com> Date: Tue, 6 Dec 2022 15:56:02 +0800 Subject: [PATCH 3/5] update --- .../rule/OptimizeLeftJoinPredicateRule.cpp | 100 ++++++++++-------- .../rule/OptimizeLeftJoinPredicateRule.h | 36 +++---- .../OptimizeLeftJoinPredicateRule.feature | 70 ++++++------ 3 files changed, 108 insertions(+), 98 deletions(-) diff --git a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp index 2b61a62867c..1c79d6e32e9 100644 --- a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp +++ b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp @@ -1,7 +1,6 @@ -/* Copyright (c) 2022 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ +// Copyright (c) 2022 vesoft inc. All rights reserved. +// +// This source code is licensed under Apache 2.0 License. #include "graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h" @@ -26,7 +25,7 @@ OptimizeLeftJoinPredicateRule::OptimizeLeftJoinPredicateRule() { const Pattern& OptimizeLeftJoinPredicateRule::pattern() const { static Pattern pattern = Pattern::create( - PlanNode::Kind::kBiLeftJoin, + PlanNode::Kind::kHashLeftJoin, {Pattern::create(PlanNode::Kind::kUnknown), Pattern::create(PlanNode::Kind::kProject, {Pattern::create(PlanNode::Kind::kAppendVertices, @@ -38,45 +37,35 @@ StatusOr OptimizeLeftJoinPredicateRule::transform( OptContext* octx, const MatchedResult& matched) const { auto* leftJoinGroupNode = matched.node; auto* leftJoinGroup = leftJoinGroupNode->group(); - auto* leftJoin = static_cast(leftJoinGroupNode->node()); + auto* leftJoin = static_cast(leftJoinGroupNode->node()); auto* projectGroupNode = matched.dependencies[1].node; - auto* projectGroup = projectGroupNode->group(); - UNUSED(projectGroup); - auto* project = static_cast(projectGroupNode->node()); - auto* appendVerticesGroup = matched.dependencies[1].dependencies[0].node->group(); - UNUSED(appendVerticesGroup); + auto* appendVerticesGroupNode = matched.dependencies[1].dependencies[0].node; auto appendVertices = static_cast(matched.dependencies[1].dependencies[0].node->node()); - auto* traverseGroup = matched.dependencies[1].dependencies[0].node->group(); - UNUSED(traverseGroup); auto traverse = static_cast( matched.dependencies[1].dependencies[0].dependencies[0].node->node()); - auto& avColNames = appendVertices->colNames(); - DCHECK_GE(avColNames.size(), 1); - auto& avNodeAlias = avColNames.back(); + auto& avNodeAlias = appendVertices->nodeAlias(); - auto& tvColNames = traverse->colNames(); - DCHECK_GE(tvColNames.size(), 1); - auto& tvEdgeAlias = traverse->colNames().back(); + auto& tvEdgeAlias = traverse->edgeAlias(); auto& hashKeys = leftJoin->hashKeys(); auto& probeKeys = leftJoin->probeKeys(); // Use visitor to collect all function `id` in the hashKeys - - std::vector hashKeyIdx; + bool found = false; + size_t hashKeyIdx = 0; for (size_t i = 0; i < hashKeys.size(); ++i) { - auto* key = hashKeys[i]; - if (key->kind() != Expression::Kind::kFunctionCall) { + auto* hashKey = hashKeys[i]; + if (hashKey->kind() != Expression::Kind::kFunctionCall) { continue; } - auto* func = static_cast(key); - if (func->name() != "id" || func->name() != "_joinkey") { + auto* func = static_cast(hashKey); + if (func->name() != "id" && func->name() != "_joinkey") { continue; } auto& args = func->args()->args(); @@ -87,66 +76,87 @@ StatusOr OptimizeLeftJoinPredicateRule::transform( } auto& alias = static_cast(arg)->prop(); if (alias != avNodeAlias) continue; - // FIXME(jie): Must check if probe keys contain the same key - hashKeyIdx.emplace_back(i); + // Must check if probe keys contain the same key + if (*probeKeys[i] != *hashKey) { + return TransformResult::noTransform(); + } + if (found) { + return TransformResult::noTransform(); + } + hashKeyIdx = i; + found = true; } - if (hashKeyIdx.size() != 1) { + if (!found) { return TransformResult::noTransform(); } - std::vector prjIdx; - for (size_t i = 0; i < project->columns()->size(); ++i) { - const auto* col = project->columns()->columns()[i]; + found = false; + size_t prjIdx = 0; + auto* columns = project->columns(); + for (size_t i = 0; i < columns->size(); ++i) { + const auto* col = columns->columns()[i]; if (col->expr()->kind() != Expression::Kind::kInputProperty) { continue; } auto* inputProp = static_cast(col->expr()); if (inputProp->prop() != avNodeAlias) continue; - prjIdx.push_back(i); + if (found) { + return TransformResult::noTransform(); + } + prjIdx = i; + found = true; } - if (prjIdx.size() != 1) { + if (!found) { return TransformResult::noTransform(); } auto* pool = octx->qctx()->objPool(); - // Let the new project generate expr `none_direct_dst($-.tvEdgeAlias)`, and let the new left join - // use it as hash key + // Let the new project generate expr `none_direct_dst($-.tvEdgeAlias)`, + // and let the new left join use it as hash key auto* args = ArgumentList::make(pool); args->addArgument(InputPropertyExpression::make(pool, tvEdgeAlias)); auto* newPrjExpr = FunctionCallExpression::make(pool, "none_direct_dst", args); auto* newYieldColumns = pool->makeAndAdd(); for (size_t i = 0; i < project->columns()->size(); ++i) { - if (i == prjIdx[0]) { - newYieldColumns->addColumn(pool->makeAndAdd(newPrjExpr, newPrjExpr->toString())); + if (i == prjIdx) { + newYieldColumns->addColumn(new YieldColumn(newPrjExpr, avNodeAlias)); } else { - newYieldColumns->addColumn(project->columns()->columns()[i]); + newYieldColumns->addColumn(project->columns()->columns()[i]->clone().release()); } } auto* newProject = graph::Project::make(octx->qctx(), nullptr, newYieldColumns); - auto* newHashExpr = InputPropertyExpression::make(pool, newPrjExpr->toString()); + // $-.`avNodeAlias` + auto* newHashExpr = InputPropertyExpression::make(pool, avNodeAlias); std::vector newHashKeys; for (size_t i = 0; i < hashKeys.size(); ++i) { - if (i == hashKeyIdx[0]) { + if (i == hashKeyIdx) { newHashKeys.emplace_back(newHashExpr); } else { newHashKeys.emplace_back(hashKeys[i]); } } auto* newLeftJoin = - graph::BiLeftJoin::make(octx->qctx(), nullptr, nullptr, newHashKeys, probeKeys); + graph::HashLeftJoin::make(octx->qctx(), nullptr, nullptr, newHashKeys, probeKeys); TransformResult result; result.eraseAll = true; newProject->setInputVar(appendVertices->inputVar()); - newProject->setOutputVar(project->outputVar()); auto newProjectGroup = OptGroup::create(octx); auto* newProjectGroupNode = newProjectGroup->makeGroupNode(newProject); - newProjectGroupNode->setDeps(projectGroupNode->dependencies()); - - newLeftJoin->setDep(1, newProject); + newProjectGroupNode->setDeps(appendVerticesGroupNode->dependencies()); + + newLeftJoin->setLeftVar(leftJoin->leftInputVar()); + newLeftJoin->setRightVar(newProject->outputVar()); + newLeftJoin->setOutputVar(leftJoin->outputVar()); + // LOG the col names of newLeftJoin + auto& newLeftJoinColNames = newLeftJoin->colNames(); + LOG(ERROR) << "newLeftJoinColNames.size(): " << newLeftJoinColNames.size(); + for (auto& colName : newLeftJoinColNames) { + LOG(ERROR) << "colName: " << colName; + } auto* newLeftJoinGroupNode = OptGroupNode::create(octx, newLeftJoin, leftJoinGroup); newLeftJoinGroupNode->dependsOn(leftJoinGroupNode->dependencies()[0]); newLeftJoinGroupNode->dependsOn(newProjectGroup); diff --git a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h index d075aefa7c1..7792a98b14d 100644 --- a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h +++ b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h @@ -9,24 +9,24 @@ namespace nebula { namespace opt { - -/* -Before: - BiLeftJoin({id(v)}, id(v)) - / \ - ... Project - \ - AppendVertices(v) - \ - Traverse(e) - -After: - BiLeftJoin({id(v)}, none_direct_dst(e)) - / \ - ... Project - \ - Traverse(e) -*/ +/* Before: + * HashLeftJoin({id(v)}, {id(v)}) + * / \ + * ... Project + * / \ + * AppendVertices(v) AppendVertices(v) + * / \ + * ... Traverse(e) + * + * After: + * HashLeftJoin({id(v)}, {$-.v}) + * / \ + * ... Project(..., none_direct_dst(e) AS v) + * / \ + * AppendVertices(v) Traverse(e) + * / + * ... + */ class OptimizeLeftJoinPredicateRule final : public OptRule { public: const Pattern &pattern() const override; diff --git a/tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature b/tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature index 3386e1313d8..425426eb387 100644 --- a/tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature +++ b/tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature @@ -1,4 +1,4 @@ -# Copyright (c) 2021 vesoft inc. All rights reserved. +# Copyright (c) 2022 vesoft inc. All rights reserved. # # This source code is licensed under Apache 2.0 License. Feature: Optimize left join predicate @@ -18,47 +18,47 @@ Feature: Optimize left join predicate id(friendTeam) AS teamId, friendTeam.team.name AS teamName, numFriends - ORDER BY numFriends DESC - LIMIT 20 + ORDER BY teamName DESC """ - Then the result should be, in any order, with relax comparison: + Then the result should be, in order, with relax comparison: | teamId | teamName | numFriends | - | "Clippers" | "Clippers" | 0 | - | "Bulls" | "Bulls" | 0 | - | "Spurs" | "Spurs" | 0 | - | "Thunders" | "Thunders" | 0 | - | "Hornets" | "Hornets" | 0 | | "Warriors" | "Warriors" | 0 | - | "Hawks" | "Hawks" | 0 | - | "Kings" | "Kings" | 0 | - | "Magic" | "Magic" | 0 | | "Trail Blazers" | "Trail Blazers" | 0 | - | "Lakers" | "Lakers" | 0 | - | "Grizzlies" | "Grizzlies" | 0 | + | "Thunders" | "Thunders" | 0 | | "Suns" | "Suns" | 0 | + | "Spurs" | "Spurs" | 0 | | "Rockets" | "Rockets" | 0 | - | "Cavaliers" | "Cavaliers" | 0 | | "Raptors" | "Raptors" | 0 | + | "Pistons" | "Pistons" | 0 | + | "Magic" | "Magic" | 0 | + | "Lakers" | "Lakers" | 0 | + | "Kings" | "Kings" | 0 | + | "Jazz" | "Jazz" | 0 | + | "Hornets" | "Hornets" | 0 | + | "Heat" | "Heat" | 0 | + | "Hawks" | "Hawks" | 0 | + | "Grizzlies" | "Grizzlies" | 0 | + | "Clippers" | "Clippers" | 0 | | "Celtics" | "Celtics" | 0 | + | "Cavaliers" | "Cavaliers" | 0 | + | "Bulls" | "Bulls" | 0 | | "76ers" | "76ers" | 0 | - | "Heat" | "Heat" | 0 | - | "Jazz" | "Jazz" | 0 | And the execution plan should be: - | id | name | dependencies | profiling data | operator info | - | 21 | TopN | 18 | | | - | 18 | Project | 17 | | | - | 17 | Aggregate | 16 | | | - | 16 | BiLeftJoin | 10,15 | | | - | 10 | Dedup | 28 | | | - | 28 | Project | 22 | | | - | 22 | Filter | 26 | | | - | 26 | AppendVertices | 25 | | | - | 25 | Traverse | 24 | | | - | 24 | Traverse | 2 | | | - | 2 | Dedup | 1 | | | - | 1 | PassThrough | 3 | | | - | 3 | Start | | | | - | 15 | Project | 14 | | | - | 14 | Traverse | 12 | | | - | 12 | Traverse | 11 | | | - | 11 | Argument | | | | + | id | name | dependencies | operator info | + | 21 | Sort | 18 | | + | 18 | Project | 17 | | + | 17 | Aggregate | 16 | | + | 16 | HashLeftJoin | 10,15 | {"probeKeys": ["_joinkey($-.friendTeam)", "_joinkey($-.friend)"], "hashKeys": ["$-.friendTeam", "_joinkey($-.friend)"]} | + | 10 | Dedup | 28 | | + | 28 | Project | 22 | | + | 22 | Filter | 26 | | + | 26 | AppendVertices | 25 | | + | 25 | Traverse | 24 | | + | 24 | Traverse | 2 | | + | 2 | Dedup | 1 | | + | 1 | PassThrough | 3 | | + | 3 | Start | | | + | 15 | Project | 14 | {"columns": ["$-.friend AS friend", "$-.friend2 AS friend2", "none_direct_dst($-.__VAR_3) AS friendTeam"]} | + | 14 | Traverse | 12 | | + | 12 | Traverse | 11 | | + | 11 | Argument | | | From c96f298b3a605cb1ed6d207a44e3c1c8904eb796 Mon Sep 17 00:00:00 2001 From: jievince <38901892+jievince@users.noreply.github.com> Date: Tue, 6 Dec 2022 23:54:08 +0800 Subject: [PATCH 4/5] address leftExprs&rightExprs --- .../rule/OptimizeLeftJoinPredicateRule.cpp | 42 +++++++++---------- .../features/match/MultiQueryParts.feature | 23 +++++----- .../OptimizeLeftJoinPredicateRule.feature | 2 +- .../PushFilterDownTraverseRule.feature | 3 +- 4 files changed, 34 insertions(+), 36 deletions(-) diff --git a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp index 1c79d6e32e9..8cee7eca55b 100644 --- a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp +++ b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp @@ -53,18 +53,17 @@ StatusOr OptimizeLeftJoinPredicateRule::transform( auto& tvEdgeAlias = traverse->edgeAlias(); - auto& hashKeys = leftJoin->hashKeys(); - auto& probeKeys = leftJoin->probeKeys(); + auto& leftExprs = leftJoin->hashKeys(); + auto& rightExprs = leftJoin->probeKeys(); - // Use visitor to collect all function `id` in the hashKeys bool found = false; - size_t hashKeyIdx = 0; - for (size_t i = 0; i < hashKeys.size(); ++i) { - auto* hashKey = hashKeys[i]; - if (hashKey->kind() != Expression::Kind::kFunctionCall) { + size_t rightExprIdx = 0; + for (size_t i = 0; i < rightExprs.size(); ++i) { + auto* rightExpr = rightExprs[i]; + if (rightExpr->kind() != Expression::Kind::kFunctionCall) { continue; } - auto* func = static_cast(hashKey); + auto* func = static_cast(rightExpr); if (func->name() != "id" && func->name() != "_joinkey") { continue; } @@ -76,14 +75,14 @@ StatusOr OptimizeLeftJoinPredicateRule::transform( } auto& alias = static_cast(arg)->prop(); if (alias != avNodeAlias) continue; - // Must check if probe keys contain the same key - if (*probeKeys[i] != *hashKey) { + // Must check if left exprs contain the same key + if (*leftExprs[i] != *rightExpr) { return TransformResult::noTransform(); } if (found) { return TransformResult::noTransform(); } - hashKeyIdx = i; + rightExprIdx = i; found = true; } if (!found) { @@ -112,33 +111,34 @@ StatusOr OptimizeLeftJoinPredicateRule::transform( auto* pool = octx->qctx()->objPool(); // Let the new project generate expr `none_direct_dst($-.tvEdgeAlias)`, - // and let the new left join use it as hash key + // and let the new left join use it as right expr auto* args = ArgumentList::make(pool); args->addArgument(InputPropertyExpression::make(pool, tvEdgeAlias)); auto* newPrjExpr = FunctionCallExpression::make(pool, "none_direct_dst", args); + auto oldYieldColumns = project->columns()->columns(); auto* newYieldColumns = pool->makeAndAdd(); - for (size_t i = 0; i < project->columns()->size(); ++i) { + for (size_t i = 0; i < oldYieldColumns.size(); ++i) { if (i == prjIdx) { newYieldColumns->addColumn(new YieldColumn(newPrjExpr, avNodeAlias)); } else { - newYieldColumns->addColumn(project->columns()->columns()[i]->clone().release()); + newYieldColumns->addColumn(oldYieldColumns[i]->clone().release()); } } auto* newProject = graph::Project::make(octx->qctx(), nullptr, newYieldColumns); // $-.`avNodeAlias` - auto* newHashExpr = InputPropertyExpression::make(pool, avNodeAlias); - std::vector newHashKeys; - for (size_t i = 0; i < hashKeys.size(); ++i) { - if (i == hashKeyIdx) { - newHashKeys.emplace_back(newHashExpr); + auto* newRightExpr = InputPropertyExpression::make(pool, avNodeAlias); + std::vector newRightExprs; + for (size_t i = 0; i < rightExprs.size(); ++i) { + if (i == rightExprIdx) { + newRightExprs.emplace_back(newRightExpr); } else { - newHashKeys.emplace_back(hashKeys[i]); + newRightExprs.emplace_back(rightExprs[i]->clone()); } } auto* newLeftJoin = - graph::HashLeftJoin::make(octx->qctx(), nullptr, nullptr, newHashKeys, probeKeys); + graph::HashLeftJoin::make(octx->qctx(), nullptr, nullptr, leftExprs, newRightExprs); TransformResult result; result.eraseAll = true; diff --git a/tests/tck/features/match/MultiQueryParts.feature b/tests/tck/features/match/MultiQueryParts.feature index 31599bcf9f1..c51338e321b 100644 --- a/tests/tck/features/match/MultiQueryParts.feature +++ b/tests/tck/features/match/MultiQueryParts.feature @@ -181,16 +181,16 @@ Feature: Multi Query Parts OPTIONAL MATCH (v3:player)-[:like]->(v1)<-[e5]-(v4) where id(v3) == "Tim Duncan" return * """ Then the result should be, in any order, with relax comparison: - | v1 | v2 | e3 | v4 | v3 | e5 | - | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"LaMarcus Aldridge" @0 {likeness: 90}] | ("LaMarcus Aldridge") | ("Tim Duncan") | [:like "LaMarcus Aldridge"->"Tony Parker" @0 {}] | - | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker") | __NULL__ | __NULL__ | - | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker") | __NULL__ | __NULL__ | - | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] | - | ("Tony Parker") | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] | - | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] | - | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] | - | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] | - | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] | + | v1 | v2 | e3 | v4 | v3 | e5 | + | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"LaMarcus Aldridge" @0] | ("LaMarcus Aldridge") | ("Tim Duncan") | [:like "LaMarcus Aldridge"->"Tony Parker" @0] | + | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 ] | ("Tony Parker") | __NULL__ | __NULL__ | + | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0] | ("Tony Parker") | __NULL__ | __NULL__ | + | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Tim Duncan" @0 ] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0] | + | ("Tony Parker") | ("Manu Ginobili") | [:like "Manu Ginobili"->"Tim Duncan" @0 ] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0] | + | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0] | + | ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 ] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0] | + | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 ] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 ] | + | ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0] | # The redudant Project after HashLeftJoin is removed now And the execution plan should be: | id | name | dependencies | profiling data | operator info | @@ -203,8 +203,7 @@ Feature: Multi Query Parts | 1 | PassThrough | 3 | | | | 3 | Start | | | | | 14 | Project | 13 | | | - | 13 | AppendVertices | 12 | | | - | 12 | Traverse | 21 | | | + | 13 | Traverse | 21 | | | | 21 | Traverse | 9 | | | | 9 | Dedup | 8 | | | | 8 | PassThrough | 10 | | | diff --git a/tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature b/tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature index 425426eb387..ab6fbe9e736 100644 --- a/tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature +++ b/tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature @@ -48,7 +48,7 @@ Feature: Optimize left join predicate | 21 | Sort | 18 | | | 18 | Project | 17 | | | 17 | Aggregate | 16 | | - | 16 | HashLeftJoin | 10,15 | {"probeKeys": ["_joinkey($-.friendTeam)", "_joinkey($-.friend)"], "hashKeys": ["$-.friendTeam", "_joinkey($-.friend)"]} | + | 16 | HashLeftJoin | 10,15 | {"hashKeys": ["_joinkey($-.friendTeam)", "_joinkey($-.friend)"], "probeKeys": ["$-.friendTeam", "_joinkey($-.friend)"]} | | 10 | Dedup | 28 | | | 28 | Project | 22 | | | 22 | Filter | 26 | | diff --git a/tests/tck/features/optimizer/PushFilterDownTraverseRule.feature b/tests/tck/features/optimizer/PushFilterDownTraverseRule.feature index 56aed295543..81ef5260791 100644 --- a/tests/tck/features/optimizer/PushFilterDownTraverseRule.feature +++ b/tests/tck/features/optimizer/PushFilterDownTraverseRule.feature @@ -65,8 +65,7 @@ Feature: Push Filter down Traverse rule | 2 | Dedup | 1 | | | | 1 | PassThrough | 3 | | | | 3 | Start | | | | - | 15 | Project | 15 | | | - | 30 | AppendVertices | 14 | | | + | 15 | Project | 14 | | | | 14 | Traverse | 12 | | | | 12 | Traverse | 11 | | | | 11 | Argument | | | | From 3ced56403340bb74f0b6d7538dfa23c806877850 Mon Sep 17 00:00:00 2001 From: jievince <38901892+jievince@users.noreply.github.com> Date: Wed, 7 Dec 2022 10:39:04 +0800 Subject: [PATCH 5/5] address comments --- .../rule/OptimizeLeftJoinPredicateRule.cpp | 6 ---- .../rule/OptimizeLeftJoinPredicateRule.h | 36 +++++++++---------- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp index 8cee7eca55b..d9af805ae24 100644 --- a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp +++ b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp @@ -151,12 +151,6 @@ StatusOr OptimizeLeftJoinPredicateRule::transform( newLeftJoin->setLeftVar(leftJoin->leftInputVar()); newLeftJoin->setRightVar(newProject->outputVar()); newLeftJoin->setOutputVar(leftJoin->outputVar()); - // LOG the col names of newLeftJoin - auto& newLeftJoinColNames = newLeftJoin->colNames(); - LOG(ERROR) << "newLeftJoinColNames.size(): " << newLeftJoinColNames.size(); - for (auto& colName : newLeftJoinColNames) { - LOG(ERROR) << "colName: " << colName; - } auto* newLeftJoinGroupNode = OptGroupNode::create(octx, newLeftJoin, leftJoinGroup); newLeftJoinGroupNode->dependsOn(leftJoinGroupNode->dependencies()[0]); newLeftJoinGroupNode->dependsOn(newProjectGroup); diff --git a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h index 7792a98b14d..ef27924af2e 100644 --- a/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h +++ b/src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h @@ -9,24 +9,24 @@ namespace nebula { namespace opt { -/* Before: - * HashLeftJoin({id(v)}, {id(v)}) - * / \ - * ... Project - * / \ - * AppendVertices(v) AppendVertices(v) - * / \ - * ... Traverse(e) - * - * After: - * HashLeftJoin({id(v)}, {$-.v}) - * / \ - * ... Project(..., none_direct_dst(e) AS v) - * / \ - * AppendVertices(v) Traverse(e) - * / - * ... - */ +// Before: +// HashLeftJoin({id(v)}, {id(v)}) +// | | +// ... Project +// | | +// AppendVertices(v) AppendVertices(v) +// | | +// ... Traverse(e) +// +// After: +// HashLeftJoin({id(v)}, {$-.v}) +// | | +// ... Project(..., none_direct_dst(e) AS v) +// | | +// AppendVertices(v) Traverse(e) +// | +// ... +// class OptimizeLeftJoinPredicateRule final : public OptRule { public: const Pattern &pattern() const override;