Skip to content

Commit

Permalink
optimize left join predicate and eliminate useless appendVertices (#4980
Browse files Browse the repository at this point in the history
)

* optimize left join predicate & Eliminate useless appendVertices

* fix

* update

* address leftExprs&rightExprs

* address comments
  • Loading branch information
jievince authored Dec 7, 2022
1 parent 3296bda commit 5ad688b
Show file tree
Hide file tree
Showing 7 changed files with 295 additions and 14 deletions.
5 changes: 5 additions & 0 deletions src/common/function/FunctionManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2004,6 +2004,11 @@ FunctionManager::FunctionManager() {
};
}
{
// `none_direct_dst` always return the dstId of an edge key
// without considering the direction of the edge type.
// The encoding of the edge key is:
// type(1) + partId(3) + srcId(*) + edgeType(4) + edgeRank(8) + dstId(*) + placeHolder(1)
// More information of encoding could be found in `NebulaKeyUtils.h`
auto &attr = functions_["none_direct_dst"];
attr.minArity_ = 1;
attr.maxArity_ = 1;
Expand Down
1 change: 1 addition & 0 deletions src/graph/optimizer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ nebula_add_library(
rule/PushLimitDownScanEdgesRule.cpp
rule/RemoveProjectDedupBeforeGetDstBySrcRule.cpp
rule/PushFilterDownTraverseRule.cpp
rule/OptimizeLeftJoinPredicateRule.cpp
)

nebula_add_subdirectory(test)
167 changes: 167 additions & 0 deletions src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
// Copyright (c) 2022 vesoft inc. All rights reserved.
//
// This source code is licensed under Apache 2.0 License.

#include "graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h"

#include "graph/optimizer/OptContext.h"
#include "graph/optimizer/OptGroup.h"
#include "graph/planner/plan/PlanNode.h"
#include "graph/planner/plan/Query.h"
#include "graph/util/ExpressionUtils.h"

using nebula::graph::PlanNode;
using nebula::graph::QueryContext;

namespace nebula {
namespace opt {

std::unique_ptr<OptRule> OptimizeLeftJoinPredicateRule::kInstance =
std::unique_ptr<OptimizeLeftJoinPredicateRule>(new OptimizeLeftJoinPredicateRule());

OptimizeLeftJoinPredicateRule::OptimizeLeftJoinPredicateRule() {
RuleSet::QueryRules().addRule(this);
}

const Pattern& OptimizeLeftJoinPredicateRule::pattern() const {
static Pattern pattern = Pattern::create(
PlanNode::Kind::kHashLeftJoin,
{Pattern::create(PlanNode::Kind::kUnknown),
Pattern::create(PlanNode::Kind::kProject,
{Pattern::create(PlanNode::Kind::kAppendVertices,
{Pattern::create(PlanNode::Kind::kTraverse)})})});
return pattern;
}

StatusOr<OptRule::TransformResult> OptimizeLeftJoinPredicateRule::transform(
OptContext* octx, const MatchedResult& matched) const {
auto* leftJoinGroupNode = matched.node;
auto* leftJoinGroup = leftJoinGroupNode->group();
auto* leftJoin = static_cast<graph::HashLeftJoin*>(leftJoinGroupNode->node());

auto* projectGroupNode = matched.dependencies[1].node;
auto* project = static_cast<graph::Project*>(projectGroupNode->node());

auto* appendVerticesGroupNode = matched.dependencies[1].dependencies[0].node;
auto appendVertices =
static_cast<graph::AppendVertices*>(matched.dependencies[1].dependencies[0].node->node());

auto traverse = static_cast<graph::Traverse*>(
matched.dependencies[1].dependencies[0].dependencies[0].node->node());

auto& avNodeAlias = appendVertices->nodeAlias();

auto& tvEdgeAlias = traverse->edgeAlias();

auto& leftExprs = leftJoin->hashKeys();
auto& rightExprs = leftJoin->probeKeys();

bool found = false;
size_t rightExprIdx = 0;
for (size_t i = 0; i < rightExprs.size(); ++i) {
auto* rightExpr = rightExprs[i];
if (rightExpr->kind() != Expression::Kind::kFunctionCall) {
continue;
}
auto* func = static_cast<FunctionCallExpression*>(rightExpr);
if (func->name() != "id" && func->name() != "_joinkey") {
continue;
}
auto& args = func->args()->args();
DCHECK_EQ(args.size(), 1);
auto* arg = args[0];
if (arg->kind() != Expression::Kind::kInputProperty) {
continue;
}
auto& alias = static_cast<InputPropertyExpression*>(arg)->prop();
if (alias != avNodeAlias) continue;
// Must check if left exprs contain the same key
if (*leftExprs[i] != *rightExpr) {
return TransformResult::noTransform();
}
if (found) {
return TransformResult::noTransform();
}
rightExprIdx = i;
found = true;
}
if (!found) {
return TransformResult::noTransform();
}

found = false;
size_t prjIdx = 0;
auto* columns = project->columns();
for (size_t i = 0; i < columns->size(); ++i) {
const auto* col = columns->columns()[i];
if (col->expr()->kind() != Expression::Kind::kInputProperty) {
continue;
}
auto* inputProp = static_cast<InputPropertyExpression*>(col->expr());
if (inputProp->prop() != avNodeAlias) continue;
if (found) {
return TransformResult::noTransform();
}
prjIdx = i;
found = true;
}
if (!found) {
return TransformResult::noTransform();
}

auto* pool = octx->qctx()->objPool();
// Let the new project generate expr `none_direct_dst($-.tvEdgeAlias)`,
// and let the new left join use it as right expr
auto* args = ArgumentList::make(pool);
args->addArgument(InputPropertyExpression::make(pool, tvEdgeAlias));
auto* newPrjExpr = FunctionCallExpression::make(pool, "none_direct_dst", args);

auto oldYieldColumns = project->columns()->columns();
auto* newYieldColumns = pool->makeAndAdd<YieldColumns>();
for (size_t i = 0; i < oldYieldColumns.size(); ++i) {
if (i == prjIdx) {
newYieldColumns->addColumn(new YieldColumn(newPrjExpr, avNodeAlias));
} else {
newYieldColumns->addColumn(oldYieldColumns[i]->clone().release());
}
}
auto* newProject = graph::Project::make(octx->qctx(), nullptr, newYieldColumns);

// $-.`avNodeAlias`
auto* newRightExpr = InputPropertyExpression::make(pool, avNodeAlias);
std::vector<Expression*> newRightExprs;
for (size_t i = 0; i < rightExprs.size(); ++i) {
if (i == rightExprIdx) {
newRightExprs.emplace_back(newRightExpr);
} else {
newRightExprs.emplace_back(rightExprs[i]->clone());
}
}
auto* newLeftJoin =
graph::HashLeftJoin::make(octx->qctx(), nullptr, nullptr, leftExprs, newRightExprs);

TransformResult result;
result.eraseAll = true;

newProject->setInputVar(appendVertices->inputVar());
auto newProjectGroup = OptGroup::create(octx);
auto* newProjectGroupNode = newProjectGroup->makeGroupNode(newProject);
newProjectGroupNode->setDeps(appendVerticesGroupNode->dependencies());

newLeftJoin->setLeftVar(leftJoin->leftInputVar());
newLeftJoin->setRightVar(newProject->outputVar());
newLeftJoin->setOutputVar(leftJoin->outputVar());
auto* newLeftJoinGroupNode = OptGroupNode::create(octx, newLeftJoin, leftJoinGroup);
newLeftJoinGroupNode->dependsOn(leftJoinGroupNode->dependencies()[0]);
newLeftJoinGroupNode->dependsOn(newProjectGroup);

result.newGroupNodes.emplace_back(newLeftJoinGroupNode);
return result;
}

std::string OptimizeLeftJoinPredicateRule::toString() const {
return "OptimizeLeftJoinPredicateRule";
}

} // namespace opt
} // namespace nebula
46 changes: 46 additions & 0 deletions src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/* Copyright (c) 2022 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License.
*/

#pragma once

#include "graph/optimizer/OptRule.h"

namespace nebula {
namespace opt {
// Before:
// HashLeftJoin({id(v)}, {id(v)})
// | |
// ... Project
// | |
// AppendVertices(v) AppendVertices(v)
// | |
// ... Traverse(e)
//
// After:
// HashLeftJoin({id(v)}, {$-.v})
// | |
// ... Project(..., none_direct_dst(e) AS v)
// | |
// AppendVertices(v) Traverse(e)
// |
// ...
//
class OptimizeLeftJoinPredicateRule final : public OptRule {
public:
const Pattern &pattern() const override;

StatusOr<OptRule::TransformResult> transform(OptContext *qctx,
const MatchedResult &matched) const override;

std::string toString() const override;

private:
OptimizeLeftJoinPredicateRule();

static std::unique_ptr<OptRule> kInstance;
};

} // namespace opt
} // namespace nebula
23 changes: 11 additions & 12 deletions tests/tck/features/match/MultiQueryParts.feature
Original file line number Diff line number Diff line change
Expand Up @@ -181,16 +181,16 @@ Feature: Multi Query Parts
OPTIONAL MATCH (v3:player)-[:like]->(v1)<-[e5]-(v4) where id(v3) == "Tim Duncan" return *
"""
Then the result should be, in any order, with relax comparison:
| v1 | v2 | e3 | v4 | v3 | e5 |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"LaMarcus Aldridge" @0 {likeness: 90}] | ("LaMarcus Aldridge") | ("Tim Duncan") | [:like "LaMarcus Aldridge"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker") | __NULL__ | __NULL__ |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker") | __NULL__ | __NULL__ |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| v1 | v2 | e3 | v4 | v3 | e5 |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"LaMarcus Aldridge" @0] | ("LaMarcus Aldridge") | ("Tim Duncan") | [:like "LaMarcus Aldridge"->"Tony Parker" @0] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 ] | ("Tony Parker") | __NULL__ | __NULL__ |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0] | ("Tony Parker") | __NULL__ | __NULL__ |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Tim Duncan" @0 ] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0] |
| ("Tony Parker") | ("Manu Ginobili") | [:like "Manu Ginobili"->"Tim Duncan" @0 ] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 ] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 ] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 ] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0] |
# The redudant Project after HashLeftJoin is removed now
And the execution plan should be:
| id | name | dependencies | profiling data | operator info |
Expand All @@ -203,8 +203,7 @@ Feature: Multi Query Parts
| 1 | PassThrough | 3 | | |
| 3 | Start | | | |
| 14 | Project | 13 | | |
| 13 | AppendVertices | 12 | | |
| 12 | Traverse | 21 | | |
| 13 | Traverse | 21 | | |
| 21 | Traverse | 9 | | |
| 9 | Dedup | 8 | | |
| 8 | PassThrough | 10 | | |
Expand Down
64 changes: 64 additions & 0 deletions tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Copyright (c) 2022 vesoft inc. All rights reserved.
#
# This source code is licensed under Apache 2.0 License.
Feature: Optimize left join predicate

Background:
Given a graph with space named "nba"

Scenario: optimize left join predicate
When profiling query:
"""
MATCH (person:player)-[:like*1..2]-(friend:player)-[:serve]->(friendTeam:team)
WHERE id(person) == "Tony Parker" AND id(friend) != "Tony Parker"
WITH DISTINCT friend, friendTeam
OPTIONAL MATCH (friend)<-[:like]-(friend2:player)<-[:like]-(friendTeam)
WITH friendTeam, count(friend2) AS numFriends
RETURN
id(friendTeam) AS teamId,
friendTeam.team.name AS teamName,
numFriends
ORDER BY teamName DESC
"""
Then the result should be, in order, with relax comparison:
| teamId | teamName | numFriends |
| "Warriors" | "Warriors" | 0 |
| "Trail Blazers" | "Trail Blazers" | 0 |
| "Thunders" | "Thunders" | 0 |
| "Suns" | "Suns" | 0 |
| "Spurs" | "Spurs" | 0 |
| "Rockets" | "Rockets" | 0 |
| "Raptors" | "Raptors" | 0 |
| "Pistons" | "Pistons" | 0 |
| "Magic" | "Magic" | 0 |
| "Lakers" | "Lakers" | 0 |
| "Kings" | "Kings" | 0 |
| "Jazz" | "Jazz" | 0 |
| "Hornets" | "Hornets" | 0 |
| "Heat" | "Heat" | 0 |
| "Hawks" | "Hawks" | 0 |
| "Grizzlies" | "Grizzlies" | 0 |
| "Clippers" | "Clippers" | 0 |
| "Celtics" | "Celtics" | 0 |
| "Cavaliers" | "Cavaliers" | 0 |
| "Bulls" | "Bulls" | 0 |
| "76ers" | "76ers" | 0 |
And the execution plan should be:
| id | name | dependencies | operator info |
| 21 | Sort | 18 | |
| 18 | Project | 17 | |
| 17 | Aggregate | 16 | |
| 16 | HashLeftJoin | 10,15 | {"hashKeys": ["_joinkey($-.friendTeam)", "_joinkey($-.friend)"], "probeKeys": ["$-.friendTeam", "_joinkey($-.friend)"]} |
| 10 | Dedup | 28 | |
| 28 | Project | 22 | |
| 22 | Filter | 26 | |
| 26 | AppendVertices | 25 | |
| 25 | Traverse | 24 | |
| 24 | Traverse | 2 | |
| 2 | Dedup | 1 | |
| 1 | PassThrough | 3 | |
| 3 | Start | | |
| 15 | Project | 14 | {"columns": ["$-.friend AS friend", "$-.friend2 AS friend2", "none_direct_dst($-.__VAR_3) AS friendTeam"]} |
| 14 | Traverse | 12 | |
| 12 | Traverse | 11 | |
| 11 | Argument | | |
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ Feature: Push Filter down Traverse rule
| 2 | Dedup | 1 | | |
| 1 | PassThrough | 3 | | |
| 3 | Start | | | |
| 15 | Project | 15 | | |
| 30 | AppendVertices | 14 | | |
| 15 | Project | 14 | | |
| 14 | Traverse | 12 | | |
| 12 | Traverse | 11 | | |
| 11 | Argument | | | |
Expand Down

0 comments on commit 5ad688b

Please sign in to comment.