Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimize left join predicate & Eliminate useless appendVertices #4980

Merged
merged 5 commits into from
Dec 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/common/function/FunctionManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2004,6 +2004,11 @@ FunctionManager::FunctionManager() {
};
}
{
// `none_direct_dst` always return the dstId of an edge key
// without considering the direction of the edge type.
// The encoding of the edge key is:
// type(1) + partId(3) + srcId(*) + edgeType(4) + edgeRank(8) + dstId(*) + placeHolder(1)
// More information of encoding could be found in `NebulaKeyUtils.h`
auto &attr = functions_["none_direct_dst"];
attr.minArity_ = 1;
attr.maxArity_ = 1;
Expand Down
1 change: 1 addition & 0 deletions src/graph/optimizer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ nebula_add_library(
rule/PushLimitDownScanEdgesRule.cpp
rule/RemoveProjectDedupBeforeGetDstBySrcRule.cpp
rule/PushFilterDownTraverseRule.cpp
rule/OptimizeLeftJoinPredicateRule.cpp
)

nebula_add_subdirectory(test)
167 changes: 167 additions & 0 deletions src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
// Copyright (c) 2022 vesoft inc. All rights reserved.
//
// This source code is licensed under Apache 2.0 License.

#include "graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h"

#include "graph/optimizer/OptContext.h"
#include "graph/optimizer/OptGroup.h"
#include "graph/planner/plan/PlanNode.h"
#include "graph/planner/plan/Query.h"
#include "graph/util/ExpressionUtils.h"

using nebula::graph::PlanNode;
using nebula::graph::QueryContext;

namespace nebula {
namespace opt {

std::unique_ptr<OptRule> OptimizeLeftJoinPredicateRule::kInstance =
std::unique_ptr<OptimizeLeftJoinPredicateRule>(new OptimizeLeftJoinPredicateRule());

OptimizeLeftJoinPredicateRule::OptimizeLeftJoinPredicateRule() {
RuleSet::QueryRules().addRule(this);
}

const Pattern& OptimizeLeftJoinPredicateRule::pattern() const {
static Pattern pattern = Pattern::create(
PlanNode::Kind::kHashLeftJoin,
{Pattern::create(PlanNode::Kind::kUnknown),
Pattern::create(PlanNode::Kind::kProject,
{Pattern::create(PlanNode::Kind::kAppendVertices,
{Pattern::create(PlanNode::Kind::kTraverse)})})});
return pattern;
}

StatusOr<OptRule::TransformResult> OptimizeLeftJoinPredicateRule::transform(
OptContext* octx, const MatchedResult& matched) const {
auto* leftJoinGroupNode = matched.node;
auto* leftJoinGroup = leftJoinGroupNode->group();
auto* leftJoin = static_cast<graph::HashLeftJoin*>(leftJoinGroupNode->node());

auto* projectGroupNode = matched.dependencies[1].node;
auto* project = static_cast<graph::Project*>(projectGroupNode->node());

auto* appendVerticesGroupNode = matched.dependencies[1].dependencies[0].node;
auto appendVertices =
static_cast<graph::AppendVertices*>(matched.dependencies[1].dependencies[0].node->node());
jievince marked this conversation as resolved.
Show resolved Hide resolved

auto traverse = static_cast<graph::Traverse*>(
matched.dependencies[1].dependencies[0].dependencies[0].node->node());

auto& avNodeAlias = appendVertices->nodeAlias();

auto& tvEdgeAlias = traverse->edgeAlias();

auto& leftExprs = leftJoin->hashKeys();
auto& rightExprs = leftJoin->probeKeys();

bool found = false;
size_t rightExprIdx = 0;
for (size_t i = 0; i < rightExprs.size(); ++i) {
auto* rightExpr = rightExprs[i];
if (rightExpr->kind() != Expression::Kind::kFunctionCall) {
continue;
}
auto* func = static_cast<FunctionCallExpression*>(rightExpr);
if (func->name() != "id" && func->name() != "_joinkey") {
continue;
}
auto& args = func->args()->args();
DCHECK_EQ(args.size(), 1);
auto* arg = args[0];
if (arg->kind() != Expression::Kind::kInputProperty) {
continue;
}
auto& alias = static_cast<InputPropertyExpression*>(arg)->prop();
if (alias != avNodeAlias) continue;
// Must check if left exprs contain the same key
if (*leftExprs[i] != *rightExpr) {
return TransformResult::noTransform();
}
if (found) {
return TransformResult::noTransform();
}
rightExprIdx = i;
found = true;
}
if (!found) {
return TransformResult::noTransform();
}

found = false;
size_t prjIdx = 0;
auto* columns = project->columns();
for (size_t i = 0; i < columns->size(); ++i) {
const auto* col = columns->columns()[i];
if (col->expr()->kind() != Expression::Kind::kInputProperty) {
continue;
}
auto* inputProp = static_cast<InputPropertyExpression*>(col->expr());
if (inputProp->prop() != avNodeAlias) continue;
if (found) {
return TransformResult::noTransform();
}
prjIdx = i;
found = true;
}
if (!found) {
return TransformResult::noTransform();
}

auto* pool = octx->qctx()->objPool();
// Let the new project generate expr `none_direct_dst($-.tvEdgeAlias)`,
// and let the new left join use it as right expr
auto* args = ArgumentList::make(pool);
args->addArgument(InputPropertyExpression::make(pool, tvEdgeAlias));
auto* newPrjExpr = FunctionCallExpression::make(pool, "none_direct_dst", args);

auto oldYieldColumns = project->columns()->columns();
auto* newYieldColumns = pool->makeAndAdd<YieldColumns>();
for (size_t i = 0; i < oldYieldColumns.size(); ++i) {
if (i == prjIdx) {
newYieldColumns->addColumn(new YieldColumn(newPrjExpr, avNodeAlias));
} else {
newYieldColumns->addColumn(oldYieldColumns[i]->clone().release());
}
}
auto* newProject = graph::Project::make(octx->qctx(), nullptr, newYieldColumns);

// $-.`avNodeAlias`
auto* newRightExpr = InputPropertyExpression::make(pool, avNodeAlias);
std::vector<Expression*> newRightExprs;
for (size_t i = 0; i < rightExprs.size(); ++i) {
if (i == rightExprIdx) {
newRightExprs.emplace_back(newRightExpr);
} else {
newRightExprs.emplace_back(rightExprs[i]->clone());
}
}
auto* newLeftJoin =
graph::HashLeftJoin::make(octx->qctx(), nullptr, nullptr, leftExprs, newRightExprs);

TransformResult result;
result.eraseAll = true;

newProject->setInputVar(appendVertices->inputVar());
auto newProjectGroup = OptGroup::create(octx);
auto* newProjectGroupNode = newProjectGroup->makeGroupNode(newProject);
newProjectGroupNode->setDeps(appendVerticesGroupNode->dependencies());

newLeftJoin->setLeftVar(leftJoin->leftInputVar());
newLeftJoin->setRightVar(newProject->outputVar());
newLeftJoin->setOutputVar(leftJoin->outputVar());
auto* newLeftJoinGroupNode = OptGroupNode::create(octx, newLeftJoin, leftJoinGroup);
newLeftJoinGroupNode->dependsOn(leftJoinGroupNode->dependencies()[0]);
newLeftJoinGroupNode->dependsOn(newProjectGroup);

result.newGroupNodes.emplace_back(newLeftJoinGroupNode);
return result;
}

std::string OptimizeLeftJoinPredicateRule::toString() const {
return "OptimizeLeftJoinPredicateRule";
}

} // namespace opt
} // namespace nebula
46 changes: 46 additions & 0 deletions src/graph/optimizer/rule/OptimizeLeftJoinPredicateRule.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/* Copyright (c) 2022 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License.
*/

#pragma once

#include "graph/optimizer/OptRule.h"

namespace nebula {
namespace opt {
// Before:
// HashLeftJoin({id(v)}, {id(v)})
// | |
// ... Project
// | |
// AppendVertices(v) AppendVertices(v)
// | |
// ... Traverse(e)
//
// After:
// HashLeftJoin({id(v)}, {$-.v})
// | |
// ... Project(..., none_direct_dst(e) AS v)
// | |
// AppendVertices(v) Traverse(e)
// |
// ...
//
class OptimizeLeftJoinPredicateRule final : public OptRule {
public:
const Pattern &pattern() const override;

StatusOr<OptRule::TransformResult> transform(OptContext *qctx,
const MatchedResult &matched) const override;

std::string toString() const override;

private:
OptimizeLeftJoinPredicateRule();

static std::unique_ptr<OptRule> kInstance;
};

} // namespace opt
} // namespace nebula
23 changes: 11 additions & 12 deletions tests/tck/features/match/MultiQueryParts.feature
Original file line number Diff line number Diff line change
Expand Up @@ -181,16 +181,16 @@ Feature: Multi Query Parts
OPTIONAL MATCH (v3:player)-[:like]->(v1)<-[e5]-(v4) where id(v3) == "Tim Duncan" return *
"""
Then the result should be, in any order, with relax comparison:
| v1 | v2 | e3 | v4 | v3 | e5 |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"LaMarcus Aldridge" @0 {likeness: 90}] | ("LaMarcus Aldridge") | ("Tim Duncan") | [:like "LaMarcus Aldridge"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker") | __NULL__ | __NULL__ |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker") | __NULL__ | __NULL__ |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| v1 | v2 | e3 | v4 | v3 | e5 |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"LaMarcus Aldridge" @0] | ("LaMarcus Aldridge") | ("Tim Duncan") | [:like "LaMarcus Aldridge"->"Tony Parker" @0] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 ] | ("Tony Parker") | __NULL__ | __NULL__ |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0] | ("Tony Parker") | __NULL__ | __NULL__ |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Tim Duncan" @0 ] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0] |
| ("Tony Parker") | ("Manu Ginobili") | [:like "Manu Ginobili"->"Tim Duncan" @0 ] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 ] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 ] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 ] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0] |
# The redudant Project after HashLeftJoin is removed now
And the execution plan should be:
| id | name | dependencies | profiling data | operator info |
Expand All @@ -203,8 +203,7 @@ Feature: Multi Query Parts
| 1 | PassThrough | 3 | | |
| 3 | Start | | | |
| 14 | Project | 13 | | |
| 13 | AppendVertices | 12 | | |
| 12 | Traverse | 21 | | |
| 13 | Traverse | 21 | | |
| 21 | Traverse | 9 | | |
| 9 | Dedup | 8 | | |
| 8 | PassThrough | 10 | | |
Expand Down
64 changes: 64 additions & 0 deletions tests/tck/features/optimizer/OptimizeLeftJoinPredicateRule.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Copyright (c) 2022 vesoft inc. All rights reserved.
#
# This source code is licensed under Apache 2.0 License.
Feature: Optimize left join predicate

Background:
Given a graph with space named "nba"

Scenario: optimize left join predicate
When profiling query:
"""
MATCH (person:player)-[:like*1..2]-(friend:player)-[:serve]->(friendTeam:team)
WHERE id(person) == "Tony Parker" AND id(friend) != "Tony Parker"
WITH DISTINCT friend, friendTeam
OPTIONAL MATCH (friend)<-[:like]-(friend2:player)<-[:like]-(friendTeam)
WITH friendTeam, count(friend2) AS numFriends
RETURN
id(friendTeam) AS teamId,
friendTeam.team.name AS teamName,
numFriends
ORDER BY teamName DESC
"""
Then the result should be, in order, with relax comparison:
| teamId | teamName | numFriends |
| "Warriors" | "Warriors" | 0 |
| "Trail Blazers" | "Trail Blazers" | 0 |
| "Thunders" | "Thunders" | 0 |
| "Suns" | "Suns" | 0 |
| "Spurs" | "Spurs" | 0 |
| "Rockets" | "Rockets" | 0 |
| "Raptors" | "Raptors" | 0 |
| "Pistons" | "Pistons" | 0 |
| "Magic" | "Magic" | 0 |
| "Lakers" | "Lakers" | 0 |
| "Kings" | "Kings" | 0 |
| "Jazz" | "Jazz" | 0 |
| "Hornets" | "Hornets" | 0 |
| "Heat" | "Heat" | 0 |
| "Hawks" | "Hawks" | 0 |
| "Grizzlies" | "Grizzlies" | 0 |
| "Clippers" | "Clippers" | 0 |
| "Celtics" | "Celtics" | 0 |
| "Cavaliers" | "Cavaliers" | 0 |
| "Bulls" | "Bulls" | 0 |
| "76ers" | "76ers" | 0 |
And the execution plan should be:
| id | name | dependencies | operator info |
| 21 | Sort | 18 | |
| 18 | Project | 17 | |
| 17 | Aggregate | 16 | |
| 16 | HashLeftJoin | 10,15 | {"hashKeys": ["_joinkey($-.friendTeam)", "_joinkey($-.friend)"], "probeKeys": ["$-.friendTeam", "_joinkey($-.friend)"]} |
| 10 | Dedup | 28 | |
| 28 | Project | 22 | |
| 22 | Filter | 26 | |
| 26 | AppendVertices | 25 | |
| 25 | Traverse | 24 | |
| 24 | Traverse | 2 | |
| 2 | Dedup | 1 | |
| 1 | PassThrough | 3 | |
| 3 | Start | | |
| 15 | Project | 14 | {"columns": ["$-.friend AS friend", "$-.friend2 AS friend2", "none_direct_dst($-.__VAR_3) AS friendTeam"]} |
| 14 | Traverse | 12 | |
| 12 | Traverse | 11 | |
| 11 | Argument | | |
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ Feature: Push Filter down Traverse rule
| 2 | Dedup | 1 | | |
| 1 | PassThrough | 3 | | |
| 3 | Start | | | |
| 15 | Project | 15 | | |
| 30 | AppendVertices | 14 | | |
| 15 | Project | 14 | | |
| 14 | Traverse | 12 | | |
| 12 | Traverse | 11 | | |
| 11 | Argument | | | |
Expand Down