Skip to content

Commit

Permalink
remove useless project and dedup before getdstBysrc
Browse files Browse the repository at this point in the history
  • Loading branch information
jievince committed Aug 20, 2022
1 parent d0e289c commit f27ef3a
Show file tree
Hide file tree
Showing 9 changed files with 153 additions and 8 deletions.
1 change: 1 addition & 0 deletions src/graph/executor/query/MinusExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ folly::Future<Status> MinusExecutor::execute() {
auto right = getRightInputData();

std::unordered_set<const Row*> hashSet;
hashSet.reserve(right.iterRef()->size());
for (; right.iterRef()->valid(); right.iterRef()->next()) {
hashSet.insert(right.iterRef()->row());
// TODO: should test duplicate rows
Expand Down
1 change: 1 addition & 0 deletions src/graph/optimizer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ nebula_add_library(
rule/PushTopNDownIndexScanRule.cpp
rule/EliminateAppendVerticesRule.cpp
rule/PushLimitDownScanEdgesRule.cpp
rule/RemoveProjectDedupBeforeGetDstBySrcRule.cpp
)

nebula_add_subdirectory(test)
5 changes: 2 additions & 3 deletions src/graph/optimizer/OptGroup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,8 @@ Status OptGroup::explore(const OptRule *rule) {
auto resStatus = rule->transform(ctx_, matched);
NG_RETURN_IF_ERROR(resStatus);
auto result = std::move(resStatus).value();
DCHECK(result.checkDataFlow(boundary))
<< "Plan of transfromed result should keep input variable same with dependencies in rule "
<< rule->toString();
// In some cases, we can apply optimization rules even if the control flow and data flow are
// inconsistent. For now, let the optimization rules themselves guarantee correctness.
if (result.eraseAll) {
for (auto gnode : groupNodes_) {
gnode->node()->releaseSymbols();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/* Copyright (c) 2021 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License.
*/

#include "graph/optimizer/rule/RemoveProjectDedupBeforeGetDstBySrcRule.h"

#include "graph/optimizer/OptContext.h"
#include "graph/optimizer/OptGroup.h"
#include "graph/planner/plan/PlanNode.h"
#include "graph/planner/plan/Query.h"

using nebula::graph::PlanNode;
using nebula::graph::QueryContext;

namespace nebula {
namespace opt {

std::unique_ptr<OptRule> RemoveProjectDedupBeforeGetDstBySrcRule::kInstance =
std::unique_ptr<RemoveProjectDedupBeforeGetDstBySrcRule>(
new RemoveProjectDedupBeforeGetDstBySrcRule());

RemoveProjectDedupBeforeGetDstBySrcRule::RemoveProjectDedupBeforeGetDstBySrcRule() {
RuleSet::QueryRules().addRule(this);
}

const Pattern& RemoveProjectDedupBeforeGetDstBySrcRule::pattern() const {
static Pattern pattern = Pattern::create(
graph::PlanNode::Kind::kGetDstBySrc,
{Pattern::create(graph::PlanNode::Kind::kDedup,
{Pattern::create(graph::PlanNode::Kind::kProject,
{Pattern::create(graph::PlanNode::Kind::kDataCollect)})})});
return pattern;
}

StatusOr<OptRule::TransformResult> RemoveProjectDedupBeforeGetDstBySrcRule::transform(
OptContext* octx, const MatchedResult& matched) const {
auto* getDstBySrcGroupNode = matched.node;
auto* getDstBySrc = static_cast<const graph::GetDstBySrc*>(getDstBySrcGroupNode->node());
auto* projectGroupNode = matched.dependencies.front().dependencies.front().node;
auto* project = static_cast<const graph::Project*>(projectGroupNode->node());

auto* newGetDstBySrc = getDstBySrc->clone();
newGetDstBySrc->setOutputVar(getDstBySrc->outputVar());
newGetDstBySrc->setInputVar(project->inputVar());
newGetDstBySrc->setColNames(newGetDstBySrc->colNames());

auto newGetDstBySrcNode =
OptGroupNode::create(octx, newGetDstBySrc, getDstBySrcGroupNode->group());
for (auto dep : projectGroupNode->dependencies()) {
newGetDstBySrcNode->dependsOn(dep);
}

TransformResult result;
result.newGroupNodes.emplace_back(newGetDstBySrcNode);
result.eraseAll = true;
return result;
}

bool RemoveProjectDedupBeforeGetDstBySrcRule::match(OptContext*,
const MatchedResult& matched) const {
auto* getDstBySrc = static_cast<const graph::GetDstBySrc*>(matched.node->node());
auto* project = static_cast<const graph::Project*>(
matched.dependencies.front().dependencies.front().node->node());
auto* dataCollect = static_cast<const graph::DataCollect*>(
matched.dependencies.front().dependencies.front().dependencies.front().node->node());

if (getDstBySrc->src()->kind() != Expression::Kind::kInputProperty &&
getDstBySrc->src()->kind() != Expression::Kind::kColumn) {
return false;
}
if (project->columns()->size() != 1) {
return false;
}
if (dataCollect->kind() != graph::DataCollect::DCKind::kMToN || !dataCollect->distinct() ||
dataCollect->colNames().size() != 1) {
return false;
}

return true;
}

std::string RemoveProjectDedupBeforeGetDstBySrcRule::toString() const {
return "RemoveProjectDedupBeforeGetDstBySrcRule";
}

} // namespace opt
} // namespace nebula
54 changes: 54 additions & 0 deletions src/graph/optimizer/rule/RemoveProjectDedupBeforeGetDstBySrcRule.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/* Copyright (c) 2021 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License.
*/

#ifndef GRAPH_OPTIMIZER_RULE_REMOVEPROJECTDEDUPBEFOREGETDSTBYSRCRULE_H_
#define GRAPH_OPTIMIZER_RULE_REMOVEPROJECTDEDUPBEFOREGETDSTBYSRCRULE_H_

#include <initializer_list>

#include "graph/optimizer/OptRule.h"

namespace nebula {
namespace opt {

// DataCollect contains only one column that is a deduped vid column which GetDstBySrc needs,
// so the following Project and Dedup are useless.
//
// Tranformation:
// Before:
//
// +---------+---------+
// | GetDstBySrc |
// +---------+---------+
// | Dedup |
// +---------+---------+
// | Project |
// +---------+---------+
// | DataCollect |
// +---------+---------+
//
// After:
// // Remove Project node

class RemoveProjectDedupBeforeGetDstBySrcRule final : public OptRule {
public:
const Pattern &pattern() const override;

StatusOr<TransformResult> transform(OptContext *ctx, const MatchedResult &matched) const override;

bool match(OptContext *ctx, const MatchedResult &matched) const override;

std::string toString() const override;

private:
RemoveProjectDedupBeforeGetDstBySrcRule();

static std::unique_ptr<OptRule> kInstance;
};

} // namespace opt
} // namespace nebula

#endif // GRAPH_OPTIMIZER_RULE_REMOVEPROJECTDEDUPBEFOREGETDSTBYSRCRULE_H_
1 change: 1 addition & 0 deletions src/graph/planner/plan/Query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ void Dedup::cloneMembers(const Dedup& l) {
std::unique_ptr<PlanNodeDescription> DataCollect::explain() const {
auto desc = VariableDependencyNode::explain();
addDescription("inputVar", folly::toJson(util::toJson(inputVars_)), desc.get());
addDescription("distinct", distinct_ ? "true" : "false", desc.get());
switch (kind_) {
case DCKind::kSubgraph: {
addDescription("kind", "SUBGRAPH", desc.get());
Expand Down
3 changes: 2 additions & 1 deletion src/graph/util/PlannerUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ SubPlan PlannerUtil::buildRuntimeInput(QueryContext* qctx, Starts& starts) {
if (starts.fromType == kVariable) {
project->setInputVar(starts.userDefinedVarName);
}
starts.src = InputPropertyExpression::make(pool, kVid);
// If possible, use column numbers in preference to column names,
starts.src = ColumnExpression::make(pool, 0);

auto* dedup = Dedup::make(qctx, project);

Expand Down
7 changes: 3 additions & 4 deletions tests/tck/features/go/SimpleCase.feature
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2021 vesoft inc. All rights reserved.
#
# This source code is licensed under Apache 2.0 License.
@czp
Feature: Simple case

Background:
Expand Down Expand Up @@ -72,10 +73,8 @@ Feature: Simple case
| 12 | Minus | 10,11 | |
| 10 | Project | 13 | |
| 13 | PassThrough | 9 | |
| 9 | Dedup | 8 | |
| 8 | GetDstBySrc | 7 | |
| 7 | Dedup | 6 | |
| 6 | Project | 5 | |
| 9 | Dedup | 15 | |
| 15 | GetDstBySrc | 5 | |
| 5 | DataCollect | 4 | |
| 4 | Loop | 0 | {"loopBody": "3"} |
| 3 | Dedup | 2 | |
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2021 vesoft inc. All rights reserved.
#
# This source code is licensed under Apache 2.0 License.
@czp
Feature: Push Filter down LeftJoin rule

Background:
Expand Down

0 comments on commit f27ef3a

Please sign in to comment.