Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support lookup indexScan using IN expression as filter #2906

Merged
merged 8 commits into from
Sep 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion src/graph/optimizer/OptimizerUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,12 @@ StatusOr<ScoredColumnHint> selectRelExprIndex(const ColumnDef& field,
}

auto right = expr->right();
DCHECK(right->kind() == Expression::Kind::kConstant);
if (expr->kind() == Expression::Kind::kRelIn) { // container expressions
DCHECK(right->isContainerExpr());
} else { // other expressions
DCHECK(right->kind() == Expression::Kind::kConstant);
}

const auto& value = static_cast<const ConstantExpression*>(right)->value();

ScoredColumnHint hint;
Expand Down Expand Up @@ -912,6 +917,32 @@ bool OptimizerUtils::findOptimalIndex(const Expression* condition,
return true;
}

// Check if the relational expression has a valid index
// The left operand should either be a kEdgeProperty or kTagProperty expr
bool OptimizerUtils::relExprHasIndex(
const Expression* expr,
const std::vector<std::shared_ptr<nebula::meta::cpp2::IndexItem>>& indexItems) {
DCHECK(expr->isRelExpr());

for (auto& index : indexItems) {
const auto& fields = index->get_fields();
if (fields.empty()) {
return false;
}

auto left = static_cast<const RelationalExpression*>(expr)->left();
DCHECK(left->kind() == Expression::Kind::kEdgeProperty ||
left->kind() == Expression::Kind::kTagProperty);

auto propExpr = static_cast<const PropertyExpression*>(left);
if (propExpr->prop() == fields[0].get_name()) {
return true;
}
}

return false;
}

Comment on lines +922 to +945
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add test case as below:
tag t1 (c1, c2)
tag t2(c1, c2)
index i1 on t1 (c1, c2)
index i2 on t2 (c3, c4)
lookup on t2 where c1 in (.....) ----> this logic will return true if the guess is correct, Because index column name can be the same from different indexes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now we check the schema type in LookupValidator

(root@nebula) [nba]> lookup on player where team.name == "Tim Duncan"
[ERROR (-1009)]: SemanticError: Schema name error: team

void OptimizerUtils::copyIndexScanData(const nebula::graph::IndexScan* from,
nebula::graph::IndexScan* to) {
to->setEmptyResultSet(from->isEmptyResultSet());
Expand Down
4 changes: 4 additions & 0 deletions src/graph/optimizer/OptimizerUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ class OptimizerUtils {
bool* isPrefixScan,
nebula::storage::cpp2::IndexQueryContext* ictx);

static bool relExprHasIndex(
const Expression* expr,
const std::vector<std::shared_ptr<nebula::meta::cpp2::IndexItem>>& indexItems);

static void copyIndexScanData(const nebula::graph::IndexScan* from, nebula::graph::IndexScan* to);
};

Expand Down
55 changes: 50 additions & 5 deletions src/graph/optimizer/rule/OptimizeTagIndexScanByFilterRule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ const Pattern& OptimizeTagIndexScanByFilterRule::pattern() const {
return pattern;
}

// Match 2 kinds of expressions:
//
// 1. Relational expr. If it is an IN expr, its list MUST have only 1 element, so it could always be
// transformed to an relEQ expr. i.g. A in [B] => A == B
// It the list has more than 1 element, the expr will be matched with UnionAllIndexScanBaseRule.
//
// 2. Logical AND expr. If the AND expr contains an operand that is an IN expr, the label attribute
// in the IN expr SHOULD NOT have a valid index, otherwise the expression should be matched with
// UnionAllIndexScanBaseRule.
bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResult& matched) const {
if (!OptRule::match(ctx, matched)) {
return false;
Expand All @@ -58,16 +67,23 @@ bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResul
}
}
auto condition = filter->condition();

// Case1: relational expr
if (condition->isRelExpr()) {
auto relExpr = static_cast<const RelationalExpression*>(condition);
// If the container in the IN expr has only 1 element, it will be converted to an relEQ
// expr. If more than 1 element found in the container, UnionAllIndexScanBaseRule will be
// applied.
if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) {
auto ContainerOperands = graph::ExpressionUtils::getContainerExprOperands(relExpr->right());
return ContainerOperands.size() == 1;
}
return relExpr->left()->kind() == ExprKind::kTagProperty &&
relExpr->right()->kind() == ExprKind::kConstant;
}
if (condition->isLogicalExpr()) {
return condition->kind() == Expression::Kind::kLogicalAnd;
}

return false;
// Case2: logical AND expr
return condition->kind() == ExprKind::kLogicalAnd;
}

TagIndexScan* makeTagIndexScan(QueryContext* qctx, const TagIndexScan* scan, bool isPrefixScan) {
Expand All @@ -94,9 +110,38 @@ StatusOr<TransformResult> OptimizeTagIndexScanByFilterRule::transform(

OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems);

auto condition = filter->condition();
auto conditionType = condition->kind();
Expression* transformedExpr = condition->clone();

// Stand alone IN expr with only 1 element in the list, no need to check index
if (conditionType == ExprKind::kRelIn) {
transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition);
DCHECK(transformedExpr->kind() == ExprKind::kRelEQ);
}

// case2: logical AND expr
if (condition->kind() == ExprKind::kLogicalAnd) {
for (auto& operand : static_cast<const LogicalExpression*>(condition)->operands()) {
if (operand->kind() == ExprKind::kRelIn) {
auto inExpr = static_cast<RelationalExpression*>(operand);
// Do not apply this rule if the IN expr has a valid index or it has only 1 element in the
// list
if (static_cast<ListExpression*>(inExpr->right())->size() > 1) {
return TransformResult::noTransform();
} else {
transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition);
}
if (OptimizerUtils::relExprHasIndex(inExpr, indexItems)) {
return TransformResult::noTransform();
}
}
}
}

Comment on lines +124 to +141
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IN this case, we can't determine that this expression is introduced by IN, for example:
where in (....) ---> correct
where a AND (b OR c) AND ... ---> incorrect

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is true.
This rule is to match AND expr containing IN expr as operand. i.g. a AND (b IN [1,2,3])

Actually, I don't really want to expand all AND expr to OR expr, since we don't know whether the property in the OR expression has a valid index or not unless we iterate all its operands. However, for IN expr a IN [b,c,d,....], we only need to check the index of the left operand a.

IndexQueryContext ictx;
bool isPrefixScan = false;
if (!OptimizerUtils::findOptimalIndex(filter->condition(), indexItems, &isPrefixScan, &ictx)) {
if (!OptimizerUtils::findOptimalIndex(transformedExpr, indexItems, &isPrefixScan, &ictx)) {
return TransformResult::noTransform();
}

Expand Down
119 changes: 105 additions & 14 deletions src/graph/optimizer/rule/UnionAllIndexScanBaseRule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "graph/planner/plan/PlanNode.h"
#include "graph/planner/plan/Query.h"
#include "graph/planner/plan/Scan.h"
#include "graph/util/ExpressionUtils.h"
#include "interface/gen-cpp2/storage_types.h"

using nebula::graph::Filter;
Expand All @@ -24,25 +25,58 @@ using nebula::graph::TagIndexFullScan;
using nebula::storage::cpp2::IndexQueryContext;

using Kind = nebula::graph::PlanNode::Kind;
using ExprKind = nebula::Expression::Kind;
using TransformResult = nebula::opt::OptRule::TransformResult;

namespace nebula {
namespace opt {

// The matched expression should be either a OR expression or an expression that could be
// rewrote to a OR expression. There are 3 senarios.
//
// 1. OR expr. If OR expr has an IN expr operand that has a valid index, expand it to OR expr.
//
// 2. AND expr such as A in [a, b] AND B when A has a valid index, because it can be transformed to
// (A==a AND B) OR (A==b AND B)
//
// 3. IN expr with its list size > 1, such as A in [a, b] since it can be transformed to (A==a) OR
// (A==b).
// If the list has a size of 1, the expr will be matched with OptimizeTagIndexScanByFilterRule.
bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matched) const {
if (!OptRule::match(ctx, matched)) {
return false;
}
auto filter = static_cast<const Filter*>(matched.planNode());
auto scan = static_cast<const IndexScan*>(matched.planNode({0, 0}));
auto condition = filter->condition();
if (!condition->isLogicalExpr() || condition->kind() != Expression::Kind::kLogicalOr) {
return false;
auto conditionType = condition->kind();

if (condition->isLogicalExpr()) {
// Case1: OR Expr
if (conditionType == ExprKind::kLogicalOr) {
return true;
}
// Case2: AND Expr
if (conditionType == ExprKind::kLogicalAnd &&
graph::ExpressionUtils::findAny(static_cast<LogicalExpression*>(condition),
{ExprKind::kRelIn})) {
return true;
}
// Check logical operands
for (auto operand : static_cast<const LogicalExpression*>(condition)->operands()) {
if (!operand->isRelExpr() || !operand->isLogicalExpr()) {
return false;
}
}
}

for (auto operand : static_cast<const LogicalExpression*>(condition)->operands()) {
if (!operand->isRelExpr()) {
return false;
// If the number of elements is less or equal than 1, the IN expr will be transformed into a
// relEQ expr by the OptimizeTagIndexScanByFilterRule.
if (condition->isRelExpr()) {
auto relExpr = static_cast<const RelationalExpression*>(condition);
if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) {
auto operandsVec = graph::ExpressionUtils::getContainerExprOperands(relExpr->right());
return operandsVec.size() > 1;
}
}

Expand All @@ -52,7 +86,7 @@ bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matc
}
}

return true;
return false;
}

StatusOr<TransformResult> UnionAllIndexScanBaseRule::transform(OptContext* ctx,
Expand All @@ -62,20 +96,77 @@ StatusOr<TransformResult> UnionAllIndexScanBaseRule::transform(OptContext* ctx,
auto scan = static_cast<const IndexScan*>(node);

auto metaClient = ctx->qctx()->getMetaClient();
StatusOr<std::vector<std::shared_ptr<meta::cpp2::IndexItem>>> status;
if (node->kind() == graph::PlanNode::Kind::kTagIndexFullScan) {
status = metaClient->getTagIndexesFromCache(scan->space());
} else {
status = metaClient->getEdgeIndexesFromCache(scan->space());
}
auto status = node->kind() == graph::PlanNode::Kind::kTagIndexFullScan
? metaClient->getTagIndexesFromCache(scan->space())
Copy link
Contributor

@bright-starry-sky bright-starry-sky Sep 30, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's about getTagIndexesByTagNameFromCache(spaceId, tagName) ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you mean getTagIndexByNameFromCache()? Sounds like something we could optimize.
The current approach is:

  1. get all indexItems in the space
  2. check if indexItems is empty
  3. OptimizerUtils::findOptimalIndex(operand, indexItems, &isPrefixScan, &ictx)

which can be turned into:

  1. analyze the expression to get tag info
  2. get indexItems usinggetTagIndexByNameFromCache(spaceID, tagName)
  3. check if indexItems is empty
  4. OptimizerUtils::findOptimalIndex(operand, indexItems, &isPrefixScan, &ictx)

I'll mark this as a TODO. WDYT?

: metaClient->getEdgeIndexesFromCache(scan->space());

NG_RETURN_IF_ERROR(status);
auto indexItems = std::move(status).value();

OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems);

// Check whether the prop has index.
// Rewrite if the property in the IN expr has a valid index
if (indexItems.empty()) {
return TransformResult::noTransform();
}

auto condition = filter->condition();
auto conditionType = condition->kind();
Expression* transformedExpr = condition->clone();

switch (conditionType) {
// Stand alone IN expr
// If it has multiple elements in the list, check valid index before expanding to OR expr
case ExprKind::kRelIn: {
if (!OptimizerUtils::relExprHasIndex(condition, indexItems)) {
return TransformResult::noTransform();
}
transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition);
break;
}

// AND expr containing IN expr operand
case ExprKind::kLogicalAnd: {
// Iterate all operands and expand IN exprs if possible
for (auto& expr : static_cast<LogicalExpression*>(transformedExpr)->operands()) {
if (expr->kind() == ExprKind::kRelIn) {
if (OptimizerUtils::relExprHasIndex(expr, indexItems)) {
expr = graph::ExpressionUtils::rewriteInExpr(expr);
}
}
}

// Reconstruct AND expr using distributive law
transformedExpr = graph::ExpressionUtils::rewriteLogicalAndToLogicalOr(transformedExpr);
break;
}

// OR expr
case ExprKind::kLogicalOr: {
// Iterate all operands and expand IN exprs if possible
for (auto& expr : static_cast<LogicalExpression*>(transformedExpr)->operands()) {
if (expr->kind() == ExprKind::kRelIn) {
if (OptimizerUtils::relExprHasIndex(expr, indexItems)) {
expr = graph::ExpressionUtils::rewriteInExpr(expr);
}
}
}
// Flatten OR exprs
graph::ExpressionUtils::pullOrs(transformedExpr);

break;
}
default:
LOG(FATAL) << "Invalid expression kind: " << static_cast<uint8_t>(conditionType);
break;
}

DCHECK(transformedExpr->kind() == ExprKind::kLogicalOr ||
transformedExpr->kind() == ExprKind::kRelEQ);
std::vector<IndexQueryContext> idxCtxs;
auto condition = static_cast<const LogicalExpression*>(filter->condition());
for (auto operand : condition->operands()) {
auto logicalExpr = static_cast<const LogicalExpression*>(transformedExpr);
for (auto operand : logicalExpr->operands()) {
IndexQueryContext ictx;
bool isPrefixScan = false;
if (!OptimizerUtils::findOptimalIndex(operand, indexItems, &isPrefixScan, &ictx)) {
Expand Down
Loading