-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support lookup indexScan using IN expression as filter #2906
Changes from all commits
f0f7a88
2a6358e
219f604
9d26461
0d18f13
959d7c6
38a2280
a8ce12a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,6 +46,15 @@ const Pattern& OptimizeTagIndexScanByFilterRule::pattern() const { | |
return pattern; | ||
} | ||
|
||
// Match 2 kinds of expressions: | ||
// | ||
// 1. Relational expr. If it is an IN expr, its list MUST have only 1 element, so it could always be | ||
// transformed to an relEQ expr. i.g. A in [B] => A == B | ||
// It the list has more than 1 element, the expr will be matched with UnionAllIndexScanBaseRule. | ||
// | ||
// 2. Logical AND expr. If the AND expr contains an operand that is an IN expr, the label attribute | ||
// in the IN expr SHOULD NOT have a valid index, otherwise the expression should be matched with | ||
// UnionAllIndexScanBaseRule. | ||
bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResult& matched) const { | ||
if (!OptRule::match(ctx, matched)) { | ||
return false; | ||
|
@@ -58,16 +67,23 @@ bool OptimizeTagIndexScanByFilterRule::match(OptContext* ctx, const MatchedResul | |
} | ||
} | ||
auto condition = filter->condition(); | ||
|
||
// Case1: relational expr | ||
if (condition->isRelExpr()) { | ||
auto relExpr = static_cast<const RelationalExpression*>(condition); | ||
// If the container in the IN expr has only 1 element, it will be converted to an relEQ | ||
// expr. If more than 1 element found in the container, UnionAllIndexScanBaseRule will be | ||
// applied. | ||
if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) { | ||
auto ContainerOperands = graph::ExpressionUtils::getContainerExprOperands(relExpr->right()); | ||
return ContainerOperands.size() == 1; | ||
} | ||
return relExpr->left()->kind() == ExprKind::kTagProperty && | ||
relExpr->right()->kind() == ExprKind::kConstant; | ||
} | ||
if (condition->isLogicalExpr()) { | ||
return condition->kind() == Expression::Kind::kLogicalAnd; | ||
} | ||
|
||
return false; | ||
// Case2: logical AND expr | ||
return condition->kind() == ExprKind::kLogicalAnd; | ||
} | ||
|
||
TagIndexScan* makeTagIndexScan(QueryContext* qctx, const TagIndexScan* scan, bool isPrefixScan) { | ||
|
@@ -94,9 +110,38 @@ StatusOr<TransformResult> OptimizeTagIndexScanByFilterRule::transform( | |
|
||
OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems); | ||
|
||
auto condition = filter->condition(); | ||
auto conditionType = condition->kind(); | ||
Expression* transformedExpr = condition->clone(); | ||
|
||
// Stand alone IN expr with only 1 element in the list, no need to check index | ||
if (conditionType == ExprKind::kRelIn) { | ||
transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition); | ||
DCHECK(transformedExpr->kind() == ExprKind::kRelEQ); | ||
} | ||
|
||
// case2: logical AND expr | ||
if (condition->kind() == ExprKind::kLogicalAnd) { | ||
for (auto& operand : static_cast<const LogicalExpression*>(condition)->operands()) { | ||
if (operand->kind() == ExprKind::kRelIn) { | ||
auto inExpr = static_cast<RelationalExpression*>(operand); | ||
// Do not apply this rule if the IN expr has a valid index or it has only 1 element in the | ||
// list | ||
if (static_cast<ListExpression*>(inExpr->right())->size() > 1) { | ||
return TransformResult::noTransform(); | ||
} else { | ||
transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition); | ||
} | ||
if (OptimizerUtils::relExprHasIndex(inExpr, indexItems)) { | ||
return TransformResult::noTransform(); | ||
} | ||
} | ||
} | ||
} | ||
|
||
Comment on lines
+124
to
+141
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IN this case, we can't determine that this expression is introduced by IN, for example: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is true. Actually, I don't really want to expand all AND expr to OR expr, since we don't know whether the property in the OR expression has a valid index or not unless we iterate all its operands. However, for IN expr |
||
IndexQueryContext ictx; | ||
bool isPrefixScan = false; | ||
if (!OptimizerUtils::findOptimalIndex(filter->condition(), indexItems, &isPrefixScan, &ictx)) { | ||
if (!OptimizerUtils::findOptimalIndex(transformedExpr, indexItems, &isPrefixScan, &ictx)) { | ||
return TransformResult::noTransform(); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
#include "graph/planner/plan/PlanNode.h" | ||
#include "graph/planner/plan/Query.h" | ||
#include "graph/planner/plan/Scan.h" | ||
#include "graph/util/ExpressionUtils.h" | ||
#include "interface/gen-cpp2/storage_types.h" | ||
|
||
using nebula::graph::Filter; | ||
|
@@ -24,25 +25,58 @@ using nebula::graph::TagIndexFullScan; | |
using nebula::storage::cpp2::IndexQueryContext; | ||
|
||
using Kind = nebula::graph::PlanNode::Kind; | ||
using ExprKind = nebula::Expression::Kind; | ||
using TransformResult = nebula::opt::OptRule::TransformResult; | ||
|
||
namespace nebula { | ||
namespace opt { | ||
|
||
// The matched expression should be either a OR expression or an expression that could be | ||
// rewrote to a OR expression. There are 3 senarios. | ||
// | ||
// 1. OR expr. If OR expr has an IN expr operand that has a valid index, expand it to OR expr. | ||
// | ||
// 2. AND expr such as A in [a, b] AND B when A has a valid index, because it can be transformed to | ||
// (A==a AND B) OR (A==b AND B) | ||
// | ||
// 3. IN expr with its list size > 1, such as A in [a, b] since it can be transformed to (A==a) OR | ||
// (A==b). | ||
// If the list has a size of 1, the expr will be matched with OptimizeTagIndexScanByFilterRule. | ||
bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matched) const { | ||
if (!OptRule::match(ctx, matched)) { | ||
return false; | ||
} | ||
auto filter = static_cast<const Filter*>(matched.planNode()); | ||
auto scan = static_cast<const IndexScan*>(matched.planNode({0, 0})); | ||
auto condition = filter->condition(); | ||
if (!condition->isLogicalExpr() || condition->kind() != Expression::Kind::kLogicalOr) { | ||
return false; | ||
auto conditionType = condition->kind(); | ||
|
||
if (condition->isLogicalExpr()) { | ||
// Case1: OR Expr | ||
if (conditionType == ExprKind::kLogicalOr) { | ||
return true; | ||
} | ||
// Case2: AND Expr | ||
if (conditionType == ExprKind::kLogicalAnd && | ||
graph::ExpressionUtils::findAny(static_cast<LogicalExpression*>(condition), | ||
{ExprKind::kRelIn})) { | ||
return true; | ||
} | ||
// Check logical operands | ||
for (auto operand : static_cast<const LogicalExpression*>(condition)->operands()) { | ||
if (!operand->isRelExpr() || !operand->isLogicalExpr()) { | ||
return false; | ||
} | ||
} | ||
} | ||
|
||
for (auto operand : static_cast<const LogicalExpression*>(condition)->operands()) { | ||
if (!operand->isRelExpr()) { | ||
return false; | ||
// If the number of elements is less or equal than 1, the IN expr will be transformed into a | ||
// relEQ expr by the OptimizeTagIndexScanByFilterRule. | ||
if (condition->isRelExpr()) { | ||
auto relExpr = static_cast<const RelationalExpression*>(condition); | ||
if (relExpr->kind() == ExprKind::kRelIn && relExpr->right()->isContainerExpr()) { | ||
auto operandsVec = graph::ExpressionUtils::getContainerExprOperands(relExpr->right()); | ||
return operandsVec.size() > 1; | ||
} | ||
} | ||
|
||
|
@@ -52,7 +86,7 @@ bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matc | |
} | ||
} | ||
|
||
return true; | ||
return false; | ||
} | ||
|
||
StatusOr<TransformResult> UnionAllIndexScanBaseRule::transform(OptContext* ctx, | ||
|
@@ -62,20 +96,77 @@ StatusOr<TransformResult> UnionAllIndexScanBaseRule::transform(OptContext* ctx, | |
auto scan = static_cast<const IndexScan*>(node); | ||
|
||
auto metaClient = ctx->qctx()->getMetaClient(); | ||
StatusOr<std::vector<std::shared_ptr<meta::cpp2::IndexItem>>> status; | ||
if (node->kind() == graph::PlanNode::Kind::kTagIndexFullScan) { | ||
status = metaClient->getTagIndexesFromCache(scan->space()); | ||
} else { | ||
status = metaClient->getEdgeIndexesFromCache(scan->space()); | ||
} | ||
auto status = node->kind() == graph::PlanNode::Kind::kTagIndexFullScan | ||
? metaClient->getTagIndexesFromCache(scan->space()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's about getTagIndexesByTagNameFromCache(spaceId, tagName) ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you mean
which can be turned into:
I'll mark this as a TODO. WDYT? |
||
: metaClient->getEdgeIndexesFromCache(scan->space()); | ||
|
||
NG_RETURN_IF_ERROR(status); | ||
auto indexItems = std::move(status).value(); | ||
|
||
OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems); | ||
|
||
// Check whether the prop has index. | ||
// Rewrite if the property in the IN expr has a valid index | ||
if (indexItems.empty()) { | ||
return TransformResult::noTransform(); | ||
} | ||
|
||
auto condition = filter->condition(); | ||
auto conditionType = condition->kind(); | ||
Expression* transformedExpr = condition->clone(); | ||
|
||
switch (conditionType) { | ||
// Stand alone IN expr | ||
// If it has multiple elements in the list, check valid index before expanding to OR expr | ||
case ExprKind::kRelIn: { | ||
if (!OptimizerUtils::relExprHasIndex(condition, indexItems)) { | ||
return TransformResult::noTransform(); | ||
} | ||
transformedExpr = graph::ExpressionUtils::rewriteInExpr(condition); | ||
break; | ||
} | ||
|
||
// AND expr containing IN expr operand | ||
case ExprKind::kLogicalAnd: { | ||
// Iterate all operands and expand IN exprs if possible | ||
for (auto& expr : static_cast<LogicalExpression*>(transformedExpr)->operands()) { | ||
if (expr->kind() == ExprKind::kRelIn) { | ||
if (OptimizerUtils::relExprHasIndex(expr, indexItems)) { | ||
expr = graph::ExpressionUtils::rewriteInExpr(expr); | ||
} | ||
} | ||
} | ||
|
||
// Reconstruct AND expr using distributive law | ||
transformedExpr = graph::ExpressionUtils::rewriteLogicalAndToLogicalOr(transformedExpr); | ||
break; | ||
} | ||
|
||
// OR expr | ||
case ExprKind::kLogicalOr: { | ||
// Iterate all operands and expand IN exprs if possible | ||
for (auto& expr : static_cast<LogicalExpression*>(transformedExpr)->operands()) { | ||
if (expr->kind() == ExprKind::kRelIn) { | ||
if (OptimizerUtils::relExprHasIndex(expr, indexItems)) { | ||
expr = graph::ExpressionUtils::rewriteInExpr(expr); | ||
} | ||
} | ||
} | ||
// Flatten OR exprs | ||
graph::ExpressionUtils::pullOrs(transformedExpr); | ||
|
||
break; | ||
} | ||
default: | ||
LOG(FATAL) << "Invalid expression kind: " << static_cast<uint8_t>(conditionType); | ||
break; | ||
} | ||
|
||
DCHECK(transformedExpr->kind() == ExprKind::kLogicalOr || | ||
transformedExpr->kind() == ExprKind::kRelEQ); | ||
std::vector<IndexQueryContext> idxCtxs; | ||
auto condition = static_cast<const LogicalExpression*>(filter->condition()); | ||
for (auto operand : condition->operands()) { | ||
auto logicalExpr = static_cast<const LogicalExpression*>(transformedExpr); | ||
for (auto operand : logicalExpr->operands()) { | ||
IndexQueryContext ictx; | ||
bool isPrefixScan = false; | ||
if (!OptimizerUtils::findOptimalIndex(operand, indexItems, &isPrefixScan, &ictx)) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add test case as below:
tag t1 (c1, c2)
tag t2(c1, c2)
index i1 on t1 (c1, c2)
index i2 on t2 (c3, c4)
lookup on t2 where c1 in (.....) ----> this logic will return true if the guess is correct, Because index column name can be the same from different indexes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now we check the schema type in LookupValidator